1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151
|
# frozen_string_literal: true
require 'roo/excelx/extractor'
module Roo
class Excelx
class SharedStrings < Excelx::Extractor
def [](index)
to_a[index]
end
def to_a
@array ||= extract_shared_strings
end
def to_html
@html ||= extract_html
end
# Use to_html or to_a for html returns
# See what is happening with commit???
def use_html?(index)
return false if @options[:disable_html_wrapper]
to_html[index][/<([biu]|sup|sub)>/]
end
private
def fix_invalid_shared_strings(doc)
invalid = { '_x000D_' => "\n" }
xml = doc.to_s
return doc unless xml[/#{invalid.keys.join('|')}/]
::Nokogiri::XML(xml.gsub(/#{invalid.keys.join('|')}/, invalid))
end
def extract_shared_strings
return [] unless doc_exists?
document = fix_invalid_shared_strings(doc)
# read the shared strings xml document
document.xpath('/sst/si').map do |si|
shared_string = +""
si.children.each do |elem|
case elem.name
when 'r'
elem.children.each do |r_elem|
shared_string << r_elem.content if r_elem.name == 't'
end
when 't'
shared_string = elem.content
end
end
shared_string
end
end
def extract_html
return [] unless doc_exists?
fix_invalid_shared_strings(doc)
# read the shared strings xml document
doc.xpath('/sst/si').map do |si|
html_string = '<html>'.dup
si.children.each do |elem|
case elem.name
when 'r'
html_string << extract_html_r(elem)
when 't'
html_string << elem.content
end # case elem.name
end # si.children.each do |elem|
html_string << '</html>'
end # doc.xpath('/sst/si').map do |si|
end # def extract_html
# The goal of this function is to take the following XML code snippet and create a html tag
# r_elem ::: XML Element that is in sharedStrings.xml of excel_book.xlsx
# {code:xml}
# <r>
# <rPr>
# <i/>
# <b/>
# <u/>
# <vertAlign val="subscript"/>
# <vertAlign val="superscript"/>
# </rPr>
# <t>TEXT</t>
# </r>
# {code}
#
# Expected Output ::: "<html><sub|sup><b><i><u>TEXT</u></i></b></sub|/sup></html>"
def extract_html_r(r_elem)
str = +""
xml_elems = {
sub: false,
sup: false,
b: false,
i: false,
u: false
}
r_elem.children.each do |elem|
case elem.name
when 'rPr'
elem.children.each do |rPr_elem|
case rPr_elem.name
when 'b'
# set formatting for Bold to true
xml_elems[:b] = true
when 'i'
# set formatting for Italics to true
xml_elems[:i] = true
when 'u'
# set formatting for Underline to true
xml_elems[:u] = true
when 'vertAlign'
# See if the Vertical Alignment is subscript or superscript
case rPr_elem.xpath('@val').first.value
when 'subscript'
# set formatting for Subscript to true and Superscript to false ... Can't have both
xml_elems[:sub] = true
xml_elems[:sup] = false
when 'superscript'
# set formatting for Superscript to true and Subscript to false ... Can't have both
xml_elems[:sup] = true
xml_elems[:sub] = false
end
end
end
when 't'
str << create_html(elem.content, xml_elems)
end
end
str
end # extract_html_r
# This will return an html string
def create_html(text, formatting)
tmp_str = +""
formatting.each do |elem, val|
tmp_str << "<#{elem}>" if val
end
tmp_str << text
formatting.reverse_each do |elem, val|
tmp_str << "</#{elem}>" if val
end
tmp_str
end
end # class SharedStrings < Excelx::Extractor
end # class Excelx
end # module Roo
|