Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions lib/creek/shared_strings.rb
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@ def self.parse_shared_string_from_document(xml)
xml.css('si').each_with_index do |si, idx|
text_nodes = si.css('t')
if text_nodes.count == 1 # plain text node
dictionary[idx] = text_nodes.first.content
dictionary[idx] = Creek::Styles::Converter.unescape_string(text_nodes.first.content)
else # rich text nodes with text fragments
dictionary[idx] = text_nodes.map(&:content).join('')
dictionary[idx] = text_nodes.map { |n| Creek::Styles::Converter.unescape_string(n.content) }.join('')
end
end

Expand Down
14 changes: 12 additions & 2 deletions lib/creek/styles/converter.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ module Creek
class Styles
class Converter
include Creek::Styles::Constants

# Excel non-printable character escape sequence
HEX_ESCAPE_REGEXP = /_x[0-9A-Za-z]{4}_/

##
# The heart of typecasting. The ruby type is determined either explicitly
# from the cell xml or implicitly from the cell style, and this
Expand Down Expand Up @@ -45,9 +49,9 @@ def self.call(value, type, style, options = {})
when 'b'
value.to_i == 1
when 'str'
value
unescape_string(value)
when 'inlineStr'
value
unescape_string(value)

##
# Type can also be determined by a style,
Expand Down Expand Up @@ -112,6 +116,12 @@ def self.convert_bignum(value)
end
end

def self.unescape_string(value)
# excel encodes some non-printable characters using a hex code in the format _xHHHH_
# e.g. Carriage Return (\r) is encoded as _x000D_
value.gsub(HEX_ESCAPE_REGEXP) { |match| match[2, 4].to_i(16).chr(Encoding::UTF_8) }
end

private

def self.base_date(options)
Expand Down
3 changes: 3 additions & 0 deletions spec/fixtures/sst.xml
Original file line number Diff line number Diff line change
Expand Up @@ -75,4 +75,7 @@
<t>B2</t>
</r>
</si>
<si>
<t>Cell with_x000D_escaped_x000D_characters</t>
</si>
</sst>
3 changes: 2 additions & 1 deletion spec/shared_string_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,13 @@
doc = Nokogiri::XML(shared_strings_xml_file)
dictionary = Creek::SharedStrings.parse_shared_string_from_document(doc)

expect(dictionary.keys.size).to eq(5)
expect(dictionary.keys.size).to eq(6)
expect(dictionary[0]).to eq('Cell A1')
expect(dictionary[1]).to eq('Cell B1')
expect(dictionary[2]).to eq('My Cell')
expect(dictionary[3]).to eq('Cell A2')
expect(dictionary[4]).to eq('Cell B2')
expect(dictionary[5]).to eq("Cell with\rescaped\rcharacters")
end

end