Skip to content

Commit f0c683f

Browse files
Add linter (#119)
* added rubocop * applied lint * added lint check in ci
1 parent 67b5cd1 commit f0c683f

21 files changed

+288
-252
lines changed

.github/workflows/test.yml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,27 @@ permissions:
1717
contents: read
1818

1919
jobs:
20+
lint:
21+
runs-on: ubuntu-latest
22+
23+
steps:
24+
- uses: actions/checkout@v3
25+
- name: Set up Ruby
26+
uses: ruby/setup-ruby@v1
27+
with:
28+
ruby-version: "3.1"
29+
bundler-cache: true # runs 'bundle install' and caches installed gems automatically
30+
- name: Run lint
31+
run: bundle exec rubocop
32+
2033
test:
2134

2235
runs-on: ubuntu-latest
2336
strategy:
2437
matrix:
2538
ruby-version: ['2.6', '2.7', '3.0', '3.1', '3.2']
39+
needs:
40+
- lint
2641

2742
steps:
2843
- uses: actions/checkout@v3

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,3 +18,5 @@ tmp
1818

1919
# Mac finder artifacts
2020
.DS_Store
21+
22+
.idea

.rubocop.yml

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
Gemspec/RequiredRubyVersion:
2+
Enabled: false
3+
4+
Layout/LineLength:
5+
Enabled: false
6+
Metrics:
7+
Enabled: false
8+
Naming/ConstantName:
9+
Enabled: false
10+
11+
Style/FrozenStringLiteralComment:
12+
Enabled: false
13+
Style/Documentation:
14+
Enabled: false
15+
Style/AndOr:
16+
Enabled: false
17+
Style/StringConcatenation:
18+
Enabled: false
19+
Style/ClassAndModuleChildren:
20+
Enabled: false
21+
Style/OptionalBooleanParameter:
22+
Enabled: false
23+
Style/TernaryParentheses:
24+
EnforcedStyle: require_parentheses_when_complex
25+
26+
Naming/PredicateName:
27+
Enabled: false

Rakefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
require "bundler/gem_tasks"
1+
require 'bundler/gem_tasks'
22
require 'rspec/core/rake_task'
33

44
RSpec::Core::RakeTask.new('spec')
55

66
# If you want to make this the default task
7-
task :default => :spec
7+
task default: :spec

creek.gemspec

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,29 @@
1-
# coding: utf-8
2-
lib = File.expand_path('../lib', __FILE__)
1+
lib = File.expand_path('lib', __dir__)
32
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
43
require 'creek/version'
54

65
Gem::Specification.new do |spec|
7-
spec.name = "creek"
6+
spec.name = 'creek'
87
spec.version = Creek::VERSION
9-
spec.authors = ["pythonicrubyist"]
10-
spec.email = ["[email protected]"]
11-
spec.description = %q{A Ruby gem that streams and parses large Excel(xlsx and xlsm) files fast and efficiently.}
12-
spec.summary = %q{A Ruby gem for parsing large Excel(xlsx and xlsm) files.}
13-
spec.homepage = "https://github.com/pythonicrubyist/creek"
14-
spec.license = "MIT"
8+
spec.authors = ['pythonicrubyist']
9+
spec.email = ['[email protected]']
10+
spec.description = 'A Ruby gem that streams and parses large Excel(xlsx and xlsm) files fast and efficiently.'
11+
spec.summary = 'A Ruby gem for parsing large Excel(xlsx and xlsm) files.'
12+
spec.homepage = 'https://github.com/pythonicrubyist/creek'
13+
spec.license = 'MIT'
1514

16-
spec.files = `git ls-files`.split($/)
15+
spec.files = `git ls-files`.split($INPUT_RECORD_SEPARATOR)
1716
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
1817
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19-
spec.require_paths = ["lib"]
18+
spec.require_paths = ['lib']
2019

2120
spec.required_ruby_version = '>= 2.0.0'
2221

23-
spec.add_development_dependency "bundler"
24-
spec.add_development_dependency "rake"
25-
spec.add_development_dependency 'rspec', '~> 3.6.0'
22+
spec.add_development_dependency 'bundler'
2623
spec.add_development_dependency 'pry-byebug'
24+
spec.add_development_dependency 'rake'
25+
spec.add_development_dependency 'rspec', '~> 3.6.0'
26+
spec.add_development_dependency 'rubocop'
2727

2828
spec.add_dependency 'nokogiri', '>= 1.10.0'
2929
spec.add_dependency 'rubyzip', '>= 1.0.0'

lib/creek/book.rb

Lines changed: 28 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,11 @@ class Creek::Book
1414
DATE_1900 = Date.new(1899, 12, 30).freeze
1515
DATE_1904 = Date.new(1904, 1, 1).freeze
1616

17-
def initialize path, options = {}
17+
def initialize(path, options = {})
1818
check_file_extension = options.fetch(:check_file_extension, true)
1919
if check_file_extension
2020
extension = File.extname(options[:original_filename] || path).downcase
21-
raise 'Not a valid file format.' unless (['.xlsx', '.xlsm'].include? extension)
21+
raise 'Not a valid file format.' unless ['.xlsx', '.xlsm'].include? extension
2222
end
2323
path = download_file(path) if options[:remote]
2424
@files = Zip::File.open(path)
@@ -28,28 +28,26 @@ def initialize path, options = {}
2828

2929
def sheets
3030
@sheets ||= begin
31-
doc = @files.file.open "xl/workbook.xml"
31+
doc = @files.file.open 'xl/workbook.xml'
3232
xml = Nokogiri::XML::Document.parse doc
3333
namespaces = xml.namespaces
3434

35-
cssPrefix = ''
35+
css_prefix = ''
3636
namespaces.each do |namespace|
37-
if namespace[1] == 'http://schemas.openxmlformats.org/spreadsheetml/2006/main' && namespace[0] != 'xmlns' then
38-
cssPrefix = namespace[0].split(':')[1]+'|'
39-
end
37+
css_prefix = namespace[0].split(':')[1] + '|' if namespace[1] == 'http://schemas.openxmlformats.org/spreadsheetml/2006/main' && namespace[0] != 'xmlns'
4038
end
4139

42-
rels_doc = @files.file.open "xl/_rels/workbook.xml.rels"
43-
rels = Nokogiri::XML::Document.parse(rels_doc).css("Relationship")
44-
xml.css(cssPrefix+'sheet').map do |sheet|
45-
sheetfile = rels.find { |el| sheet.attr("r:id") == el.attr("Id") }.attr("Target")
40+
rels_doc = @files.file.open 'xl/_rels/workbook.xml.rels'
41+
rels = Nokogiri::XML::Document.parse(rels_doc).css('Relationship')
42+
xml.css(css_prefix + 'sheet').map do |sheet|
43+
sheetfile = rels.find { |el| sheet.attr('r:id') == el.attr('Id') }.attr('Target')
4644
sheet = Sheet.new(
4745
self,
48-
sheet.attr("name"),
49-
sheet.attr("sheetid"),
50-
sheet.attr("state"),
51-
sheet.attr("visible"),
52-
sheet.attr("r:id"),
46+
sheet.attr('name'),
47+
sheet.attr('sheetid'),
48+
sheet.attr('state'),
49+
sheet.attr('visible'),
50+
sheet.attr('r:id'),
5351
sheetfile
5452
)
5553
sheet.with_headers = with_headers
@@ -68,23 +66,23 @@ def close
6866

6967
def base_date
7068
@base_date ||=
71-
begin
72-
# Default to 1900 (minus one day due to excel quirk) but use 1904 if
73-
# it's set in the Workbook's workbookPr
74-
# http://msdn.microsoft.com/en-us/library/ff530155(v=office.12).aspx
75-
result = DATE_1900 # default
69+
begin
70+
# Default to 1900 (minus one day due to excel quirk) but use 1904 if
71+
# it's set in the Workbook's workbookPr
72+
# http://msdn.microsoft.com/en-us/library/ff530155(v=office.12).aspx
73+
result = DATE_1900 # default
7674

77-
doc = @files.file.open "xl/workbook.xml"
78-
xml = Nokogiri::XML::Document.parse doc
79-
xml.css('workbookPr[date1904]').each do |workbookPr|
80-
if workbookPr['date1904'] =~ /true|1/i
81-
result = DATE_1904
82-
break
75+
doc = @files.file.open 'xl/workbook.xml'
76+
xml = Nokogiri::XML::Document.parse doc
77+
xml.css('workbookPr[date1904]').each do |workbook_pr|
78+
if workbook_pr['date1904'] =~ /true|1/i
79+
result = DATE_1904
80+
break
81+
end
8382
end
84-
end
8583

86-
result
87-
end
84+
result
85+
end
8886
end
8987

9088
private

lib/creek/drawing.rb

Lines changed: 17 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,10 @@ def initialize(book, drawing_filepath)
1515
@drawings_rels = []
1616
@images_pathnames = Hash.new { |hash, key| hash[key] = [] }
1717

18-
if file_exist?(@drawing_filepath)
19-
load_drawings_and_rels
20-
load_images_pathnames_by_cells if has_images?
21-
end
18+
return unless file_exist?(@drawing_filepath)
19+
20+
load_drawings_and_rels
21+
load_images_pathnames_by_cells if has_images?
2222
end
2323

2424
##
@@ -36,13 +36,11 @@ def images_at(cell_name)
3636
return if pathnames_at_coordinate.empty?
3737

3838
pathnames_at_coordinate.map do |image_pathname|
39-
if image_pathname.exist?
40-
image_pathname
41-
else
39+
unless image_pathname.exist?
4240
excel_image_path = "xl/media#{image_pathname.to_path.split(tmpdir).last}"
4341
IO.copy_stream(@book.files.file.open(excel_image_path), image_pathname.to_path)
44-
image_pathname
45-
end
42+
end
43+
image_pathname
4644
end
4745
end
4846

@@ -52,8 +50,8 @@ def images_at(cell_name)
5250
# Transforms cell name to [row, col], e.g. A1 => [0, 0], B3 => [1, 2]
5351
# Rows and cols start with 0.
5452
def calc_coordinate(cell_name)
55-
col = COLUMNS.index(cell_name.slice /[A-Z]+/)
56-
row = (cell_name.slice /\d+/).to_i - 1 # rows in drawings start with 0
53+
col = COLUMNS.index(cell_name.slice(/[A-Z]+/))
54+
row = cell_name.slice(/\d+/).to_i - 1 # rows in drawings start with 0
5755
[row, col]
5856
end
5957

@@ -68,7 +66,7 @@ def tmpdir
6866
# Drawing xml contains relationships ID's and coordinates (row, col).
6967
# Drawing relationships xml contains images' locations.
7068
def load_drawings_and_rels
71-
@drawings = parse_xml(@drawing_filepath).css('xdr|twoCellAnchor', 'xdr|oneCellAnchor' )
69+
@drawings = parse_xml(@drawing_filepath).css('xdr|twoCellAnchor', 'xdr|oneCellAnchor')
7270
drawing_rels_filepath = expand_to_rels_path(@drawing_filepath)
7371
@drawings_rels = parse_xml(drawing_rels_filepath).css('Relationships')
7472
end
@@ -78,11 +76,11 @@ def load_drawings_and_rels
7876
# As multiple images can be located in a single cell, hash values are array of Pathname objects.
7977
# One image can be spread across multiple cells (defined with from-row/to-row/from-col/to-col attributes) - same Pathname object is associated to each row-col combination for the range.
8078
def load_images_pathnames_by_cells
81-
image_selector = 'xdr:pic/xdr:blipFill/a:blip'.freeze
82-
row_from_selector = 'xdr:from/xdr:row'.freeze
83-
row_to_selector = 'xdr:to/xdr:row'.freeze
84-
col_from_selector = 'xdr:from/xdr:col'.freeze
85-
col_to_selector = 'xdr:to/xdr:col'.freeze
79+
image_selector = 'xdr:pic/xdr:blipFill/a:blip'
80+
row_from_selector = 'xdr:from/xdr:row'
81+
row_to_selector = 'xdr:to/xdr:row'
82+
col_from_selector = 'xdr:from/xdr:col'
83+
col_to_selector = 'xdr:to/xdr:col'
8684

8785
@drawings.xpath('//xdr:twoCellAnchor', '//xdr:oneCellAnchor').each do |drawing|
8886
# embed = drawing.xpath(image_selector).first.attributes['embed']
@@ -91,13 +89,13 @@ def load_images_pathnames_by_cells
9189
next if embed.nil?
9290

9391
rid = embed.value
94-
path = Pathname.new("#{tmpdir}/#{extract_drawing_path(rid).slice(/[^\/]*$/)}")
92+
path = Pathname.new("#{tmpdir}/#{extract_drawing_path(rid).slice(%r{[^/]*$})}")
9593

9694
row_from = drawing.xpath(row_from_selector).text.to_i
9795
col_from = drawing.xpath(col_from_selector).text.to_i
9896

9997
if drawing.name == 'oneCellAnchor'
100-
@images_pathnames[[row_from , col_from ]].push(path)
98+
@images_pathnames[[row_from, col_from]].push(path)
10199
else
102100
row_to = drawing.xpath(row_to_selector).text.to_i
103101
col_to = drawing.xpath(col_to_selector).text.to_i

lib/creek/shared_strings.rb

Lines changed: 14 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -4,34 +4,32 @@
44
require 'nokogiri'
55

66
module Creek
7-
87
class Creek::SharedStrings
9-
108
SPREADSHEETML_URI = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main'
119

1210
attr_reader :book, :dictionary
1311

14-
def initialize book
12+
def initialize(book)
1513
@book = book
1614
parse_shared_shared_strings
1715
end
1816

1917
def parse_shared_shared_strings
20-
path = "xl/sharedStrings.xml"
21-
if @book.files.file.exist?(path)
22-
doc = @book.files.file.open path
23-
xml = Nokogiri::XML::Document.parse doc
24-
parse_shared_string_from_document(xml)
25-
end
18+
path = 'xl/sharedStrings.xml'
19+
return unless @book.files.file.exist?(path)
20+
21+
doc = @book.files.file.open path
22+
xml = Nokogiri::XML::Document.parse doc
23+
parse_shared_string_from_document(xml)
2624
end
2725

2826
def parse_shared_string_from_document(xml)
2927
@dictionary = self.class.parse_shared_string_from_document(xml)
3028
end
3129

3230
def self.parse_shared_string_from_document(xml)
33-
dictionary = Hash.new
34-
namespace = xml.namespaces.detect{|_key, uri| uri == SPREADSHEETML_URI }
31+
dictionary = {}
32+
namespace = xml.namespaces.detect { |_key, uri| uri == SPREADSHEETML_URI }
3533
prefix = if namespace && namespace[0].start_with?('xmlns:')
3634
namespace[0].delete_prefix('xmlns:') + '|'
3735
else
@@ -42,15 +40,14 @@ def self.parse_shared_string_from_document(xml)
4240

4341
xml.css(node_selector).each_with_index do |si, idx|
4442
text_nodes = si.css(text_selector)
45-
if text_nodes.count == 1 # plain text node
46-
dictionary[idx] = Creek::Styles::Converter.unescape_string(text_nodes.first.content)
47-
else # rich text nodes with text fragments
48-
dictionary[idx] = text_nodes.map { |n| Creek::Styles::Converter.unescape_string(n.content) }.join('')
49-
end
43+
dictionary[idx] = if text_nodes.count == 1 # plain text node
44+
Creek::Styles::Converter.unescape_string(text_nodes.first.content)
45+
else # rich text nodes with text fragments
46+
text_nodes.map { |n| Creek::Styles::Converter.unescape_string(n.content) }.join('')
47+
end
5048
end
5149

5250
dictionary
5351
end
54-
5552
end
5653
end

0 commit comments

Comments
 (0)