diff --git a/lib/foodsoft_article_import.rb b/lib/foodsoft_article_import.rb index e70748b..4ea4d55 100644 --- a/lib/foodsoft_article_import.rb +++ b/lib/foodsoft_article_import.rb @@ -5,6 +5,7 @@ require 'yaml' require 'active_support/core_ext/hash/keys' require_relative 'foodsoft_article_import/bioromeo' require_relative 'foodsoft_article_import/bnn' +require_relative 'foodsoft_article_import/utf8_encoder' require_relative 'foodsoft_article_import/borkenstein' require_relative 'foodsoft_article_import/dnb_xml' require_relative 'foodsoft_article_import/foodsoft' @@ -34,11 +35,13 @@ module FoodsoftArticleImport # @param file [File, Tempfile] # @option opts [String] type file format (required) (see {.file_formats}) # @return [File, Roo::Spreadsheet] file with encoding set if needed - def self.parse(file, custom_file_path=nil, type='bnn', **opts, &blk) + def self.parse(file, custom_file_path: nil, type: nil, **opts, &blk) # @todo handle wrong or undetected type - type = opts[:type] || 'bnn' - puts type + custom_file_path ||= nil + type ||= 'bnn' + parser = file_formats[type] + puts parser if block_given? parser.parse(file, custom_file_path, **opts, &blk) else @@ -71,6 +74,10 @@ module FoodsoftArticleImport opts[:csv_options][:encoding] = encoding if encoding opts[:csv_options][:col_sep] = col_sep if col_sep opts[:extension] = File.extname(filename) if filename - Roo::Spreadsheet.open(file, **opts) + begin + Roo::Spreadsheet.open(file, **opts) + rescue => e + raise "Failed to parse foodsoft file. make sure file format is correct: #{e.message}" + end end end \ No newline at end of file diff --git a/lib/foodsoft_article_import/bioromeo.rb b/lib/foodsoft_article_import/bioromeo.rb index e32f34c..a7f73ac 100644 --- a/lib/foodsoft_article_import/bioromeo.rb +++ b/lib/foodsoft_article_import/bioromeo.rb @@ -27,7 +27,8 @@ module FoodsoftArticleImport RES_PARSE_UNIT_LIST.map {|r| /#{r}\s*$/} + RES_PARSE_UNIT_LIST.map {|r| /-#{r}/} - def self.parse(file, custom_file_path=nil, **opts) + def self.parse(file, custom_file_path: nil, **opts) + custom_file_path ||= nil opts = OPTIONS.merge(opts) ss = FoodsoftArticleImport.open_spreadsheet(file, **opts) diff --git a/lib/foodsoft_article_import/bnn.rb b/lib/foodsoft_article_import/bnn.rb index fbf2ed4..da221d1 100644 --- a/lib/foodsoft_article_import/bnn.rb +++ b/lib/foodsoft_article_import/bnn.rb @@ -52,7 +52,8 @@ module FoodsoftArticleImport }.freeze # parses a bnn-file - def self.parse(file, custom_file_path=nil, **opts) + def self.parse(file, custom_file_path: nil, **opts) + custom_file_path ||= nil encoding = opts[:encoding] || OPTIONS[:encoding] col_sep = opts[:col_sep] || OPTIONS[:col_sep] self.load_codes(custom_file_path) @@ -60,9 +61,9 @@ module FoodsoftArticleImport # check if the line is empty unless row[0] == "" || row[0].nil? article = { - :name => row[6], + :name => UTF8Encoder.clean(row[6]), :order_number => row[0], - :note => row[7], + :note => UTF8Encoder.clean(row[7]), :manufacturer => self.translate(:manufacturer, row[10]), :origin => row[12], :article_category => self.translate(:category, row[16]), diff --git a/lib/foodsoft_article_import/borkenstein.rb b/lib/foodsoft_article_import/borkenstein.rb index 72be944..820aee3 100644 --- a/lib/foodsoft_article_import/borkenstein.rb +++ b/lib/foodsoft_article_import/borkenstein.rb @@ -19,7 +19,8 @@ module FoodsoftArticleImport encoding: "UTF-8" # @todo check this }.freeze - def self.parse(file, custom_file_path=nil, **opts) + def self.parse(file, custom_file_path: nil, **opts) + custom_file_path ||= nil global_manufacturer = nil file.set_encoding(opts[:encoding] || OPTIONS[:encoding]) diff --git a/lib/foodsoft_article_import/dnb_xml.rb b/lib/foodsoft_article_import/dnb_xml.rb index 4f7ac3d..377881f 100644 --- a/lib/foodsoft_article_import/dnb_xml.rb +++ b/lib/foodsoft_article_import/dnb_xml.rb @@ -14,26 +14,31 @@ module FoodsoftArticleImport OPTIONS = {}.freeze # parses a string or file - def self.parse(file, custom_file_path=nil, opts={}) - doc = Nokogiri.XML(file, nil, nil, + def self.parse(file, custom_file_path: nil, **opts) + custom_file_path ||= nil + xml = File.open(file) + doc = Nokogiri.XML(xml, nil, nil, Nokogiri::XML::ParseOptions::RECOVER + Nokogiri::XML::ParseOptions::NONET + Nokogiri::XML::ParseOptions::COMPACT # do not modify doc! ) + doc.search('product').each.with_index(1) do |row, i| # create a new article unit = row.search('eenheid').text - unit = case(unit) - when blank? then 'st' + unit = case(unit.strip) + when '' then 'st' when 'stuk' then 'st' when 'g' then 'gr' # need at least 2 chars when 'l' then 'ltr' else unit end + return if i==3 + puts unit, i inhoud = row.search('inhoud').text - inhoud.blank? or (inhoud.to_f-1).abs > 1e-3 and unit = inhoud.gsub(/\.0+\s*$/,'') + unit + inhoud.to_s.strip.empty? or (inhoud.to_f-1).abs > 1e-3 and unit = inhoud.gsub(/\.0+\s*$/,'') + unit deposit = row.search('statiegeld').text - deposit.blank? and deposit = 0 + deposit.to_s.strip.empty? and deposit = 0 category = [ @@codes[:indeling][row.search('indeling').text.to_i], @@codes[:indeling][row.search('subindeling').text.to_i] @@ -69,7 +74,6 @@ module FoodsoftArticleImport raise "Failed to load dnb_codes: #{dir}/dnb_codes.yml: #{e.message}" end end - end FoodsoftArticleImport::DnbXml.load_codes diff --git a/lib/foodsoft_article_import/foodsoft.rb b/lib/foodsoft_article_import/foodsoft.rb index 6916a16..ada6de1 100644 --- a/lib/foodsoft_article_import/foodsoft.rb +++ b/lib/foodsoft_article_import/foodsoft.rb @@ -16,8 +16,10 @@ module FoodsoftArticleImport::Foodsoft # Parses Foodsoft file # the yielded article is a simple hash - def self.parse(file, custom_file_path=nil, **opts) + def self.parse(file, custom_file_path: nil, **opts) + custom_file_path ||= nil opts = OPTIONS.merge(opts) + ss = FoodsoftArticleImport.open_spreadsheet(file, **opts) header_row = true diff --git a/lib/foodsoft_article_import/utf8_encoder.rb b/lib/foodsoft_article_import/utf8_encoder.rb new file mode 100644 index 0000000..0715a7e --- /dev/null +++ b/lib/foodsoft_article_import/utf8_encoder.rb @@ -0,0 +1,9 @@ +module UTF8Encoder + def self.clean(string) + if string.nil? + string + else + string.encode('UTF-8') + end + end +end