update gemspec, finetune parser classes

This commit is contained in:
viehlieb 2023-01-31 12:08:01 +01:00
parent 4ed1764b75
commit 77474c0811
9 changed files with 74 additions and 144 deletions

View file

@ -6,7 +6,6 @@ require 'active_support/core_ext/hash/keys'
require_relative 'foodsoft_article_import/bioromeo'
require_relative 'foodsoft_article_import/bnn'
require_relative 'foodsoft_article_import/utf8_encoder'
require_relative 'foodsoft_article_import/borkenstein'
require_relative 'foodsoft_article_import/dnb_xml'
require_relative 'foodsoft_article_import/foodsoft'
module FoodsoftArticleImport
@ -23,9 +22,9 @@ module FoodsoftArticleImport
def self.file_formats
@@file_formats ||= {
'bnn' => FoodsoftArticleImport::Bnn,
'borkenstein' => FoodsoftArticleImport::Borkenstein,
'foodsoft' => FoodsoftArticleImport::Foodsoft,
'dnb_xml' => FoodsoftArticleImport::DnbXml,
'odin' => FoodsoftArticleImport::DnbXml,
'bioromeo' => FoodsoftArticleImport::Bioromeo,
}.freeze
end
@ -36,17 +35,15 @@ module FoodsoftArticleImport
# @option opts [String] type file format (required) (see {.file_formats})
# @return [File, Roo::Spreadsheet] file with encoding set if needed
def self.parse(file, custom_file_path: nil, type: nil, **opts, &blk)
# @todo handle wrong or undetected type
custom_file_path ||= nil
type ||= 'bnn'
parser = file_formats[type]
puts parser
if block_given?
parser.parse(file, custom_file_path, **opts, &blk)
parser.parse(file, custom_file_path: custom_file_path, **opts, &blk)
else
data = []
parser.parse(file, custom_file_path, **opts) { |a| data << a }
parser.parse(file, custom_file_path: custom_file_path, **opts) { |a| data << a }
data
end
end
@ -69,10 +66,11 @@ module FoodsoftArticleImport
# @param encoding [String, NilClass] optional CSV encoding
# @param col_sep [String, NilClass] optional column separator
# @return [Roo::Spreadsheet]
def self.open_spreadsheet(file, filename: nil, encoding: nil, col_sep: nil)
def self.open_spreadsheet(file, filename: nil, encoding: nil, col_sep: nil, liberal_parsing: nil)
opts = {csv_options: {}}
opts[:csv_options][:encoding] = encoding if encoding
opts[:csv_options][:col_sep] = col_sep if col_sep
opts[:csv_options][:liberal_parsing] = true if liberal_parsing
opts[:extension] = File.extname(filename) if filename
begin
Roo::Spreadsheet.open(file, **opts)

View file

@ -30,42 +30,42 @@ module FoodsoftArticleImport
def self.parse(file, custom_file_path: nil, **opts)
custom_file_path ||= nil
opts = OPTIONS.merge(opts)
opts[:liberal_parsing]=true
opts[:col_sep]=","
ss = FoodsoftArticleImport.open_spreadsheet(file, **opts)
header_row = true
sheet = ss.sheet(0).parse(clean: true,
number: /^artnr/i,
name: /^product/i,
skal: /^skal$/i,
demeter: /^demeter$/i,
order_number: /Artnr./,
name: /Product/,
skal: /Skal$/,
demeter: /Demeter$/,
unit_price: /prijs\b.*\beenh/i,
pack_price: /prijs\b.*\bcolli/i,
comment: /^opm(erking)?/i,
comment: /opm(erking)?/i,
)
linenum = 0
category = nil
sheet.each do |row|
puts("[ROW] #{row.inspect}")
linenum += 1
row[:name].blank? and next
row[:name].to_s.strip.empty? and next
# (sub)categories are in first two content cells - assume if there's a price it's a product
if row[:order_number].blank? && row[:unit_price].blank?
if row[:order_number].to_s.strip.empty? && row[:unit_price].to_s.strip.empty?
category = row[:name]
yield nil, nil, linenum
next
end
# skip products without a number
if row[:order_number].blank?
if row[:order_number].to_s.strip.empty?
yield nil, nil, linenum
next
end
# extract name and unit
errors = []
notes = []
unit_price = row[:unit_price]
pack_price = row[:pack_price]
unit_price = row[:unit_price].gsub("","").to_s.strip.to_f
pack_price = row[:pack_price].gsub("","").to_s.strip.to_f
number = row[:order_number]
name = row[:name]
unit = nil
@ -75,6 +75,7 @@ module FoodsoftArticleImport
m=name.match(re)
unless m
yield nil, nil, linenum
next
end
unit = self.normalize_unit(m[3])
name = name.sub(re, '').sub(/\(\s*\)\s*$/,'').sub(/\s+/, ' ').sub(/\.\s*$/, '').strip
@ -120,10 +121,10 @@ module FoodsoftArticleImport
end
end
# note from various fields
notes.append("Skal #{row[:skal]}") if row[:skal].present?
notes.append(row[:demeter]) if row[:demeter].present? && row[:demeter].is_a?(String)
notes.append("Demeter #{row[:demeter]}") if row[:demeter].present? && row[:demeter].is_a?(Fixnum)
notes.append "(#{row[:comment]})" unless row[:comment].blank?
notes.append("Skal #{row[:skal]}") unless row[:skal].to_s.strip.empty?
notes.append(row[:demeter]) unless row[:skal].to_s.strip.empty?
notes.append("Demeter #{row[:demeter]}") unless row[:skal].to_s.strip.empty? && row[:demeter].is_a?(Fixnum)
notes.append "(#{row[:comment]})" unless row[:comment].to_s.strip.empty?
name.sub!(/(,\.?\s*)?\bDemeter\b/i, '') and notes.prepend("Demeter")
name.sub!(/(,\.?\s*)?\bBIO\b/i, '') and notes.prepend "BIO"
# unit check
@ -172,7 +173,7 @@ module FoodsoftArticleImport
elsif what =~ /^gr/
pack_price.to_f / amount.to_f * 1000
end
if kgprice.present? && (kgprice - unit_price.to_f).abs < 1e-2
unless kgprice.to_s.strip.empty? && (kgprice - unit_price.to_f).abs < 1e-2
return
end

View file

@ -1,97 +0,0 @@
# -*- coding: utf-8 -*-
# Module for Borkenstein csv import
require 'csv'
module FoodsoftArticleImport
class Borkenstein
REGEX = {
:main => /^(.+)\s+\[([^\[\]]+)\]\s+(\d+\.\d+)\((\d+\.\d+)\)$/,
:manufacturer => /^(.+)\s{4}\[\]\s{4}\(\)$/,
:origin => /(.+)\s+(\w+)\/\w+[\/[\w\-]+]?/
}.freeze
NAME = "Borkenstein (CSV)"
OUTLIST = false
OPTIONS = {
col_sep: ",",
encoding: "UTF-8" # @todo check this
}.freeze
def self.parse(file, custom_file_path: nil, **opts)
custom_file_path ||= nil
global_manufacturer = nil
file.set_encoding(opts[:encoding] || OPTIONS[:encoding])
col_sep = opts[:col_sep] || OPTIONS[:col_sep]
CSV.new(file, {col_sep: col_sep, :headers => false}).each.with_index(1) do |row, i|
# Set manufacturer
if row[1] == "-"
match = row[2].match(REGEX[:manufacturer])
global_manufacturer = match.captures.first unless match.nil?
end
# check if the line is empty
unless row[1].blank? || row[1] == "-"
# Split string and remove beginning "
matched = row[2].gsub(/^\"/, "").gsub(/\"$/, "").match(REGEX[:main])
if matched.nil?
puts "No regular article data for #{row[1]}: #{row[2]}"
yield nil, nil, nil
else
name, units, price_high, price_low = matched.captures
# Try to get origin
matched_name = name.match(REGEX[:origin])
if matched_name
name, origin = matched_name.captures
else
name, origin = name.gsub(/\s{2,}/, ""), nil
end
# Manufacturer
if name.match(/^[A-Za-z]{2,3}\s{1}/)
name.gsub!(/^[A-Za-z]{2,3}\s{1}/, "")
manufacturer = global_manufacturer
end
# Get unit quantities
units = units.split("x")
if units.size == 2
unit_quantity = units.first
unit = units.last
else
unit_quantity = 1
unit = units.first
end
article = {
:order_number => row[1],
:name => name,
:origin => origin,
:manufacturer => manufacturer,
:unit_quantity => unit_quantity,
:unit => unit,
:price => price_low, # Inklusive Rabattstufe von 10%
:tax => 0.0 # Tax is included
}
# test, if neccecary attributes exists
if article[:unit].nil? || article[:price].nil? || article[:unit_quantity].nil?
raise "Fehler: Einheit, Preis und MwSt. müssen gegeben sein: #{article.inspect}"
end
yield article, nil, i
end
end
yield nil, nil, i
end
end
end
end

View file

@ -22,7 +22,7 @@ module FoodsoftArticleImport
Nokogiri::XML::ParseOptions::NONET +
Nokogiri::XML::ParseOptions::COMPACT # do not modify doc!
)
self.load_codes(custom_file_path)
doc.search('product').each.with_index(1) do |row, i|
# create a new article
unit = row.search('eenheid').text
@ -33,8 +33,6 @@ module FoodsoftArticleImport
when 'l' then 'ltr'
else unit
end
return if i==3
puts unit, i
inhoud = row.search('inhoud').text
inhoud.to_s.strip.empty? or (inhoud.to_f-1).abs > 1e-3 and unit = inhoud.gsub(/\.0+\s*$/,'') + unit
deposit = row.search('statiegeld').text
@ -44,20 +42,23 @@ module FoodsoftArticleImport
@@codes[:indeling][row.search('subindeling').text.to_i]
].compact.join(' - ')
article = {:order_number => row.search('bestelnummer').text,
#:ean => row.search('eancode').text,
:name => row.search('omschrijving').text,
:note => row.search('kwaliteit').text,
:manufacturer => row.search('merk').text,
:origin => row.search('herkomst').text,
:unit => unit,
:price => row.search('prijs inkoopprijs').text,
:unit_quantity => row.search('sve').text,
:tax => row.search('btw').text,
:deposit => deposit,
:article_category => category}
yield article, (row.search('status') == 'Actief' ? :outlisted : nil), i
status = row.search('status').text == "Actief" ? nil : :outlisted
article = {}
unless row.search('bestelnummer').text == ""
article = {:order_number => row.search('bestelnummer').text,
#:ean => row.search('eancode').text,
:name => row.search('omschrijving').text,
:note => row.search('kwaliteit').text,
:manufacturer => row.search('merk').text,
:origin => row.search('herkomst').text,
:unit => unit,
:price => row.search('prijs inkoopprijs').text,
:unit_quantity => row.search('sve').text,
:tax => row.search('btw').text,
:deposit => deposit,
:article_category => category}
end
yield article, status, i
end
end
@ -65,16 +66,24 @@ module FoodsoftArticleImport
@@codes = Hash.new
def self.load_codes
def self.load_codes(custom_file_path=nil)
@gem_lib = File.expand_path "../../", __FILE__
dir = File.join @gem_lib, 'foodsoft_article_import'
begin
@@codes = YAML::load(File.open(File.join(dir, "dnb_codes.yml"))).symbolize_keys
if(custom_file_path)
custom_codes = YAML::load(File.open(custom_file_path)).symbolize_keys
custom_codes.keys.each do |key|
if @@codes.keys.include?(key)
custom_codes[key] =custom_codes[key].merge @@codes[key]
end
@@codes = @@codes.merge custom_codes
end
end
@@codes
rescue => e
raise "Failed to load dnb_codes: #{dir}/dnb_codes.yml: #{e.message}"
end
end
end
FoodsoftArticleImport::DnbXml.load_codes
end

View file

@ -27,11 +27,11 @@ module FoodsoftArticleImport::Foodsoft
# skip first header row
if header_row
header_row = false
yield nil, nil, i
next
end
# skip empty lines
if row[2].blank?
if row[2].to_s.strip.empty?
# raise no order number given
yield nil, nil, i
next
end
@ -49,7 +49,7 @@ module FoodsoftArticleImport::Foodsoft
:scale_price => row[12],
:article_category => row[13]}
article.merge!(:deposit => row[9]) unless row[9].nil?
article[:order_number].blank? and ArticleImport.generate_number(article)
FoodsoftArticleImport.generate_number(article) if article[:order_number].to_s.strip.empty?
if row[6].nil? || row[7].nil? or row[8].nil?
yield article, "Error: unit, price and tax must be entered", i
else