Reviewed-on: #351
This commit is contained in:
commit
b44d8a9d70
5 changed files with 513 additions and 2 deletions
295
lib/mv/membership/import/csv_parser.ex
Normal file
295
lib/mv/membership/import/csv_parser.ex
Normal file
|
|
@ -0,0 +1,295 @@
|
|||
NimbleCSV.define(Mv.Membership.Import.CsvParserSemicolon, separator: ";", escape: "\"")
|
||||
NimbleCSV.define(Mv.Membership.Import.CsvParserComma, separator: ",", escape: "\"")
|
||||
|
||||
defmodule Mv.Membership.Import.CsvParser do
|
||||
@moduledoc """
|
||||
CSV parser with BOM handling, delimiter auto-detection, and physical line numbering.
|
||||
|
||||
Guarantees:
|
||||
- UTF-8 BOM is stripped (Excel)
|
||||
- Delimiter auto-detected (semicolon/comma) using NimbleCSV parsing (quote-aware)
|
||||
- Returns rows tagged with their *physical start line number* in the CSV file (1-based)
|
||||
- Skips completely empty rows (but preserves numbering by using physical line numbers)
|
||||
- Handles `\\r\\n`, `\\n`, `\\r`
|
||||
- Correct even when fields contain newlines inside quotes: the row gets the start line number
|
||||
"""
|
||||
|
||||
@utf8_bom <<0xEF, 0xBB, 0xBF>>
|
||||
@quote ?"
|
||||
@max_error_snippet_length 50
|
||||
|
||||
@type line_number :: pos_integer()
|
||||
@type row :: [String.t()]
|
||||
@type numbered_row :: {line_number(), row()}
|
||||
|
||||
@spec parse(binary()) :: {:ok, row(), [numbered_row()]} | {:error, String.t()}
|
||||
def parse(file_content) when is_binary(file_content) do
|
||||
with :ok <- validate_utf8(file_content),
|
||||
content <- file_content |> strip_bom() |> normalize_line_endings(),
|
||||
:ok <- validate_content_not_empty(content),
|
||||
{:ok, header_record, data_records} <- extract_header_and_data(content),
|
||||
:ok <- validate_header_not_empty(header_record) do
|
||||
parse_csv_records(header_record, data_records)
|
||||
end
|
||||
end
|
||||
|
||||
def parse(_), do: {:error, "Invalid CSV content"}
|
||||
|
||||
@spec validate_utf8(binary()) :: :ok | {:error, String.t()}
|
||||
defp validate_utf8(content) do
|
||||
if String.valid?(content) do
|
||||
:ok
|
||||
else
|
||||
{:error, "CSV must be valid UTF-8"}
|
||||
end
|
||||
end
|
||||
|
||||
@spec validate_content_not_empty(binary()) :: :ok | {:error, String.t()}
|
||||
defp validate_content_not_empty(content) do
|
||||
if String.trim(content) == "" do
|
||||
{:error, "CSV file is empty"}
|
||||
else
|
||||
:ok
|
||||
end
|
||||
end
|
||||
|
||||
@spec extract_header_and_data(binary()) ::
|
||||
{:ok, binary(), [{line_number(), binary()}]} | {:error, String.t()}
|
||||
defp extract_header_and_data(content) do
|
||||
records = split_records_with_line_numbers(content)
|
||||
|
||||
case records do
|
||||
[] ->
|
||||
{:error, "CSV file is empty"}
|
||||
|
||||
[{_line1, header_record} | data_records] ->
|
||||
{:ok, header_record, data_records}
|
||||
end
|
||||
end
|
||||
|
||||
@spec validate_header_not_empty(binary()) :: :ok | {:error, String.t()}
|
||||
defp validate_header_not_empty(header_record) do
|
||||
if String.trim(header_record) == "" do
|
||||
{:error, "CSV file has no header row"}
|
||||
else
|
||||
:ok
|
||||
end
|
||||
end
|
||||
|
||||
@spec parse_csv_records(binary(), [{line_number(), binary()}]) ::
|
||||
{:ok, row(), [numbered_row()]} | {:error, String.t()}
|
||||
defp parse_csv_records(header_record, data_records) do
|
||||
delimiter = detect_delimiter_by_parsing(header_record)
|
||||
parser = get_parser(delimiter)
|
||||
|
||||
with {:ok, headers} <-
|
||||
parse_single_record(parser, header_record, "CSV file has no header row"),
|
||||
{:ok, rows} <- parse_data_records(parser, data_records) do
|
||||
{:ok, headers, rows}
|
||||
end
|
||||
end
|
||||
|
||||
@spec strip_bom(binary()) :: binary()
|
||||
defp strip_bom(<<@utf8_bom, rest::binary>>), do: rest
|
||||
defp strip_bom(content), do: content
|
||||
|
||||
@spec normalize_line_endings(binary()) :: binary()
|
||||
defp normalize_line_endings(content) do
|
||||
content
|
||||
|> String.replace("\r\n", "\n")
|
||||
|> String.replace("\r", "\n")
|
||||
end
|
||||
|
||||
@spec get_parser(String.t()) :: module()
|
||||
defp get_parser(";"), do: Mv.Membership.Import.CsvParserSemicolon
|
||||
defp get_parser(","), do: Mv.Membership.Import.CsvParserComma
|
||||
defp get_parser(_), do: Mv.Membership.Import.CsvParserSemicolon
|
||||
|
||||
# --- Delimiter detection (quote-aware by actually parsing the header) ---
|
||||
|
||||
@spec detect_delimiter_by_parsing(binary()) :: String.t()
|
||||
defp detect_delimiter_by_parsing(header_record) do
|
||||
semicolon_score = header_field_count(Mv.Membership.Import.CsvParserSemicolon, header_record)
|
||||
comma_score = header_field_count(Mv.Membership.Import.CsvParserComma, header_record)
|
||||
|
||||
# prefer ";" on tie
|
||||
if semicolon_score >= comma_score, do: ";", else: ","
|
||||
end
|
||||
|
||||
@spec header_field_count(module(), binary()) :: non_neg_integer()
|
||||
defp header_field_count(parser, header_record) do
|
||||
case parse_single_record(parser, header_record, nil) do
|
||||
{:ok, fields} -> Enum.count(fields, &(String.trim(&1) != ""))
|
||||
{:error, _} -> 0
|
||||
end
|
||||
end
|
||||
|
||||
# Parses exactly one record (string without trailing newline is fine).
|
||||
# Returns `{:ok, row}` or `{:error, reason}`.
|
||||
@spec parse_single_record(module(), binary(), String.t() | nil) ::
|
||||
{:ok, row()} | {:error, String.t()}
|
||||
defp parse_single_record(parser, record, error_reason_if_empty) do
|
||||
# NimbleCSV is happiest if there's a newline at the end.
|
||||
rows = parser.parse_string(ensure_trailing_newline(record), skip_headers: false)
|
||||
|
||||
case rows do
|
||||
[row] when is_list(row) and row != [] ->
|
||||
{:ok, row}
|
||||
|
||||
_ ->
|
||||
if is_binary(error_reason_if_empty),
|
||||
do: {:error, error_reason_if_empty},
|
||||
else: {:error, "Failed to parse CSV header"}
|
||||
end
|
||||
rescue
|
||||
e ->
|
||||
{:error, "Failed to parse CSV: #{Exception.message(e)}"}
|
||||
end
|
||||
|
||||
@spec ensure_trailing_newline(binary()) :: binary()
|
||||
defp ensure_trailing_newline(str) do
|
||||
if String.ends_with?(str, "\n"), do: str, else: str <> "\n"
|
||||
end
|
||||
|
||||
# --- Data parsing preserving *physical* line numbers ---
|
||||
#
|
||||
# Parses data records while preserving physical line numbers.
|
||||
# Skips empty rows but maintains correct line numbering for error reporting.
|
||||
#
|
||||
@spec parse_data_records(module(), [{line_number(), binary()}]) ::
|
||||
{:ok, [numbered_row()]} | {:error, String.t()}
|
||||
defp parse_data_records(parser, data_records) do
|
||||
rows =
|
||||
data_records
|
||||
|> Enum.reduce_while([], fn {line_no, record}, acc ->
|
||||
process_data_record(parser, line_no, record, acc)
|
||||
end)
|
||||
|
||||
case rows do
|
||||
{:error, reason} -> {:error, reason}
|
||||
rows -> {:ok, Enum.reverse(rows)}
|
||||
end
|
||||
rescue
|
||||
e ->
|
||||
{:error, "Failed to parse CSV: #{Exception.message(e)}"}
|
||||
end
|
||||
|
||||
@spec process_data_record(module(), line_number(), binary(), [numbered_row()]) ::
|
||||
{:cont, [numbered_row()]} | {:halt, {:error, String.t()}}
|
||||
defp process_data_record(parser, line_no, record, acc) do
|
||||
trimmed = String.trim(record)
|
||||
|
||||
if trimmed == "" do
|
||||
{:cont, acc}
|
||||
else
|
||||
process_non_empty_record(parser, line_no, record, acc)
|
||||
end
|
||||
end
|
||||
|
||||
@spec process_non_empty_record(module(), line_number(), binary(), [numbered_row()]) ::
|
||||
{:cont, [numbered_row()]} | {:halt, {:error, String.t()}}
|
||||
defp process_non_empty_record(parser, line_no, record, acc) do
|
||||
parsed = parser.parse_string(ensure_trailing_newline(record), skip_headers: false)
|
||||
|
||||
case parsed do
|
||||
[row] when is_list(row) ->
|
||||
if empty_row?(row) do
|
||||
{:cont, acc}
|
||||
else
|
||||
{:cont, [{line_no, row} | acc]}
|
||||
end
|
||||
|
||||
# unparsable row -> return error with line number
|
||||
_ ->
|
||||
snippet =
|
||||
String.slice(record, 0, min(@max_error_snippet_length, String.length(record)))
|
||||
|
||||
{:halt, {:error, "Failed to parse CSV data at line #{line_no}: #{inspect(snippet)}"}}
|
||||
end
|
||||
end
|
||||
|
||||
@spec empty_row?(row()) :: boolean()
|
||||
defp empty_row?(row) when is_list(row) do
|
||||
Enum.all?(row, fn field -> String.trim(field) == "" end)
|
||||
end
|
||||
|
||||
# --- Record splitting with correct line numbers (quote-aware) ---
|
||||
#
|
||||
# Splits the CSV into records separated by newline *outside quotes*.
|
||||
# Returns `[{start_line_number, record_string_without_newline}, ...]`.
|
||||
#
|
||||
# Line numbers are 1-based and represent the physical line in the CSV file.
|
||||
# Empty lines are included in the numbering (they're just skipped later).
|
||||
#
|
||||
@spec split_records_with_line_numbers(binary()) :: [{line_number(), binary()}]
|
||||
defp split_records_with_line_numbers(content) do
|
||||
{acc, buf, _in_quotes, _line, start_line} =
|
||||
do_split(content, [], [], false, 1, 1)
|
||||
|
||||
# finalize last record only if there is buffered content
|
||||
acc =
|
||||
case buf do
|
||||
[] ->
|
||||
acc
|
||||
|
||||
_ ->
|
||||
record = buf |> Enum.reverse() |> :erlang.list_to_binary()
|
||||
[{start_line, record} | acc]
|
||||
end
|
||||
|
||||
Enum.reverse(acc)
|
||||
end
|
||||
|
||||
# Recursively splits CSV content into records with correct line numbering.
|
||||
#
|
||||
# Handles quote-aware parsing:
|
||||
# - Escaped quotes (`""`) inside quoted fields are preserved
|
||||
# - Newlines inside quotes are part of the record but advance line counter
|
||||
# - Newlines outside quotes end a record
|
||||
#
|
||||
# Parameters:
|
||||
# - `content` - Remaining binary content to parse
|
||||
# - `acc` - Accumulated records `[{line_number, record}, ...]`
|
||||
# - `buf` - Current record buffer (reversed byte list)
|
||||
# - `in_quotes` - Whether we're currently inside a quoted field
|
||||
# - `line` - Current physical line number
|
||||
# - `start_line` - Line number where current record started
|
||||
#
|
||||
@spec do_split(
|
||||
binary(),
|
||||
[{line_number(), binary()}],
|
||||
[byte()],
|
||||
boolean(),
|
||||
line_number(),
|
||||
line_number()
|
||||
) ::
|
||||
{[{line_number(), binary()}], [byte()], boolean(), line_number(), line_number()}
|
||||
defp do_split(<<>>, acc, buf, in_quotes, line, start_line),
|
||||
do: {acc, buf, in_quotes, line, start_line}
|
||||
|
||||
# Escaped quote inside quoted field: "" -> keep both quotes, do NOT toggle in_quotes
|
||||
defp do_split(<<@quote, @quote, rest::binary>>, acc, buf, true = in_quotes, line, start_line) do
|
||||
do_split(rest, acc, [@quote, @quote | buf], in_quotes, line, start_line)
|
||||
end
|
||||
|
||||
# Quote toggles quote state (when not escaped "")
|
||||
defp do_split(<<@quote, rest::binary>>, acc, buf, in_quotes, line, start_line) do
|
||||
do_split(rest, acc, [@quote | buf], not in_quotes, line, start_line)
|
||||
end
|
||||
|
||||
# Newline outside quotes ends a record (even if empty)
|
||||
defp do_split(<<"\n", rest::binary>>, acc, buf, false, line, start_line) do
|
||||
record = buf |> Enum.reverse() |> :erlang.list_to_binary()
|
||||
do_split(rest, [{start_line, record} | acc], [], false, line + 1, line + 1)
|
||||
end
|
||||
|
||||
# Newline inside quotes is part of the record, but advances physical line counter
|
||||
defp do_split(<<"\n", rest::binary>>, acc, buf, true = in_quotes, line, start_line) do
|
||||
do_split(rest, acc, [?\n | buf], in_quotes, line + 1, start_line)
|
||||
end
|
||||
|
||||
# Any other byte
|
||||
defp do_split(<<ch, rest::binary>>, acc, buf, in_quotes, line, start_line) do
|
||||
do_split(rest, acc, [ch | buf], in_quotes, line, start_line)
|
||||
end
|
||||
end
|
||||
3
mix.exs
3
mix.exs
|
|
@ -78,7 +78,8 @@ defmodule Mv.MixProject do
|
|||
{:credo, "~> 1.7", only: [:dev, :test], runtime: false},
|
||||
{:picosat_elixir, "~> 0.1", only: [:dev, :test]},
|
||||
{:ecto_commons, "~> 0.3"},
|
||||
{:slugify, "~> 1.3"}
|
||||
{:slugify, "~> 1.3"},
|
||||
{:nimble_csv, "~> 1.0"}
|
||||
]
|
||||
end
|
||||
|
||||
|
|
|
|||
1
mix.lock
1
mix.lock
|
|
@ -47,6 +47,7 @@
|
|||
"mime": {:hex, :mime, "2.0.7", "b8d739037be7cd402aee1ba0306edfdef982687ee7e9859bee6198c1e7e2f128", [:mix], [], "hexpm", "6171188e399ee16023ffc5b76ce445eb6d9672e2e241d2df6050f3c771e80ccd"},
|
||||
"mint": {:hex, :mint, "1.7.1", "113fdb2b2f3b59e47c7955971854641c61f378549d73e829e1768de90fc1abf1", [:mix], [{:castore, "~> 0.1.0 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: true]}, {:hpax, "~> 0.1.1 or ~> 0.2.0 or ~> 1.0", [hex: :hpax, repo: "hexpm", optional: false]}], "hexpm", "fceba0a4d0f24301ddee3024ae116df1c3f4bb7a563a731f45fdfeb9d39a231b"},
|
||||
"mix_audit": {:hex, :mix_audit, "2.1.5", "c0f77cee6b4ef9d97e37772359a187a166c7a1e0e08b50edf5bf6959dfe5a016", [:make, :mix], [{:jason, "~> 1.4", [hex: :jason, repo: "hexpm", optional: false]}, {:yaml_elixir, "~> 2.11", [hex: :yaml_elixir, repo: "hexpm", optional: false]}], "hexpm", "87f9298e21da32f697af535475860dc1d3617a010e0b418d2ec6142bc8b42d69"},
|
||||
"nimble_csv": {:hex, :nimble_csv, "1.3.0", "b7f998dc62b222bce9596e46f028c7a5af04cb5dde6df2ea197c583227c54971", [:mix], [], "hexpm", "41ccdc18f7c8f8bb06e84164fc51635321e80d5a3b450761c4997d620925d619"},
|
||||
"nimble_options": {:hex, :nimble_options, "1.1.1", "e3a492d54d85fc3fd7c5baf411d9d2852922f66e69476317787a7b2bb000a61b", [:mix], [], "hexpm", "821b2470ca9442c4b6984882fe9bb0389371b8ddec4d45a9504f00a66f650b44"},
|
||||
"nimble_pool": {:hex, :nimble_pool, "1.1.0", "bf9c29fbdcba3564a8b800d1eeb5a3c58f36e1e11d7b7fb2e084a643f645f06b", [:mix], [], "hexpm", "af2e4e6b34197db81f7aad230c1118eac993acc0dae6bc83bac0126d4ae0813a"},
|
||||
"owl": {:hex, :owl, "0.13.0", "26010e066d5992774268f3163506972ddac0a7e77bfe57fa42a250f24d6b876e", [:mix], [{:ucwidth, "~> 0.2", [hex: :ucwidth, repo: "hexpm", optional: true]}], "hexpm", "59bf9d11ce37a4db98f57cb68fbfd61593bf419ec4ed302852b6683d3d2f7475"},
|
||||
|
|
|
|||
215
test/mv/membership/import/csv_parser_test.exs
Normal file
215
test/mv/membership/import/csv_parser_test.exs
Normal file
|
|
@ -0,0 +1,215 @@
|
|||
defmodule Mv.Membership.Import.CsvParserTest do
|
||||
use ExUnit.Case, async: true
|
||||
|
||||
alias Mv.Membership.Import.CsvParser
|
||||
|
||||
describe "parse/1" do
|
||||
test "returns {:ok, headers, rows} for valid CSV with semicolon delimiter" do
|
||||
csv_content = "email;first_name\njohn@example.com;John"
|
||||
|
||||
assert {:ok, headers, rows} = CsvParser.parse(csv_content)
|
||||
|
||||
assert headers == ["email", "first_name"]
|
||||
assert rows == [{2, ["john@example.com", "John"]}]
|
||||
end
|
||||
|
||||
test "returns {:ok, headers, rows} for valid CSV with comma delimiter" do
|
||||
csv_content = "email,first_name\njohn@example.com,John"
|
||||
|
||||
assert {:ok, headers, rows} = CsvParser.parse(csv_content)
|
||||
|
||||
assert headers == ["email", "first_name"]
|
||||
assert rows == [{2, ["john@example.com", "John"]}]
|
||||
end
|
||||
|
||||
test "detects semicolon delimiter when both delimiters present" do
|
||||
csv_content = "email;first_name,last_name\njohn@example.com;John,Doe"
|
||||
|
||||
assert {:ok, headers, rows} = CsvParser.parse(csv_content)
|
||||
|
||||
# Should detect semicolon as primary delimiter
|
||||
assert length(headers) >= 2
|
||||
assert length(rows) == 1
|
||||
end
|
||||
|
||||
test "prefers semicolon delimiter when recognition is tied" do
|
||||
# CSV where both delimiters would yield same number of fields
|
||||
csv_content = "email;name\njohn@example.com;John"
|
||||
|
||||
assert {:ok, headers, rows} = CsvParser.parse(csv_content)
|
||||
|
||||
# Should prefer semicolon
|
||||
assert headers == ["email", "name"]
|
||||
assert rows == [{2, ["john@example.com", "John"]}]
|
||||
end
|
||||
|
||||
test "defaults to semicolon delimiter when no headers recognized" do
|
||||
csv_content = "unknown1;unknown2\nvalue1;value2"
|
||||
|
||||
assert {:ok, headers, rows} = CsvParser.parse(csv_content)
|
||||
|
||||
# Should default to semicolon
|
||||
assert headers == ["unknown1", "unknown2"]
|
||||
assert rows == [{2, ["value1", "value2"]}]
|
||||
end
|
||||
end
|
||||
|
||||
describe "BOM handling" do
|
||||
test "strips UTF-8 BOM from file content" do
|
||||
bom = <<0xEF, 0xBB, 0xBF>>
|
||||
csv_content = bom <> "email;first_name\njohn@example.com;John"
|
||||
|
||||
assert {:ok, headers, rows} = CsvParser.parse(csv_content)
|
||||
|
||||
assert headers == ["email", "first_name"]
|
||||
assert rows == [{2, ["john@example.com", "John"]}]
|
||||
end
|
||||
|
||||
test "parses CSV with BOM correctly (Excel export compatibility)" do
|
||||
bom = <<0xEF, 0xBB, 0xBF>>
|
||||
|
||||
csv_content =
|
||||
bom <>
|
||||
"email;first_name;last_name\njohn@example.com;John;Doe\njane@example.com;Jane;Smith"
|
||||
|
||||
assert {:ok, headers, rows} = CsvParser.parse(csv_content)
|
||||
|
||||
assert headers == ["email", "first_name", "last_name"]
|
||||
assert length(rows) == 2
|
||||
assert Enum.at(rows, 0) == {2, ["john@example.com", "John", "Doe"]}
|
||||
assert Enum.at(rows, 1) == {3, ["jane@example.com", "Jane", "Smith"]}
|
||||
end
|
||||
end
|
||||
|
||||
describe "line number handling" do
|
||||
test "header row is line 1, first data row is line 2" do
|
||||
csv_content = "email\njohn@example.com"
|
||||
|
||||
assert {:ok, headers, rows} = CsvParser.parse(csv_content)
|
||||
|
||||
assert headers == ["email"]
|
||||
assert rows == [{2, ["john@example.com"]}]
|
||||
end
|
||||
|
||||
test "preserves correct line numbers when empty lines are skipped" do
|
||||
csv_content = "email;first_name\n\njohn@example.com;John\n\njane@example.com;Jane"
|
||||
|
||||
assert {:ok, headers, rows} = CsvParser.parse(csv_content)
|
||||
|
||||
assert headers == ["email", "first_name"]
|
||||
# Line 2 is empty (skipped), line 3 has data
|
||||
assert Enum.at(rows, 0) == {3, ["john@example.com", "John"]}
|
||||
# Line 4 is empty (skipped), line 5 has data
|
||||
assert Enum.at(rows, 1) == {5, ["jane@example.com", "Jane"]}
|
||||
end
|
||||
|
||||
test "skips completely empty rows but preserves line numbers" do
|
||||
csv_content = "email\n\n\njohn@example.com"
|
||||
|
||||
assert {:ok, headers, rows} = CsvParser.parse(csv_content)
|
||||
|
||||
assert headers == ["email"]
|
||||
# Lines 2 & 3 are empty (skipped), line 4 has data
|
||||
assert rows == [{4, ["john@example.com"]}]
|
||||
end
|
||||
end
|
||||
|
||||
describe "line ending handling" do
|
||||
test "handles \\r\\n line endings correctly" do
|
||||
csv_content = "email;first_name\r\njohn@example.com;John\r\njane@example.com;Jane"
|
||||
|
||||
assert {:ok, headers, rows} = CsvParser.parse(csv_content)
|
||||
|
||||
assert headers == ["email", "first_name"]
|
||||
assert length(rows) == 2
|
||||
assert Enum.at(rows, 0) == {2, ["john@example.com", "John"]}
|
||||
assert Enum.at(rows, 1) == {3, ["jane@example.com", "Jane"]}
|
||||
end
|
||||
|
||||
test "handles \\n line endings correctly" do
|
||||
csv_content = "email;first_name\njohn@example.com;John\njane@example.com;Jane"
|
||||
|
||||
assert {:ok, headers, rows} = CsvParser.parse(csv_content)
|
||||
|
||||
assert headers == ["email", "first_name"]
|
||||
assert length(rows) == 2
|
||||
assert Enum.at(rows, 0) == {2, ["john@example.com", "John"]}
|
||||
assert Enum.at(rows, 1) == {3, ["jane@example.com", "Jane"]}
|
||||
end
|
||||
end
|
||||
|
||||
describe "quoted fields" do
|
||||
test "parses quoted fields correctly" do
|
||||
csv_content = "email;name\njohn@example.com;\"John Doe\""
|
||||
|
||||
assert {:ok, headers, rows} = CsvParser.parse(csv_content)
|
||||
|
||||
assert headers == ["email", "name"]
|
||||
assert rows == [{2, ["john@example.com", "John Doe"]}]
|
||||
end
|
||||
|
||||
test "handles escaped quotes (\"\") inside quoted fields" do
|
||||
csv_content = "email;name\njohn@example.com;\"John \"\"Johnny\"\" Doe\""
|
||||
|
||||
assert {:ok, headers, rows} = CsvParser.parse(csv_content)
|
||||
|
||||
assert headers == ["email", "name"]
|
||||
assert rows == [{2, ["john@example.com", "John \"Johnny\" Doe"]}]
|
||||
end
|
||||
|
||||
test "handles multiline quoted fields with correct line numbering" do
|
||||
# Header line 1
|
||||
# Data record starts line 2, contains "foo\nbar" in a field
|
||||
# Record ends physically at line 3
|
||||
# Expected: row gets line number 2 (start line)
|
||||
csv_content = "email;description\njohn@example.com;\"foo\nbar\""
|
||||
|
||||
assert {:ok, headers, rows} = CsvParser.parse(csv_content)
|
||||
|
||||
assert headers == ["email", "description"]
|
||||
assert rows == [{2, ["john@example.com", "foo\nbar"]}]
|
||||
end
|
||||
end
|
||||
|
||||
describe "error handling" do
|
||||
test "returns {:error, reason} for empty file" do
|
||||
assert {:error, reason} = CsvParser.parse("")
|
||||
assert reason =~ "empty"
|
||||
end
|
||||
|
||||
test "returns {:error, reason} when no header row found" do
|
||||
# Only whitespace after BOM strip
|
||||
assert {:error, reason} = CsvParser.parse(" \n ")
|
||||
assert reason =~ "CSV file is empty"
|
||||
end
|
||||
|
||||
test "returns {:error, reason} for invalid UTF-8 content" do
|
||||
# Invalid UTF-8 sequence
|
||||
invalid_utf8 = <<0xFF, 0xFE, 0xFD>>
|
||||
|
||||
assert {:error, reason} = CsvParser.parse(invalid_utf8)
|
||||
assert reason =~ "UTF-8"
|
||||
end
|
||||
|
||||
test "returns {:error, reason} for unparsable data row" do
|
||||
# Malformed CSV row that cannot be parsed
|
||||
# NimbleCSV will throw an exception for unclosed quotes
|
||||
csv_content = "email;name\njohn@example.com;\"unclosed quote"
|
||||
|
||||
assert {:error, reason} = CsvParser.parse(csv_content)
|
||||
assert is_binary(reason)
|
||||
# Error message should indicate parsing failure
|
||||
assert reason =~ "parse" or reason =~ "CSV"
|
||||
end
|
||||
end
|
||||
|
||||
describe "module documentation" do
|
||||
test "module has @moduledoc" do
|
||||
assert Code.ensure_loaded?(CsvParser)
|
||||
|
||||
{:docs_v1, _, _, _, %{"en" => moduledoc}, _, _} = Code.fetch_docs(CsvParser)
|
||||
assert is_binary(moduledoc)
|
||||
assert String.length(moduledoc) > 0
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
@ -44,7 +44,6 @@ defmodule Mv.Membership.Import.MemberCSVTest do
|
|||
assert match?({:ok, _}, result) or match?({:error, _}, result)
|
||||
end
|
||||
|
||||
# Skip until Issue #3 is implemented
|
||||
@tag :skip
|
||||
test "returns {:ok, import_state} on success" do
|
||||
file_content = "email\njohn@example.com"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue