191 lines
6.3 KiB
Elixir
191 lines
6.3 KiB
Elixir
defmodule Mv.Membership.Import.CsvParserTest do
|
|
use ExUnit.Case, async: true
|
|
|
|
alias Mv.Membership.Import.CsvParser
|
|
|
|
describe "parse/1" do
|
|
test "returns {:ok, headers, rows} for valid CSV with semicolon delimiter" do
|
|
csv_content = "email;first_name\njohn@example.com;John"
|
|
|
|
assert {:ok, headers, rows} = CsvParser.parse(csv_content)
|
|
|
|
assert headers == ["email", "first_name"]
|
|
assert rows == [{2, ["john@example.com", "John"]}]
|
|
end
|
|
|
|
test "returns {:ok, headers, rows} for valid CSV with comma delimiter" do
|
|
csv_content = "email,first_name\njohn@example.com,John"
|
|
|
|
assert {:ok, headers, rows} = CsvParser.parse(csv_content)
|
|
|
|
assert headers == ["email", "first_name"]
|
|
assert rows == [{2, ["john@example.com", "John"]}]
|
|
end
|
|
|
|
test "detects semicolon delimiter when both delimiters present" do
|
|
csv_content = "email;first_name,last_name\njohn@example.com;John,Doe"
|
|
|
|
assert {:ok, headers, rows} = CsvParser.parse(csv_content)
|
|
|
|
# Should detect semicolon as primary delimiter
|
|
assert length(headers) >= 2
|
|
assert length(rows) == 1
|
|
end
|
|
|
|
test "prefers semicolon delimiter when recognition is tied" do
|
|
# CSV where both delimiters would yield same number of fields
|
|
csv_content = "email;name\njohn@example.com;John"
|
|
|
|
assert {:ok, headers, rows} = CsvParser.parse(csv_content)
|
|
|
|
# Should prefer semicolon
|
|
assert headers == ["email", "name"]
|
|
assert rows == [{2, ["john@example.com", "John"]}]
|
|
end
|
|
|
|
test "defaults to semicolon delimiter when no headers recognized" do
|
|
csv_content = "unknown1;unknown2\nvalue1;value2"
|
|
|
|
assert {:ok, headers, rows} = CsvParser.parse(csv_content)
|
|
|
|
# Should default to semicolon
|
|
assert headers == ["unknown1", "unknown2"]
|
|
assert rows == [{2, ["value1", "value2"]}]
|
|
end
|
|
end
|
|
|
|
describe "BOM handling" do
|
|
test "strips UTF-8 BOM from file content" do
|
|
bom = <<0xEF, 0xBB, 0xBF>>
|
|
csv_content = bom <> "email;first_name\njohn@example.com;John"
|
|
|
|
assert {:ok, headers, rows} = CsvParser.parse(csv_content)
|
|
|
|
assert headers == ["email", "first_name"]
|
|
assert rows == [{2, ["john@example.com", "John"]}]
|
|
end
|
|
|
|
test "parses CSV with BOM correctly (Excel export compatibility)" do
|
|
bom = <<0xEF, 0xBB, 0xBF>>
|
|
|
|
csv_content =
|
|
bom <>
|
|
"email;first_name;last_name\njohn@example.com;John;Doe\njane@example.com;Jane;Smith"
|
|
|
|
assert {:ok, headers, rows} = CsvParser.parse(csv_content)
|
|
|
|
assert headers == ["email", "first_name", "last_name"]
|
|
assert length(rows) == 2
|
|
assert Enum.at(rows, 0) == {2, ["john@example.com", "John", "Doe"]}
|
|
assert Enum.at(rows, 1) == {3, ["jane@example.com", "Jane", "Smith"]}
|
|
end
|
|
end
|
|
|
|
describe "line number handling" do
|
|
test "header row is line 1, first data row is line 2" do
|
|
csv_content = "email\njohn@example.com"
|
|
|
|
assert {:ok, headers, rows} = CsvParser.parse(csv_content)
|
|
|
|
assert headers == ["email"]
|
|
assert rows == [{2, ["john@example.com"]}]
|
|
end
|
|
|
|
test "preserves correct line numbers when empty lines are skipped" do
|
|
csv_content = "email;first_name\n\njohn@example.com;John\n\njane@example.com;Jane"
|
|
|
|
assert {:ok, headers, rows} = CsvParser.parse(csv_content)
|
|
|
|
assert headers == ["email", "first_name"]
|
|
# Line 2 is empty (skipped), line 3 has data
|
|
assert Enum.at(rows, 0) == {3, ["john@example.com", "John"]}
|
|
# Line 4 is empty (skipped), line 5 has data
|
|
assert Enum.at(rows, 1) == {5, ["jane@example.com", "Jane"]}
|
|
end
|
|
|
|
test "skips completely empty rows but preserves line numbers" do
|
|
csv_content = "email\n\n\njohn@example.com"
|
|
|
|
assert {:ok, headers, rows} = CsvParser.parse(csv_content)
|
|
|
|
assert headers == ["email"]
|
|
# Lines 2, 3, 4 are empty (skipped), line 4 has data
|
|
assert rows == [{4, ["john@example.com"]}]
|
|
end
|
|
end
|
|
|
|
describe "line ending handling" do
|
|
test "handles \\r\\n line endings correctly" do
|
|
csv_content = "email;first_name\r\njohn@example.com;John\r\njane@example.com;Jane"
|
|
|
|
assert {:ok, headers, rows} = CsvParser.parse(csv_content)
|
|
|
|
assert headers == ["email", "first_name"]
|
|
assert length(rows) == 2
|
|
assert Enum.at(rows, 0) == {2, ["john@example.com", "John"]}
|
|
assert Enum.at(rows, 1) == {3, ["jane@example.com", "Jane"]}
|
|
end
|
|
|
|
test "handles \\n line endings correctly" do
|
|
csv_content = "email;first_name\njohn@example.com;John\njane@example.com;Jane"
|
|
|
|
assert {:ok, headers, rows} = CsvParser.parse(csv_content)
|
|
|
|
assert headers == ["email", "first_name"]
|
|
assert length(rows) == 2
|
|
assert Enum.at(rows, 0) == {2, ["john@example.com", "John"]}
|
|
assert Enum.at(rows, 1) == {3, ["jane@example.com", "Jane"]}
|
|
end
|
|
end
|
|
|
|
describe "quoted fields" do
|
|
test "parses quoted fields correctly" do
|
|
csv_content = "email;name\njohn@example.com;\"John Doe\""
|
|
|
|
assert {:ok, headers, rows} = CsvParser.parse(csv_content)
|
|
|
|
assert headers == ["email", "name"]
|
|
assert rows == [{2, ["john@example.com", "John Doe"]}]
|
|
end
|
|
|
|
test "handles escaped quotes (\"\") inside quoted fields" do
|
|
csv_content = "email;name\njohn@example.com;\"John \"\"Johnny\"\" Doe\""
|
|
|
|
assert {:ok, headers, rows} = CsvParser.parse(csv_content)
|
|
|
|
assert headers == ["email", "name"]
|
|
assert rows == [{2, ["john@example.com", "John \"Johnny\" Doe"]}]
|
|
end
|
|
end
|
|
|
|
describe "error handling" do
|
|
test "returns {:error, reason} for empty file" do
|
|
assert {:error, reason} = CsvParser.parse("")
|
|
assert reason =~ "empty"
|
|
end
|
|
|
|
test "returns {:error, reason} when no header row found" do
|
|
# Only whitespace after BOM strip
|
|
assert {:error, reason} = CsvParser.parse(" \n ")
|
|
assert reason =~ "CSV file is empty"
|
|
end
|
|
|
|
test "returns {:error, reason} for invalid CSV format" do
|
|
# Unbalanced quotes
|
|
csv_content = "email;name\n\"john@example.com;John"
|
|
|
|
assert {:error, reason} = CsvParser.parse(csv_content)
|
|
assert is_binary(reason)
|
|
end
|
|
end
|
|
|
|
describe "module documentation" do
|
|
test "module has @moduledoc" do
|
|
assert Code.ensure_loaded?(CsvParser)
|
|
|
|
{:docs_v1, _, _, _, %{"en" => moduledoc}, _, _} = Code.fetch_docs(CsvParser)
|
|
assert is_binary(moduledoc)
|
|
assert String.length(moduledoc) > 0
|
|
end
|
|
end
|
|
end
|