defmodule Mv.Membership.Import.CsvParserTest do use ExUnit.Case, async: true alias Mv.Membership.Import.CsvParser describe "parse/1" do test "returns {:ok, headers, rows} for valid CSV with semicolon delimiter" do csv_content = "email;first_name\njohn@example.com;John" assert {:ok, headers, rows} = CsvParser.parse(csv_content) assert headers == ["email", "first_name"] assert rows == [{2, ["john@example.com", "John"]}] end test "returns {:ok, headers, rows} for valid CSV with comma delimiter" do csv_content = "email,first_name\njohn@example.com,John" assert {:ok, headers, rows} = CsvParser.parse(csv_content) assert headers == ["email", "first_name"] assert rows == [{2, ["john@example.com", "John"]}] end test "detects semicolon delimiter when both delimiters present" do csv_content = "email;first_name,last_name\njohn@example.com;John,Doe" assert {:ok, headers, rows} = CsvParser.parse(csv_content) # Should detect semicolon as primary delimiter assert length(headers) >= 2 assert length(rows) == 1 end test "prefers semicolon delimiter when recognition is tied" do # CSV where both delimiters would yield same number of fields csv_content = "email;name\njohn@example.com;John" assert {:ok, headers, rows} = CsvParser.parse(csv_content) # Should prefer semicolon assert headers == ["email", "name"] assert rows == [{2, ["john@example.com", "John"]}] end test "defaults to semicolon delimiter when no headers recognized" do csv_content = "unknown1;unknown2\nvalue1;value2" assert {:ok, headers, rows} = CsvParser.parse(csv_content) # Should default to semicolon assert headers == ["unknown1", "unknown2"] assert rows == [{2, ["value1", "value2"]}] end end describe "BOM handling" do test "strips UTF-8 BOM from file content" do bom = <<0xEF, 0xBB, 0xBF>> csv_content = bom <> "email;first_name\njohn@example.com;John" assert {:ok, headers, rows} = CsvParser.parse(csv_content) assert headers == ["email", "first_name"] assert rows == [{2, ["john@example.com", "John"]}] end test "parses CSV with BOM correctly (Excel export compatibility)" do bom = <<0xEF, 0xBB, 0xBF>> csv_content = bom <> "email;first_name;last_name\njohn@example.com;John;Doe\njane@example.com;Jane;Smith" assert {:ok, headers, rows} = CsvParser.parse(csv_content) assert headers == ["email", "first_name", "last_name"] assert length(rows) == 2 assert Enum.at(rows, 0) == {2, ["john@example.com", "John", "Doe"]} assert Enum.at(rows, 1) == {3, ["jane@example.com", "Jane", "Smith"]} end end describe "line number handling" do test "header row is line 1, first data row is line 2" do csv_content = "email\njohn@example.com" assert {:ok, headers, rows} = CsvParser.parse(csv_content) assert headers == ["email"] assert rows == [{2, ["john@example.com"]}] end test "preserves correct line numbers when empty lines are skipped" do csv_content = "email;first_name\n\njohn@example.com;John\n\njane@example.com;Jane" assert {:ok, headers, rows} = CsvParser.parse(csv_content) assert headers == ["email", "first_name"] # Line 2 is empty (skipped), line 3 has data assert Enum.at(rows, 0) == {3, ["john@example.com", "John"]} # Line 4 is empty (skipped), line 5 has data assert Enum.at(rows, 1) == {5, ["jane@example.com", "Jane"]} end test "skips completely empty rows but preserves line numbers" do csv_content = "email\n\n\njohn@example.com" assert {:ok, headers, rows} = CsvParser.parse(csv_content) assert headers == ["email"] # Lines 2 & 3 are empty (skipped), line 4 has data assert rows == [{4, ["john@example.com"]}] end end describe "line ending handling" do test "handles \\r\\n line endings correctly" do csv_content = "email;first_name\r\njohn@example.com;John\r\njane@example.com;Jane" assert {:ok, headers, rows} = CsvParser.parse(csv_content) assert headers == ["email", "first_name"] assert length(rows) == 2 assert Enum.at(rows, 0) == {2, ["john@example.com", "John"]} assert Enum.at(rows, 1) == {3, ["jane@example.com", "Jane"]} end test "handles \\n line endings correctly" do csv_content = "email;first_name\njohn@example.com;John\njane@example.com;Jane" assert {:ok, headers, rows} = CsvParser.parse(csv_content) assert headers == ["email", "first_name"] assert length(rows) == 2 assert Enum.at(rows, 0) == {2, ["john@example.com", "John"]} assert Enum.at(rows, 1) == {3, ["jane@example.com", "Jane"]} end end describe "quoted fields" do test "parses quoted fields correctly" do csv_content = "email;name\njohn@example.com;\"John Doe\"" assert {:ok, headers, rows} = CsvParser.parse(csv_content) assert headers == ["email", "name"] assert rows == [{2, ["john@example.com", "John Doe"]}] end test "handles escaped quotes (\"\") inside quoted fields" do csv_content = "email;name\njohn@example.com;\"John \"\"Johnny\"\" Doe\"" assert {:ok, headers, rows} = CsvParser.parse(csv_content) assert headers == ["email", "name"] assert rows == [{2, ["john@example.com", "John \"Johnny\" Doe"]}] end test "handles multiline quoted fields with correct line numbering" do # Header line 1 # Data record starts line 2, contains "foo\nbar" in a field # Record ends physically at line 3 # Expected: row gets line number 2 (start line) csv_content = "email;description\njohn@example.com;\"foo\nbar\"" assert {:ok, headers, rows} = CsvParser.parse(csv_content) assert headers == ["email", "description"] assert rows == [{2, ["john@example.com", "foo\nbar"]}] end end describe "error handling" do test "returns {:error, reason} for empty file" do assert {:error, reason} = CsvParser.parse("") assert reason =~ "empty" end test "returns {:error, reason} when no header row found" do # Only whitespace after BOM strip assert {:error, reason} = CsvParser.parse(" \n ") assert reason =~ "CSV file is empty" end test "returns {:error, reason} for invalid UTF-8 content" do # Invalid UTF-8 sequence invalid_utf8 = <<0xFF, 0xFE, 0xFD>> assert {:error, reason} = CsvParser.parse(invalid_utf8) assert reason =~ "UTF-8" end test "returns {:error, reason} for unparsable data row" do # Malformed CSV row that cannot be parsed # NimbleCSV will throw an exception for unclosed quotes csv_content = "email;name\njohn@example.com;\"unclosed quote" assert {:error, reason} = CsvParser.parse(csv_content) assert is_binary(reason) # Error message should indicate parsing failure assert reason =~ "parse" or reason =~ "CSV" end end describe "module documentation" do test "module has @moduledoc" do assert Code.ensure_loaded?(CsvParser) {:docs_v1, _, _, _, %{"en" => moduledoc}, _, _} = Code.fetch_docs(CsvParser) assert is_binary(moduledoc) assert String.length(moduledoc) > 0 end end end