From cc6d72b6b19ff8bed99f0733d8458a298d5a266d Mon Sep 17 00:00:00 2001 From: carla Date: Tue, 13 Jan 2026 11:44:40 +0100 Subject: [PATCH 01/12] feat: add service skeleton and tests --- lib/mv/membership/import/member_csv.ex | 158 ++++++++++++++++++ test/mv/membership/import/member_csv_test.exs | 128 ++++++++++++++ 2 files changed, 286 insertions(+) create mode 100644 lib/mv/membership/import/member_csv.ex create mode 100644 test/mv/membership/import/member_csv_test.exs diff --git a/lib/mv/membership/import/member_csv.ex b/lib/mv/membership/import/member_csv.ex new file mode 100644 index 0000000..6e4e019 --- /dev/null +++ b/lib/mv/membership/import/member_csv.ex @@ -0,0 +1,158 @@ +defmodule Mv.Membership.Import.MemberCSV do + @moduledoc """ + Service module for importing members from CSV files. + + This module provides the core API for CSV member import functionality: + - `prepare/2` - Parses and validates CSV content, returns import state + - `process_chunk/3` - Processes a chunk of rows and creates members + + ## Error Handling + + Errors are returned as `%Error{}` structs containing: + - `csv_line_number` - The physical line number in the CSV file + - `field` - The field name (atom) or `nil` if not field-specific + - `message` - Human-readable error message + + ## Import State + + The `import_state` returned by `prepare/2` contains: + - `chunks` - List of row chunks ready for processing + - `column_map` - Map of canonical field names to column indices + - `custom_field_map` - Map of custom field names to column indices + - `warnings` - List of warning messages (e.g., unknown custom field columns) + + ## Chunk Results + + The `chunk_result` returned by `process_chunk/3` contains: + - `inserted` - Number of successfully created members + - `failed` - Number of failed member creations + - `errors` - List of `%Error{}` structs (capped at 50 per import) + + ## Examples + + # Prepare CSV for import + {:ok, import_state} = MemberCSV.prepare(csv_content) + + # Process first chunk + chunk = Enum.at(import_state.chunks, 0) + {:ok, result} = MemberCSV.process_chunk(chunk, import_state.column_map) + """ + + defmodule Error do + @moduledoc """ + Error struct for CSV import errors. + + ## Fields + + - `csv_line_number` - The physical line number in the CSV file (1-based, header is line 1) + - `field` - The field name as an atom (e.g., `:email`) or `nil` if not field-specific + - `message` - Human-readable error message + """ + defstruct csv_line_number: nil, field: nil, message: nil + + @type t :: %__MODULE__{ + csv_line_number: integer(), + field: atom() | nil, + message: String.t() + } + end + + @type import_state :: %{ + chunks: list(list({pos_integer(), map()})), + column_map: %{atom() => non_neg_integer()}, + custom_field_map: %{String.t() => non_neg_integer()}, + warnings: list(String.t()) + } + + @type chunk_result :: %{ + inserted: non_neg_integer(), + failed: non_neg_integer(), + errors: list(Error.t()) + } + + @doc """ + Prepares CSV content for import by parsing, mapping headers, and validating limits. + + This function: + 1. Strips UTF-8 BOM if present + 2. Detects CSV delimiter (semicolon or comma) + 3. Parses headers and data rows + 4. Maps headers to canonical member fields + 5. Maps custom field columns by name + 6. Validates row count limits + 7. Chunks rows for processing + + ## Parameters + + - `file_content` - The raw CSV file content as a string + - `opts` - Optional keyword list: + - `:max_rows` - Maximum number of data rows allowed (default: 1000) + - `:chunk_size` - Number of rows per chunk (default: 200) + + ## Returns + + - `{:ok, import_state}` - Successfully prepared import state + - `{:error, reason}` - Error reason (string or error struct) + + ## Examples + + iex> MemberCSV.prepare("email\\njohn@example.com") + {:ok, %{chunks: [...], column_map: %{email: 0}, ...}} + + iex> MemberCSV.prepare("") + {:error, "CSV file is empty"} + """ + @spec prepare(String.t(), keyword()) :: {:ok, import_state()} | {:error, String.t()} + def prepare(file_content, opts \\ []) do + # TODO: Implement in Issue #3 (CSV Parsing) + # This is a skeleton implementation that will be filled in later + _ = {file_content, opts} + + # Placeholder return - will be replaced with actual implementation + {:error, "Not yet implemented"} + end + + @doc """ + Processes a chunk of CSV rows and creates members. + + This function: + 1. Validates each row + 2. Creates members via Ash resource + 3. Creates custom field values for each member + 4. Collects errors with correct CSV line numbers + 5. Returns chunk processing results + + ## Parameters + + - `chunk_rows_with_lines` - List of tuples `{csv_line_number, row_map}` where: + - `csv_line_number` - Physical line number in CSV (1-based) + - `row_map` - Map of column names to values + - `column_map` - Map of canonical field names (atoms) to column indices + - `opts` - Optional keyword list for processing options + + ## Returns + + - `{:ok, chunk_result}` - Chunk processing results + - `{:error, reason}` - Error reason (string) + + ## Examples + + iex> chunk = [{2, %{"email" => "john@example.com"}}] + iex> column_map = %{email: 0} + iex> MemberCSV.process_chunk(chunk, column_map) + {:ok, %{inserted: 1, failed: 0, errors: []}} + """ + @spec process_chunk( + list({pos_integer(), map()}), + %{atom() => non_neg_integer()}, + keyword() + ) :: {:ok, chunk_result()} | {:error, String.t()} + def process_chunk(chunk_rows_with_lines, column_map, opts \\ []) do + # TODO: Implement in Issue #6 (Persistence) + # This is a skeleton implementation that will be filled in later + _ = {chunk_rows_with_lines, column_map, opts} + + # Placeholder return - will be replaced with actual implementation + {:ok, %{inserted: 0, failed: 0, errors: []}} + end +end diff --git a/test/mv/membership/import/member_csv_test.exs b/test/mv/membership/import/member_csv_test.exs new file mode 100644 index 0000000..1e51d51 --- /dev/null +++ b/test/mv/membership/import/member_csv_test.exs @@ -0,0 +1,128 @@ +defmodule Mv.Membership.Import.MemberCSVTest do + use Mv.DataCase, async: false + + alias Mv.Membership.Import.MemberCSV + + describe "Error struct" do + test "Error struct exists with required fields" do + # This will fail at runtime if the struct doesn't exist + # We use struct/2 to create the struct at runtime + error = + struct(MemberCSV.Error, %{ + csv_line_number: 5, + field: :email, + message: "is not a valid email" + }) + + assert error.csv_line_number == 5 + assert error.field == :email + assert error.message == "is not a valid email" + end + + test "Error struct allows nil field" do + # This will fail at runtime if the struct doesn't exist + error = + struct(MemberCSV.Error, %{ + csv_line_number: 10, + field: nil, + message: "Row is empty" + }) + + assert error.csv_line_number == 10 + assert error.field == nil + assert error.message == "Row is empty" + end + end + + describe "prepare/2" do + test "function exists and accepts file_content and opts" do + file_content = "email\njohn@example.com" + opts = [] + + # This will fail until the function is implemented + result = MemberCSV.prepare(file_content, opts) + assert match?({:ok, _}, result) or match?({:error, _}, result) + end + + test "returns {:ok, import_state} on success" do + file_content = "email\njohn@example.com" + opts = [] + + assert {:ok, import_state} = MemberCSV.prepare(file_content, opts) + + # Check that import_state contains expected fields + assert Map.has_key?(import_state, :chunks) + assert Map.has_key?(import_state, :column_map) + assert Map.has_key?(import_state, :custom_field_map) + assert Map.has_key?(import_state, :warnings) + end + + test "returns {:error, reason} on failure" do + file_content = "" + opts = [] + + assert {:error, _reason} = MemberCSV.prepare(file_content, opts) + end + + test "function has documentation" do + # Check that @doc exists by reading the module + assert function_exported?(MemberCSV, :prepare, 2) + end + end + + describe "process_chunk/3" do + test "function exists and accepts chunk_rows_with_lines, column_map, and opts" do + chunk_rows_with_lines = [{2, %{"email" => "john@example.com"}}] + column_map = %{email: 0} + opts = [] + + # This will fail until the function is implemented + result = MemberCSV.process_chunk(chunk_rows_with_lines, column_map, opts) + assert match?({:ok, _}, result) or match?({:error, _}, result) + end + + test "returns {:ok, chunk_result} on success" do + chunk_rows_with_lines = [{2, %{"email" => "john@example.com"}}] + column_map = %{email: 0} + opts = [] + + assert {:ok, chunk_result} = + MemberCSV.process_chunk(chunk_rows_with_lines, column_map, opts) + + # Check that chunk_result contains expected fields + assert Map.has_key?(chunk_result, :inserted) + assert Map.has_key?(chunk_result, :failed) + assert Map.has_key?(chunk_result, :errors) + assert is_integer(chunk_result.inserted) + assert is_integer(chunk_result.failed) + assert is_list(chunk_result.errors) + end + + test "returns {:error, reason} on failure" do + chunk_rows_with_lines = [] + column_map = %{} + opts = [] + + # This might return {:ok, _} with zero counts or {:error, _} + result = MemberCSV.process_chunk(chunk_rows_with_lines, column_map, opts) + assert match?({:ok, _}, result) or match?({:error, _}, result) + end + + test "function has documentation" do + # Check that @doc exists by reading the module + assert function_exported?(MemberCSV, :process_chunk, 3) + end + end + + describe "module documentation" do + test "module has @moduledoc" do + # Check that the module exists and has documentation + assert Code.ensure_loaded?(MemberCSV) + + # Try to get the module documentation + {:docs_v1, _, _, _, %{"en" => moduledoc}, _, _} = Code.fetch_docs(MemberCSV) + assert is_binary(moduledoc) + assert String.length(moduledoc) > 0 + end + end +end From aa62e0340950498e79cac7cc93718f10cc991d18 Mon Sep 17 00:00:00 2001 From: carla Date: Wed, 14 Jan 2026 09:11:44 +0100 Subject: [PATCH 02/12] skip test for now --- test/mv/membership/import/member_csv_test.exs | 1 + 1 file changed, 1 insertion(+) diff --git a/test/mv/membership/import/member_csv_test.exs b/test/mv/membership/import/member_csv_test.exs index 1e51d51..6893329 100644 --- a/test/mv/membership/import/member_csv_test.exs +++ b/test/mv/membership/import/member_csv_test.exs @@ -44,6 +44,7 @@ defmodule Mv.Membership.Import.MemberCSVTest do assert match?({:ok, _}, result) or match?({:error, _}, result) end + @tag :skip #Skip until Issue #3 is implemented test "returns {:ok, import_state} on success" do file_content = "email\njohn@example.com" opts = [] From fb71b7ddb1b3c8258ed6dfbd4942eaaa096d7420 Mon Sep 17 00:00:00 2001 From: carla Date: Wed, 14 Jan 2026 09:49:40 +0100 Subject: [PATCH 03/12] fix struct inconsistencies --- lib/mv/membership/import/member_csv.ex | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/mv/membership/import/member_csv.ex b/lib/mv/membership/import/member_csv.ex index 6e4e019..9e30a20 100644 --- a/lib/mv/membership/import/member_csv.ex +++ b/lib/mv/membership/import/member_csv.ex @@ -8,10 +8,10 @@ defmodule Mv.Membership.Import.MemberCSV do ## Error Handling - Errors are returned as `%Error{}` structs containing: - - `csv_line_number` - The physical line number in the CSV file + Errors are returned as `%MemberCSV.Error{}` structs containing: + - `csv_line_number` - The physical line number in the CSV file (or `nil` for general errors) - `field` - The field name (atom) or `nil` if not field-specific - - `message` - Human-readable error message + - `message` - Human-readable error message (or `nil` for general errors) ## Import State @@ -26,7 +26,7 @@ defmodule Mv.Membership.Import.MemberCSV do The `chunk_result` returned by `process_chunk/3` contains: - `inserted` - Number of successfully created members - `failed` - Number of failed member creations - - `errors` - List of `%Error{}` structs (capped at 50 per import) + - `errors` - List of `%MemberCSV.Error{}` structs (capped at 50 per import) ## Examples @@ -51,9 +51,9 @@ defmodule Mv.Membership.Import.MemberCSV do defstruct csv_line_number: nil, field: nil, message: nil @type t :: %__MODULE__{ - csv_line_number: integer(), + csv_line_number: pos_integer() | nil, field: atom() | nil, - message: String.t() + message: String.t() | nil } end From aa3fb0c49b4fef5f9a785aeac5589e0d69377f13 Mon Sep 17 00:00:00 2001 From: carla Date: Wed, 14 Jan 2026 10:48:36 +0100 Subject: [PATCH 04/12] fix linting --- test/mv/membership/import/member_csv_test.exs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/mv/membership/import/member_csv_test.exs b/test/mv/membership/import/member_csv_test.exs index 6893329..5ac5311 100644 --- a/test/mv/membership/import/member_csv_test.exs +++ b/test/mv/membership/import/member_csv_test.exs @@ -44,7 +44,8 @@ defmodule Mv.Membership.Import.MemberCSVTest do assert match?({:ok, _}, result) or match?({:error, _}, result) end - @tag :skip #Skip until Issue #3 is implemented + # Skip until Issue #3 is implemented + @tag :skip test "returns {:ok, import_state} on success" do file_content = "email\njohn@example.com" opts = [] From 699d4385cba3d1ab1bd115506441931902bbf063 Mon Sep 17 00:00:00 2001 From: carla Date: Thu, 15 Jan 2026 10:09:23 +0100 Subject: [PATCH 05/12] chore: add nimble_csv dependency --- mix.exs | 3 ++- mix.lock | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/mix.exs b/mix.exs index 4092b7a..99f4507 100644 --- a/mix.exs +++ b/mix.exs @@ -78,7 +78,8 @@ defmodule Mv.MixProject do {:credo, "~> 1.7", only: [:dev, :test], runtime: false}, {:picosat_elixir, "~> 0.1", only: [:dev, :test]}, {:ecto_commons, "~> 0.3"}, - {:slugify, "~> 1.3"} + {:slugify, "~> 1.3"}, + {:nimble_csv, "~> 1.0"} ] end diff --git a/mix.lock b/mix.lock index 54b0e51..26aa555 100644 --- a/mix.lock +++ b/mix.lock @@ -47,6 +47,7 @@ "mime": {:hex, :mime, "2.0.7", "b8d739037be7cd402aee1ba0306edfdef982687ee7e9859bee6198c1e7e2f128", [:mix], [], "hexpm", "6171188e399ee16023ffc5b76ce445eb6d9672e2e241d2df6050f3c771e80ccd"}, "mint": {:hex, :mint, "1.7.1", "113fdb2b2f3b59e47c7955971854641c61f378549d73e829e1768de90fc1abf1", [:mix], [{:castore, "~> 0.1.0 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: true]}, {:hpax, "~> 0.1.1 or ~> 0.2.0 or ~> 1.0", [hex: :hpax, repo: "hexpm", optional: false]}], "hexpm", "fceba0a4d0f24301ddee3024ae116df1c3f4bb7a563a731f45fdfeb9d39a231b"}, "mix_audit": {:hex, :mix_audit, "2.1.5", "c0f77cee6b4ef9d97e37772359a187a166c7a1e0e08b50edf5bf6959dfe5a016", [:make, :mix], [{:jason, "~> 1.4", [hex: :jason, repo: "hexpm", optional: false]}, {:yaml_elixir, "~> 2.11", [hex: :yaml_elixir, repo: "hexpm", optional: false]}], "hexpm", "87f9298e21da32f697af535475860dc1d3617a010e0b418d2ec6142bc8b42d69"}, + "nimble_csv": {:hex, :nimble_csv, "1.3.0", "b7f998dc62b222bce9596e46f028c7a5af04cb5dde6df2ea197c583227c54971", [:mix], [], "hexpm", "41ccdc18f7c8f8bb06e84164fc51635321e80d5a3b450761c4997d620925d619"}, "nimble_options": {:hex, :nimble_options, "1.1.1", "e3a492d54d85fc3fd7c5baf411d9d2852922f66e69476317787a7b2bb000a61b", [:mix], [], "hexpm", "821b2470ca9442c4b6984882fe9bb0389371b8ddec4d45a9504f00a66f650b44"}, "nimble_pool": {:hex, :nimble_pool, "1.1.0", "bf9c29fbdcba3564a8b800d1eeb5a3c58f36e1e11d7b7fb2e084a643f645f06b", [:mix], [], "hexpm", "af2e4e6b34197db81f7aad230c1118eac993acc0dae6bc83bac0126d4ae0813a"}, "owl": {:hex, :owl, "0.13.0", "26010e066d5992774268f3163506972ddac0a7e77bfe57fa42a250f24d6b876e", [:mix], [{:ucwidth, "~> 0.2", [hex: :ucwidth, repo: "hexpm", optional: true]}], "hexpm", "59bf9d11ce37a4db98f57cb68fbfd61593bf419ec4ed302852b6683d3d2f7475"}, From 68e19bea1836011e3f6cf547261bfaa1fa3f95ee Mon Sep 17 00:00:00 2001 From: carla Date: Thu, 15 Jan 2026 10:10:02 +0100 Subject: [PATCH 06/12] feat: add csv parser --- lib/mv/membership/import/csv_parser.ex | 276 +++++++++++++++++++++++++ 1 file changed, 276 insertions(+) create mode 100644 lib/mv/membership/import/csv_parser.ex diff --git a/lib/mv/membership/import/csv_parser.ex b/lib/mv/membership/import/csv_parser.ex new file mode 100644 index 0000000..b9f036d --- /dev/null +++ b/lib/mv/membership/import/csv_parser.ex @@ -0,0 +1,276 @@ +NimbleCSV.define(Mv.Membership.Import.CsvParserSemicolon, separator: ";", escape: "\"") +NimbleCSV.define(Mv.Membership.Import.CsvParserComma, separator: ",", escape: "\"") + +defmodule Mv.Membership.Import.CsvParser do + @moduledoc """ + CSV parser with BOM handling, delimiter auto-detection, and physical line numbering. + + Guarantees: + - UTF-8 BOM is stripped (Excel) + - Delimiter auto-detected (semicolon/comma) using NimbleCSV parsing (quote-aware) + - Returns rows tagged with their *physical start line number* in the CSV file (1-based) + - Skips completely empty rows (but preserves numbering by using physical line numbers) + - Handles `\\r\\n`, `\\n`, `\\r` + - Correct even when fields contain newlines inside quotes: the row gets the start line number + """ + + @utf8_bom <<0xEF, 0xBB, 0xBF>> + @quote ?" + + @type line_number :: pos_integer() + @type row :: [String.t()] + @type numbered_row :: {line_number(), row()} + + @spec parse(binary()) :: {:ok, row(), [numbered_row()]} | {:error, String.t()} + def parse(file_content) when is_binary(file_content) do + content = + file_content + |> strip_bom() + |> normalize_line_endings() + + with :ok <- validate_content_not_empty(content), + :ok <- check_balanced_quotes(content), + {:ok, header_record, data_records} <- extract_header_and_data(content), + :ok <- validate_header_not_empty(header_record), + {:ok, headers, rows} <- parse_csv_records(header_record, data_records) do + {:ok, headers, rows} + else + {:error, reason} -> {:error, reason} + end + end + + def parse(_), do: {:error, "Invalid CSV content"} + + @spec validate_content_not_empty(binary()) :: :ok | {:error, String.t()} + defp validate_content_not_empty(content) do + if String.trim(content) == "" do + {:error, "CSV file is empty"} + else + :ok + end + end + + @spec extract_header_and_data(binary()) :: {:ok, binary(), [{line_number(), binary()}]} | {:error, String.t()} + defp extract_header_and_data(content) do + records = split_records_with_line_numbers(content) + + case records do + [] -> + {:error, "CSV file is empty"} + + [{_line1, header_record} | data_records] -> + {:ok, header_record, data_records} + end + end + + @spec validate_header_not_empty(binary()) :: :ok | {:error, String.t()} + defp validate_header_not_empty(header_record) do + if String.trim(header_record) == "" do + {:error, "CSV file has no header row"} + else + :ok + end + end + + @spec parse_csv_records(binary(), [{line_number(), binary()}]) :: + {:ok, row(), [numbered_row()]} | {:error, String.t()} + defp parse_csv_records(header_record, data_records) do + delimiter = detect_delimiter_by_parsing(header_record) + parser = get_parser(delimiter) + + with {:ok, headers} <- + parse_single_record(parser, header_record, "CSV file has no header row"), + {:ok, rows} <- parse_data_records(parser, data_records) do + {:ok, headers, rows} + end + end + + @spec strip_bom(binary()) :: binary() + defp strip_bom(<<@utf8_bom, rest::binary>>), do: rest + defp strip_bom(content), do: content + + @spec normalize_line_endings(binary()) :: binary() + defp normalize_line_endings(content) do + content + |> String.replace("\r\n", "\n") + |> String.replace("\r", "\n") + end + + @spec get_parser(String.t()) :: module() + defp get_parser(";"), do: Mv.Membership.Import.CsvParserSemicolon + defp get_parser(","), do: Mv.Membership.Import.CsvParserComma + defp get_parser(_), do: Mv.Membership.Import.CsvParserSemicolon + + # --- Delimiter detection (quote-aware by actually parsing the header) --- + + @spec detect_delimiter_by_parsing(binary()) :: String.t() + defp detect_delimiter_by_parsing(header_record) do + semicolon_score = header_field_count(Mv.Membership.Import.CsvParserSemicolon, header_record) + comma_score = header_field_count(Mv.Membership.Import.CsvParserComma, header_record) + + # prefer ";" on tie + if semicolon_score >= comma_score, do: ";", else: "," + end + + @spec header_field_count(module(), binary()) :: non_neg_integer() + defp header_field_count(parser, header_record) do + case parse_single_record(parser, header_record, nil) do + {:ok, fields} -> Enum.count(fields, &(String.trim(&1) != "")) + {:error, _} -> 0 + end + end + + # Parses exactly one record (string without trailing newline is fine). + # Returns {:ok, row} or {:error, reason}. + @spec parse_single_record(module(), binary(), String.t() | nil) :: + {:ok, row()} | {:error, String.t()} + defp parse_single_record(parser, record, error_reason_if_empty) do + try do + # NimbleCSV is happiest if there's a newline at the end. + rows = parser.parse_string(ensure_trailing_newline(record), skip_headers: false) + + case rows do + [row] when is_list(row) and row != [] -> + {:ok, row} + + _ -> + if is_binary(error_reason_if_empty), + do: {:error, error_reason_if_empty}, + else: {:error, "Failed to parse CSV header"} + end + rescue + e -> + {:error, "Failed to parse CSV: #{Exception.message(e)}"} + end + end + + @spec ensure_trailing_newline(binary()) :: binary() + defp ensure_trailing_newline(str) do + if String.ends_with?(str, "\n"), do: str, else: str <> "\n" + end + + # --- Data parsing preserving *physical* line numbers --- + + @spec parse_data_records(module(), [{line_number(), binary()}]) :: + {:ok, [numbered_row()]} | {:error, String.t()} + defp parse_data_records(parser, data_records) do + try do + rows = + data_records + |> Enum.reduce([], fn {line_no, record}, acc -> + case String.trim(record) do + "" -> + acc + + _ -> + parsed = parser.parse_string(ensure_trailing_newline(record), skip_headers: false) + + case parsed do + [row] when is_list(row) -> + if empty_row?(row) do + acc + else + [{line_no, row} | acc] + end + + # empty row or unparsable row -> treat as skipped empty row + _ -> + acc + end + end + end) + |> Enum.reverse() + + {:ok, rows} + rescue + e -> + {:error, "Failed to parse CSV: #{Exception.message(e)}"} + end + end + + @spec empty_row?(row()) :: boolean() + defp empty_row?(row) when is_list(row) do + Enum.all?(row, fn field -> String.trim(field) == "" end) + end + + # Check if quotes are balanced in the content. + # + # This is a simple check that counts quote characters. In CSV, escaped quotes + # are represented as "", so an odd number of quotes indicates unbalanced quotes. + @spec check_balanced_quotes(binary()) :: :ok | {:error, String.t()} + defp check_balanced_quotes(content) do + quote_count = content |> :binary.bin_to_list() |> Enum.count(&(&1 == @quote)) + + if rem(quote_count, 2) != 0 do + {:error, "Unbalanced quotes in CSV file"} + else + :ok + end + end + + # --- Record splitting with correct line numbers (quote-aware) --- + # + # This splits the CSV into "records" separated by newline *outside quotes*, + # returning [{start_line_number, record_string_without_newline}, ...] + # + # Line numbers are 1-based and represent the physical line in the CSV file. + # Empty lines are included in the numbering (they're just skipped later). + # + @spec split_records_with_line_numbers(binary()) :: [{line_number(), binary()}] + defp split_records_with_line_numbers(content) do + {acc, buf, _in_quotes, _line, start_line} = + do_split(content, [], [], false, 1, 1) + + # finalize last record only if there is buffered content + acc = + case buf do + [] -> + acc + + _ -> + record = buf |> Enum.reverse() |> :erlang.list_to_binary() + [{start_line, record} | acc] + end + + Enum.reverse(acc) + end + + # EOF + @spec do_split( + binary(), + [{line_number(), binary()}], + [byte()], + boolean(), + line_number(), + line_number() + ) :: + {[{line_number(), binary()}], [byte()], boolean(), line_number(), line_number()} + defp do_split(<<>>, acc, buf, in_quotes, line, start_line), + do: {acc, buf, in_quotes, line, start_line} + + # Escaped quote inside quoted field: "" -> keep both quotes, do NOT toggle in_quotes + defp do_split(<<@quote, @quote, rest::binary>>, acc, buf, true = in_quotes, line, start_line) do + do_split(rest, acc, [@quote, @quote | buf], in_quotes, line, start_line) + end + + # Quote toggles quote state (when not escaped "") + defp do_split(<<@quote, rest::binary>>, acc, buf, in_quotes, line, start_line) do + do_split(rest, acc, [@quote | buf], not in_quotes, line, start_line) + end + + # Newline outside quotes ends a record (even if empty) + defp do_split(<<"\n", rest::binary>>, acc, buf, false, line, start_line) do + record = buf |> Enum.reverse() |> :erlang.list_to_binary() + do_split(rest, [{start_line, record} | acc], [], false, line + 1, line + 1) + end + + # Newline inside quotes is part of the record, but advances physical line counter + defp do_split(<<"\n", rest::binary>>, acc, buf, true = in_quotes, line, start_line) do + do_split(rest, acc, [?\n | buf], in_quotes, line + 1, start_line) + end + + # Any other byte + defp do_split(<>, acc, buf, in_quotes, line, start_line) do + do_split(rest, acc, [ch | buf], in_quotes, line, start_line) + end +end From 31cf07c071f36f72b875a1eabbd985ed804985b2 Mon Sep 17 00:00:00 2001 From: carla Date: Thu, 15 Jan 2026 10:10:14 +0100 Subject: [PATCH 07/12] test: updated tests --- test/mv/membership/import/csv_parser_test.exs | 191 ++++++++++++++++++ test/mv/membership/import/member_csv_test.exs | 1 - 2 files changed, 191 insertions(+), 1 deletion(-) create mode 100644 test/mv/membership/import/csv_parser_test.exs diff --git a/test/mv/membership/import/csv_parser_test.exs b/test/mv/membership/import/csv_parser_test.exs new file mode 100644 index 0000000..539e57e --- /dev/null +++ b/test/mv/membership/import/csv_parser_test.exs @@ -0,0 +1,191 @@ +defmodule Mv.Membership.Import.CsvParserTest do + use ExUnit.Case, async: true + + alias Mv.Membership.Import.CsvParser + + describe "parse/1" do + test "returns {:ok, headers, rows} for valid CSV with semicolon delimiter" do + csv_content = "email;first_name\njohn@example.com;John" + + assert {:ok, headers, rows} = CsvParser.parse(csv_content) + + assert headers == ["email", "first_name"] + assert rows == [{2, ["john@example.com", "John"]}] + end + + test "returns {:ok, headers, rows} for valid CSV with comma delimiter" do + csv_content = "email,first_name\njohn@example.com,John" + + assert {:ok, headers, rows} = CsvParser.parse(csv_content) + + assert headers == ["email", "first_name"] + assert rows == [{2, ["john@example.com", "John"]}] + end + + test "detects semicolon delimiter when both delimiters present" do + csv_content = "email;first_name,last_name\njohn@example.com;John,Doe" + + assert {:ok, headers, rows} = CsvParser.parse(csv_content) + + # Should detect semicolon as primary delimiter + assert length(headers) >= 2 + assert length(rows) == 1 + end + + test "prefers semicolon delimiter when recognition is tied" do + # CSV where both delimiters would yield same number of fields + csv_content = "email;name\njohn@example.com;John" + + assert {:ok, headers, rows} = CsvParser.parse(csv_content) + + # Should prefer semicolon + assert headers == ["email", "name"] + assert rows == [{2, ["john@example.com", "John"]}] + end + + test "defaults to semicolon delimiter when no headers recognized" do + csv_content = "unknown1;unknown2\nvalue1;value2" + + assert {:ok, headers, rows} = CsvParser.parse(csv_content) + + # Should default to semicolon + assert headers == ["unknown1", "unknown2"] + assert rows == [{2, ["value1", "value2"]}] + end + end + + describe "BOM handling" do + test "strips UTF-8 BOM from file content" do + bom = <<0xEF, 0xBB, 0xBF>> + csv_content = bom <> "email;first_name\njohn@example.com;John" + + assert {:ok, headers, rows} = CsvParser.parse(csv_content) + + assert headers == ["email", "first_name"] + assert rows == [{2, ["john@example.com", "John"]}] + end + + test "parses CSV with BOM correctly (Excel export compatibility)" do + bom = <<0xEF, 0xBB, 0xBF>> + + csv_content = + bom <> + "email;first_name;last_name\njohn@example.com;John;Doe\njane@example.com;Jane;Smith" + + assert {:ok, headers, rows} = CsvParser.parse(csv_content) + + assert headers == ["email", "first_name", "last_name"] + assert length(rows) == 2 + assert Enum.at(rows, 0) == {2, ["john@example.com", "John", "Doe"]} + assert Enum.at(rows, 1) == {3, ["jane@example.com", "Jane", "Smith"]} + end + end + + describe "line number handling" do + test "header row is line 1, first data row is line 2" do + csv_content = "email\njohn@example.com" + + assert {:ok, headers, rows} = CsvParser.parse(csv_content) + + assert headers == ["email"] + assert rows == [{2, ["john@example.com"]}] + end + + test "preserves correct line numbers when empty lines are skipped" do + csv_content = "email;first_name\n\njohn@example.com;John\n\njane@example.com;Jane" + + assert {:ok, headers, rows} = CsvParser.parse(csv_content) + + assert headers == ["email", "first_name"] + # Line 2 is empty (skipped), line 3 has data + assert Enum.at(rows, 0) == {3, ["john@example.com", "John"]} + # Line 4 is empty (skipped), line 5 has data + assert Enum.at(rows, 1) == {5, ["jane@example.com", "Jane"]} + end + + test "skips completely empty rows but preserves line numbers" do + csv_content = "email\n\n\njohn@example.com" + + assert {:ok, headers, rows} = CsvParser.parse(csv_content) + + assert headers == ["email"] + # Lines 2, 3, 4 are empty (skipped), line 4 has data + assert rows == [{4, ["john@example.com"]}] + end + end + + describe "line ending handling" do + test "handles \\r\\n line endings correctly" do + csv_content = "email;first_name\r\njohn@example.com;John\r\njane@example.com;Jane" + + assert {:ok, headers, rows} = CsvParser.parse(csv_content) + + assert headers == ["email", "first_name"] + assert length(rows) == 2 + assert Enum.at(rows, 0) == {2, ["john@example.com", "John"]} + assert Enum.at(rows, 1) == {3, ["jane@example.com", "Jane"]} + end + + test "handles \\n line endings correctly" do + csv_content = "email;first_name\njohn@example.com;John\njane@example.com;Jane" + + assert {:ok, headers, rows} = CsvParser.parse(csv_content) + + assert headers == ["email", "first_name"] + assert length(rows) == 2 + assert Enum.at(rows, 0) == {2, ["john@example.com", "John"]} + assert Enum.at(rows, 1) == {3, ["jane@example.com", "Jane"]} + end + end + + describe "quoted fields" do + test "parses quoted fields correctly" do + csv_content = "email;name\njohn@example.com;\"John Doe\"" + + assert {:ok, headers, rows} = CsvParser.parse(csv_content) + + assert headers == ["email", "name"] + assert rows == [{2, ["john@example.com", "John Doe"]}] + end + + test "handles escaped quotes (\"\") inside quoted fields" do + csv_content = "email;name\njohn@example.com;\"John \"\"Johnny\"\" Doe\"" + + assert {:ok, headers, rows} = CsvParser.parse(csv_content) + + assert headers == ["email", "name"] + assert rows == [{2, ["john@example.com", "John \"Johnny\" Doe"]}] + end + end + + describe "error handling" do + test "returns {:error, reason} for empty file" do + assert {:error, reason} = CsvParser.parse("") + assert reason =~ "empty" + end + + test "returns {:error, reason} when no header row found" do + # Only whitespace after BOM strip + assert {:error, reason} = CsvParser.parse(" \n ") + assert reason =~ "CSV file is empty" + end + + test "returns {:error, reason} for invalid CSV format" do + # Unbalanced quotes + csv_content = "email;name\n\"john@example.com;John" + + assert {:error, reason} = CsvParser.parse(csv_content) + assert is_binary(reason) + end + end + + describe "module documentation" do + test "module has @moduledoc" do + assert Code.ensure_loaded?(CsvParser) + + {:docs_v1, _, _, _, %{"en" => moduledoc}, _, _} = Code.fetch_docs(CsvParser) + assert is_binary(moduledoc) + assert String.length(moduledoc) > 0 + end + end +end diff --git a/test/mv/membership/import/member_csv_test.exs b/test/mv/membership/import/member_csv_test.exs index 5ac5311..e918afd 100644 --- a/test/mv/membership/import/member_csv_test.exs +++ b/test/mv/membership/import/member_csv_test.exs @@ -44,7 +44,6 @@ defmodule Mv.Membership.Import.MemberCSVTest do assert match?({:ok, _}, result) or match?({:error, _}, result) end - # Skip until Issue #3 is implemented @tag :skip test "returns {:ok, import_state} on success" do file_content = "email\njohn@example.com" From 3bbe9895eebb5d8a7e4ad7ef4b6942412e5b6eba Mon Sep 17 00:00:00 2001 From: carla Date: Thu, 15 Jan 2026 11:08:22 +0100 Subject: [PATCH 08/12] fix: improve CSV parser error handling --- lib/mv/membership/import/csv_parser.ex | 53 ++++++++----------- test/mv/membership/import/csv_parser_test.exs | 32 +++++++++-- 2 files changed, 51 insertions(+), 34 deletions(-) diff --git a/lib/mv/membership/import/csv_parser.ex b/lib/mv/membership/import/csv_parser.ex index b9f036d..22a7496 100644 --- a/lib/mv/membership/import/csv_parser.ex +++ b/lib/mv/membership/import/csv_parser.ex @@ -23,13 +23,9 @@ defmodule Mv.Membership.Import.CsvParser do @spec parse(binary()) :: {:ok, row(), [numbered_row()]} | {:error, String.t()} def parse(file_content) when is_binary(file_content) do - content = - file_content - |> strip_bom() - |> normalize_line_endings() - - with :ok <- validate_content_not_empty(content), - :ok <- check_balanced_quotes(content), + with :ok <- validate_utf8(file_content), + content <- file_content |> strip_bom() |> normalize_line_endings(), + :ok <- validate_content_not_empty(content), {:ok, header_record, data_records} <- extract_header_and_data(content), :ok <- validate_header_not_empty(header_record), {:ok, headers, rows} <- parse_csv_records(header_record, data_records) do @@ -41,6 +37,15 @@ defmodule Mv.Membership.Import.CsvParser do def parse(_), do: {:error, "Invalid CSV content"} + @spec validate_utf8(binary()) :: :ok | {:error, String.t()} + defp validate_utf8(content) do + if String.valid?(content) do + :ok + else + {:error, "CSV must be valid UTF-8"} + end + end + @spec validate_content_not_empty(binary()) :: :ok | {:error, String.t()} defp validate_content_not_empty(content) do if String.trim(content) == "" do @@ -157,10 +162,10 @@ defmodule Mv.Membership.Import.CsvParser do try do rows = data_records - |> Enum.reduce([], fn {line_no, record}, acc -> + |> Enum.reduce_while([], fn {line_no, record}, acc -> case String.trim(record) do "" -> - acc + {:cont, acc} _ -> parsed = parser.parse_string(ensure_trailing_newline(record), skip_headers: false) @@ -168,20 +173,23 @@ defmodule Mv.Membership.Import.CsvParser do case parsed do [row] when is_list(row) -> if empty_row?(row) do - acc + {:cont, acc} else - [{line_no, row} | acc] + {:cont, [{line_no, row} | acc]} end - # empty row or unparsable row -> treat as skipped empty row + # unparsable row -> return error with line number _ -> - acc + snippet = String.slice(record, 0, min(50, String.length(record))) + {:halt, {:error, "Failed to parse CSV data at line #{line_no}: #{inspect(snippet)}"}} end end end) - |> Enum.reverse() - {:ok, rows} + case rows do + {:error, reason} -> {:error, reason} + rows -> {:ok, Enum.reverse(rows)} + end rescue e -> {:error, "Failed to parse CSV: #{Exception.message(e)}"} @@ -193,21 +201,6 @@ defmodule Mv.Membership.Import.CsvParser do Enum.all?(row, fn field -> String.trim(field) == "" end) end - # Check if quotes are balanced in the content. - # - # This is a simple check that counts quote characters. In CSV, escaped quotes - # are represented as "", so an odd number of quotes indicates unbalanced quotes. - @spec check_balanced_quotes(binary()) :: :ok | {:error, String.t()} - defp check_balanced_quotes(content) do - quote_count = content |> :binary.bin_to_list() |> Enum.count(&(&1 == @quote)) - - if rem(quote_count, 2) != 0 do - {:error, "Unbalanced quotes in CSV file"} - else - :ok - end - end - # --- Record splitting with correct line numbers (quote-aware) --- # # This splits the CSV into "records" separated by newline *outside quotes*, diff --git a/test/mv/membership/import/csv_parser_test.exs b/test/mv/membership/import/csv_parser_test.exs index 539e57e..e4a762b 100644 --- a/test/mv/membership/import/csv_parser_test.exs +++ b/test/mv/membership/import/csv_parser_test.exs @@ -109,7 +109,7 @@ defmodule Mv.Membership.Import.CsvParserTest do assert {:ok, headers, rows} = CsvParser.parse(csv_content) assert headers == ["email"] - # Lines 2, 3, 4 are empty (skipped), line 4 has data + # Lines 2 & 3 are empty (skipped), line 4 has data assert rows == [{4, ["john@example.com"]}] end end @@ -156,6 +156,19 @@ defmodule Mv.Membership.Import.CsvParserTest do assert headers == ["email", "name"] assert rows == [{2, ["john@example.com", "John \"Johnny\" Doe"]}] end + + test "handles multiline quoted fields with correct line numbering" do + # Header line 1 + # Data record starts line 2, contains "foo\nbar" in a field + # Record ends physically at line 3 + # Expected: row gets line number 2 (start line) + csv_content = "email;description\njohn@example.com;\"foo\nbar\"" + + assert {:ok, headers, rows} = CsvParser.parse(csv_content) + + assert headers == ["email", "description"] + assert rows == [{2, ["john@example.com", "foo\nbar"]}] + end end describe "error handling" do @@ -170,12 +183,23 @@ defmodule Mv.Membership.Import.CsvParserTest do assert reason =~ "CSV file is empty" end - test "returns {:error, reason} for invalid CSV format" do - # Unbalanced quotes - csv_content = "email;name\n\"john@example.com;John" + test "returns {:error, reason} for invalid UTF-8 content" do + # Invalid UTF-8 sequence + invalid_utf8 = <<0xFF, 0xFE, 0xFD>> + + assert {:error, reason} = CsvParser.parse(invalid_utf8) + assert reason =~ "UTF-8" + end + + test "returns {:error, reason} for unparsable data row" do + # Malformed CSV row that cannot be parsed + # NimbleCSV will throw an exception for unclosed quotes + csv_content = "email;name\njohn@example.com;\"unclosed quote" assert {:error, reason} = CsvParser.parse(csv_content) assert is_binary(reason) + # Error message should indicate parsing failure + assert reason =~ "parse" or reason =~ "CSV" end end From 8a5d012895903c458836291a3e85f83c6ccdfc98 Mon Sep 17 00:00:00 2001 From: carla Date: Thu, 15 Jan 2026 12:15:22 +0100 Subject: [PATCH 09/12] refactor parser --- lib/mv/membership/import/csv_parser.ex | 136 +++++++++++++++---------- 1 file changed, 81 insertions(+), 55 deletions(-) diff --git a/lib/mv/membership/import/csv_parser.ex b/lib/mv/membership/import/csv_parser.ex index 22a7496..2de75ee 100644 --- a/lib/mv/membership/import/csv_parser.ex +++ b/lib/mv/membership/import/csv_parser.ex @@ -16,6 +16,7 @@ defmodule Mv.Membership.Import.CsvParser do @utf8_bom <<0xEF, 0xBB, 0xBF>> @quote ?" + @max_error_snippet_length 50 @type line_number :: pos_integer() @type row :: [String.t()] @@ -27,11 +28,8 @@ defmodule Mv.Membership.Import.CsvParser do content <- file_content |> strip_bom() |> normalize_line_endings(), :ok <- validate_content_not_empty(content), {:ok, header_record, data_records} <- extract_header_and_data(content), - :ok <- validate_header_not_empty(header_record), - {:ok, headers, rows} <- parse_csv_records(header_record, data_records) do - {:ok, headers, rows} - else - {:error, reason} -> {:error, reason} + :ok <- validate_header_not_empty(header_record) do + parse_csv_records(header_record, data_records) end end @@ -55,7 +53,8 @@ defmodule Mv.Membership.Import.CsvParser do end end - @spec extract_header_and_data(binary()) :: {:ok, binary(), [{line_number(), binary()}]} | {:error, String.t()} + @spec extract_header_and_data(binary()) :: + {:ok, binary(), [{line_number(), binary()}]} | {:error, String.t()} defp extract_header_and_data(content) do records = split_records_with_line_numbers(content) @@ -126,27 +125,25 @@ defmodule Mv.Membership.Import.CsvParser do end # Parses exactly one record (string without trailing newline is fine). - # Returns {:ok, row} or {:error, reason}. + # Returns `{:ok, row}` or `{:error, reason}`. @spec parse_single_record(module(), binary(), String.t() | nil) :: {:ok, row()} | {:error, String.t()} defp parse_single_record(parser, record, error_reason_if_empty) do - try do - # NimbleCSV is happiest if there's a newline at the end. - rows = parser.parse_string(ensure_trailing_newline(record), skip_headers: false) + # NimbleCSV is happiest if there's a newline at the end. + rows = parser.parse_string(ensure_trailing_newline(record), skip_headers: false) - case rows do - [row] when is_list(row) and row != [] -> - {:ok, row} + case rows do + [row] when is_list(row) and row != [] -> + {:ok, row} - _ -> - if is_binary(error_reason_if_empty), - do: {:error, error_reason_if_empty}, - else: {:error, "Failed to parse CSV header"} - end - rescue - e -> - {:error, "Failed to parse CSV: #{Exception.message(e)}"} + _ -> + if is_binary(error_reason_if_empty), + do: {:error, error_reason_if_empty}, + else: {:error, "Failed to parse CSV header"} end + rescue + e -> + {:error, "Failed to parse CSV: #{Exception.message(e)}"} end @spec ensure_trailing_newline(binary()) :: binary() @@ -155,44 +152,59 @@ defmodule Mv.Membership.Import.CsvParser do end # --- Data parsing preserving *physical* line numbers --- - + # + # Parses data records while preserving physical line numbers. + # Skips empty rows but maintains correct line numbering for error reporting. + # @spec parse_data_records(module(), [{line_number(), binary()}]) :: {:ok, [numbered_row()]} | {:error, String.t()} defp parse_data_records(parser, data_records) do - try do - rows = - data_records - |> Enum.reduce_while([], fn {line_no, record}, acc -> - case String.trim(record) do - "" -> - {:cont, acc} + rows = + data_records + |> Enum.reduce_while([], fn {line_no, record}, acc -> + process_data_record(parser, line_no, record, acc) + end) - _ -> - parsed = parser.parse_string(ensure_trailing_newline(record), skip_headers: false) + case rows do + {:error, reason} -> {:error, reason} + rows -> {:ok, Enum.reverse(rows)} + end + rescue + e -> + {:error, "Failed to parse CSV: #{Exception.message(e)}"} + end - case parsed do - [row] when is_list(row) -> - if empty_row?(row) do - {:cont, acc} - else - {:cont, [{line_no, row} | acc]} - end + @spec process_data_record(module(), line_number(), binary(), [numbered_row()]) :: + {:cont, [numbered_row()]} | {:halt, {:error, String.t()}} + defp process_data_record(parser, line_no, record, acc) do + trimmed = String.trim(record) - # unparsable row -> return error with line number - _ -> - snippet = String.slice(record, 0, min(50, String.length(record))) - {:halt, {:error, "Failed to parse CSV data at line #{line_no}: #{inspect(snippet)}"}} - end - end - end) + if trimmed == "" do + {:cont, acc} + else + process_non_empty_record(parser, line_no, record, acc) + end + end - case rows do - {:error, reason} -> {:error, reason} - rows -> {:ok, Enum.reverse(rows)} - end - rescue - e -> - {:error, "Failed to parse CSV: #{Exception.message(e)}"} + @spec process_non_empty_record(module(), line_number(), binary(), [numbered_row()]) :: + {:cont, [numbered_row()]} | {:halt, {:error, String.t()}} + defp process_non_empty_record(parser, line_no, record, acc) do + parsed = parser.parse_string(ensure_trailing_newline(record), skip_headers: false) + + case parsed do + [row] when is_list(row) -> + if empty_row?(row) do + {:cont, acc} + else + {:cont, [{line_no, row} | acc]} + end + + # unparsable row -> return error with line number + _ -> + snippet = + String.slice(record, 0, min(@max_error_snippet_length, String.length(record))) + + {:halt, {:error, "Failed to parse CSV data at line #{line_no}: #{inspect(snippet)}"}} end end @@ -203,8 +215,8 @@ defmodule Mv.Membership.Import.CsvParser do # --- Record splitting with correct line numbers (quote-aware) --- # - # This splits the CSV into "records" separated by newline *outside quotes*, - # returning [{start_line_number, record_string_without_newline}, ...] + # Splits the CSV into records separated by newline *outside quotes*. + # Returns `[{start_line_number, record_string_without_newline}, ...]`. # # Line numbers are 1-based and represent the physical line in the CSV file. # Empty lines are included in the numbering (they're just skipped later). @@ -228,7 +240,21 @@ defmodule Mv.Membership.Import.CsvParser do Enum.reverse(acc) end - # EOF + # Recursively splits CSV content into records with correct line numbering. + # + # Handles quote-aware parsing: + # - Escaped quotes (`""`) inside quoted fields are preserved + # - Newlines inside quotes are part of the record but advance line counter + # - Newlines outside quotes end a record + # + # Parameters: + # - `content` - Remaining binary content to parse + # - `acc` - Accumulated records `[{line_number, record}, ...]` + # - `buf` - Current record buffer (reversed byte list) + # - `in_quotes` - Whether we're currently inside a quoted field + # - `line` - Current physical line number + # - `start_line` - Line number where current record started + # @spec do_split( binary(), [{line_number(), binary()}], From 0673684cc154d1e283638f2bcd609d2ce2604504 Mon Sep 17 00:00:00 2001 From: carla Date: Thu, 15 Jan 2026 16:11:02 +0100 Subject: [PATCH 10/12] test: adds tests for header normalization --- .../membership/import/header_mapper_test.exs | 244 ++++++++++++++++++ test/mv/membership/import/member_csv_test.exs | 185 ++++++++++++- 2 files changed, 416 insertions(+), 13 deletions(-) create mode 100644 test/mv/membership/import/header_mapper_test.exs diff --git a/test/mv/membership/import/header_mapper_test.exs b/test/mv/membership/import/header_mapper_test.exs new file mode 100644 index 0000000..5e7efbd --- /dev/null +++ b/test/mv/membership/import/header_mapper_test.exs @@ -0,0 +1,244 @@ +defmodule Mv.Membership.Import.HeaderMapperTest do + use ExUnit.Case, async: true + + alias Mv.Membership.Import.HeaderMapper + + describe "normalize_header/1" do + test "trims whitespace" do + assert HeaderMapper.normalize_header(" email ") == "email" + end + + test "converts to lowercase" do + assert HeaderMapper.normalize_header("EMAIL") == "email" + assert HeaderMapper.normalize_header("E-Mail") == "e-mail" + end + + test "normalizes Unicode characters" do + # ß -> ss + assert HeaderMapper.normalize_header("Straße") == "strasse" + # Umlaute transliteration (ä -> ae, ö -> oe, ü -> ue) + assert HeaderMapper.normalize_header("Müller") == "mueller" + assert HeaderMapper.normalize_header("Köln") == "koeln" + assert HeaderMapper.normalize_header("Grün") == "gruen" + end + + test "compresses and removes whitespace" do + # Whitespace is removed entirely to ensure "first name" == "firstname" + assert HeaderMapper.normalize_header("first name") == "firstname" + assert HeaderMapper.normalize_header("email address") == "emailaddress" + end + + test "unifies hyphen variants" do + # Different Unicode hyphen characters should become standard hyphen + # en dash + assert HeaderMapper.normalize_header("E–Mail") == "e-mail" + # minus sign + assert HeaderMapper.normalize_header("E−Mail") == "e-mail" + # standard hyphen + assert HeaderMapper.normalize_header("E-Mail") == "e-mail" + end + + test "removes or unifies punctuation" do + # Parentheses, slashes, etc. are removed (whitespace is also removed) + assert HeaderMapper.normalize_header("E-Mail (privat)") == "e-mailprivat" + assert HeaderMapper.normalize_header("Telefon / Mobil") == "telefonmobil" + end + + test "handles empty strings" do + assert HeaderMapper.normalize_header("") == "" + assert HeaderMapper.normalize_header(" ") == "" + end + end + + describe "build_maps/2" do + test "maps English email variant correctly" do + headers = ["Email"] + + assert {:ok, %{member: member_map, custom: custom_map, unknown: unknown}} = + HeaderMapper.build_maps(headers, []) + + assert member_map[:email] == 0 + assert custom_map == %{} + assert unknown == [] + end + + test "maps German email variant correctly" do + headers = ["E-Mail"] + + assert {:ok, %{member: member_map, custom: custom_map, unknown: unknown}} = + HeaderMapper.build_maps(headers, []) + + assert member_map[:email] == 0 + assert custom_map == %{} + assert unknown == [] + end + + test "maps multiple member fields" do + headers = ["Email", "First Name", "Last Name"] + + assert {:ok, %{member: member_map, custom: custom_map, unknown: unknown}} = + HeaderMapper.build_maps(headers, []) + + assert member_map[:email] == 0 + assert member_map[:first_name] == 1 + assert member_map[:last_name] == 2 + assert custom_map == %{} + assert unknown == [] + end + + test "handles Unicode and whitespace in headers" do + headers = [" E-Mail ", "Straße", " Telefon / Mobil "] + + assert {:ok, %{member: member_map, custom: custom_map, unknown: unknown}} = + HeaderMapper.build_maps(headers, []) + + assert member_map[:email] == 0 + assert member_map[:street] == 1 + # "Telefon / Mobil" is not a known member field, so it should be unknown + assert length(unknown) == 1 + assert custom_map == %{} + end + + test "returns error when duplicate headers normalize to same field" do + headers = ["Email", "E-Mail"] + + assert {:error, reason} = HeaderMapper.build_maps(headers, []) + assert reason =~ "duplicate" + assert reason =~ "email" + end + + test "returns error when required field email is missing" do + headers = ["First Name", "Last Name"] + + assert {:error, reason} = HeaderMapper.build_maps(headers, []) + assert reason =~ "Missing required header" + assert reason =~ "email" + assert reason =~ "accepted" + end + + test "collects unknown columns" do + headers = ["Email", "FooBar", "UnknownColumn"] + + assert {:ok, %{member: member_map, custom: custom_map, unknown: unknown}} = + HeaderMapper.build_maps(headers, []) + + assert member_map[:email] == 0 + assert length(unknown) == 2 + assert "FooBar" in unknown or "foobar" in unknown + assert "UnknownColumn" in unknown or "unknowncolumn" in unknown + assert custom_map == %{} + end + + test "ignores empty headers after normalization" do + headers = ["Email", " ", ""] + + assert {:ok, %{member: member_map, custom: custom_map, unknown: unknown}} = + HeaderMapper.build_maps(headers, []) + + assert member_map[:email] == 0 + assert custom_map == %{} + assert unknown == [] + end + + test "maps custom field columns correctly" do + headers = ["Email", "Lieblingsfarbe"] + custom_fields = [%{id: "cf1", name: "Lieblingsfarbe"}] + + assert {:ok, %{member: member_map, custom: custom_map, unknown: unknown}} = + HeaderMapper.build_maps(headers, custom_fields) + + assert member_map[:email] == 0 + assert custom_map["cf1"] == 1 + assert unknown == [] + end + + test "custom field collision: member field wins" do + headers = ["Email"] + # Custom field with name "Email" should not override member field + custom_fields = [%{id: "cf1", name: "Email"}] + + assert {:ok, %{member: member_map, custom: custom_map, unknown: unknown}} = + HeaderMapper.build_maps(headers, custom_fields) + + assert member_map[:email] == 0 + # Custom field should not be in custom_map because member field has priority + assert custom_map == %{} + assert unknown == [] + end + + test "handles custom field with Unicode normalization" do + headers = ["Email", "Straße"] + custom_fields = [%{id: "cf1", name: "Straße"}] + + assert {:ok, %{member: member_map, custom: custom_map, unknown: unknown}} = + HeaderMapper.build_maps(headers, custom_fields) + + assert member_map[:email] == 0 + # "Straße" is a member field (street), so it should be in member_map, not custom_map + assert member_map[:street] == 1 + assert custom_map == %{} + assert unknown == [] + end + + test "handles unknown custom field columns" do + headers = ["Email", "UnknownCustomField"] + custom_fields = [%{id: "cf1", name: "KnownField"}] + + assert {:ok, %{member: member_map, custom: custom_map, unknown: unknown}} = + HeaderMapper.build_maps(headers, custom_fields) + + assert member_map[:email] == 0 + assert custom_map == %{} + # UnknownCustomField should be in unknown list + assert length(unknown) == 1 + end + + test "handles duplicate custom field names after normalization" do + headers = ["Email", "CustomField", "Custom Field"] + custom_fields = [%{id: "cf1", name: "CustomField"}] + + # Both "CustomField" and "Custom Field" normalize to the same, so this should error + assert {:error, reason} = HeaderMapper.build_maps(headers, custom_fields) + assert reason =~ "duplicate" + end + + test "maps all supported member fields" do + headers = [ + "Email", + "First Name", + "Last Name", + "Street", + "Postal Code", + "City" + ] + + assert {:ok, %{member: member_map, custom: custom_map, unknown: unknown}} = + HeaderMapper.build_maps(headers, []) + + assert member_map[:email] == 0 + assert member_map[:first_name] == 1 + assert member_map[:last_name] == 2 + assert member_map[:street] == 3 + assert member_map[:postal_code] == 4 + assert member_map[:city] == 5 + assert custom_map == %{} + assert unknown == [] + end + + test "maps German member field variants" do + headers = ["E-Mail", "Vorname", "Nachname", "Straße", "PLZ", "Stadt"] + + assert {:ok, %{member: member_map, custom: custom_map, unknown: unknown}} = + HeaderMapper.build_maps(headers, []) + + assert member_map[:email] == 0 + assert member_map[:first_name] == 1 + assert member_map[:last_name] == 2 + assert member_map[:street] == 3 + assert member_map[:postal_code] == 4 + assert member_map[:city] == 5 + assert custom_map == %{} + assert unknown == [] + end + end +end diff --git a/test/mv/membership/import/member_csv_test.exs b/test/mv/membership/import/member_csv_test.exs index e918afd..a38315f 100644 --- a/test/mv/membership/import/member_csv_test.exs +++ b/test/mv/membership/import/member_csv_test.exs @@ -44,7 +44,6 @@ defmodule Mv.Membership.Import.MemberCSVTest do assert match?({:ok, _}, result) or match?({:error, _}, result) end - @tag :skip test "returns {:ok, import_state} on success" do file_content = "email\njohn@example.com" opts = [] @@ -56,6 +55,8 @@ defmodule Mv.Membership.Import.MemberCSVTest do assert Map.has_key?(import_state, :column_map) assert Map.has_key?(import_state, :custom_field_map) assert Map.has_key?(import_state, :warnings) + assert import_state.column_map[:email] == 0 + assert import_state.chunks != [] end test "returns {:error, reason} on failure" do @@ -72,23 +73,177 @@ defmodule Mv.Membership.Import.MemberCSVTest do end describe "process_chunk/3" do - test "function exists and accepts chunk_rows_with_lines, column_map, and opts" do - chunk_rows_with_lines = [{2, %{"email" => "john@example.com"}}] + test "function exists and accepts chunk_rows_with_lines, column_map, custom_field_map, and opts" do + chunk_rows_with_lines = [{2, %{member: %{email: "john@example.com"}, custom: %{}}}] column_map = %{email: 0} + custom_field_map = %{} opts = [] # This will fail until the function is implemented - result = MemberCSV.process_chunk(chunk_rows_with_lines, column_map, opts) + result = MemberCSV.process_chunk(chunk_rows_with_lines, column_map, custom_field_map, opts) assert match?({:ok, _}, result) or match?({:error, _}, result) end - test "returns {:ok, chunk_result} on success" do - chunk_rows_with_lines = [{2, %{"email" => "john@example.com"}}] - column_map = %{email: 0} + test "creates member successfully with valid data" do + chunk_rows_with_lines = [ + {2, %{member: %{email: "john@example.com", first_name: "John"}, custom: %{}}} + ] + + column_map = %{email: 0, first_name: 1} + custom_field_map = %{} opts = [] assert {:ok, chunk_result} = - MemberCSV.process_chunk(chunk_rows_with_lines, column_map, opts) + MemberCSV.process_chunk(chunk_rows_with_lines, column_map, custom_field_map, opts) + + assert chunk_result.inserted == 1 + assert chunk_result.failed == 0 + assert chunk_result.errors == [] + + # Verify member was created + members = Mv.Membership.list_members!() + assert Enum.any?(members, &(&1.email == "john@example.com")) + end + + test "returns error for invalid email" do + chunk_rows_with_lines = [ + {2, %{member: %{email: "invalid-email"}, custom: %{}}} + ] + + column_map = %{email: 0} + custom_field_map = %{} + opts = [] + + assert {:ok, chunk_result} = + MemberCSV.process_chunk(chunk_rows_with_lines, column_map, custom_field_map, opts) + + assert chunk_result.inserted == 0 + assert chunk_result.failed == 1 + assert length(chunk_result.errors) == 1 + + error = List.first(chunk_result.errors) + assert error.csv_line_number == 2 + assert error.field == :email + assert error.message =~ "email" + end + + test "returns error for duplicate email" do + # Create existing member first + {:ok, _existing} = + Mv.Membership.create_member(%{email: "duplicate@example.com", first_name: "Existing"}) + + chunk_rows_with_lines = [ + {2, %{member: %{email: "duplicate@example.com", first_name: "New"}, custom: %{}}} + ] + + column_map = %{email: 0, first_name: 1} + custom_field_map = %{} + opts = [] + + assert {:ok, chunk_result} = + MemberCSV.process_chunk(chunk_rows_with_lines, column_map, custom_field_map, opts) + + assert chunk_result.inserted == 0 + assert chunk_result.failed == 1 + assert length(chunk_result.errors) == 1 + + error = List.first(chunk_result.errors) + assert error.csv_line_number == 2 + assert error.field == :email + assert error.message =~ "email" or error.message =~ "duplicate" or error.message =~ "unique" + end + + test "creates member with custom field values" do + # Create custom field first + {:ok, custom_field} = + Mv.Membership.CustomField + |> Ash.Changeset.for_create(:create, %{ + name: "Phone", + value_type: :string + }) + |> Ash.create() + + chunk_rows_with_lines = [ + {2, + %{ + member: %{email: "withcustom@example.com"}, + custom: %{to_string(custom_field.id) => "123-456-7890"} + }} + ] + + column_map = %{email: 0} + custom_field_map = %{to_string(custom_field.id) => 1} + opts = [] + + assert {:ok, chunk_result} = + MemberCSV.process_chunk(chunk_rows_with_lines, column_map, custom_field_map, opts) + + assert chunk_result.inserted == 1 + assert chunk_result.failed == 0 + + # Verify member and custom field value were created + members = Mv.Membership.list_members!() + member = Enum.find(members, &(&1.email == "withcustom@example.com")) + assert member != nil + + {:ok, member_with_cf} = Ash.load(member, :custom_field_values) + assert length(member_with_cf.custom_field_values) == 1 + cfv = List.first(member_with_cf.custom_field_values) + assert cfv.custom_field_id == custom_field.id + assert cfv.value.value == "123-456-7890" + end + + test "handles multiple rows with mixed success and failure" do + chunk_rows_with_lines = [ + {2, %{member: %{email: "valid1@example.com"}, custom: %{}}}, + {3, %{member: %{email: "invalid-email"}, custom: %{}}}, + {4, %{member: %{email: "valid2@example.com"}, custom: %{}}} + ] + + column_map = %{email: 0} + custom_field_map = %{} + opts = [] + + assert {:ok, chunk_result} = + MemberCSV.process_chunk(chunk_rows_with_lines, column_map, custom_field_map, opts) + + assert chunk_result.inserted == 2 + assert chunk_result.failed == 1 + assert length(chunk_result.errors) == 1 + + error = List.first(chunk_result.errors) + assert error.csv_line_number == 3 + end + + test "preserves CSV line numbers in errors" do + chunk_rows_with_lines = [ + {5, %{member: %{email: "invalid"}, custom: %{}}}, + {10, %{member: %{email: "also-invalid"}, custom: %{}}} + ] + + column_map = %{email: 0} + custom_field_map = %{} + opts = [] + + assert {:ok, chunk_result} = + MemberCSV.process_chunk(chunk_rows_with_lines, column_map, custom_field_map, opts) + + assert chunk_result.failed == 2 + assert length(chunk_result.errors) == 2 + + line_numbers = Enum.map(chunk_result.errors, & &1.csv_line_number) + assert 5 in line_numbers + assert 10 in line_numbers + end + + test "returns {:ok, chunk_result} on success" do + chunk_rows_with_lines = [{2, %{member: %{email: "test@example.com"}, custom: %{}}}] + column_map = %{email: 0} + custom_field_map = %{} + opts = [] + + assert {:ok, chunk_result} = + MemberCSV.process_chunk(chunk_rows_with_lines, column_map, custom_field_map, opts) # Check that chunk_result contains expected fields assert Map.has_key?(chunk_result, :inserted) @@ -99,19 +254,23 @@ defmodule Mv.Membership.Import.MemberCSVTest do assert is_list(chunk_result.errors) end - test "returns {:error, reason} on failure" do + test "returns {:ok, _} with zero counts for empty chunk" do chunk_rows_with_lines = [] column_map = %{} + custom_field_map = %{} opts = [] - # This might return {:ok, _} with zero counts or {:error, _} - result = MemberCSV.process_chunk(chunk_rows_with_lines, column_map, opts) - assert match?({:ok, _}, result) or match?({:error, _}, result) + assert {:ok, chunk_result} = + MemberCSV.process_chunk(chunk_rows_with_lines, column_map, custom_field_map, opts) + + assert chunk_result.inserted == 0 + assert chunk_result.failed == 0 + assert chunk_result.errors == [] end test "function has documentation" do # Check that @doc exists by reading the module - assert function_exported?(MemberCSV, :process_chunk, 3) + assert function_exported?(MemberCSV, :process_chunk, 4) end end From 67072f0c526f357f31710e6c4d968922543bc974 Mon Sep 17 00:00:00 2001 From: carla Date: Thu, 15 Jan 2026 16:11:09 +0100 Subject: [PATCH 11/12] feat: adds header header normalization --- lib/mv/membership/import/header_mapper.ex | 380 ++++++++++++++++++++++ lib/mv/membership/import/member_csv.ex | 355 +++++++++++++++++++- 2 files changed, 720 insertions(+), 15 deletions(-) create mode 100644 lib/mv/membership/import/header_mapper.ex diff --git a/lib/mv/membership/import/header_mapper.ex b/lib/mv/membership/import/header_mapper.ex new file mode 100644 index 0000000..caf7245 --- /dev/null +++ b/lib/mv/membership/import/header_mapper.ex @@ -0,0 +1,380 @@ +defmodule Mv.Membership.Import.HeaderMapper do + @moduledoc """ + Maps CSV headers to canonical member fields and custom fields. + + Provides header normalization and mapping functionality for CSV imports. + Handles bilingual header variants (English/German) and custom field detection. + + ## Header Normalization + + Headers are normalized using the following rules: + - Trim whitespace + - Convert to lowercase + - Unicode normalization (ß → ss, ä → ae, ö → oe, ü → ue) + - Remove all whitespace (ensures "first name" == "firstname") + - Unify hyphen variants (en dash, minus sign → standard hyphen) + - Remove or unify punctuation (parentheses, slashes → spaces) + + ## Member Field Mapping + + Maps CSV headers to canonical member fields: + - `email` (required) + - `first_name` (optional) + - `last_name` (optional) + - `street` (optional) + - `postal_code` (optional) + - `city` (optional) + + Supports both English and German variants (e.g., "Email" / "E-Mail", "First Name" / "Vorname"). + + ## Custom Field Detection + + Custom fields are detected by matching normalized header names to custom field names. + Member fields have priority over custom fields (member field wins in case of collision). + + ## Examples + + iex> HeaderMapper.normalize_header(" E-Mail ") + "e-mail" + + iex> HeaderMapper.build_maps(["Email", "First Name"], []) + {:ok, %{member: %{email: 0, first_name: 1}, custom: %{}, unknown: []}} + + iex> HeaderMapper.build_maps(["Email", "CustomField"], [%{id: "cf1", name: "CustomField"}]) + {:ok, %{member: %{email: 0}, custom: %{"cf1" => 1}, unknown: []}} + """ + + @type column_map :: %{atom() => non_neg_integer()} + @type custom_field_map :: %{String.t() => non_neg_integer()} + @type unknown_headers :: [String.t()] + + # Required member fields + @required_member_fields [:email] + + # Canonical member fields with their raw variants + # These will be normalized at runtime when building the lookup map + @member_field_variants_raw %{ + email: [ + "email", + "e-mail", + "e_mail", + "e mail", + "e-mail adresse", + "e-mail-adresse", + "mail" + ], + first_name: [ + "first name", + "firstname", + "vorname" + ], + last_name: [ + "last name", + "lastname", + "surname", + "nachname", + "familienname" + ], + street: [ + "street", + "address", + "strasse" + ], + postal_code: [ + "postal code", + "postal_code", + "zip", + "postcode", + "plz", + "postleitzahl" + ], + city: [ + "city", + "town", + "stadt", + "ort" + ] + } + + # Build reverse map: normalized_variant -> canonical_field + # This is computed at runtime on first access and cached + defp normalized_to_canonical do + @member_field_variants_raw + |> Enum.flat_map(fn {canonical, variants} -> + Enum.map(variants, fn variant -> + {normalize_header(variant), canonical} + end) + end) + |> Map.new() + end + + @doc """ + Normalizes a CSV header string for comparison. + + Applies the following transformations: + - Trim whitespace + - Convert to lowercase + - Unicode transliteration (ß → ss, ä → ae, ö → oe, ü → ue) + - Unify hyphen variants (en dash U+2013, minus sign U+2212 → standard hyphen) + - Remove or unify punctuation (parentheses, slashes → spaces) + - Remove all whitespace (ensures "first name" == "firstname") + - Final trim + + ## Examples + + iex> normalize_header(" E-Mail ") + "e-mail" + + iex> normalize_header("Straße") + "strasse" + + iex> normalize_header("E-Mail (privat)") + "e-mailprivat" + + iex> normalize_header("First Name") + "firstname" + + """ + @spec normalize_header(String.t()) :: String.t() + def normalize_header(header) when is_binary(header) do + header + |> String.trim() + |> String.downcase() + |> transliterate_unicode() + |> unify_hyphens() + |> normalize_punctuation() + |> compress_whitespace() + |> String.trim() + end + + def normalize_header(_), do: "" + + @doc """ + Builds column maps for member fields and custom fields from CSV headers. + + ## Parameters + + - `headers` - List of CSV header strings (in column order, 0-based indices) + - `custom_fields` - List of custom field maps/structs with at least `:id` and `:name` keys + + ## Returns + + - `{:ok, %{member: column_map, custom: custom_field_map, unknown: unknown_headers}}` on success + - `{:error, reason}` on error (missing required field, duplicate headers) + + ## Examples + + iex> build_maps(["Email", "First Name"], []) + {:ok, %{member: %{email: 0, first_name: 1}, custom: %{}, unknown: []}} + + iex> build_maps(["Email", "CustomField"], [%{id: "cf1", name: "CustomField"}]) + {:ok, %{member: %{email: 0}, custom: %{"cf1" => 1}, unknown: []}} + + """ + @spec build_maps([String.t()], [map()]) :: + {:ok, %{member: column_map(), custom: custom_field_map(), unknown: unknown_headers()}} + | {:error, String.t()} + def build_maps(headers, custom_fields) when is_list(headers) and is_list(custom_fields) do + with {:ok, member_map, unknown_after_member} <- build_member_map(headers), + {:ok, custom_map, unknown_after_custom} <- + build_custom_field_map(headers, unknown_after_member, custom_fields, member_map) do + unknown = Enum.map(unknown_after_custom, &Enum.at(headers, &1)) + {:ok, %{member: member_map, custom: custom_map, unknown: unknown}} + end + end + + # --- Private Functions --- + + # Transliterates German umlauts and special characters + defp transliterate_unicode(str) do + str + |> String.replace("ß", "ss") + |> String.replace("ä", "ae") + |> String.replace("ö", "oe") + |> String.replace("ü", "ue") + |> String.replace("Ä", "ae") + |> String.replace("Ö", "oe") + |> String.replace("Ü", "ue") + end + + # Unifies different hyphen variants to standard hyphen + defp unify_hyphens(str) do + str + # en dash + |> String.replace(<<0x2013::utf8>>, "-") + # em dash + |> String.replace(<<0x2014::utf8>>, "-") + # minus sign + |> String.replace(<<0x2212::utf8>>, "-") + end + + # Normalizes punctuation: parentheses, slashes become spaces + defp normalize_punctuation(str) do + str + |> String.replace(~r/[()\[\]{}]/, " ") + |> String.replace(~r/[\/\\]/, " ") + end + + # Compresses multiple whitespace characters to single space, then removes all spaces + # This ensures "first name" and "firstname" normalize to the same value + defp compress_whitespace(str) do + str + |> String.replace(~r/\s+/, " ") + |> String.replace(" ", "") + end + + # Builds member field column map + defp build_member_map(headers) do + result = + headers + |> Enum.with_index() + |> Enum.reduce_while({%{}, [], %{}}, fn {header, index}, {acc_map, acc_unknown, acc_seen} -> + normalized = normalize_header(header) + + case process_member_header(header, index, normalized, acc_map, acc_seen) do + {:error, reason} -> + {:halt, {:error, reason}} + + {:ok, new_map, new_seen} -> + {:cont, {new_map, acc_unknown, new_seen}} + + {:unknown} -> + {:cont, {acc_map, [index | acc_unknown], acc_seen}} + end + end) + + case result do + {:error, reason} -> + {:error, reason} + + {member_map, unknown_indices, _normalized_seen} -> + validate_required_fields(member_map, unknown_indices) + end + end + + # Processes a single header for member field mapping + defp process_member_header(_header, _index, normalized, acc_map, acc_seen) + when normalized == "" do + {:ok, acc_map, acc_seen} + end + + defp process_member_header(_header, index, normalized, acc_map, acc_seen) do + if Map.has_key?(normalized_to_canonical(), normalized) do + canonical = normalized_to_canonical()[normalized] + + if Map.has_key?(acc_map, canonical) do + {:error, "duplicate header for #{canonical} (normalized: #{normalized})"} + else + {:ok, Map.put(acc_map, canonical, index), Map.put(acc_seen, normalized, canonical)} + end + else + {:unknown} + end + end + + # Validates that all required member fields are present + defp validate_required_fields(member_map, unknown_indices) do + missing_required = + @required_member_fields + |> Enum.filter(&(not Map.has_key?(member_map, &1))) + + if Enum.empty?(missing_required) do + {:ok, member_map, Enum.reverse(unknown_indices)} + else + missing_field = List.first(missing_required) + variants = Map.get(@member_field_variants_raw, missing_field, []) + accepted = Enum.join(variants, ", ") + + {:error, "Missing required header: #{missing_field} (accepted: #{accepted})"} + end + end + + # Builds custom field column map from unmatched headers + defp build_custom_field_map(headers, unknown_indices, custom_fields, _member_map) do + custom_field_lookup = build_custom_field_lookup(custom_fields) + + result = + unknown_indices + |> Enum.reduce_while({%{}, [], %{}}, fn index, {acc_map, acc_unknown, acc_seen} -> + header = Enum.at(headers, index) + normalized = normalize_header(header) + + case process_custom_field_header( + header, + index, + normalized, + custom_field_lookup, + acc_map, + acc_seen + ) do + {:error, reason} -> + {:halt, {:error, reason}} + + {:ok, new_map, new_seen} -> + {:cont, {new_map, acc_unknown, new_seen}} + + {:unknown} -> + {:cont, {acc_map, [index | acc_unknown], acc_seen}} + end + end) + + case result do + {:error, reason} -> + {:error, reason} + + {custom_map, remaining_unknown, _normalized_seen} -> + {:ok, custom_map, Enum.reverse(remaining_unknown)} + end + end + + # Builds normalized custom field name -> id lookup map + defp build_custom_field_lookup(custom_fields) do + custom_fields + |> Enum.reduce(%{}, fn cf, acc -> + name = Map.get(cf, :name) || Map.get(cf, "name") + id = Map.get(cf, :id) || Map.get(cf, "id") + + if name && id do + normalized_name = normalize_header(name) + Map.put(acc, normalized_name, id) + else + acc + end + end) + end + + # Processes a single header for custom field mapping + defp process_custom_field_header( + _header, + _index, + normalized, + _custom_field_lookup, + acc_map, + acc_seen + ) + when normalized == "" do + {:ok, acc_map, acc_seen} + end + + defp process_custom_field_header( + _header, + index, + normalized, + custom_field_lookup, + acc_map, + acc_seen + ) do + if Map.has_key?(custom_field_lookup, normalized) do + custom_field_id = custom_field_lookup[normalized] + + if Map.has_key?(acc_map, custom_field_id) do + {:error, "duplicate custom field header (normalized: #{normalized})"} + else + {:ok, Map.put(acc_map, custom_field_id, index), + Map.put(acc_seen, normalized, custom_field_id)} + end + else + {:unknown} + end + end +end diff --git a/lib/mv/membership/import/member_csv.ex b/lib/mv/membership/import/member_csv.ex index 9e30a20..7790bff 100644 --- a/lib/mv/membership/import/member_csv.ex +++ b/lib/mv/membership/import/member_csv.ex @@ -2,6 +2,8 @@ defmodule Mv.Membership.Import.MemberCSV do @moduledoc """ Service module for importing members from CSV files. + require Ash.Query + This module provides the core API for CSV member import functionality: - `prepare/2` - Parses and validates CSV content, returns import state - `process_chunk/3` - Processes a chunk of rows and creates members @@ -70,6 +72,9 @@ defmodule Mv.Membership.Import.MemberCSV do errors: list(Error.t()) } + alias Mv.Membership.Import.CsvParser + alias Mv.Membership.Import.HeaderMapper + @doc """ Prepares CSV content for import by parsing, mapping headers, and validating limits. @@ -104,12 +109,120 @@ defmodule Mv.Membership.Import.MemberCSV do """ @spec prepare(String.t(), keyword()) :: {:ok, import_state()} | {:error, String.t()} def prepare(file_content, opts \\ []) do - # TODO: Implement in Issue #3 (CSV Parsing) - # This is a skeleton implementation that will be filled in later - _ = {file_content, opts} + max_rows = Keyword.get(opts, :max_rows, 1000) + chunk_size = Keyword.get(opts, :chunk_size, 200) - # Placeholder return - will be replaced with actual implementation - {:error, "Not yet implemented"} + with {:ok, headers, rows} <- CsvParser.parse(file_content), + {:ok, custom_fields} <- load_custom_fields(), + {:ok, maps, warnings} <- build_header_maps(headers, custom_fields), + :ok <- validate_row_count(rows, max_rows) do + chunks = chunk_rows(rows, maps, chunk_size) + + {:ok, + %{ + chunks: chunks, + column_map: maps.member, + custom_field_map: maps.custom, + warnings: warnings + }} + end + end + + # Loads all custom fields from the database + defp load_custom_fields do + custom_fields = + Mv.Membership.CustomField + |> Ash.read!() + + {:ok, custom_fields} + rescue + e -> + {:error, "Failed to load custom fields: #{Exception.message(e)}"} + end + + # Builds header maps using HeaderMapper and collects warnings for unknown custom fields + defp build_header_maps(headers, custom_fields) do + # Convert custom fields to maps with id and name + custom_field_maps = + Enum.map(custom_fields, fn cf -> + %{id: to_string(cf.id), name: cf.name} + end) + + case HeaderMapper.build_maps(headers, custom_field_maps) do + {:ok, %{member: member_map, custom: custom_map, unknown: unknown}} -> + # Build warnings for unknown custom field columns + warnings = + unknown + |> Enum.filter(fn header -> + # Check if it could be a custom field (not a known member field) + normalized = HeaderMapper.normalize_header(header) + # If it's not empty and not a member field, it might be a custom field + normalized != "" && not member_field?(normalized) + end) + |> Enum.map(fn header -> + "Unknown column '#{header}' will be ignored. " <> + "If this is a custom field, create it in Mila before importing." + end) + + {:ok, %{member: member_map, custom: custom_map}, warnings} + + {:error, reason} -> + {:error, reason} + end + end + + # Checks if a normalized header matches a member field + # Uses HeaderMapper's internal logic to check if header would map to a member field + defp member_field?(normalized) do + # Try to build maps with just this header - if it maps to a member field, it's a member field + case HeaderMapper.build_maps([normalized], []) do + {:ok, %{member: member_map}} -> + # If member_map is not empty, it's a member field + map_size(member_map) > 0 + + _ -> + false + end + end + + # Validates that row count doesn't exceed limit + defp validate_row_count(rows, max_rows) do + if length(rows) > max_rows do + {:error, "CSV file exceeds maximum row limit of #{max_rows} rows"} + else + :ok + end + end + + # Chunks rows and converts them to row maps using column maps + defp chunk_rows(rows, maps, chunk_size) do + rows + |> Enum.chunk_every(chunk_size) + |> Enum.map(fn chunk -> + Enum.map(chunk, fn {line_number, row_values} -> + row_map = build_row_map(row_values, maps) + {line_number, row_map} + end) + end) + end + + # Builds a row map from raw row values using column maps + defp build_row_map(row_values, maps) do + member_map = + maps.member + |> Enum.reduce(%{}, fn {field, index}, acc -> + value = Enum.at(row_values, index, "") + Map.put(acc, field, value) + end) + + custom_map = + maps.custom + |> Enum.reduce(%{}, fn {custom_field_id, index}, acc -> + value = Enum.at(row_values, index, "") + Map.put(acc, custom_field_id, value) + end) + + %{member: member_map, custom: custom_map} end @doc """ @@ -126,8 +239,9 @@ defmodule Mv.Membership.Import.MemberCSV do - `chunk_rows_with_lines` - List of tuples `{csv_line_number, row_map}` where: - `csv_line_number` - Physical line number in CSV (1-based) - - `row_map` - Map of column names to values - - `column_map` - Map of canonical field names (atoms) to column indices + - `row_map` - Map with `:member` and `:custom` keys containing field values + - `column_map` - Map of canonical field names (atoms) to column indices (for reference) + - `custom_field_map` - Map of custom field IDs (strings) to column indices (for reference) - `opts` - Optional keyword list for processing options ## Returns @@ -137,22 +251,233 @@ defmodule Mv.Membership.Import.MemberCSV do ## Examples - iex> chunk = [{2, %{"email" => "john@example.com"}}] + iex> chunk = [{2, %{member: %{email: "john@example.com"}, custom: %{}}}] iex> column_map = %{email: 0} - iex> MemberCSV.process_chunk(chunk, column_map) + iex> custom_field_map = %{} + iex> MemberCSV.process_chunk(chunk, column_map, custom_field_map) {:ok, %{inserted: 1, failed: 0, errors: []}} """ @spec process_chunk( list({pos_integer(), map()}), %{atom() => non_neg_integer()}, + %{String.t() => non_neg_integer()}, keyword() ) :: {:ok, chunk_result()} | {:error, String.t()} - def process_chunk(chunk_rows_with_lines, column_map, opts \\ []) do - # TODO: Implement in Issue #6 (Persistence) - # This is a skeleton implementation that will be filled in later - _ = {chunk_rows_with_lines, column_map, opts} + def process_chunk(chunk_rows_with_lines, _column_map, _custom_field_map, _opts \\ []) do + {inserted, failed, errors} = + Enum.reduce(chunk_rows_with_lines, {0, 0, []}, fn {line_number, row_map}, + {acc_inserted, acc_failed, acc_errors} -> + case process_row(row_map, line_number) do + {:ok, _member} -> + {acc_inserted + 1, acc_failed, acc_errors} - # Placeholder return - will be replaced with actual implementation - {:ok, %{inserted: 0, failed: 0, errors: []}} + {:error, error} -> + {acc_inserted, acc_failed + 1, [error | acc_errors]} + end + end) + + {:ok, %{inserted: inserted, failed: failed, errors: Enum.reverse(errors)}} end + + # Processes a single row and creates member with custom field values + defp process_row(%{member: member_attrs, custom: custom_attrs}, line_number) do + # Prepare custom field values for Ash + custom_field_values = prepare_custom_field_values(custom_attrs) + + # Create member with custom field values + member_attrs_with_cf = + member_attrs + |> Map.put(:custom_field_values, custom_field_values) + |> trim_string_values() + + # Only include custom_field_values if not empty + final_attrs = + if Enum.empty?(custom_field_values) do + Map.delete(member_attrs_with_cf, :custom_field_values) + else + member_attrs_with_cf + end + + case Mv.Membership.create_member(final_attrs) do + {:ok, member} -> + {:ok, member} + + {:error, %Ash.Error.Invalid{} = error} -> + {:error, format_ash_error(error, line_number)} + + {:error, error} -> + {:error, %Error{csv_line_number: line_number, field: nil, message: inspect(error)}} + end + rescue + e -> + {:error, %Error{csv_line_number: line_number, field: nil, message: Exception.message(e)}} + end + + # Prepares custom field values from row map for Ash + defp prepare_custom_field_values(custom_attrs) when is_map(custom_attrs) do + custom_attrs + |> Enum.filter(fn {_id, value} -> value != nil && value != "" end) + |> Enum.map(fn {custom_field_id_str, value} -> + # Load custom field to get value_type and ensure ID is correct + case load_custom_field_by_id(custom_field_id_str) do + {:ok, custom_field} -> + # Use the actual custom_field.id (UUID) from the database + %{ + "custom_field_id" => to_string(custom_field.id), + "value" => format_custom_field_value(value, custom_field.value_type) + } + + {:error, _} -> + # Skip if custom field not found + nil + end + end) + |> Enum.filter(&(&1 != nil)) + end + + defp prepare_custom_field_values(_), do: [] + + # Loads a custom field by ID (string or UUID) + defp load_custom_field_by_id(id) when is_binary(id) do + require Ash.Query + + try do + # Try to parse as UUID first + uuid_id = + case Ecto.UUID.cast(id) do + {:ok, uuid} -> uuid + :error -> id + end + + custom_field = + Mv.Membership.CustomField + |> Ash.Query.filter(id == ^uuid_id) + |> Ash.read_one!() + + {:ok, custom_field} + rescue + _ -> {:error, :not_found} + end + end + + defp load_custom_field_by_id(_), do: {:error, :invalid_id} + + # Formats a custom field value according to its type + # Uses _union_type and _union_value format as expected by Ash + defp format_custom_field_value(value, :string) when is_binary(value) do + %{"_union_type" => "string", "_union_value" => String.trim(value)} + end + + defp format_custom_field_value(value, :integer) when is_binary(value) do + case Integer.parse(value) do + {int_value, _} -> %{"_union_type" => "integer", "_union_value" => int_value} + :error -> %{"_union_type" => "string", "_union_value" => String.trim(value)} + end + end + + defp format_custom_field_value(value, :boolean) when is_binary(value) do + bool_value = + value + |> String.trim() + |> String.downcase() + |> case do + "true" -> true + "1" -> true + "yes" -> true + "ja" -> true + _ -> false + end + + %{"_union_type" => "boolean", "_union_value" => bool_value} + end + + defp format_custom_field_value(value, :date) when is_binary(value) do + case Date.from_iso8601(String.trim(value)) do + {:ok, date} -> %{"_union_type" => "date", "_union_value" => date} + {:error, _} -> %{"_union_type" => "string", "_union_value" => String.trim(value)} + end + end + + defp format_custom_field_value(value, :email) when is_binary(value) do + %{"_union_type" => "email", "_union_value" => String.trim(value)} + end + + defp format_custom_field_value(value, _type) when is_binary(value) do + # Default to string if type is unknown + %{"_union_type" => "string", "_union_value" => String.trim(value)} + end + + # Trims all string values in member attributes + defp trim_string_values(attrs) do + Enum.reduce(attrs, %{}, fn {key, value}, acc -> + trimmed_value = + if is_binary(value) do + String.trim(value) + else + value + end + + Map.put(acc, key, trimmed_value) + end) + end + + # Formats Ash errors into MemberCSV.Error structs + defp format_ash_error(%Ash.Error.Invalid{errors: errors}, line_number) do + # Try to find email-related errors first (for better error messages) + email_error = + Enum.find(errors, fn error -> + case error do + %{field: :email} -> true + _ -> false + end + end) + + case email_error || List.first(errors) do + %{field: field, message: message} when is_atom(field) -> + %Error{ + csv_line_number: line_number, + field: field, + message: format_error_message(message, field) + } + + %{message: message} -> + %Error{ + csv_line_number: line_number, + field: nil, + message: format_error_message(message, nil) + } + + _ -> + %Error{ + csv_line_number: line_number, + field: nil, + message: "Validation failed" + } + end + end + + # Formats error messages, handling common cases like email uniqueness + defp format_error_message(message, field) when is_binary(message) do + if email_uniqueness_error?(message, field) do + "email has already been taken" + else + message + end + end + + defp format_error_message(message, _field), do: to_string(message) + + # Checks if error message indicates email uniqueness constraint violation + defp email_uniqueness_error?(message, :email) do + message_lower = String.downcase(message) + + String.contains?(message_lower, "unique") or + String.contains?(message_lower, "constraint") or + String.contains?(message_lower, "duplicate") or + String.contains?(message_lower, "already been taken") or + String.contains?(message_lower, "already exists") or + String.contains?(message_lower, "violates unique constraint") + end + + defp email_uniqueness_error?(_message, _field), do: false end From 6dc398fa5ad4e17ca0698cabf539acf29c775450 Mon Sep 17 00:00:00 2001 From: carla Date: Thu, 15 Jan 2026 17:00:17 +0100 Subject: [PATCH 12/12] refactor: reduce complexity --- lib/mv/membership/import/header_mapper.ex | 84 +++++++++++-------- lib/mv/membership/import/member_csv.ex | 77 ++++++++--------- test/mv/membership/import/member_csv_test.exs | 9 +- 3 files changed, 92 insertions(+), 78 deletions(-) diff --git a/lib/mv/membership/import/header_mapper.ex b/lib/mv/membership/import/header_mapper.ex index caf7245..4e4a77d 100644 --- a/lib/mv/membership/import/header_mapper.ex +++ b/lib/mv/membership/import/header_mapper.ex @@ -97,17 +97,33 @@ defmodule Mv.Membership.Import.HeaderMapper do } # Build reverse map: normalized_variant -> canonical_field - # This is computed at runtime on first access and cached + # Cached on first access for performance defp normalized_to_canonical do + cached = Process.get({__MODULE__, :normalized_to_canonical}) + + if cached do + cached + else + map = build_normalized_to_canonical_map() + Process.put({__MODULE__, :normalized_to_canonical}, map) + map + end + end + + # Builds the normalized variant -> canonical field map + defp build_normalized_to_canonical_map do @member_field_variants_raw - |> Enum.flat_map(fn {canonical, variants} -> - Enum.map(variants, fn variant -> - {normalize_header(variant), canonical} - end) - end) + |> Enum.flat_map(&map_variants_to_normalized/1) |> Map.new() end + # Maps a canonical field and its variants to normalized tuples + defp map_variants_to_normalized({canonical, variants}) do + Enum.map(variants, fn variant -> + {normalize_header(variant), canonical} + end) + end + @doc """ Normalizes a CSV header string for comparison. @@ -208,9 +224,10 @@ defmodule Mv.Membership.Import.HeaderMapper do |> String.replace(<<0x2212::utf8>>, "-") end - # Normalizes punctuation: parentheses, slashes become spaces + # Normalizes punctuation: parentheses, slashes, underscores become spaces defp normalize_punctuation(str) do str + |> String.replace("_", " ") |> String.replace(~r/[()\[\]{}]/, " ") |> String.replace(~r/[\/\\]/, " ") end @@ -228,18 +245,18 @@ defmodule Mv.Membership.Import.HeaderMapper do result = headers |> Enum.with_index() - |> Enum.reduce_while({%{}, [], %{}}, fn {header, index}, {acc_map, acc_unknown, acc_seen} -> + |> Enum.reduce_while({%{}, []}, fn {header, index}, {acc_map, acc_unknown} -> normalized = normalize_header(header) - case process_member_header(header, index, normalized, acc_map, acc_seen) do + case process_member_header(header, index, normalized, acc_map, %{}) do {:error, reason} -> {:halt, {:error, reason}} - {:ok, new_map, new_seen} -> - {:cont, {new_map, acc_unknown, new_seen}} + {:ok, new_map, _} -> + {:cont, {new_map, acc_unknown}} {:unknown} -> - {:cont, {acc_map, [index | acc_unknown], acc_seen}} + {:cont, {acc_map, [index | acc_unknown]}} end end) @@ -247,7 +264,7 @@ defmodule Mv.Membership.Import.HeaderMapper do {:error, reason} -> {:error, reason} - {member_map, unknown_indices, _normalized_seen} -> + {member_map, unknown_indices} -> validate_required_fields(member_map, unknown_indices) end end @@ -258,17 +275,17 @@ defmodule Mv.Membership.Import.HeaderMapper do {:ok, acc_map, acc_seen} end - defp process_member_header(_header, index, normalized, acc_map, acc_seen) do - if Map.has_key?(normalized_to_canonical(), normalized) do - canonical = normalized_to_canonical()[normalized] + defp process_member_header(_header, index, normalized, acc_map, _acc_seen) do + case Map.get(normalized_to_canonical(), normalized) do + nil -> + {:unknown} - if Map.has_key?(acc_map, canonical) do - {:error, "duplicate header for #{canonical} (normalized: #{normalized})"} - else - {:ok, Map.put(acc_map, canonical, index), Map.put(acc_seen, normalized, canonical)} - end - else - {:unknown} + canonical -> + if Map.has_key?(acc_map, canonical) do + {:error, "duplicate header for #{canonical} (normalized: #{normalized})"} + else + {:ok, Map.put(acc_map, canonical, index), %{}} + end end end @@ -295,7 +312,7 @@ defmodule Mv.Membership.Import.HeaderMapper do result = unknown_indices - |> Enum.reduce_while({%{}, [], %{}}, fn index, {acc_map, acc_unknown, acc_seen} -> + |> Enum.reduce_while({%{}, []}, fn index, {acc_map, acc_unknown} -> header = Enum.at(headers, index) normalized = normalize_header(header) @@ -305,16 +322,16 @@ defmodule Mv.Membership.Import.HeaderMapper do normalized, custom_field_lookup, acc_map, - acc_seen + %{} ) do {:error, reason} -> {:halt, {:error, reason}} - {:ok, new_map, new_seen} -> - {:cont, {new_map, acc_unknown, new_seen}} + {:ok, new_map, _} -> + {:cont, {new_map, acc_unknown}} {:unknown} -> - {:cont, {acc_map, [index | acc_unknown], acc_seen}} + {:cont, {acc_map, [index | acc_unknown]}} end end) @@ -322,7 +339,7 @@ defmodule Mv.Membership.Import.HeaderMapper do {:error, reason} -> {:error, reason} - {custom_map, remaining_unknown, _normalized_seen} -> + {custom_map, remaining_unknown} -> {:ok, custom_map, Enum.reverse(remaining_unknown)} end end @@ -350,10 +367,10 @@ defmodule Mv.Membership.Import.HeaderMapper do normalized, _custom_field_lookup, acc_map, - acc_seen + _acc_seen ) when normalized == "" do - {:ok, acc_map, acc_seen} + {:ok, acc_map, %{}} end defp process_custom_field_header( @@ -362,7 +379,7 @@ defmodule Mv.Membership.Import.HeaderMapper do normalized, custom_field_lookup, acc_map, - acc_seen + _acc_seen ) do if Map.has_key?(custom_field_lookup, normalized) do custom_field_id = custom_field_lookup[normalized] @@ -370,8 +387,7 @@ defmodule Mv.Membership.Import.HeaderMapper do if Map.has_key?(acc_map, custom_field_id) do {:error, "duplicate custom field header (normalized: #{normalized})"} else - {:ok, Map.put(acc_map, custom_field_id, index), - Map.put(acc_seen, normalized, custom_field_id)} + {:ok, Map.put(acc_map, custom_field_id, index), %{}} end else {:unknown} diff --git a/lib/mv/membership/import/member_csv.ex b/lib/mv/membership/import/member_csv.ex index 7790bff..26756b4 100644 --- a/lib/mv/membership/import/member_csv.ex +++ b/lib/mv/membership/import/member_csv.ex @@ -63,6 +63,7 @@ defmodule Mv.Membership.Import.MemberCSV do chunks: list(list({pos_integer(), map()})), column_map: %{atom() => non_neg_integer()}, custom_field_map: %{String.t() => non_neg_integer()}, + custom_field_lookup: %{String.t() => %{id: String.t(), value_type: atom()}}, warnings: list(String.t()) } @@ -118,11 +119,15 @@ defmodule Mv.Membership.Import.MemberCSV do :ok <- validate_row_count(rows, max_rows) do chunks = chunk_rows(rows, maps, chunk_size) + # Build custom field lookup for efficient value processing + custom_field_lookup = build_custom_field_lookup(custom_fields) + {:ok, %{ chunks: chunks, column_map: maps.member, custom_field_map: maps.custom, + custom_field_lookup: custom_field_lookup, warnings: warnings }} end @@ -140,6 +145,15 @@ defmodule Mv.Membership.Import.MemberCSV do {:error, "Failed to load custom fields: #{Exception.message(e)}"} end + # Builds custom field lookup map for efficient value processing + defp build_custom_field_lookup(custom_fields) do + custom_fields + |> Enum.reduce(%{}, fn cf, acc -> + id_str = to_string(cf.id) + Map.put(acc, id_str, %{id: cf.id, value_type: cf.value_type}) + end) + end + # Builds header maps using HeaderMapper and collects warnings for unknown custom fields defp build_header_maps(headers, custom_fields) do # Convert custom fields to maps with id and name @@ -263,11 +277,13 @@ defmodule Mv.Membership.Import.MemberCSV do %{String.t() => non_neg_integer()}, keyword() ) :: {:ok, chunk_result()} | {:error, String.t()} - def process_chunk(chunk_rows_with_lines, _column_map, _custom_field_map, _opts \\ []) do + def process_chunk(chunk_rows_with_lines, _column_map, _custom_field_map, opts \\ []) do + custom_field_lookup = Keyword.get(opts, :custom_field_lookup, %{}) + {inserted, failed, errors} = Enum.reduce(chunk_rows_with_lines, {0, 0, []}, fn {line_number, row_map}, {acc_inserted, acc_failed, acc_errors} -> - case process_row(row_map, line_number) do + case process_row(row_map, line_number, custom_field_lookup) do {:ok, _member} -> {acc_inserted + 1, acc_failed, acc_errors} @@ -280,9 +296,13 @@ defmodule Mv.Membership.Import.MemberCSV do end # Processes a single row and creates member with custom field values - defp process_row(%{member: member_attrs, custom: custom_attrs}, line_number) do + defp process_row( + %{member: member_attrs, custom: custom_attrs}, + line_number, + custom_field_lookup + ) do # Prepare custom field values for Ash - custom_field_values = prepare_custom_field_values(custom_attrs) + custom_field_values = prepare_custom_field_values(custom_attrs, custom_field_lookup) # Create member with custom field values member_attrs_with_cf = @@ -314,53 +334,26 @@ defmodule Mv.Membership.Import.MemberCSV do end # Prepares custom field values from row map for Ash - defp prepare_custom_field_values(custom_attrs) when is_map(custom_attrs) do + defp prepare_custom_field_values(custom_attrs, custom_field_lookup) when is_map(custom_attrs) do custom_attrs |> Enum.filter(fn {_id, value} -> value != nil && value != "" end) |> Enum.map(fn {custom_field_id_str, value} -> - # Load custom field to get value_type and ensure ID is correct - case load_custom_field_by_id(custom_field_id_str) do - {:ok, custom_field} -> - # Use the actual custom_field.id (UUID) from the database - %{ - "custom_field_id" => to_string(custom_field.id), - "value" => format_custom_field_value(value, custom_field.value_type) - } - - {:error, _} -> - # Skip if custom field not found + case Map.get(custom_field_lookup, custom_field_id_str) do + nil -> + # Custom field not found, skip nil + + %{id: custom_field_id, value_type: value_type} -> + %{ + "custom_field_id" => to_string(custom_field_id), + "value" => format_custom_field_value(value, value_type) + } end end) |> Enum.filter(&(&1 != nil)) end - defp prepare_custom_field_values(_), do: [] - - # Loads a custom field by ID (string or UUID) - defp load_custom_field_by_id(id) when is_binary(id) do - require Ash.Query - - try do - # Try to parse as UUID first - uuid_id = - case Ecto.UUID.cast(id) do - {:ok, uuid} -> uuid - :error -> id - end - - custom_field = - Mv.Membership.CustomField - |> Ash.Query.filter(id == ^uuid_id) - |> Ash.read_one!() - - {:ok, custom_field} - rescue - _ -> {:error, :not_found} - end - end - - defp load_custom_field_by_id(_), do: {:error, :invalid_id} + defp prepare_custom_field_values(_, _), do: [] # Formats a custom field value according to its type # Uses _union_type and _union_value format as expected by Ash diff --git a/test/mv/membership/import/member_csv_test.exs b/test/mv/membership/import/member_csv_test.exs index a38315f..356cc19 100644 --- a/test/mv/membership/import/member_csv_test.exs +++ b/test/mv/membership/import/member_csv_test.exs @@ -72,7 +72,7 @@ defmodule Mv.Membership.Import.MemberCSVTest do end end - describe "process_chunk/3" do + describe "process_chunk/4" do test "function exists and accepts chunk_rows_with_lines, column_map, custom_field_map, and opts" do chunk_rows_with_lines = [{2, %{member: %{email: "john@example.com"}, custom: %{}}}] column_map = %{email: 0} @@ -173,7 +173,12 @@ defmodule Mv.Membership.Import.MemberCSVTest do column_map = %{email: 0} custom_field_map = %{to_string(custom_field.id) => 1} - opts = [] + + custom_field_lookup = %{ + to_string(custom_field.id) => %{id: custom_field.id, value_type: custom_field.value_type} + } + + opts = [custom_field_lookup: custom_field_lookup] assert {:ok, chunk_result} = MemberCSV.process_chunk(chunk_rows_with_lines, column_map, custom_field_map, opts)