Merge pull request 'implements header normalization closes #332' (#352) from feature/332_header_normalization into main

Reviewed-on: #352
This commit is contained in:
carla 2026-01-15 17:01:50 +01:00
commit 9be5dc8751
4 changed files with 1151 additions and 29 deletions

View file

@ -0,0 +1,396 @@
defmodule Mv.Membership.Import.HeaderMapper do
@moduledoc """
Maps CSV headers to canonical member fields and custom fields.
Provides header normalization and mapping functionality for CSV imports.
Handles bilingual header variants (English/German) and custom field detection.
## Header Normalization
Headers are normalized using the following rules:
- Trim whitespace
- Convert to lowercase
- Unicode normalization (ß ss, ä ae, ö oe, ü ue)
- Remove all whitespace (ensures "first name" == "firstname")
- Unify hyphen variants (en dash, minus sign standard hyphen)
- Remove or unify punctuation (parentheses, slashes spaces)
## Member Field Mapping
Maps CSV headers to canonical member fields:
- `email` (required)
- `first_name` (optional)
- `last_name` (optional)
- `street` (optional)
- `postal_code` (optional)
- `city` (optional)
Supports both English and German variants (e.g., "Email" / "E-Mail", "First Name" / "Vorname").
## Custom Field Detection
Custom fields are detected by matching normalized header names to custom field names.
Member fields have priority over custom fields (member field wins in case of collision).
## Examples
iex> HeaderMapper.normalize_header(" E-Mail ")
"e-mail"
iex> HeaderMapper.build_maps(["Email", "First Name"], [])
{:ok, %{member: %{email: 0, first_name: 1}, custom: %{}, unknown: []}}
iex> HeaderMapper.build_maps(["Email", "CustomField"], [%{id: "cf1", name: "CustomField"}])
{:ok, %{member: %{email: 0}, custom: %{"cf1" => 1}, unknown: []}}
"""
@type column_map :: %{atom() => non_neg_integer()}
@type custom_field_map :: %{String.t() => non_neg_integer()}
@type unknown_headers :: [String.t()]
# Required member fields
@required_member_fields [:email]
# Canonical member fields with their raw variants
# These will be normalized at runtime when building the lookup map
@member_field_variants_raw %{
email: [
"email",
"e-mail",
"e_mail",
"e mail",
"e-mail adresse",
"e-mail-adresse",
"mail"
],
first_name: [
"first name",
"firstname",
"vorname"
],
last_name: [
"last name",
"lastname",
"surname",
"nachname",
"familienname"
],
street: [
"street",
"address",
"strasse"
],
postal_code: [
"postal code",
"postal_code",
"zip",
"postcode",
"plz",
"postleitzahl"
],
city: [
"city",
"town",
"stadt",
"ort"
]
}
# Build reverse map: normalized_variant -> canonical_field
# Cached on first access for performance
defp normalized_to_canonical do
cached = Process.get({__MODULE__, :normalized_to_canonical})
if cached do
cached
else
map = build_normalized_to_canonical_map()
Process.put({__MODULE__, :normalized_to_canonical}, map)
map
end
end
# Builds the normalized variant -> canonical field map
defp build_normalized_to_canonical_map do
@member_field_variants_raw
|> Enum.flat_map(&map_variants_to_normalized/1)
|> Map.new()
end
# Maps a canonical field and its variants to normalized tuples
defp map_variants_to_normalized({canonical, variants}) do
Enum.map(variants, fn variant ->
{normalize_header(variant), canonical}
end)
end
@doc """
Normalizes a CSV header string for comparison.
Applies the following transformations:
- Trim whitespace
- Convert to lowercase
- Unicode transliteration (ß ss, ä ae, ö oe, ü ue)
- Unify hyphen variants (en dash U+2013, minus sign U+2212 standard hyphen)
- Remove or unify punctuation (parentheses, slashes spaces)
- Remove all whitespace (ensures "first name" == "firstname")
- Final trim
## Examples
iex> normalize_header(" E-Mail ")
"e-mail"
iex> normalize_header("Straße")
"strasse"
iex> normalize_header("E-Mail (privat)")
"e-mailprivat"
iex> normalize_header("First Name")
"firstname"
"""
@spec normalize_header(String.t()) :: String.t()
def normalize_header(header) when is_binary(header) do
header
|> String.trim()
|> String.downcase()
|> transliterate_unicode()
|> unify_hyphens()
|> normalize_punctuation()
|> compress_whitespace()
|> String.trim()
end
def normalize_header(_), do: ""
@doc """
Builds column maps for member fields and custom fields from CSV headers.
## Parameters
- `headers` - List of CSV header strings (in column order, 0-based indices)
- `custom_fields` - List of custom field maps/structs with at least `:id` and `:name` keys
## Returns
- `{:ok, %{member: column_map, custom: custom_field_map, unknown: unknown_headers}}` on success
- `{:error, reason}` on error (missing required field, duplicate headers)
## Examples
iex> build_maps(["Email", "First Name"], [])
{:ok, %{member: %{email: 0, first_name: 1}, custom: %{}, unknown: []}}
iex> build_maps(["Email", "CustomField"], [%{id: "cf1", name: "CustomField"}])
{:ok, %{member: %{email: 0}, custom: %{"cf1" => 1}, unknown: []}}
"""
@spec build_maps([String.t()], [map()]) ::
{:ok, %{member: column_map(), custom: custom_field_map(), unknown: unknown_headers()}}
| {:error, String.t()}
def build_maps(headers, custom_fields) when is_list(headers) and is_list(custom_fields) do
with {:ok, member_map, unknown_after_member} <- build_member_map(headers),
{:ok, custom_map, unknown_after_custom} <-
build_custom_field_map(headers, unknown_after_member, custom_fields, member_map) do
unknown = Enum.map(unknown_after_custom, &Enum.at(headers, &1))
{:ok, %{member: member_map, custom: custom_map, unknown: unknown}}
end
end
# --- Private Functions ---
# Transliterates German umlauts and special characters
defp transliterate_unicode(str) do
str
|> String.replace("ß", "ss")
|> String.replace("ä", "ae")
|> String.replace("ö", "oe")
|> String.replace("ü", "ue")
|> String.replace("Ä", "ae")
|> String.replace("Ö", "oe")
|> String.replace("Ü", "ue")
end
# Unifies different hyphen variants to standard hyphen
defp unify_hyphens(str) do
str
# en dash
|> String.replace(<<0x2013::utf8>>, "-")
# em dash
|> String.replace(<<0x2014::utf8>>, "-")
# minus sign
|> String.replace(<<0x2212::utf8>>, "-")
end
# Normalizes punctuation: parentheses, slashes, underscores become spaces
defp normalize_punctuation(str) do
str
|> String.replace("_", " ")
|> String.replace(~r/[()\[\]{}]/, " ")
|> String.replace(~r/[\/\\]/, " ")
end
# Compresses multiple whitespace characters to single space, then removes all spaces
# This ensures "first name" and "firstname" normalize to the same value
defp compress_whitespace(str) do
str
|> String.replace(~r/\s+/, " ")
|> String.replace(" ", "")
end
# Builds member field column map
defp build_member_map(headers) do
result =
headers
|> Enum.with_index()
|> Enum.reduce_while({%{}, []}, fn {header, index}, {acc_map, acc_unknown} ->
normalized = normalize_header(header)
case process_member_header(header, index, normalized, acc_map, %{}) do
{:error, reason} ->
{:halt, {:error, reason}}
{:ok, new_map, _} ->
{:cont, {new_map, acc_unknown}}
{:unknown} ->
{:cont, {acc_map, [index | acc_unknown]}}
end
end)
case result do
{:error, reason} ->
{:error, reason}
{member_map, unknown_indices} ->
validate_required_fields(member_map, unknown_indices)
end
end
# Processes a single header for member field mapping
defp process_member_header(_header, _index, normalized, acc_map, acc_seen)
when normalized == "" do
{:ok, acc_map, acc_seen}
end
defp process_member_header(_header, index, normalized, acc_map, _acc_seen) do
case Map.get(normalized_to_canonical(), normalized) do
nil ->
{:unknown}
canonical ->
if Map.has_key?(acc_map, canonical) do
{:error, "duplicate header for #{canonical} (normalized: #{normalized})"}
else
{:ok, Map.put(acc_map, canonical, index), %{}}
end
end
end
# Validates that all required member fields are present
defp validate_required_fields(member_map, unknown_indices) do
missing_required =
@required_member_fields
|> Enum.filter(&(not Map.has_key?(member_map, &1)))
if Enum.empty?(missing_required) do
{:ok, member_map, Enum.reverse(unknown_indices)}
else
missing_field = List.first(missing_required)
variants = Map.get(@member_field_variants_raw, missing_field, [])
accepted = Enum.join(variants, ", ")
{:error, "Missing required header: #{missing_field} (accepted: #{accepted})"}
end
end
# Builds custom field column map from unmatched headers
defp build_custom_field_map(headers, unknown_indices, custom_fields, _member_map) do
custom_field_lookup = build_custom_field_lookup(custom_fields)
result =
unknown_indices
|> Enum.reduce_while({%{}, []}, fn index, {acc_map, acc_unknown} ->
header = Enum.at(headers, index)
normalized = normalize_header(header)
case process_custom_field_header(
header,
index,
normalized,
custom_field_lookup,
acc_map,
%{}
) do
{:error, reason} ->
{:halt, {:error, reason}}
{:ok, new_map, _} ->
{:cont, {new_map, acc_unknown}}
{:unknown} ->
{:cont, {acc_map, [index | acc_unknown]}}
end
end)
case result do
{:error, reason} ->
{:error, reason}
{custom_map, remaining_unknown} ->
{:ok, custom_map, Enum.reverse(remaining_unknown)}
end
end
# Builds normalized custom field name -> id lookup map
defp build_custom_field_lookup(custom_fields) do
custom_fields
|> Enum.reduce(%{}, fn cf, acc ->
name = Map.get(cf, :name) || Map.get(cf, "name")
id = Map.get(cf, :id) || Map.get(cf, "id")
if name && id do
normalized_name = normalize_header(name)
Map.put(acc, normalized_name, id)
else
acc
end
end)
end
# Processes a single header for custom field mapping
defp process_custom_field_header(
_header,
_index,
normalized,
_custom_field_lookup,
acc_map,
_acc_seen
)
when normalized == "" do
{:ok, acc_map, %{}}
end
defp process_custom_field_header(
_header,
index,
normalized,
custom_field_lookup,
acc_map,
_acc_seen
) do
if Map.has_key?(custom_field_lookup, normalized) do
custom_field_id = custom_field_lookup[normalized]
if Map.has_key?(acc_map, custom_field_id) do
{:error, "duplicate custom field header (normalized: #{normalized})"}
else
{:ok, Map.put(acc_map, custom_field_id, index), %{}}
end
else
{:unknown}
end
end
end

View file

@ -2,6 +2,8 @@ defmodule Mv.Membership.Import.MemberCSV do
@moduledoc """
Service module for importing members from CSV files.
require Ash.Query
This module provides the core API for CSV member import functionality:
- `prepare/2` - Parses and validates CSV content, returns import state
- `process_chunk/3` - Processes a chunk of rows and creates members
@ -61,6 +63,7 @@ defmodule Mv.Membership.Import.MemberCSV do
chunks: list(list({pos_integer(), map()})),
column_map: %{atom() => non_neg_integer()},
custom_field_map: %{String.t() => non_neg_integer()},
custom_field_lookup: %{String.t() => %{id: String.t(), value_type: atom()}},
warnings: list(String.t())
}
@ -70,6 +73,9 @@ defmodule Mv.Membership.Import.MemberCSV do
errors: list(Error.t())
}
alias Mv.Membership.Import.CsvParser
alias Mv.Membership.Import.HeaderMapper
@doc """
Prepares CSV content for import by parsing, mapping headers, and validating limits.
@ -104,12 +110,133 @@ defmodule Mv.Membership.Import.MemberCSV do
"""
@spec prepare(String.t(), keyword()) :: {:ok, import_state()} | {:error, String.t()}
def prepare(file_content, opts \\ []) do
# TODO: Implement in Issue #3 (CSV Parsing)
# This is a skeleton implementation that will be filled in later
_ = {file_content, opts}
max_rows = Keyword.get(opts, :max_rows, 1000)
chunk_size = Keyword.get(opts, :chunk_size, 200)
# Placeholder return - will be replaced with actual implementation
{:error, "Not yet implemented"}
with {:ok, headers, rows} <- CsvParser.parse(file_content),
{:ok, custom_fields} <- load_custom_fields(),
{:ok, maps, warnings} <- build_header_maps(headers, custom_fields),
:ok <- validate_row_count(rows, max_rows) do
chunks = chunk_rows(rows, maps, chunk_size)
# Build custom field lookup for efficient value processing
custom_field_lookup = build_custom_field_lookup(custom_fields)
{:ok,
%{
chunks: chunks,
column_map: maps.member,
custom_field_map: maps.custom,
custom_field_lookup: custom_field_lookup,
warnings: warnings
}}
end
end
# Loads all custom fields from the database
defp load_custom_fields do
custom_fields =
Mv.Membership.CustomField
|> Ash.read!()
{:ok, custom_fields}
rescue
e ->
{:error, "Failed to load custom fields: #{Exception.message(e)}"}
end
# Builds custom field lookup map for efficient value processing
defp build_custom_field_lookup(custom_fields) do
custom_fields
|> Enum.reduce(%{}, fn cf, acc ->
id_str = to_string(cf.id)
Map.put(acc, id_str, %{id: cf.id, value_type: cf.value_type})
end)
end
# Builds header maps using HeaderMapper and collects warnings for unknown custom fields
defp build_header_maps(headers, custom_fields) do
# Convert custom fields to maps with id and name
custom_field_maps =
Enum.map(custom_fields, fn cf ->
%{id: to_string(cf.id), name: cf.name}
end)
case HeaderMapper.build_maps(headers, custom_field_maps) do
{:ok, %{member: member_map, custom: custom_map, unknown: unknown}} ->
# Build warnings for unknown custom field columns
warnings =
unknown
|> Enum.filter(fn header ->
# Check if it could be a custom field (not a known member field)
normalized = HeaderMapper.normalize_header(header)
# If it's not empty and not a member field, it might be a custom field
normalized != "" && not member_field?(normalized)
end)
|> Enum.map(fn header ->
"Unknown column '#{header}' will be ignored. " <>
"If this is a custom field, create it in Mila before importing."
end)
{:ok, %{member: member_map, custom: custom_map}, warnings}
{:error, reason} ->
{:error, reason}
end
end
# Checks if a normalized header matches a member field
# Uses HeaderMapper's internal logic to check if header would map to a member field
defp member_field?(normalized) do
# Try to build maps with just this header - if it maps to a member field, it's a member field
case HeaderMapper.build_maps([normalized], []) do
{:ok, %{member: member_map}} ->
# If member_map is not empty, it's a member field
map_size(member_map) > 0
_ ->
false
end
end
# Validates that row count doesn't exceed limit
defp validate_row_count(rows, max_rows) do
if length(rows) > max_rows do
{:error, "CSV file exceeds maximum row limit of #{max_rows} rows"}
else
:ok
end
end
# Chunks rows and converts them to row maps using column maps
defp chunk_rows(rows, maps, chunk_size) do
rows
|> Enum.chunk_every(chunk_size)
|> Enum.map(fn chunk ->
Enum.map(chunk, fn {line_number, row_values} ->
row_map = build_row_map(row_values, maps)
{line_number, row_map}
end)
end)
end
# Builds a row map from raw row values using column maps
defp build_row_map(row_values, maps) do
member_map =
maps.member
|> Enum.reduce(%{}, fn {field, index}, acc ->
value = Enum.at(row_values, index, "")
Map.put(acc, field, value)
end)
custom_map =
maps.custom
|> Enum.reduce(%{}, fn {custom_field_id, index}, acc ->
value = Enum.at(row_values, index, "")
Map.put(acc, custom_field_id, value)
end)
%{member: member_map, custom: custom_map}
end
@doc """
@ -126,8 +253,9 @@ defmodule Mv.Membership.Import.MemberCSV do
- `chunk_rows_with_lines` - List of tuples `{csv_line_number, row_map}` where:
- `csv_line_number` - Physical line number in CSV (1-based)
- `row_map` - Map of column names to values
- `column_map` - Map of canonical field names (atoms) to column indices
- `row_map` - Map with `:member` and `:custom` keys containing field values
- `column_map` - Map of canonical field names (atoms) to column indices (for reference)
- `custom_field_map` - Map of custom field IDs (strings) to column indices (for reference)
- `opts` - Optional keyword list for processing options
## Returns
@ -137,22 +265,212 @@ defmodule Mv.Membership.Import.MemberCSV do
## Examples
iex> chunk = [{2, %{"email" => "john@example.com"}}]
iex> chunk = [{2, %{member: %{email: "john@example.com"}, custom: %{}}}]
iex> column_map = %{email: 0}
iex> MemberCSV.process_chunk(chunk, column_map)
iex> custom_field_map = %{}
iex> MemberCSV.process_chunk(chunk, column_map, custom_field_map)
{:ok, %{inserted: 1, failed: 0, errors: []}}
"""
@spec process_chunk(
list({pos_integer(), map()}),
%{atom() => non_neg_integer()},
%{String.t() => non_neg_integer()},
keyword()
) :: {:ok, chunk_result()} | {:error, String.t()}
def process_chunk(chunk_rows_with_lines, column_map, opts \\ []) do
# TODO: Implement in Issue #6 (Persistence)
# This is a skeleton implementation that will be filled in later
_ = {chunk_rows_with_lines, column_map, opts}
def process_chunk(chunk_rows_with_lines, _column_map, _custom_field_map, opts \\ []) do
custom_field_lookup = Keyword.get(opts, :custom_field_lookup, %{})
# Placeholder return - will be replaced with actual implementation
{:ok, %{inserted: 0, failed: 0, errors: []}}
{inserted, failed, errors} =
Enum.reduce(chunk_rows_with_lines, {0, 0, []}, fn {line_number, row_map},
{acc_inserted, acc_failed, acc_errors} ->
case process_row(row_map, line_number, custom_field_lookup) do
{:ok, _member} ->
{acc_inserted + 1, acc_failed, acc_errors}
{:error, error} ->
{acc_inserted, acc_failed + 1, [error | acc_errors]}
end
end)
{:ok, %{inserted: inserted, failed: failed, errors: Enum.reverse(errors)}}
end
# Processes a single row and creates member with custom field values
defp process_row(
%{member: member_attrs, custom: custom_attrs},
line_number,
custom_field_lookup
) do
# Prepare custom field values for Ash
custom_field_values = prepare_custom_field_values(custom_attrs, custom_field_lookup)
# Create member with custom field values
member_attrs_with_cf =
member_attrs
|> Map.put(:custom_field_values, custom_field_values)
|> trim_string_values()
# Only include custom_field_values if not empty
final_attrs =
if Enum.empty?(custom_field_values) do
Map.delete(member_attrs_with_cf, :custom_field_values)
else
member_attrs_with_cf
end
case Mv.Membership.create_member(final_attrs) do
{:ok, member} ->
{:ok, member}
{:error, %Ash.Error.Invalid{} = error} ->
{:error, format_ash_error(error, line_number)}
{:error, error} ->
{:error, %Error{csv_line_number: line_number, field: nil, message: inspect(error)}}
end
rescue
e ->
{:error, %Error{csv_line_number: line_number, field: nil, message: Exception.message(e)}}
end
# Prepares custom field values from row map for Ash
defp prepare_custom_field_values(custom_attrs, custom_field_lookup) when is_map(custom_attrs) do
custom_attrs
|> Enum.filter(fn {_id, value} -> value != nil && value != "" end)
|> Enum.map(fn {custom_field_id_str, value} ->
case Map.get(custom_field_lookup, custom_field_id_str) do
nil ->
# Custom field not found, skip
nil
%{id: custom_field_id, value_type: value_type} ->
%{
"custom_field_id" => to_string(custom_field_id),
"value" => format_custom_field_value(value, value_type)
}
end
end)
|> Enum.filter(&(&1 != nil))
end
defp prepare_custom_field_values(_, _), do: []
# Formats a custom field value according to its type
# Uses _union_type and _union_value format as expected by Ash
defp format_custom_field_value(value, :string) when is_binary(value) do
%{"_union_type" => "string", "_union_value" => String.trim(value)}
end
defp format_custom_field_value(value, :integer) when is_binary(value) do
case Integer.parse(value) do
{int_value, _} -> %{"_union_type" => "integer", "_union_value" => int_value}
:error -> %{"_union_type" => "string", "_union_value" => String.trim(value)}
end
end
defp format_custom_field_value(value, :boolean) when is_binary(value) do
bool_value =
value
|> String.trim()
|> String.downcase()
|> case do
"true" -> true
"1" -> true
"yes" -> true
"ja" -> true
_ -> false
end
%{"_union_type" => "boolean", "_union_value" => bool_value}
end
defp format_custom_field_value(value, :date) when is_binary(value) do
case Date.from_iso8601(String.trim(value)) do
{:ok, date} -> %{"_union_type" => "date", "_union_value" => date}
{:error, _} -> %{"_union_type" => "string", "_union_value" => String.trim(value)}
end
end
defp format_custom_field_value(value, :email) when is_binary(value) do
%{"_union_type" => "email", "_union_value" => String.trim(value)}
end
defp format_custom_field_value(value, _type) when is_binary(value) do
# Default to string if type is unknown
%{"_union_type" => "string", "_union_value" => String.trim(value)}
end
# Trims all string values in member attributes
defp trim_string_values(attrs) do
Enum.reduce(attrs, %{}, fn {key, value}, acc ->
trimmed_value =
if is_binary(value) do
String.trim(value)
else
value
end
Map.put(acc, key, trimmed_value)
end)
end
# Formats Ash errors into MemberCSV.Error structs
defp format_ash_error(%Ash.Error.Invalid{errors: errors}, line_number) do
# Try to find email-related errors first (for better error messages)
email_error =
Enum.find(errors, fn error ->
case error do
%{field: :email} -> true
_ -> false
end
end)
case email_error || List.first(errors) do
%{field: field, message: message} when is_atom(field) ->
%Error{
csv_line_number: line_number,
field: field,
message: format_error_message(message, field)
}
%{message: message} ->
%Error{
csv_line_number: line_number,
field: nil,
message: format_error_message(message, nil)
}
_ ->
%Error{
csv_line_number: line_number,
field: nil,
message: "Validation failed"
}
end
end
# Formats error messages, handling common cases like email uniqueness
defp format_error_message(message, field) when is_binary(message) do
if email_uniqueness_error?(message, field) do
"email has already been taken"
else
message
end
end
defp format_error_message(message, _field), do: to_string(message)
# Checks if error message indicates email uniqueness constraint violation
defp email_uniqueness_error?(message, :email) do
message_lower = String.downcase(message)
String.contains?(message_lower, "unique") or
String.contains?(message_lower, "constraint") or
String.contains?(message_lower, "duplicate") or
String.contains?(message_lower, "already been taken") or
String.contains?(message_lower, "already exists") or
String.contains?(message_lower, "violates unique constraint")
end
defp email_uniqueness_error?(_message, _field), do: false
end

View file

@ -0,0 +1,244 @@
defmodule Mv.Membership.Import.HeaderMapperTest do
use ExUnit.Case, async: true
alias Mv.Membership.Import.HeaderMapper
describe "normalize_header/1" do
test "trims whitespace" do
assert HeaderMapper.normalize_header(" email ") == "email"
end
test "converts to lowercase" do
assert HeaderMapper.normalize_header("EMAIL") == "email"
assert HeaderMapper.normalize_header("E-Mail") == "e-mail"
end
test "normalizes Unicode characters" do
# ß -> ss
assert HeaderMapper.normalize_header("Straße") == "strasse"
# Umlaute transliteration (ä -> ae, ö -> oe, ü -> ue)
assert HeaderMapper.normalize_header("Müller") == "mueller"
assert HeaderMapper.normalize_header("Köln") == "koeln"
assert HeaderMapper.normalize_header("Grün") == "gruen"
end
test "compresses and removes whitespace" do
# Whitespace is removed entirely to ensure "first name" == "firstname"
assert HeaderMapper.normalize_header("first name") == "firstname"
assert HeaderMapper.normalize_header("email address") == "emailaddress"
end
test "unifies hyphen variants" do
# Different Unicode hyphen characters should become standard hyphen
# en dash
assert HeaderMapper.normalize_header("EMail") == "e-mail"
# minus sign
assert HeaderMapper.normalize_header("EMail") == "e-mail"
# standard hyphen
assert HeaderMapper.normalize_header("E-Mail") == "e-mail"
end
test "removes or unifies punctuation" do
# Parentheses, slashes, etc. are removed (whitespace is also removed)
assert HeaderMapper.normalize_header("E-Mail (privat)") == "e-mailprivat"
assert HeaderMapper.normalize_header("Telefon / Mobil") == "telefonmobil"
end
test "handles empty strings" do
assert HeaderMapper.normalize_header("") == ""
assert HeaderMapper.normalize_header(" ") == ""
end
end
describe "build_maps/2" do
test "maps English email variant correctly" do
headers = ["Email"]
assert {:ok, %{member: member_map, custom: custom_map, unknown: unknown}} =
HeaderMapper.build_maps(headers, [])
assert member_map[:email] == 0
assert custom_map == %{}
assert unknown == []
end
test "maps German email variant correctly" do
headers = ["E-Mail"]
assert {:ok, %{member: member_map, custom: custom_map, unknown: unknown}} =
HeaderMapper.build_maps(headers, [])
assert member_map[:email] == 0
assert custom_map == %{}
assert unknown == []
end
test "maps multiple member fields" do
headers = ["Email", "First Name", "Last Name"]
assert {:ok, %{member: member_map, custom: custom_map, unknown: unknown}} =
HeaderMapper.build_maps(headers, [])
assert member_map[:email] == 0
assert member_map[:first_name] == 1
assert member_map[:last_name] == 2
assert custom_map == %{}
assert unknown == []
end
test "handles Unicode and whitespace in headers" do
headers = [" E-Mail ", "Straße", " Telefon / Mobil "]
assert {:ok, %{member: member_map, custom: custom_map, unknown: unknown}} =
HeaderMapper.build_maps(headers, [])
assert member_map[:email] == 0
assert member_map[:street] == 1
# "Telefon / Mobil" is not a known member field, so it should be unknown
assert length(unknown) == 1
assert custom_map == %{}
end
test "returns error when duplicate headers normalize to same field" do
headers = ["Email", "E-Mail"]
assert {:error, reason} = HeaderMapper.build_maps(headers, [])
assert reason =~ "duplicate"
assert reason =~ "email"
end
test "returns error when required field email is missing" do
headers = ["First Name", "Last Name"]
assert {:error, reason} = HeaderMapper.build_maps(headers, [])
assert reason =~ "Missing required header"
assert reason =~ "email"
assert reason =~ "accepted"
end
test "collects unknown columns" do
headers = ["Email", "FooBar", "UnknownColumn"]
assert {:ok, %{member: member_map, custom: custom_map, unknown: unknown}} =
HeaderMapper.build_maps(headers, [])
assert member_map[:email] == 0
assert length(unknown) == 2
assert "FooBar" in unknown or "foobar" in unknown
assert "UnknownColumn" in unknown or "unknowncolumn" in unknown
assert custom_map == %{}
end
test "ignores empty headers after normalization" do
headers = ["Email", " ", ""]
assert {:ok, %{member: member_map, custom: custom_map, unknown: unknown}} =
HeaderMapper.build_maps(headers, [])
assert member_map[:email] == 0
assert custom_map == %{}
assert unknown == []
end
test "maps custom field columns correctly" do
headers = ["Email", "Lieblingsfarbe"]
custom_fields = [%{id: "cf1", name: "Lieblingsfarbe"}]
assert {:ok, %{member: member_map, custom: custom_map, unknown: unknown}} =
HeaderMapper.build_maps(headers, custom_fields)
assert member_map[:email] == 0
assert custom_map["cf1"] == 1
assert unknown == []
end
test "custom field collision: member field wins" do
headers = ["Email"]
# Custom field with name "Email" should not override member field
custom_fields = [%{id: "cf1", name: "Email"}]
assert {:ok, %{member: member_map, custom: custom_map, unknown: unknown}} =
HeaderMapper.build_maps(headers, custom_fields)
assert member_map[:email] == 0
# Custom field should not be in custom_map because member field has priority
assert custom_map == %{}
assert unknown == []
end
test "handles custom field with Unicode normalization" do
headers = ["Email", "Straße"]
custom_fields = [%{id: "cf1", name: "Straße"}]
assert {:ok, %{member: member_map, custom: custom_map, unknown: unknown}} =
HeaderMapper.build_maps(headers, custom_fields)
assert member_map[:email] == 0
# "Straße" is a member field (street), so it should be in member_map, not custom_map
assert member_map[:street] == 1
assert custom_map == %{}
assert unknown == []
end
test "handles unknown custom field columns" do
headers = ["Email", "UnknownCustomField"]
custom_fields = [%{id: "cf1", name: "KnownField"}]
assert {:ok, %{member: member_map, custom: custom_map, unknown: unknown}} =
HeaderMapper.build_maps(headers, custom_fields)
assert member_map[:email] == 0
assert custom_map == %{}
# UnknownCustomField should be in unknown list
assert length(unknown) == 1
end
test "handles duplicate custom field names after normalization" do
headers = ["Email", "CustomField", "Custom Field"]
custom_fields = [%{id: "cf1", name: "CustomField"}]
# Both "CustomField" and "Custom Field" normalize to the same, so this should error
assert {:error, reason} = HeaderMapper.build_maps(headers, custom_fields)
assert reason =~ "duplicate"
end
test "maps all supported member fields" do
headers = [
"Email",
"First Name",
"Last Name",
"Street",
"Postal Code",
"City"
]
assert {:ok, %{member: member_map, custom: custom_map, unknown: unknown}} =
HeaderMapper.build_maps(headers, [])
assert member_map[:email] == 0
assert member_map[:first_name] == 1
assert member_map[:last_name] == 2
assert member_map[:street] == 3
assert member_map[:postal_code] == 4
assert member_map[:city] == 5
assert custom_map == %{}
assert unknown == []
end
test "maps German member field variants" do
headers = ["E-Mail", "Vorname", "Nachname", "Straße", "PLZ", "Stadt"]
assert {:ok, %{member: member_map, custom: custom_map, unknown: unknown}} =
HeaderMapper.build_maps(headers, [])
assert member_map[:email] == 0
assert member_map[:first_name] == 1
assert member_map[:last_name] == 2
assert member_map[:street] == 3
assert member_map[:postal_code] == 4
assert member_map[:city] == 5
assert custom_map == %{}
assert unknown == []
end
end
end

View file

@ -44,7 +44,6 @@ defmodule Mv.Membership.Import.MemberCSVTest do
assert match?({:ok, _}, result) or match?({:error, _}, result)
end
@tag :skip
test "returns {:ok, import_state} on success" do
file_content = "email\njohn@example.com"
opts = []
@ -56,6 +55,8 @@ defmodule Mv.Membership.Import.MemberCSVTest do
assert Map.has_key?(import_state, :column_map)
assert Map.has_key?(import_state, :custom_field_map)
assert Map.has_key?(import_state, :warnings)
assert import_state.column_map[:email] == 0
assert import_state.chunks != []
end
test "returns {:error, reason} on failure" do
@ -71,24 +72,183 @@ defmodule Mv.Membership.Import.MemberCSVTest do
end
end
describe "process_chunk/3" do
test "function exists and accepts chunk_rows_with_lines, column_map, and opts" do
chunk_rows_with_lines = [{2, %{"email" => "john@example.com"}}]
describe "process_chunk/4" do
test "function exists and accepts chunk_rows_with_lines, column_map, custom_field_map, and opts" do
chunk_rows_with_lines = [{2, %{member: %{email: "john@example.com"}, custom: %{}}}]
column_map = %{email: 0}
custom_field_map = %{}
opts = []
# This will fail until the function is implemented
result = MemberCSV.process_chunk(chunk_rows_with_lines, column_map, opts)
result = MemberCSV.process_chunk(chunk_rows_with_lines, column_map, custom_field_map, opts)
assert match?({:ok, _}, result) or match?({:error, _}, result)
end
test "returns {:ok, chunk_result} on success" do
chunk_rows_with_lines = [{2, %{"email" => "john@example.com"}}]
column_map = %{email: 0}
test "creates member successfully with valid data" do
chunk_rows_with_lines = [
{2, %{member: %{email: "john@example.com", first_name: "John"}, custom: %{}}}
]
column_map = %{email: 0, first_name: 1}
custom_field_map = %{}
opts = []
assert {:ok, chunk_result} =
MemberCSV.process_chunk(chunk_rows_with_lines, column_map, opts)
MemberCSV.process_chunk(chunk_rows_with_lines, column_map, custom_field_map, opts)
assert chunk_result.inserted == 1
assert chunk_result.failed == 0
assert chunk_result.errors == []
# Verify member was created
members = Mv.Membership.list_members!()
assert Enum.any?(members, &(&1.email == "john@example.com"))
end
test "returns error for invalid email" do
chunk_rows_with_lines = [
{2, %{member: %{email: "invalid-email"}, custom: %{}}}
]
column_map = %{email: 0}
custom_field_map = %{}
opts = []
assert {:ok, chunk_result} =
MemberCSV.process_chunk(chunk_rows_with_lines, column_map, custom_field_map, opts)
assert chunk_result.inserted == 0
assert chunk_result.failed == 1
assert length(chunk_result.errors) == 1
error = List.first(chunk_result.errors)
assert error.csv_line_number == 2
assert error.field == :email
assert error.message =~ "email"
end
test "returns error for duplicate email" do
# Create existing member first
{:ok, _existing} =
Mv.Membership.create_member(%{email: "duplicate@example.com", first_name: "Existing"})
chunk_rows_with_lines = [
{2, %{member: %{email: "duplicate@example.com", first_name: "New"}, custom: %{}}}
]
column_map = %{email: 0, first_name: 1}
custom_field_map = %{}
opts = []
assert {:ok, chunk_result} =
MemberCSV.process_chunk(chunk_rows_with_lines, column_map, custom_field_map, opts)
assert chunk_result.inserted == 0
assert chunk_result.failed == 1
assert length(chunk_result.errors) == 1
error = List.first(chunk_result.errors)
assert error.csv_line_number == 2
assert error.field == :email
assert error.message =~ "email" or error.message =~ "duplicate" or error.message =~ "unique"
end
test "creates member with custom field values" do
# Create custom field first
{:ok, custom_field} =
Mv.Membership.CustomField
|> Ash.Changeset.for_create(:create, %{
name: "Phone",
value_type: :string
})
|> Ash.create()
chunk_rows_with_lines = [
{2,
%{
member: %{email: "withcustom@example.com"},
custom: %{to_string(custom_field.id) => "123-456-7890"}
}}
]
column_map = %{email: 0}
custom_field_map = %{to_string(custom_field.id) => 1}
custom_field_lookup = %{
to_string(custom_field.id) => %{id: custom_field.id, value_type: custom_field.value_type}
}
opts = [custom_field_lookup: custom_field_lookup]
assert {:ok, chunk_result} =
MemberCSV.process_chunk(chunk_rows_with_lines, column_map, custom_field_map, opts)
assert chunk_result.inserted == 1
assert chunk_result.failed == 0
# Verify member and custom field value were created
members = Mv.Membership.list_members!()
member = Enum.find(members, &(&1.email == "withcustom@example.com"))
assert member != nil
{:ok, member_with_cf} = Ash.load(member, :custom_field_values)
assert length(member_with_cf.custom_field_values) == 1
cfv = List.first(member_with_cf.custom_field_values)
assert cfv.custom_field_id == custom_field.id
assert cfv.value.value == "123-456-7890"
end
test "handles multiple rows with mixed success and failure" do
chunk_rows_with_lines = [
{2, %{member: %{email: "valid1@example.com"}, custom: %{}}},
{3, %{member: %{email: "invalid-email"}, custom: %{}}},
{4, %{member: %{email: "valid2@example.com"}, custom: %{}}}
]
column_map = %{email: 0}
custom_field_map = %{}
opts = []
assert {:ok, chunk_result} =
MemberCSV.process_chunk(chunk_rows_with_lines, column_map, custom_field_map, opts)
assert chunk_result.inserted == 2
assert chunk_result.failed == 1
assert length(chunk_result.errors) == 1
error = List.first(chunk_result.errors)
assert error.csv_line_number == 3
end
test "preserves CSV line numbers in errors" do
chunk_rows_with_lines = [
{5, %{member: %{email: "invalid"}, custom: %{}}},
{10, %{member: %{email: "also-invalid"}, custom: %{}}}
]
column_map = %{email: 0}
custom_field_map = %{}
opts = []
assert {:ok, chunk_result} =
MemberCSV.process_chunk(chunk_rows_with_lines, column_map, custom_field_map, opts)
assert chunk_result.failed == 2
assert length(chunk_result.errors) == 2
line_numbers = Enum.map(chunk_result.errors, & &1.csv_line_number)
assert 5 in line_numbers
assert 10 in line_numbers
end
test "returns {:ok, chunk_result} on success" do
chunk_rows_with_lines = [{2, %{member: %{email: "test@example.com"}, custom: %{}}}]
column_map = %{email: 0}
custom_field_map = %{}
opts = []
assert {:ok, chunk_result} =
MemberCSV.process_chunk(chunk_rows_with_lines, column_map, custom_field_map, opts)
# Check that chunk_result contains expected fields
assert Map.has_key?(chunk_result, :inserted)
@ -99,19 +259,23 @@ defmodule Mv.Membership.Import.MemberCSVTest do
assert is_list(chunk_result.errors)
end
test "returns {:error, reason} on failure" do
test "returns {:ok, _} with zero counts for empty chunk" do
chunk_rows_with_lines = []
column_map = %{}
custom_field_map = %{}
opts = []
# This might return {:ok, _} with zero counts or {:error, _}
result = MemberCSV.process_chunk(chunk_rows_with_lines, column_map, opts)
assert match?({:ok, _}, result) or match?({:error, _}, result)
assert {:ok, chunk_result} =
MemberCSV.process_chunk(chunk_rows_with_lines, column_map, custom_field_map, opts)
assert chunk_result.inserted == 0
assert chunk_result.failed == 0
assert chunk_result.errors == []
end
test "function has documentation" do
# Check that @doc exists by reading the module
assert function_exported?(MemberCSV, :process_chunk, 3)
assert function_exported?(MemberCSV, :process_chunk, 4)
end
end