Merge pull request 'implements header normalization closes #332' (#352) from feature/332_header_normalization into main

Reviewed-on: #352
This commit is contained in:
carla 2026-01-15 17:01:50 +01:00
commit 9be5dc8751
4 changed files with 1151 additions and 29 deletions

View file

@ -0,0 +1,396 @@
defmodule Mv.Membership.Import.HeaderMapper do
@moduledoc """
Maps CSV headers to canonical member fields and custom fields.
Provides header normalization and mapping functionality for CSV imports.
Handles bilingual header variants (English/German) and custom field detection.
## Header Normalization
Headers are normalized using the following rules:
- Trim whitespace
- Convert to lowercase
- Unicode normalization (ß ss, ä ae, ö oe, ü ue)
- Remove all whitespace (ensures "first name" == "firstname")
- Unify hyphen variants (en dash, minus sign standard hyphen)
- Remove or unify punctuation (parentheses, slashes spaces)
## Member Field Mapping
Maps CSV headers to canonical member fields:
- `email` (required)
- `first_name` (optional)
- `last_name` (optional)
- `street` (optional)
- `postal_code` (optional)
- `city` (optional)
Supports both English and German variants (e.g., "Email" / "E-Mail", "First Name" / "Vorname").
## Custom Field Detection
Custom fields are detected by matching normalized header names to custom field names.
Member fields have priority over custom fields (member field wins in case of collision).
## Examples
iex> HeaderMapper.normalize_header(" E-Mail ")
"e-mail"
iex> HeaderMapper.build_maps(["Email", "First Name"], [])
{:ok, %{member: %{email: 0, first_name: 1}, custom: %{}, unknown: []}}
iex> HeaderMapper.build_maps(["Email", "CustomField"], [%{id: "cf1", name: "CustomField"}])
{:ok, %{member: %{email: 0}, custom: %{"cf1" => 1}, unknown: []}}
"""
@type column_map :: %{atom() => non_neg_integer()}
@type custom_field_map :: %{String.t() => non_neg_integer()}
@type unknown_headers :: [String.t()]
# Required member fields
@required_member_fields [:email]
# Canonical member fields with their raw variants
# These will be normalized at runtime when building the lookup map
@member_field_variants_raw %{
email: [
"email",
"e-mail",
"e_mail",
"e mail",
"e-mail adresse",
"e-mail-adresse",
"mail"
],
first_name: [
"first name",
"firstname",
"vorname"
],
last_name: [
"last name",
"lastname",
"surname",
"nachname",
"familienname"
],
street: [
"street",
"address",
"strasse"
],
postal_code: [
"postal code",
"postal_code",
"zip",
"postcode",
"plz",
"postleitzahl"
],
city: [
"city",
"town",
"stadt",
"ort"
]
}
# Build reverse map: normalized_variant -> canonical_field
# Cached on first access for performance
defp normalized_to_canonical do
cached = Process.get({__MODULE__, :normalized_to_canonical})
if cached do
cached
else
map = build_normalized_to_canonical_map()
Process.put({__MODULE__, :normalized_to_canonical}, map)
map
end
end
# Builds the normalized variant -> canonical field map
defp build_normalized_to_canonical_map do
@member_field_variants_raw
|> Enum.flat_map(&map_variants_to_normalized/1)
|> Map.new()
end
# Maps a canonical field and its variants to normalized tuples
defp map_variants_to_normalized({canonical, variants}) do
Enum.map(variants, fn variant ->
{normalize_header(variant), canonical}
end)
end
@doc """
Normalizes a CSV header string for comparison.
Applies the following transformations:
- Trim whitespace
- Convert to lowercase
- Unicode transliteration (ß ss, ä ae, ö oe, ü ue)
- Unify hyphen variants (en dash U+2013, minus sign U+2212 standard hyphen)
- Remove or unify punctuation (parentheses, slashes spaces)
- Remove all whitespace (ensures "first name" == "firstname")
- Final trim
## Examples
iex> normalize_header(" E-Mail ")
"e-mail"
iex> normalize_header("Straße")
"strasse"
iex> normalize_header("E-Mail (privat)")
"e-mailprivat"
iex> normalize_header("First Name")
"firstname"
"""
@spec normalize_header(String.t()) :: String.t()
def normalize_header(header) when is_binary(header) do
header
|> String.trim()
|> String.downcase()
|> transliterate_unicode()
|> unify_hyphens()
|> normalize_punctuation()
|> compress_whitespace()
|> String.trim()
end
def normalize_header(_), do: ""
@doc """
Builds column maps for member fields and custom fields from CSV headers.
## Parameters
- `headers` - List of CSV header strings (in column order, 0-based indices)
- `custom_fields` - List of custom field maps/structs with at least `:id` and `:name` keys
## Returns
- `{:ok, %{member: column_map, custom: custom_field_map, unknown: unknown_headers}}` on success
- `{:error, reason}` on error (missing required field, duplicate headers)
## Examples
iex> build_maps(["Email", "First Name"], [])
{:ok, %{member: %{email: 0, first_name: 1}, custom: %{}, unknown: []}}
iex> build_maps(["Email", "CustomField"], [%{id: "cf1", name: "CustomField"}])
{:ok, %{member: %{email: 0}, custom: %{"cf1" => 1}, unknown: []}}
"""
@spec build_maps([String.t()], [map()]) ::
{:ok, %{member: column_map(), custom: custom_field_map(), unknown: unknown_headers()}}
| {:error, String.t()}
def build_maps(headers, custom_fields) when is_list(headers) and is_list(custom_fields) do
with {:ok, member_map, unknown_after_member} <- build_member_map(headers),
{:ok, custom_map, unknown_after_custom} <-
build_custom_field_map(headers, unknown_after_member, custom_fields, member_map) do
unknown = Enum.map(unknown_after_custom, &Enum.at(headers, &1))
{:ok, %{member: member_map, custom: custom_map, unknown: unknown}}
end
end
# --- Private Functions ---
# Transliterates German umlauts and special characters
defp transliterate_unicode(str) do
str
|> String.replace("ß", "ss")
|> String.replace("ä", "ae")
|> String.replace("ö", "oe")
|> String.replace("ü", "ue")
|> String.replace("Ä", "ae")
|> String.replace("Ö", "oe")
|> String.replace("Ü", "ue")
end
# Unifies different hyphen variants to standard hyphen
defp unify_hyphens(str) do
str
# en dash
|> String.replace(<<0x2013::utf8>>, "-")
# em dash
|> String.replace(<<0x2014::utf8>>, "-")
# minus sign
|> String.replace(<<0x2212::utf8>>, "-")
end
# Normalizes punctuation: parentheses, slashes, underscores become spaces
defp normalize_punctuation(str) do
str
|> String.replace("_", " ")
|> String.replace(~r/[()\[\]{}]/, " ")
|> String.replace(~r/[\/\\]/, " ")
end
# Compresses multiple whitespace characters to single space, then removes all spaces
# This ensures "first name" and "firstname" normalize to the same value
defp compress_whitespace(str) do
str
|> String.replace(~r/\s+/, " ")
|> String.replace(" ", "")
end
# Builds member field column map
defp build_member_map(headers) do
result =
headers
|> Enum.with_index()
|> Enum.reduce_while({%{}, []}, fn {header, index}, {acc_map, acc_unknown} ->
normalized = normalize_header(header)
case process_member_header(header, index, normalized, acc_map, %{}) do
{:error, reason} ->
{:halt, {:error, reason}}
{:ok, new_map, _} ->
{:cont, {new_map, acc_unknown}}
{:unknown} ->
{:cont, {acc_map, [index | acc_unknown]}}
end
end)
case result do
{:error, reason} ->
{:error, reason}
{member_map, unknown_indices} ->
validate_required_fields(member_map, unknown_indices)
end
end
# Processes a single header for member field mapping
defp process_member_header(_header, _index, normalized, acc_map, acc_seen)
when normalized == "" do
{:ok, acc_map, acc_seen}
end
defp process_member_header(_header, index, normalized, acc_map, _acc_seen) do
case Map.get(normalized_to_canonical(), normalized) do
nil ->
{:unknown}
canonical ->
if Map.has_key?(acc_map, canonical) do
{:error, "duplicate header for #{canonical} (normalized: #{normalized})"}
else
{:ok, Map.put(acc_map, canonical, index), %{}}
end
end
end
# Validates that all required member fields are present
defp validate_required_fields(member_map, unknown_indices) do
missing_required =
@required_member_fields
|> Enum.filter(&(not Map.has_key?(member_map, &1)))
if Enum.empty?(missing_required) do
{:ok, member_map, Enum.reverse(unknown_indices)}
else
missing_field = List.first(missing_required)
variants = Map.get(@member_field_variants_raw, missing_field, [])
accepted = Enum.join(variants, ", ")
{:error, "Missing required header: #{missing_field} (accepted: #{accepted})"}
end
end
# Builds custom field column map from unmatched headers
defp build_custom_field_map(headers, unknown_indices, custom_fields, _member_map) do
custom_field_lookup = build_custom_field_lookup(custom_fields)
result =
unknown_indices
|> Enum.reduce_while({%{}, []}, fn index, {acc_map, acc_unknown} ->
header = Enum.at(headers, index)
normalized = normalize_header(header)
case process_custom_field_header(
header,
index,
normalized,
custom_field_lookup,
acc_map,
%{}
) do
{:error, reason} ->
{:halt, {:error, reason}}
{:ok, new_map, _} ->
{:cont, {new_map, acc_unknown}}
{:unknown} ->
{:cont, {acc_map, [index | acc_unknown]}}
end
end)
case result do
{:error, reason} ->
{:error, reason}
{custom_map, remaining_unknown} ->
{:ok, custom_map, Enum.reverse(remaining_unknown)}
end
end
# Builds normalized custom field name -> id lookup map
defp build_custom_field_lookup(custom_fields) do
custom_fields
|> Enum.reduce(%{}, fn cf, acc ->
name = Map.get(cf, :name) || Map.get(cf, "name")
id = Map.get(cf, :id) || Map.get(cf, "id")
if name && id do
normalized_name = normalize_header(name)
Map.put(acc, normalized_name, id)
else
acc
end
end)
end
# Processes a single header for custom field mapping
defp process_custom_field_header(
_header,
_index,
normalized,
_custom_field_lookup,
acc_map,
_acc_seen
)
when normalized == "" do
{:ok, acc_map, %{}}
end
defp process_custom_field_header(
_header,
index,
normalized,
custom_field_lookup,
acc_map,
_acc_seen
) do
if Map.has_key?(custom_field_lookup, normalized) do
custom_field_id = custom_field_lookup[normalized]
if Map.has_key?(acc_map, custom_field_id) do
{:error, "duplicate custom field header (normalized: #{normalized})"}
else
{:ok, Map.put(acc_map, custom_field_id, index), %{}}
end
else
{:unknown}
end
end
end

View file

@ -2,6 +2,8 @@ defmodule Mv.Membership.Import.MemberCSV do
@moduledoc """ @moduledoc """
Service module for importing members from CSV files. Service module for importing members from CSV files.
require Ash.Query
This module provides the core API for CSV member import functionality: This module provides the core API for CSV member import functionality:
- `prepare/2` - Parses and validates CSV content, returns import state - `prepare/2` - Parses and validates CSV content, returns import state
- `process_chunk/3` - Processes a chunk of rows and creates members - `process_chunk/3` - Processes a chunk of rows and creates members
@ -61,6 +63,7 @@ defmodule Mv.Membership.Import.MemberCSV do
chunks: list(list({pos_integer(), map()})), chunks: list(list({pos_integer(), map()})),
column_map: %{atom() => non_neg_integer()}, column_map: %{atom() => non_neg_integer()},
custom_field_map: %{String.t() => non_neg_integer()}, custom_field_map: %{String.t() => non_neg_integer()},
custom_field_lookup: %{String.t() => %{id: String.t(), value_type: atom()}},
warnings: list(String.t()) warnings: list(String.t())
} }
@ -70,6 +73,9 @@ defmodule Mv.Membership.Import.MemberCSV do
errors: list(Error.t()) errors: list(Error.t())
} }
alias Mv.Membership.Import.CsvParser
alias Mv.Membership.Import.HeaderMapper
@doc """ @doc """
Prepares CSV content for import by parsing, mapping headers, and validating limits. Prepares CSV content for import by parsing, mapping headers, and validating limits.
@ -104,12 +110,133 @@ defmodule Mv.Membership.Import.MemberCSV do
""" """
@spec prepare(String.t(), keyword()) :: {:ok, import_state()} | {:error, String.t()} @spec prepare(String.t(), keyword()) :: {:ok, import_state()} | {:error, String.t()}
def prepare(file_content, opts \\ []) do def prepare(file_content, opts \\ []) do
# TODO: Implement in Issue #3 (CSV Parsing) max_rows = Keyword.get(opts, :max_rows, 1000)
# This is a skeleton implementation that will be filled in later chunk_size = Keyword.get(opts, :chunk_size, 200)
_ = {file_content, opts}
# Placeholder return - will be replaced with actual implementation with {:ok, headers, rows} <- CsvParser.parse(file_content),
{:error, "Not yet implemented"} {:ok, custom_fields} <- load_custom_fields(),
{:ok, maps, warnings} <- build_header_maps(headers, custom_fields),
:ok <- validate_row_count(rows, max_rows) do
chunks = chunk_rows(rows, maps, chunk_size)
# Build custom field lookup for efficient value processing
custom_field_lookup = build_custom_field_lookup(custom_fields)
{:ok,
%{
chunks: chunks,
column_map: maps.member,
custom_field_map: maps.custom,
custom_field_lookup: custom_field_lookup,
warnings: warnings
}}
end
end
# Loads all custom fields from the database
defp load_custom_fields do
custom_fields =
Mv.Membership.CustomField
|> Ash.read!()
{:ok, custom_fields}
rescue
e ->
{:error, "Failed to load custom fields: #{Exception.message(e)}"}
end
# Builds custom field lookup map for efficient value processing
defp build_custom_field_lookup(custom_fields) do
custom_fields
|> Enum.reduce(%{}, fn cf, acc ->
id_str = to_string(cf.id)
Map.put(acc, id_str, %{id: cf.id, value_type: cf.value_type})
end)
end
# Builds header maps using HeaderMapper and collects warnings for unknown custom fields
defp build_header_maps(headers, custom_fields) do
# Convert custom fields to maps with id and name
custom_field_maps =
Enum.map(custom_fields, fn cf ->
%{id: to_string(cf.id), name: cf.name}
end)
case HeaderMapper.build_maps(headers, custom_field_maps) do
{:ok, %{member: member_map, custom: custom_map, unknown: unknown}} ->
# Build warnings for unknown custom field columns
warnings =
unknown
|> Enum.filter(fn header ->
# Check if it could be a custom field (not a known member field)
normalized = HeaderMapper.normalize_header(header)
# If it's not empty and not a member field, it might be a custom field
normalized != "" && not member_field?(normalized)
end)
|> Enum.map(fn header ->
"Unknown column '#{header}' will be ignored. " <>
"If this is a custom field, create it in Mila before importing."
end)
{:ok, %{member: member_map, custom: custom_map}, warnings}
{:error, reason} ->
{:error, reason}
end
end
# Checks if a normalized header matches a member field
# Uses HeaderMapper's internal logic to check if header would map to a member field
defp member_field?(normalized) do
# Try to build maps with just this header - if it maps to a member field, it's a member field
case HeaderMapper.build_maps([normalized], []) do
{:ok, %{member: member_map}} ->
# If member_map is not empty, it's a member field
map_size(member_map) > 0
_ ->
false
end
end
# Validates that row count doesn't exceed limit
defp validate_row_count(rows, max_rows) do
if length(rows) > max_rows do
{:error, "CSV file exceeds maximum row limit of #{max_rows} rows"}
else
:ok
end
end
# Chunks rows and converts them to row maps using column maps
defp chunk_rows(rows, maps, chunk_size) do
rows
|> Enum.chunk_every(chunk_size)
|> Enum.map(fn chunk ->
Enum.map(chunk, fn {line_number, row_values} ->
row_map = build_row_map(row_values, maps)
{line_number, row_map}
end)
end)
end
# Builds a row map from raw row values using column maps
defp build_row_map(row_values, maps) do
member_map =
maps.member
|> Enum.reduce(%{}, fn {field, index}, acc ->
value = Enum.at(row_values, index, "")
Map.put(acc, field, value)
end)
custom_map =
maps.custom
|> Enum.reduce(%{}, fn {custom_field_id, index}, acc ->
value = Enum.at(row_values, index, "")
Map.put(acc, custom_field_id, value)
end)
%{member: member_map, custom: custom_map}
end end
@doc """ @doc """
@ -126,8 +253,9 @@ defmodule Mv.Membership.Import.MemberCSV do
- `chunk_rows_with_lines` - List of tuples `{csv_line_number, row_map}` where: - `chunk_rows_with_lines` - List of tuples `{csv_line_number, row_map}` where:
- `csv_line_number` - Physical line number in CSV (1-based) - `csv_line_number` - Physical line number in CSV (1-based)
- `row_map` - Map of column names to values - `row_map` - Map with `:member` and `:custom` keys containing field values
- `column_map` - Map of canonical field names (atoms) to column indices - `column_map` - Map of canonical field names (atoms) to column indices (for reference)
- `custom_field_map` - Map of custom field IDs (strings) to column indices (for reference)
- `opts` - Optional keyword list for processing options - `opts` - Optional keyword list for processing options
## Returns ## Returns
@ -137,22 +265,212 @@ defmodule Mv.Membership.Import.MemberCSV do
## Examples ## Examples
iex> chunk = [{2, %{"email" => "john@example.com"}}] iex> chunk = [{2, %{member: %{email: "john@example.com"}, custom: %{}}}]
iex> column_map = %{email: 0} iex> column_map = %{email: 0}
iex> MemberCSV.process_chunk(chunk, column_map) iex> custom_field_map = %{}
iex> MemberCSV.process_chunk(chunk, column_map, custom_field_map)
{:ok, %{inserted: 1, failed: 0, errors: []}} {:ok, %{inserted: 1, failed: 0, errors: []}}
""" """
@spec process_chunk( @spec process_chunk(
list({pos_integer(), map()}), list({pos_integer(), map()}),
%{atom() => non_neg_integer()}, %{atom() => non_neg_integer()},
%{String.t() => non_neg_integer()},
keyword() keyword()
) :: {:ok, chunk_result()} | {:error, String.t()} ) :: {:ok, chunk_result()} | {:error, String.t()}
def process_chunk(chunk_rows_with_lines, column_map, opts \\ []) do def process_chunk(chunk_rows_with_lines, _column_map, _custom_field_map, opts \\ []) do
# TODO: Implement in Issue #6 (Persistence) custom_field_lookup = Keyword.get(opts, :custom_field_lookup, %{})
# This is a skeleton implementation that will be filled in later
_ = {chunk_rows_with_lines, column_map, opts}
# Placeholder return - will be replaced with actual implementation {inserted, failed, errors} =
{:ok, %{inserted: 0, failed: 0, errors: []}} Enum.reduce(chunk_rows_with_lines, {0, 0, []}, fn {line_number, row_map},
{acc_inserted, acc_failed, acc_errors} ->
case process_row(row_map, line_number, custom_field_lookup) do
{:ok, _member} ->
{acc_inserted + 1, acc_failed, acc_errors}
{:error, error} ->
{acc_inserted, acc_failed + 1, [error | acc_errors]}
end
end)
{:ok, %{inserted: inserted, failed: failed, errors: Enum.reverse(errors)}}
end end
# Processes a single row and creates member with custom field values
defp process_row(
%{member: member_attrs, custom: custom_attrs},
line_number,
custom_field_lookup
) do
# Prepare custom field values for Ash
custom_field_values = prepare_custom_field_values(custom_attrs, custom_field_lookup)
# Create member with custom field values
member_attrs_with_cf =
member_attrs
|> Map.put(:custom_field_values, custom_field_values)
|> trim_string_values()
# Only include custom_field_values if not empty
final_attrs =
if Enum.empty?(custom_field_values) do
Map.delete(member_attrs_with_cf, :custom_field_values)
else
member_attrs_with_cf
end
case Mv.Membership.create_member(final_attrs) do
{:ok, member} ->
{:ok, member}
{:error, %Ash.Error.Invalid{} = error} ->
{:error, format_ash_error(error, line_number)}
{:error, error} ->
{:error, %Error{csv_line_number: line_number, field: nil, message: inspect(error)}}
end
rescue
e ->
{:error, %Error{csv_line_number: line_number, field: nil, message: Exception.message(e)}}
end
# Prepares custom field values from row map for Ash
defp prepare_custom_field_values(custom_attrs, custom_field_lookup) when is_map(custom_attrs) do
custom_attrs
|> Enum.filter(fn {_id, value} -> value != nil && value != "" end)
|> Enum.map(fn {custom_field_id_str, value} ->
case Map.get(custom_field_lookup, custom_field_id_str) do
nil ->
# Custom field not found, skip
nil
%{id: custom_field_id, value_type: value_type} ->
%{
"custom_field_id" => to_string(custom_field_id),
"value" => format_custom_field_value(value, value_type)
}
end
end)
|> Enum.filter(&(&1 != nil))
end
defp prepare_custom_field_values(_, _), do: []
# Formats a custom field value according to its type
# Uses _union_type and _union_value format as expected by Ash
defp format_custom_field_value(value, :string) when is_binary(value) do
%{"_union_type" => "string", "_union_value" => String.trim(value)}
end
defp format_custom_field_value(value, :integer) when is_binary(value) do
case Integer.parse(value) do
{int_value, _} -> %{"_union_type" => "integer", "_union_value" => int_value}
:error -> %{"_union_type" => "string", "_union_value" => String.trim(value)}
end
end
defp format_custom_field_value(value, :boolean) when is_binary(value) do
bool_value =
value
|> String.trim()
|> String.downcase()
|> case do
"true" -> true
"1" -> true
"yes" -> true
"ja" -> true
_ -> false
end
%{"_union_type" => "boolean", "_union_value" => bool_value}
end
defp format_custom_field_value(value, :date) when is_binary(value) do
case Date.from_iso8601(String.trim(value)) do
{:ok, date} -> %{"_union_type" => "date", "_union_value" => date}
{:error, _} -> %{"_union_type" => "string", "_union_value" => String.trim(value)}
end
end
defp format_custom_field_value(value, :email) when is_binary(value) do
%{"_union_type" => "email", "_union_value" => String.trim(value)}
end
defp format_custom_field_value(value, _type) when is_binary(value) do
# Default to string if type is unknown
%{"_union_type" => "string", "_union_value" => String.trim(value)}
end
# Trims all string values in member attributes
defp trim_string_values(attrs) do
Enum.reduce(attrs, %{}, fn {key, value}, acc ->
trimmed_value =
if is_binary(value) do
String.trim(value)
else
value
end
Map.put(acc, key, trimmed_value)
end)
end
# Formats Ash errors into MemberCSV.Error structs
defp format_ash_error(%Ash.Error.Invalid{errors: errors}, line_number) do
# Try to find email-related errors first (for better error messages)
email_error =
Enum.find(errors, fn error ->
case error do
%{field: :email} -> true
_ -> false
end
end)
case email_error || List.first(errors) do
%{field: field, message: message} when is_atom(field) ->
%Error{
csv_line_number: line_number,
field: field,
message: format_error_message(message, field)
}
%{message: message} ->
%Error{
csv_line_number: line_number,
field: nil,
message: format_error_message(message, nil)
}
_ ->
%Error{
csv_line_number: line_number,
field: nil,
message: "Validation failed"
}
end
end
# Formats error messages, handling common cases like email uniqueness
defp format_error_message(message, field) when is_binary(message) do
if email_uniqueness_error?(message, field) do
"email has already been taken"
else
message
end
end
defp format_error_message(message, _field), do: to_string(message)
# Checks if error message indicates email uniqueness constraint violation
defp email_uniqueness_error?(message, :email) do
message_lower = String.downcase(message)
String.contains?(message_lower, "unique") or
String.contains?(message_lower, "constraint") or
String.contains?(message_lower, "duplicate") or
String.contains?(message_lower, "already been taken") or
String.contains?(message_lower, "already exists") or
String.contains?(message_lower, "violates unique constraint")
end
defp email_uniqueness_error?(_message, _field), do: false
end end

View file

@ -0,0 +1,244 @@
defmodule Mv.Membership.Import.HeaderMapperTest do
use ExUnit.Case, async: true
alias Mv.Membership.Import.HeaderMapper
describe "normalize_header/1" do
test "trims whitespace" do
assert HeaderMapper.normalize_header(" email ") == "email"
end
test "converts to lowercase" do
assert HeaderMapper.normalize_header("EMAIL") == "email"
assert HeaderMapper.normalize_header("E-Mail") == "e-mail"
end
test "normalizes Unicode characters" do
# ß -> ss
assert HeaderMapper.normalize_header("Straße") == "strasse"
# Umlaute transliteration (ä -> ae, ö -> oe, ü -> ue)
assert HeaderMapper.normalize_header("Müller") == "mueller"
assert HeaderMapper.normalize_header("Köln") == "koeln"
assert HeaderMapper.normalize_header("Grün") == "gruen"
end
test "compresses and removes whitespace" do
# Whitespace is removed entirely to ensure "first name" == "firstname"
assert HeaderMapper.normalize_header("first name") == "firstname"
assert HeaderMapper.normalize_header("email address") == "emailaddress"
end
test "unifies hyphen variants" do
# Different Unicode hyphen characters should become standard hyphen
# en dash
assert HeaderMapper.normalize_header("EMail") == "e-mail"
# minus sign
assert HeaderMapper.normalize_header("EMail") == "e-mail"
# standard hyphen
assert HeaderMapper.normalize_header("E-Mail") == "e-mail"
end
test "removes or unifies punctuation" do
# Parentheses, slashes, etc. are removed (whitespace is also removed)
assert HeaderMapper.normalize_header("E-Mail (privat)") == "e-mailprivat"
assert HeaderMapper.normalize_header("Telefon / Mobil") == "telefonmobil"
end
test "handles empty strings" do
assert HeaderMapper.normalize_header("") == ""
assert HeaderMapper.normalize_header(" ") == ""
end
end
describe "build_maps/2" do
test "maps English email variant correctly" do
headers = ["Email"]
assert {:ok, %{member: member_map, custom: custom_map, unknown: unknown}} =
HeaderMapper.build_maps(headers, [])
assert member_map[:email] == 0
assert custom_map == %{}
assert unknown == []
end
test "maps German email variant correctly" do
headers = ["E-Mail"]
assert {:ok, %{member: member_map, custom: custom_map, unknown: unknown}} =
HeaderMapper.build_maps(headers, [])
assert member_map[:email] == 0
assert custom_map == %{}
assert unknown == []
end
test "maps multiple member fields" do
headers = ["Email", "First Name", "Last Name"]
assert {:ok, %{member: member_map, custom: custom_map, unknown: unknown}} =
HeaderMapper.build_maps(headers, [])
assert member_map[:email] == 0
assert member_map[:first_name] == 1
assert member_map[:last_name] == 2
assert custom_map == %{}
assert unknown == []
end
test "handles Unicode and whitespace in headers" do
headers = [" E-Mail ", "Straße", " Telefon / Mobil "]
assert {:ok, %{member: member_map, custom: custom_map, unknown: unknown}} =
HeaderMapper.build_maps(headers, [])
assert member_map[:email] == 0
assert member_map[:street] == 1
# "Telefon / Mobil" is not a known member field, so it should be unknown
assert length(unknown) == 1
assert custom_map == %{}
end
test "returns error when duplicate headers normalize to same field" do
headers = ["Email", "E-Mail"]
assert {:error, reason} = HeaderMapper.build_maps(headers, [])
assert reason =~ "duplicate"
assert reason =~ "email"
end
test "returns error when required field email is missing" do
headers = ["First Name", "Last Name"]
assert {:error, reason} = HeaderMapper.build_maps(headers, [])
assert reason =~ "Missing required header"
assert reason =~ "email"
assert reason =~ "accepted"
end
test "collects unknown columns" do
headers = ["Email", "FooBar", "UnknownColumn"]
assert {:ok, %{member: member_map, custom: custom_map, unknown: unknown}} =
HeaderMapper.build_maps(headers, [])
assert member_map[:email] == 0
assert length(unknown) == 2
assert "FooBar" in unknown or "foobar" in unknown
assert "UnknownColumn" in unknown or "unknowncolumn" in unknown
assert custom_map == %{}
end
test "ignores empty headers after normalization" do
headers = ["Email", " ", ""]
assert {:ok, %{member: member_map, custom: custom_map, unknown: unknown}} =
HeaderMapper.build_maps(headers, [])
assert member_map[:email] == 0
assert custom_map == %{}
assert unknown == []
end
test "maps custom field columns correctly" do
headers = ["Email", "Lieblingsfarbe"]
custom_fields = [%{id: "cf1", name: "Lieblingsfarbe"}]
assert {:ok, %{member: member_map, custom: custom_map, unknown: unknown}} =
HeaderMapper.build_maps(headers, custom_fields)
assert member_map[:email] == 0
assert custom_map["cf1"] == 1
assert unknown == []
end
test "custom field collision: member field wins" do
headers = ["Email"]
# Custom field with name "Email" should not override member field
custom_fields = [%{id: "cf1", name: "Email"}]
assert {:ok, %{member: member_map, custom: custom_map, unknown: unknown}} =
HeaderMapper.build_maps(headers, custom_fields)
assert member_map[:email] == 0
# Custom field should not be in custom_map because member field has priority
assert custom_map == %{}
assert unknown == []
end
test "handles custom field with Unicode normalization" do
headers = ["Email", "Straße"]
custom_fields = [%{id: "cf1", name: "Straße"}]
assert {:ok, %{member: member_map, custom: custom_map, unknown: unknown}} =
HeaderMapper.build_maps(headers, custom_fields)
assert member_map[:email] == 0
# "Straße" is a member field (street), so it should be in member_map, not custom_map
assert member_map[:street] == 1
assert custom_map == %{}
assert unknown == []
end
test "handles unknown custom field columns" do
headers = ["Email", "UnknownCustomField"]
custom_fields = [%{id: "cf1", name: "KnownField"}]
assert {:ok, %{member: member_map, custom: custom_map, unknown: unknown}} =
HeaderMapper.build_maps(headers, custom_fields)
assert member_map[:email] == 0
assert custom_map == %{}
# UnknownCustomField should be in unknown list
assert length(unknown) == 1
end
test "handles duplicate custom field names after normalization" do
headers = ["Email", "CustomField", "Custom Field"]
custom_fields = [%{id: "cf1", name: "CustomField"}]
# Both "CustomField" and "Custom Field" normalize to the same, so this should error
assert {:error, reason} = HeaderMapper.build_maps(headers, custom_fields)
assert reason =~ "duplicate"
end
test "maps all supported member fields" do
headers = [
"Email",
"First Name",
"Last Name",
"Street",
"Postal Code",
"City"
]
assert {:ok, %{member: member_map, custom: custom_map, unknown: unknown}} =
HeaderMapper.build_maps(headers, [])
assert member_map[:email] == 0
assert member_map[:first_name] == 1
assert member_map[:last_name] == 2
assert member_map[:street] == 3
assert member_map[:postal_code] == 4
assert member_map[:city] == 5
assert custom_map == %{}
assert unknown == []
end
test "maps German member field variants" do
headers = ["E-Mail", "Vorname", "Nachname", "Straße", "PLZ", "Stadt"]
assert {:ok, %{member: member_map, custom: custom_map, unknown: unknown}} =
HeaderMapper.build_maps(headers, [])
assert member_map[:email] == 0
assert member_map[:first_name] == 1
assert member_map[:last_name] == 2
assert member_map[:street] == 3
assert member_map[:postal_code] == 4
assert member_map[:city] == 5
assert custom_map == %{}
assert unknown == []
end
end
end

View file

@ -44,7 +44,6 @@ defmodule Mv.Membership.Import.MemberCSVTest do
assert match?({:ok, _}, result) or match?({:error, _}, result) assert match?({:ok, _}, result) or match?({:error, _}, result)
end end
@tag :skip
test "returns {:ok, import_state} on success" do test "returns {:ok, import_state} on success" do
file_content = "email\njohn@example.com" file_content = "email\njohn@example.com"
opts = [] opts = []
@ -56,6 +55,8 @@ defmodule Mv.Membership.Import.MemberCSVTest do
assert Map.has_key?(import_state, :column_map) assert Map.has_key?(import_state, :column_map)
assert Map.has_key?(import_state, :custom_field_map) assert Map.has_key?(import_state, :custom_field_map)
assert Map.has_key?(import_state, :warnings) assert Map.has_key?(import_state, :warnings)
assert import_state.column_map[:email] == 0
assert import_state.chunks != []
end end
test "returns {:error, reason} on failure" do test "returns {:error, reason} on failure" do
@ -71,24 +72,183 @@ defmodule Mv.Membership.Import.MemberCSVTest do
end end
end end
describe "process_chunk/3" do describe "process_chunk/4" do
test "function exists and accepts chunk_rows_with_lines, column_map, and opts" do test "function exists and accepts chunk_rows_with_lines, column_map, custom_field_map, and opts" do
chunk_rows_with_lines = [{2, %{"email" => "john@example.com"}}] chunk_rows_with_lines = [{2, %{member: %{email: "john@example.com"}, custom: %{}}}]
column_map = %{email: 0} column_map = %{email: 0}
custom_field_map = %{}
opts = [] opts = []
# This will fail until the function is implemented # This will fail until the function is implemented
result = MemberCSV.process_chunk(chunk_rows_with_lines, column_map, opts) result = MemberCSV.process_chunk(chunk_rows_with_lines, column_map, custom_field_map, opts)
assert match?({:ok, _}, result) or match?({:error, _}, result) assert match?({:ok, _}, result) or match?({:error, _}, result)
end end
test "returns {:ok, chunk_result} on success" do test "creates member successfully with valid data" do
chunk_rows_with_lines = [{2, %{"email" => "john@example.com"}}] chunk_rows_with_lines = [
column_map = %{email: 0} {2, %{member: %{email: "john@example.com", first_name: "John"}, custom: %{}}}
]
column_map = %{email: 0, first_name: 1}
custom_field_map = %{}
opts = [] opts = []
assert {:ok, chunk_result} = assert {:ok, chunk_result} =
MemberCSV.process_chunk(chunk_rows_with_lines, column_map, opts) MemberCSV.process_chunk(chunk_rows_with_lines, column_map, custom_field_map, opts)
assert chunk_result.inserted == 1
assert chunk_result.failed == 0
assert chunk_result.errors == []
# Verify member was created
members = Mv.Membership.list_members!()
assert Enum.any?(members, &(&1.email == "john@example.com"))
end
test "returns error for invalid email" do
chunk_rows_with_lines = [
{2, %{member: %{email: "invalid-email"}, custom: %{}}}
]
column_map = %{email: 0}
custom_field_map = %{}
opts = []
assert {:ok, chunk_result} =
MemberCSV.process_chunk(chunk_rows_with_lines, column_map, custom_field_map, opts)
assert chunk_result.inserted == 0
assert chunk_result.failed == 1
assert length(chunk_result.errors) == 1
error = List.first(chunk_result.errors)
assert error.csv_line_number == 2
assert error.field == :email
assert error.message =~ "email"
end
test "returns error for duplicate email" do
# Create existing member first
{:ok, _existing} =
Mv.Membership.create_member(%{email: "duplicate@example.com", first_name: "Existing"})
chunk_rows_with_lines = [
{2, %{member: %{email: "duplicate@example.com", first_name: "New"}, custom: %{}}}
]
column_map = %{email: 0, first_name: 1}
custom_field_map = %{}
opts = []
assert {:ok, chunk_result} =
MemberCSV.process_chunk(chunk_rows_with_lines, column_map, custom_field_map, opts)
assert chunk_result.inserted == 0
assert chunk_result.failed == 1
assert length(chunk_result.errors) == 1
error = List.first(chunk_result.errors)
assert error.csv_line_number == 2
assert error.field == :email
assert error.message =~ "email" or error.message =~ "duplicate" or error.message =~ "unique"
end
test "creates member with custom field values" do
# Create custom field first
{:ok, custom_field} =
Mv.Membership.CustomField
|> Ash.Changeset.for_create(:create, %{
name: "Phone",
value_type: :string
})
|> Ash.create()
chunk_rows_with_lines = [
{2,
%{
member: %{email: "withcustom@example.com"},
custom: %{to_string(custom_field.id) => "123-456-7890"}
}}
]
column_map = %{email: 0}
custom_field_map = %{to_string(custom_field.id) => 1}
custom_field_lookup = %{
to_string(custom_field.id) => %{id: custom_field.id, value_type: custom_field.value_type}
}
opts = [custom_field_lookup: custom_field_lookup]
assert {:ok, chunk_result} =
MemberCSV.process_chunk(chunk_rows_with_lines, column_map, custom_field_map, opts)
assert chunk_result.inserted == 1
assert chunk_result.failed == 0
# Verify member and custom field value were created
members = Mv.Membership.list_members!()
member = Enum.find(members, &(&1.email == "withcustom@example.com"))
assert member != nil
{:ok, member_with_cf} = Ash.load(member, :custom_field_values)
assert length(member_with_cf.custom_field_values) == 1
cfv = List.first(member_with_cf.custom_field_values)
assert cfv.custom_field_id == custom_field.id
assert cfv.value.value == "123-456-7890"
end
test "handles multiple rows with mixed success and failure" do
chunk_rows_with_lines = [
{2, %{member: %{email: "valid1@example.com"}, custom: %{}}},
{3, %{member: %{email: "invalid-email"}, custom: %{}}},
{4, %{member: %{email: "valid2@example.com"}, custom: %{}}}
]
column_map = %{email: 0}
custom_field_map = %{}
opts = []
assert {:ok, chunk_result} =
MemberCSV.process_chunk(chunk_rows_with_lines, column_map, custom_field_map, opts)
assert chunk_result.inserted == 2
assert chunk_result.failed == 1
assert length(chunk_result.errors) == 1
error = List.first(chunk_result.errors)
assert error.csv_line_number == 3
end
test "preserves CSV line numbers in errors" do
chunk_rows_with_lines = [
{5, %{member: %{email: "invalid"}, custom: %{}}},
{10, %{member: %{email: "also-invalid"}, custom: %{}}}
]
column_map = %{email: 0}
custom_field_map = %{}
opts = []
assert {:ok, chunk_result} =
MemberCSV.process_chunk(chunk_rows_with_lines, column_map, custom_field_map, opts)
assert chunk_result.failed == 2
assert length(chunk_result.errors) == 2
line_numbers = Enum.map(chunk_result.errors, & &1.csv_line_number)
assert 5 in line_numbers
assert 10 in line_numbers
end
test "returns {:ok, chunk_result} on success" do
chunk_rows_with_lines = [{2, %{member: %{email: "test@example.com"}, custom: %{}}}]
column_map = %{email: 0}
custom_field_map = %{}
opts = []
assert {:ok, chunk_result} =
MemberCSV.process_chunk(chunk_rows_with_lines, column_map, custom_field_map, opts)
# Check that chunk_result contains expected fields # Check that chunk_result contains expected fields
assert Map.has_key?(chunk_result, :inserted) assert Map.has_key?(chunk_result, :inserted)
@ -99,19 +259,23 @@ defmodule Mv.Membership.Import.MemberCSVTest do
assert is_list(chunk_result.errors) assert is_list(chunk_result.errors)
end end
test "returns {:error, reason} on failure" do test "returns {:ok, _} with zero counts for empty chunk" do
chunk_rows_with_lines = [] chunk_rows_with_lines = []
column_map = %{} column_map = %{}
custom_field_map = %{}
opts = [] opts = []
# This might return {:ok, _} with zero counts or {:error, _} assert {:ok, chunk_result} =
result = MemberCSV.process_chunk(chunk_rows_with_lines, column_map, opts) MemberCSV.process_chunk(chunk_rows_with_lines, column_map, custom_field_map, opts)
assert match?({:ok, _}, result) or match?({:error, _}, result)
assert chunk_result.inserted == 0
assert chunk_result.failed == 0
assert chunk_result.errors == []
end end
test "function has documentation" do test "function has documentation" do
# Check that @doc exists by reading the module # Check that @doc exists by reading the module
assert function_exported?(MemberCSV, :process_chunk, 3) assert function_exported?(MemberCSV, :process_chunk, 4)
end end
end end