defmodule Mv.Membership.Import.HeaderMapper do @moduledoc """ Maps CSV headers to canonical member fields and custom fields. Provides header normalization and mapping functionality for CSV imports. Handles bilingual header variants (English/German) and custom field detection. ## Header Normalization Headers are normalized using the following rules: - Trim whitespace - Convert to lowercase - Unicode normalization (ß → ss, ä → ae, ö → oe, ü → ue) - Remove all whitespace (ensures "first name" == "firstname") - Unify hyphen variants (en dash, minus sign → standard hyphen) - Remove or unify punctuation (parentheses, slashes → spaces) ## Member Field Mapping Maps CSV headers to canonical member fields: - `email` (required) - `first_name` (optional) - `last_name` (optional) - `street` (optional) - `postal_code` (optional) - `city` (optional) Supports both English and German variants (e.g., "Email" / "E-Mail", "First Name" / "Vorname"). ## Custom Field Detection Custom fields are detected by matching normalized header names to custom field names. Member fields have priority over custom fields (member field wins in case of collision). ## Examples iex> HeaderMapper.normalize_header(" E-Mail ") "e-mail" iex> HeaderMapper.build_maps(["Email", "First Name"], []) {:ok, %{member: %{email: 0, first_name: 1}, custom: %{}, unknown: []}} iex> HeaderMapper.build_maps(["Email", "CustomField"], [%{id: "cf1", name: "CustomField"}]) {:ok, %{member: %{email: 0}, custom: %{"cf1" => 1}, unknown: []}} """ @type column_map :: %{atom() => non_neg_integer()} @type custom_field_map :: %{String.t() => non_neg_integer()} @type unknown_headers :: [String.t()] # Required member fields @required_member_fields [:email] # Canonical member fields with their raw variants # These will be normalized at runtime when building the lookup map @member_field_variants_raw %{ email: [ "email", "e-mail", "e_mail", "e mail", "e-mail adresse", "e-mail-adresse", "mail" ], first_name: [ "first name", "firstname", "vorname" ], last_name: [ "last name", "lastname", "surname", "nachname", "familienname" ], street: [ "street", "address", "strasse" ], postal_code: [ "postal code", "postal_code", "zip", "postcode", "plz", "postleitzahl" ], city: [ "city", "town", "stadt", "ort" ] } # Build reverse map: normalized_variant -> canonical_field # This is computed at runtime on first access and cached defp normalized_to_canonical do @member_field_variants_raw |> Enum.flat_map(fn {canonical, variants} -> Enum.map(variants, fn variant -> {normalize_header(variant), canonical} end) end) |> Map.new() end @doc """ Normalizes a CSV header string for comparison. Applies the following transformations: - Trim whitespace - Convert to lowercase - Unicode transliteration (ß → ss, ä → ae, ö → oe, ü → ue) - Unify hyphen variants (en dash U+2013, minus sign U+2212 → standard hyphen) - Remove or unify punctuation (parentheses, slashes → spaces) - Remove all whitespace (ensures "first name" == "firstname") - Final trim ## Examples iex> normalize_header(" E-Mail ") "e-mail" iex> normalize_header("Straße") "strasse" iex> normalize_header("E-Mail (privat)") "e-mailprivat" iex> normalize_header("First Name") "firstname" """ @spec normalize_header(String.t()) :: String.t() def normalize_header(header) when is_binary(header) do header |> String.trim() |> String.downcase() |> transliterate_unicode() |> unify_hyphens() |> normalize_punctuation() |> compress_whitespace() |> String.trim() end def normalize_header(_), do: "" @doc """ Builds column maps for member fields and custom fields from CSV headers. ## Parameters - `headers` - List of CSV header strings (in column order, 0-based indices) - `custom_fields` - List of custom field maps/structs with at least `:id` and `:name` keys ## Returns - `{:ok, %{member: column_map, custom: custom_field_map, unknown: unknown_headers}}` on success - `{:error, reason}` on error (missing required field, duplicate headers) ## Examples iex> build_maps(["Email", "First Name"], []) {:ok, %{member: %{email: 0, first_name: 1}, custom: %{}, unknown: []}} iex> build_maps(["Email", "CustomField"], [%{id: "cf1", name: "CustomField"}]) {:ok, %{member: %{email: 0}, custom: %{"cf1" => 1}, unknown: []}} """ @spec build_maps([String.t()], [map()]) :: {:ok, %{member: column_map(), custom: custom_field_map(), unknown: unknown_headers()}} | {:error, String.t()} def build_maps(headers, custom_fields) when is_list(headers) and is_list(custom_fields) do with {:ok, member_map, unknown_after_member} <- build_member_map(headers), {:ok, custom_map, unknown_after_custom} <- build_custom_field_map(headers, unknown_after_member, custom_fields, member_map) do unknown = Enum.map(unknown_after_custom, &Enum.at(headers, &1)) {:ok, %{member: member_map, custom: custom_map, unknown: unknown}} end end # --- Private Functions --- # Transliterates German umlauts and special characters defp transliterate_unicode(str) do str |> String.replace("ß", "ss") |> String.replace("ä", "ae") |> String.replace("ö", "oe") |> String.replace("ü", "ue") |> String.replace("Ä", "ae") |> String.replace("Ö", "oe") |> String.replace("Ü", "ue") end # Unifies different hyphen variants to standard hyphen defp unify_hyphens(str) do str # en dash |> String.replace(<<0x2013::utf8>>, "-") # em dash |> String.replace(<<0x2014::utf8>>, "-") # minus sign |> String.replace(<<0x2212::utf8>>, "-") end # Normalizes punctuation: parentheses, slashes become spaces defp normalize_punctuation(str) do str |> String.replace(~r/[()\[\]{}]/, " ") |> String.replace(~r/[\/\\]/, " ") end # Compresses multiple whitespace characters to single space, then removes all spaces # This ensures "first name" and "firstname" normalize to the same value defp compress_whitespace(str) do str |> String.replace(~r/\s+/, " ") |> String.replace(" ", "") end # Builds member field column map defp build_member_map(headers) do result = headers |> Enum.with_index() |> Enum.reduce_while({%{}, [], %{}}, fn {header, index}, {acc_map, acc_unknown, acc_seen} -> normalized = normalize_header(header) case process_member_header(header, index, normalized, acc_map, acc_seen) do {:error, reason} -> {:halt, {:error, reason}} {:ok, new_map, new_seen} -> {:cont, {new_map, acc_unknown, new_seen}} {:unknown} -> {:cont, {acc_map, [index | acc_unknown], acc_seen}} end end) case result do {:error, reason} -> {:error, reason} {member_map, unknown_indices, _normalized_seen} -> validate_required_fields(member_map, unknown_indices) end end # Processes a single header for member field mapping defp process_member_header(_header, _index, normalized, acc_map, acc_seen) when normalized == "" do {:ok, acc_map, acc_seen} end defp process_member_header(_header, index, normalized, acc_map, acc_seen) do if Map.has_key?(normalized_to_canonical(), normalized) do canonical = normalized_to_canonical()[normalized] if Map.has_key?(acc_map, canonical) do {:error, "duplicate header for #{canonical} (normalized: #{normalized})"} else {:ok, Map.put(acc_map, canonical, index), Map.put(acc_seen, normalized, canonical)} end else {:unknown} end end # Validates that all required member fields are present defp validate_required_fields(member_map, unknown_indices) do missing_required = @required_member_fields |> Enum.filter(&(not Map.has_key?(member_map, &1))) if Enum.empty?(missing_required) do {:ok, member_map, Enum.reverse(unknown_indices)} else missing_field = List.first(missing_required) variants = Map.get(@member_field_variants_raw, missing_field, []) accepted = Enum.join(variants, ", ") {:error, "Missing required header: #{missing_field} (accepted: #{accepted})"} end end # Builds custom field column map from unmatched headers defp build_custom_field_map(headers, unknown_indices, custom_fields, _member_map) do custom_field_lookup = build_custom_field_lookup(custom_fields) result = unknown_indices |> Enum.reduce_while({%{}, [], %{}}, fn index, {acc_map, acc_unknown, acc_seen} -> header = Enum.at(headers, index) normalized = normalize_header(header) case process_custom_field_header( header, index, normalized, custom_field_lookup, acc_map, acc_seen ) do {:error, reason} -> {:halt, {:error, reason}} {:ok, new_map, new_seen} -> {:cont, {new_map, acc_unknown, new_seen}} {:unknown} -> {:cont, {acc_map, [index | acc_unknown], acc_seen}} end end) case result do {:error, reason} -> {:error, reason} {custom_map, remaining_unknown, _normalized_seen} -> {:ok, custom_map, Enum.reverse(remaining_unknown)} end end # Builds normalized custom field name -> id lookup map defp build_custom_field_lookup(custom_fields) do custom_fields |> Enum.reduce(%{}, fn cf, acc -> name = Map.get(cf, :name) || Map.get(cf, "name") id = Map.get(cf, :id) || Map.get(cf, "id") if name && id do normalized_name = normalize_header(name) Map.put(acc, normalized_name, id) else acc end end) end # Processes a single header for custom field mapping defp process_custom_field_header( _header, _index, normalized, _custom_field_lookup, acc_map, acc_seen ) when normalized == "" do {:ok, acc_map, acc_seen} end defp process_custom_field_header( _header, index, normalized, custom_field_lookup, acc_map, acc_seen ) do if Map.has_key?(custom_field_lookup, normalized) do custom_field_id = custom_field_lookup[normalized] if Map.has_key?(acc_map, custom_field_id) do {:error, "duplicate custom field header (normalized: #{normalized})"} else {:ok, Map.put(acc_map, custom_field_id, index), Map.put(acc_seen, normalized, custom_field_id)} end else {:unknown} end end end