mitgliederverwaltung/lib/mv/membership/import/header_mapper.ex

396 lines
11 KiB
Elixir

defmodule Mv.Membership.Import.HeaderMapper do
@moduledoc """
Maps CSV headers to canonical member fields and custom fields.
Provides header normalization and mapping functionality for CSV imports.
Handles bilingual header variants (English/German) and custom field detection.
## Header Normalization
Headers are normalized using the following rules:
- Trim whitespace
- Convert to lowercase
- Unicode normalization (ß → ss, ä → ae, ö → oe, ü → ue)
- Remove all whitespace (ensures "first name" == "firstname")
- Unify hyphen variants (en dash, minus sign → standard hyphen)
- Remove or unify punctuation (parentheses, slashes → spaces)
## Member Field Mapping
Maps CSV headers to canonical member fields:
- `email` (required)
- `first_name` (optional)
- `last_name` (optional)
- `street` (optional)
- `postal_code` (optional)
- `city` (optional)
Supports both English and German variants (e.g., "Email" / "E-Mail", "First Name" / "Vorname").
## Custom Field Detection
Custom fields are detected by matching normalized header names to custom field names.
Member fields have priority over custom fields (member field wins in case of collision).
## Examples
iex> HeaderMapper.normalize_header(" E-Mail ")
"e-mail"
iex> HeaderMapper.build_maps(["Email", "First Name"], [])
{:ok, %{member: %{email: 0, first_name: 1}, custom: %{}, unknown: []}}
iex> HeaderMapper.build_maps(["Email", "CustomField"], [%{id: "cf1", name: "CustomField"}])
{:ok, %{member: %{email: 0}, custom: %{"cf1" => 1}, unknown: []}}
"""
@type column_map :: %{atom() => non_neg_integer()}
@type custom_field_map :: %{String.t() => non_neg_integer()}
@type unknown_headers :: [String.t()]
# Required member fields
@required_member_fields [:email]
# Canonical member fields with their raw variants
# These will be normalized at runtime when building the lookup map
@member_field_variants_raw %{
email: [
"email",
"e-mail",
"e_mail",
"e mail",
"e-mail adresse",
"e-mail-adresse",
"mail"
],
first_name: [
"first name",
"firstname",
"vorname"
],
last_name: [
"last name",
"lastname",
"surname",
"nachname",
"familienname"
],
street: [
"street",
"address",
"strasse"
],
postal_code: [
"postal code",
"postal_code",
"zip",
"postcode",
"plz",
"postleitzahl"
],
city: [
"city",
"town",
"stadt",
"ort"
]
}
# Build reverse map: normalized_variant -> canonical_field
# Cached on first access for performance
defp normalized_to_canonical do
cached = Process.get({__MODULE__, :normalized_to_canonical})
if cached do
cached
else
map = build_normalized_to_canonical_map()
Process.put({__MODULE__, :normalized_to_canonical}, map)
map
end
end
# Builds the normalized variant -> canonical field map
defp build_normalized_to_canonical_map do
@member_field_variants_raw
|> Enum.flat_map(&map_variants_to_normalized/1)
|> Map.new()
end
# Maps a canonical field and its variants to normalized tuples
defp map_variants_to_normalized({canonical, variants}) do
Enum.map(variants, fn variant ->
{normalize_header(variant), canonical}
end)
end
@doc """
Normalizes a CSV header string for comparison.
Applies the following transformations:
- Trim whitespace
- Convert to lowercase
- Unicode transliteration (ß → ss, ä → ae, ö → oe, ü → ue)
- Unify hyphen variants (en dash U+2013, minus sign U+2212 → standard hyphen)
- Remove or unify punctuation (parentheses, slashes → spaces)
- Remove all whitespace (ensures "first name" == "firstname")
- Final trim
## Examples
iex> normalize_header(" E-Mail ")
"e-mail"
iex> normalize_header("Straße")
"strasse"
iex> normalize_header("E-Mail (privat)")
"e-mailprivat"
iex> normalize_header("First Name")
"firstname"
"""
@spec normalize_header(String.t()) :: String.t()
def normalize_header(header) when is_binary(header) do
header
|> String.trim()
|> String.downcase()
|> transliterate_unicode()
|> unify_hyphens()
|> normalize_punctuation()
|> compress_whitespace()
|> String.trim()
end
def normalize_header(_), do: ""
@doc """
Builds column maps for member fields and custom fields from CSV headers.
## Parameters
- `headers` - List of CSV header strings (in column order, 0-based indices)
- `custom_fields` - List of custom field maps/structs with at least `:id` and `:name` keys
## Returns
- `{:ok, %{member: column_map, custom: custom_field_map, unknown: unknown_headers}}` on success
- `{:error, reason}` on error (missing required field, duplicate headers)
## Examples
iex> build_maps(["Email", "First Name"], [])
{:ok, %{member: %{email: 0, first_name: 1}, custom: %{}, unknown: []}}
iex> build_maps(["Email", "CustomField"], [%{id: "cf1", name: "CustomField"}])
{:ok, %{member: %{email: 0}, custom: %{"cf1" => 1}, unknown: []}}
"""
@spec build_maps([String.t()], [map()]) ::
{:ok, %{member: column_map(), custom: custom_field_map(), unknown: unknown_headers()}}
| {:error, String.t()}
def build_maps(headers, custom_fields) when is_list(headers) and is_list(custom_fields) do
with {:ok, member_map, unknown_after_member} <- build_member_map(headers),
{:ok, custom_map, unknown_after_custom} <-
build_custom_field_map(headers, unknown_after_member, custom_fields, member_map) do
unknown = Enum.map(unknown_after_custom, &Enum.at(headers, &1))
{:ok, %{member: member_map, custom: custom_map, unknown: unknown}}
end
end
# --- Private Functions ---
# Transliterates German umlauts and special characters
defp transliterate_unicode(str) do
str
|> String.replace("ß", "ss")
|> String.replace("ä", "ae")
|> String.replace("ö", "oe")
|> String.replace("ü", "ue")
|> String.replace("Ä", "ae")
|> String.replace("Ö", "oe")
|> String.replace("Ü", "ue")
end
# Unifies different hyphen variants to standard hyphen
defp unify_hyphens(str) do
str
# en dash
|> String.replace(<<0x2013::utf8>>, "-")
# em dash
|> String.replace(<<0x2014::utf8>>, "-")
# minus sign
|> String.replace(<<0x2212::utf8>>, "-")
end
# Normalizes punctuation: parentheses, slashes, underscores become spaces
defp normalize_punctuation(str) do
str
|> String.replace("_", " ")
|> String.replace(~r/[()\[\]{}]/, " ")
|> String.replace(~r/[\/\\]/, " ")
end
# Compresses multiple whitespace characters to single space, then removes all spaces
# This ensures "first name" and "firstname" normalize to the same value
defp compress_whitespace(str) do
str
|> String.replace(~r/\s+/, " ")
|> String.replace(" ", "")
end
# Builds member field column map
defp build_member_map(headers) do
result =
headers
|> Enum.with_index()
|> Enum.reduce_while({%{}, []}, fn {header, index}, {acc_map, acc_unknown} ->
normalized = normalize_header(header)
case process_member_header(header, index, normalized, acc_map, %{}) do
{:error, reason} ->
{:halt, {:error, reason}}
{:ok, new_map, _} ->
{:cont, {new_map, acc_unknown}}
{:unknown} ->
{:cont, {acc_map, [index | acc_unknown]}}
end
end)
case result do
{:error, reason} ->
{:error, reason}
{member_map, unknown_indices} ->
validate_required_fields(member_map, unknown_indices)
end
end
# Processes a single header for member field mapping
defp process_member_header(_header, _index, normalized, acc_map, acc_seen)
when normalized == "" do
{:ok, acc_map, acc_seen}
end
defp process_member_header(_header, index, normalized, acc_map, _acc_seen) do
case Map.get(normalized_to_canonical(), normalized) do
nil ->
{:unknown}
canonical ->
if Map.has_key?(acc_map, canonical) do
{:error, "duplicate header for #{canonical} (normalized: #{normalized})"}
else
{:ok, Map.put(acc_map, canonical, index), %{}}
end
end
end
# Validates that all required member fields are present
defp validate_required_fields(member_map, unknown_indices) do
missing_required =
@required_member_fields
|> Enum.filter(&(not Map.has_key?(member_map, &1)))
if Enum.empty?(missing_required) do
{:ok, member_map, Enum.reverse(unknown_indices)}
else
missing_field = List.first(missing_required)
variants = Map.get(@member_field_variants_raw, missing_field, [])
accepted = Enum.join(variants, ", ")
{:error, "Missing required header: #{missing_field} (accepted: #{accepted})"}
end
end
# Builds custom field column map from unmatched headers
defp build_custom_field_map(headers, unknown_indices, custom_fields, _member_map) do
custom_field_lookup = build_custom_field_lookup(custom_fields)
result =
unknown_indices
|> Enum.reduce_while({%{}, []}, fn index, {acc_map, acc_unknown} ->
header = Enum.at(headers, index)
normalized = normalize_header(header)
case process_custom_field_header(
header,
index,
normalized,
custom_field_lookup,
acc_map,
%{}
) do
{:error, reason} ->
{:halt, {:error, reason}}
{:ok, new_map, _} ->
{:cont, {new_map, acc_unknown}}
{:unknown} ->
{:cont, {acc_map, [index | acc_unknown]}}
end
end)
case result do
{:error, reason} ->
{:error, reason}
{custom_map, remaining_unknown} ->
{:ok, custom_map, Enum.reverse(remaining_unknown)}
end
end
# Builds normalized custom field name -> id lookup map
defp build_custom_field_lookup(custom_fields) do
custom_fields
|> Enum.reduce(%{}, fn cf, acc ->
name = Map.get(cf, :name) || Map.get(cf, "name")
id = Map.get(cf, :id) || Map.get(cf, "id")
if name && id do
normalized_name = normalize_header(name)
Map.put(acc, normalized_name, id)
else
acc
end
end)
end
# Processes a single header for custom field mapping
defp process_custom_field_header(
_header,
_index,
normalized,
_custom_field_lookup,
acc_map,
_acc_seen
)
when normalized == "" do
{:ok, acc_map, %{}}
end
defp process_custom_field_header(
_header,
index,
normalized,
custom_field_lookup,
acc_map,
_acc_seen
) do
if Map.has_key?(custom_field_lookup, normalized) do
custom_field_id = custom_field_lookup[normalized]
if Map.has_key?(acc_map, custom_field_id) do
{:error, "duplicate custom field header (normalized: #{normalized})"}
else
{:ok, Map.put(acc_map, custom_field_id, index), %{}}
end
else
{:unknown}
end
end
end