mitgliederverwaltung/lib/mv/membership/import/header_mapper.ex

460 lines
13 KiB
Elixir
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

defmodule Mv.Membership.Import.HeaderMapper do
@moduledoc """
Maps CSV headers to canonical member fields and custom fields.
Provides header normalization and mapping functionality for CSV imports.
Handles bilingual header variants (English/German) and custom field detection.
## Header Normalization
Headers are normalized using the following rules:
- Trim whitespace
- Convert to lowercase
- Unicode normalization (ß → ss, ä → ae, ö → oe, ü → ue)
- Remove all whitespace (ensures "first name" == "firstname")
- Unify hyphen variants (en dash, minus sign → standard hyphen)
- Remove or unify punctuation (parentheses, slashes → spaces)
## Member Field Mapping
Maps CSV headers to canonical member fields (same as `Mv.Constants.member_fields()` for
importable attributes). All DB-backed member attributes can be imported.
- `email` (required)
- `first_name`, `last_name` (optional)
- `join_date`, `exit_date` (optional, ISO-8601 date)
- `notes` (optional)
- `country`, `city`, `street`, `house_number`, `postal_code` (optional)
- `membership_fee_start_date` (optional, ISO-8601 date)
Supports English and German header variants (e.g. "Email" / "E-Mail", "Join Date" / "Beitrittsdatum").
## Fields not supported for import
- **membership_fee_status** Computed (calculation from membership fee cycles). Not stored;
cannot be set via CSV. Export can include it.
- **groups** Many-to-many relationship (through member_groups). Import would require
resolving group names/slugs to IDs and creating associations; not in current import scope.
## Custom Field Detection
Custom fields are detected by matching normalized header names to custom field names.
Member fields have priority over custom fields (member field wins in case of collision).
## Examples
iex> HeaderMapper.normalize_header(" E-Mail ")
"e-mail"
iex> HeaderMapper.build_maps(["Email", "First Name"], [])
{:ok, %{member: %{email: 0, first_name: 1}, custom: %{}, unknown: []}}
iex> HeaderMapper.build_maps(["Email", "CustomField"], [%{id: "cf1", name: "CustomField"}])
{:ok, %{member: %{email: 0}, custom: %{"cf1" => 1}, unknown: []}}
"""
@type column_map :: %{atom() => non_neg_integer()}
@type custom_field_map :: %{String.t() => non_neg_integer()}
@type unknown_headers :: [String.t()]
# Required member fields
@required_member_fields [:email]
# Canonical member fields with their raw variants
# These will be normalized at runtime when building the lookup map
@member_field_variants_raw %{
email: [
"email",
"e-mail",
"e_mail",
"e mail",
"e-mail adresse",
"e-mail-adresse",
"mail"
],
first_name: [
"first name",
"firstname",
"vorname"
],
last_name: [
"last name",
"lastname",
"surname",
"nachname",
"familienname"
],
join_date: [
"join date",
"join_date",
"beitrittsdatum",
"beitritts-datum"
],
exit_date: [
"exit date",
"exit_date",
"austrittsdatum",
"austritts-datum"
],
notes: [
"notes",
"notizen",
"bemerkungen"
],
street: [
"street",
"address",
"strasse"
],
house_number: [
"house number",
"house_number",
"house no",
"hausnummer",
"nr",
"nr.",
"nummer"
],
postal_code: [
"postal code",
"postal_code",
"zip",
"postcode",
"plz",
"postleitzahl"
],
city: [
"city",
"town",
"stadt",
"ort"
],
country: [
"country",
"land",
"staat"
],
membership_fee_start_date: [
"membership fee start date",
"membership_fee_start_date",
"fee start",
"beitragsbeginn",
"beitrags-beginn"
]
}
# Build reverse map: normalized_variant -> canonical_field
# Computed on each access - the map is small enough that recomputing is fast
# This avoids Module.get_attribute issues while maintaining simplicity
defp normalized_to_canonical do
@member_field_variants_raw
|> Enum.flat_map(fn {canonical, variants} ->
Enum.map(variants, fn variant ->
{normalize_header(variant), canonical}
end)
end)
|> Map.new()
end
@doc """
Returns a MapSet of normalized member field names.
This is the single source of truth for known member fields.
Used to distinguish between member fields and custom fields.
## Returns
- `MapSet.t(String.t())` - Set of normalized member field names
## Examples
iex> HeaderMapper.known_member_fields()
#MapSet<["email", "firstname", "lastname", "street", "postalcode", "city"]>
"""
# Known member fields computed at compile-time for performance and determinism
@known_member_fields @member_field_variants_raw
|> Map.keys()
|> Enum.map(fn canonical ->
# Normalize the canonical field name (e.g., :first_name -> "firstname")
canonical
|> Atom.to_string()
|> String.replace("_", "")
|> String.downcase()
end)
|> MapSet.new()
@spec known_member_fields() :: MapSet.t(String.t())
def known_member_fields do
@known_member_fields
end
@doc """
Normalizes a CSV header string for comparison.
Applies the following transformations:
- Trim whitespace
- Convert to lowercase
- Unicode transliteration (ß → ss, ä → ae, ö → oe, ü → ue)
- Unify hyphen variants (en dash U+2013, minus sign U+2212 → standard hyphen)
- Remove or unify punctuation (parentheses, slashes → spaces)
- Remove all whitespace (ensures "first name" == "firstname")
- Final trim
## Examples
iex> normalize_header(" E-Mail ")
"e-mail"
iex> normalize_header("Straße")
"strasse"
iex> normalize_header("E-Mail (privat)")
"e-mailprivat"
iex> normalize_header("First Name")
"firstname"
"""
@spec normalize_header(String.t()) :: String.t()
def normalize_header(header) when is_binary(header) do
header
|> String.trim()
|> String.downcase()
|> transliterate_unicode()
|> unify_hyphens()
|> normalize_punctuation()
|> compress_whitespace()
|> String.trim()
end
def normalize_header(_), do: ""
@doc """
Builds column maps for member fields and custom fields from CSV headers.
## Parameters
- `headers` - List of CSV header strings (in column order, 0-based indices)
- `custom_fields` - List of custom field maps/structs with at least `:id` and `:name` keys
## Returns
- `{:ok, %{member: column_map, custom: custom_field_map, unknown: unknown_headers}}` on success
- `{:error, reason}` on error (missing required field, duplicate headers)
## Examples
iex> build_maps(["Email", "First Name"], [])
{:ok, %{member: %{email: 0, first_name: 1}, custom: %{}, unknown: []}}
iex> build_maps(["Email", "CustomField"], [%{id: "cf1", name: "CustomField"}])
{:ok, %{member: %{email: 0}, custom: %{"cf1" => 1}, unknown: []}}
"""
@spec build_maps([String.t()], [map()]) ::
{:ok, %{member: column_map(), custom: custom_field_map(), unknown: unknown_headers()}}
| {:error, String.t()}
def build_maps(headers, custom_fields) when is_list(headers) and is_list(custom_fields) do
with {:ok, member_map, unknown_after_member} <- build_member_map(headers),
{:ok, custom_map, unknown_after_custom} <-
build_custom_field_map(headers, unknown_after_member, custom_fields, member_map) do
unknown = Enum.map(unknown_after_custom, &Enum.at(headers, &1))
{:ok, %{member: member_map, custom: custom_map, unknown: unknown}}
end
end
# --- Private Functions ---
# Transliterates German umlauts and special characters
defp transliterate_unicode(str) do
str
|> String.replace("ß", "ss")
|> String.replace("ä", "ae")
|> String.replace("ö", "oe")
|> String.replace("ü", "ue")
|> String.replace("Ä", "ae")
|> String.replace("Ö", "oe")
|> String.replace("Ü", "ue")
end
# Unifies different hyphen variants to standard hyphen
defp unify_hyphens(str) do
str
# en dash
|> String.replace(<<0x2013::utf8>>, "-")
# em dash
|> String.replace(<<0x2014::utf8>>, "-")
# minus sign
|> String.replace(<<0x2212::utf8>>, "-")
end
# Normalizes punctuation: parentheses, slashes, underscores become spaces
defp normalize_punctuation(str) do
str
|> String.replace("_", " ")
|> String.replace(~r/[()\[\]{}]/, " ")
|> String.replace(~r/[\/\\]/, " ")
end
# Compresses multiple whitespace characters to single space, then removes all spaces
# This ensures "first name" and "firstname" normalize to the same value
defp compress_whitespace(str) do
str
|> String.replace(~r/\s+/, " ")
|> String.replace(" ", "")
end
# Builds member field column map
defp build_member_map(headers) do
result =
headers
|> Enum.with_index()
|> Enum.reduce_while({%{}, []}, fn {header, index}, {acc_map, acc_unknown} ->
normalized = normalize_header(header)
case process_member_header(header, index, normalized, acc_map, %{}) do
{:error, reason} ->
{:halt, {:error, reason}}
{:ok, new_map, _} ->
{:cont, {new_map, acc_unknown}}
{:unknown} ->
{:cont, {acc_map, [index | acc_unknown]}}
end
end)
case result do
{:error, reason} ->
{:error, reason}
{member_map, unknown_indices} ->
validate_required_fields(member_map, unknown_indices)
end
end
# Processes a single header for member field mapping
defp process_member_header(_header, _index, normalized, acc_map, acc_seen)
when normalized == "" do
{:ok, acc_map, acc_seen}
end
defp process_member_header(_header, index, normalized, acc_map, _acc_seen) do
case Map.get(normalized_to_canonical(), normalized) do
nil ->
{:unknown}
canonical ->
if Map.has_key?(acc_map, canonical) do
{:error, "duplicate header for #{canonical} (normalized: #{normalized})"}
else
{:ok, Map.put(acc_map, canonical, index), %{}}
end
end
end
# Validates that all required member fields are present
defp validate_required_fields(member_map, unknown_indices) do
missing_required =
@required_member_fields
|> Enum.filter(&(not Map.has_key?(member_map, &1)))
if Enum.empty?(missing_required) do
{:ok, member_map, Enum.reverse(unknown_indices)}
else
missing_field = List.first(missing_required)
variants = Map.get(@member_field_variants_raw, missing_field, [])
accepted = Enum.join(variants, ", ")
{:error, "Missing required header: #{missing_field} (accepted: #{accepted})"}
end
end
# Builds custom field column map from unmatched headers
defp build_custom_field_map(headers, unknown_indices, custom_fields, _member_map) do
custom_field_lookup = build_custom_field_lookup(custom_fields)
result =
unknown_indices
|> Enum.reduce_while({%{}, []}, fn index, {acc_map, acc_unknown} ->
header = Enum.at(headers, index)
normalized = normalize_header(header)
case process_custom_field_header(
header,
index,
normalized,
custom_field_lookup,
acc_map,
%{}
) do
{:error, reason} ->
{:halt, {:error, reason}}
{:ok, new_map, _} ->
{:cont, {new_map, acc_unknown}}
{:unknown} ->
{:cont, {acc_map, [index | acc_unknown]}}
end
end)
case result do
{:error, reason} ->
{:error, reason}
{custom_map, remaining_unknown} ->
{:ok, custom_map, Enum.reverse(remaining_unknown)}
end
end
# Builds normalized custom field name -> id lookup map
defp build_custom_field_lookup(custom_fields) do
custom_fields
|> Enum.reduce(%{}, fn cf, acc ->
name = Map.get(cf, :name) || Map.get(cf, "name")
id = Map.get(cf, :id) || Map.get(cf, "id")
if name && id do
normalized_name = normalize_header(name)
Map.put(acc, normalized_name, id)
else
acc
end
end)
end
# Processes a single header for custom field mapping
defp process_custom_field_header(
_header,
_index,
normalized,
_custom_field_lookup,
acc_map,
_acc_seen
)
when normalized == "" do
{:ok, acc_map, %{}}
end
defp process_custom_field_header(
_header,
index,
normalized,
custom_field_lookup,
acc_map,
_acc_seen
) do
if Map.has_key?(custom_field_lookup, normalized) do
custom_field_id = custom_field_lookup[normalized]
if Map.has_key?(acc_map, custom_field_id) do
{:error, "duplicate custom field header (normalized: #{normalized})"}
else
{:ok, Map.put(acc_map, custom_field_id, index), %{}}
end
else
{:unknown}
end
end
end