mitgliederverwaltung/lib/mv/membership/import/header_mapper.ex

535 lines
16 KiB
Elixir
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

defmodule Mv.Membership.Import.HeaderMapper do
@moduledoc """
Maps CSV headers to canonical member fields and custom fields.
Provides header normalization and mapping functionality for CSV imports.
Handles bilingual header variants (English/German) and custom field detection.
## Header Normalization
Headers are normalized using the following rules:
- Trim whitespace
- Convert to lowercase
- Unicode normalization (ß → ss, ä → ae, ö → oe, ü → ue)
- Remove all whitespace (ensures "first name" == "firstname")
- Unify hyphen variants (en dash, minus sign → standard hyphen)
- Remove or unify punctuation (parentheses, slashes → spaces)
## Member Field Mapping
Maps CSV headers to canonical member fields (same as `Mv.Constants.member_fields()` for
importable attributes). All DB-backed member attributes can be imported.
- `email` (required)
- `first_name`, `last_name` (optional)
- `join_date`, `exit_date` (optional, ISO-8601 date)
- `notes` (optional)
- `country`, `city`, `street`, `house_number`, `postal_code` (optional)
- `membership_fee_start_date` (optional, ISO-8601 date)
Supports English and German header variants (e.g. "Email" / "E-Mail", "Join Date" / "Beitrittsdatum").
## Fields not supported for import
- **membership_fee_status** Computed (calculation from membership fee cycles). Not stored;
cannot be set via CSV. Export can include it.
- **groups** Many-to-many relationship (through member_groups). Import would require
resolving group names/slugs to IDs and creating associations; not in current import scope.
## Custom Field Detection
Custom fields are detected by matching normalized header names to custom field names.
Member fields have priority over custom fields (member field wins in case of collision).
## Examples
iex> HeaderMapper.normalize_header(" E-Mail ")
"e-mail"
iex> HeaderMapper.build_maps(["Email", "First Name"], [])
{:ok, %{member: %{email: 0, first_name: 1}, custom: %{}, unknown: [], ignored: [], groups_column_index: nil, fee_type_column_index: nil}}
iex> HeaderMapper.build_maps(["Email", "CustomField"], [%{id: "cf1", name: "CustomField"}])
{:ok, %{member: %{email: 0}, custom: %{"cf1" => 1}, unknown: [], ignored: [], groups_column_index: nil, fee_type_column_index: nil}}
"""
@type column_map :: %{atom() => non_neg_integer()}
@type custom_field_map :: %{String.t() => non_neg_integer()}
@type unknown_headers :: [String.t()]
# Required member fields
@required_member_fields [:email]
# Fee-status header variants that must never be imported (computed/read-only field).
# Stored already-normalized; checked before member, custom, groups, and fee-type mapping.
# Maintain this list when new locale translations for fee-status are added.
@ignored_normalized [
"membershipfeestatus",
"mitgliedsbeitragsstatus",
"bezahlstatus"
]
# Normalized header variants for the groups column. The column is resolved to
# group associations during import; it is never a member or custom field.
@groups_column_normalized [
"groups",
"gruppen",
"gruppe"
]
# Normalized header variants for the membership fee-type column. The column is
# resolved to a MembershipFeeType during import; it is never a member or custom field.
@fee_type_column_normalized [
"membershipfeetype",
"feetype",
"beitragsart"
]
# Canonical member fields with their raw variants
# These will be normalized at runtime when building the lookup map
@member_field_variants_raw %{
email: [
"email",
"e-mail",
"e_mail",
"e mail",
"e-mail adresse",
"e-mail-adresse",
"mail"
],
first_name: [
"first name",
"firstname",
"vorname"
],
last_name: [
"last name",
"lastname",
"surname",
"nachname",
"familienname"
],
join_date: [
"join date",
"join_date",
"beitrittsdatum",
"beitritts-datum"
],
exit_date: [
"exit date",
"exit_date",
"austrittsdatum",
"austritts-datum"
],
notes: [
"notes",
"notizen",
"bemerkungen"
],
street: [
"street",
"address",
"strasse"
],
house_number: [
"house number",
"house_number",
"house no",
"hausnummer",
"nr",
"nr.",
"nummer"
],
postal_code: [
"postal code",
"postal_code",
"zip",
"postcode",
"plz",
"postleitzahl"
],
city: [
"city",
"town",
"stadt",
"ort"
],
country: [
"country",
"land",
"staat"
],
membership_fee_start_date: [
"membership fee start date",
"membership_fee_start_date",
"fee start",
"beitragsbeginn",
"beitrags-beginn"
]
}
# Build reverse map: normalized_variant -> canonical_field
# Computed on each access - the map is small enough that recomputing is fast
# This avoids Module.get_attribute issues while maintaining simplicity
defp normalized_to_canonical do
@member_field_variants_raw
|> Enum.flat_map(fn {canonical, variants} ->
Enum.map(variants, fn variant ->
{normalize_header(variant), canonical}
end)
end)
|> Map.new()
end
@doc """
Returns a MapSet of normalized member field names.
This is the single source of truth for known member fields.
Used to distinguish between member fields and custom fields.
## Returns
- `MapSet.t(String.t())` - Set of normalized member field names
## Examples
iex> HeaderMapper.known_member_fields()
#MapSet<["email", "firstname", "lastname", "street", "postalcode", "city"]>
"""
# Known member fields computed at compile-time for performance and determinism
@known_member_fields @member_field_variants_raw
|> Map.keys()
|> Enum.map(fn canonical ->
# Normalize the canonical field name (e.g., :first_name -> "firstname")
canonical
|> Atom.to_string()
|> String.replace("_", "")
|> String.downcase()
end)
|> MapSet.new()
@spec known_member_fields() :: MapSet.t(String.t())
def known_member_fields do
@known_member_fields
end
@doc """
Normalizes a CSV header string for comparison.
Applies the following transformations:
- Trim whitespace
- Convert to lowercase
- Unicode transliteration (ß → ss, ä → ae, ö → oe, ü → ue)
- Unify hyphen variants (en dash U+2013, minus sign U+2212 → standard hyphen)
- Remove or unify punctuation (parentheses, slashes → spaces)
- Remove all whitespace (ensures "first name" == "firstname")
- Final trim
## Examples
iex> normalize_header(" E-Mail ")
"e-mail"
iex> normalize_header("Straße")
"strasse"
iex> normalize_header("E-Mail (privat)")
"e-mailprivat"
iex> normalize_header("First Name")
"firstname"
"""
@spec normalize_header(String.t()) :: String.t()
def normalize_header(header) when is_binary(header) do
header
|> String.trim()
|> String.downcase()
|> transliterate_unicode()
|> unify_hyphens()
|> normalize_punctuation()
|> compress_whitespace()
|> String.trim()
end
def normalize_header(_), do: ""
@doc """
Builds column maps for member fields and custom fields from CSV headers.
## Parameters
- `headers` - List of CSV header strings (in column order, 0-based indices)
- `custom_fields` - List of custom field maps/structs with at least `:id` and `:name` keys
## Returns
- `{:ok, %{member: column_map, custom: custom_field_map, unknown: unknown_headers,
ignored: [non_neg_integer], groups_column_index: non_neg_integer | nil,
fee_type_column_index: non_neg_integer | nil}}` on success
- `{:error, reason}` on error (missing required field, duplicate headers)
The `ignored` list holds the indices of fee-status columns (computed/read-only),
which are never mapped to member or custom fields.
## Examples
iex> build_maps(["Email", "First Name"], [])
{:ok, %{member: %{email: 0, first_name: 1}, custom: %{}, unknown: [], ignored: [], groups_column_index: nil, fee_type_column_index: nil}}
iex> build_maps(["Email", "CustomField"], [%{id: "cf1", name: "CustomField"}])
{:ok, %{member: %{email: 0}, custom: %{"cf1" => 1}, unknown: [], ignored: [], groups_column_index: nil, fee_type_column_index: nil}}
"""
@spec build_maps([String.t()], [map()]) ::
{:ok,
%{
member: column_map(),
custom: custom_field_map(),
unknown: unknown_headers(),
ignored: [non_neg_integer()],
groups_column_index: non_neg_integer() | nil,
fee_type_column_index: non_neg_integer() | nil
}}
| {:error, String.t()}
def build_maps(headers, custom_fields) when is_list(headers) and is_list(custom_fields) do
ignored = ignored_indices(headers)
groups_column_index = first_matching_index(headers, @groups_column_normalized)
fee_type_column_index = first_matching_index(headers, @fee_type_column_normalized)
reserved =
[groups_column_index, fee_type_column_index | ignored]
|> Enum.reject(&is_nil/1)
|> MapSet.new()
with {:ok, member_map, unknown_after_member} <- build_member_map(headers, reserved),
{:ok, custom_map, unknown_after_custom} <-
build_custom_field_map(headers, unknown_after_member, custom_fields, member_map) do
unknown = Enum.map(unknown_after_custom, &Enum.at(headers, &1))
{:ok,
%{
member: member_map,
custom: custom_map,
unknown: unknown,
ignored: ignored,
groups_column_index: groups_column_index,
fee_type_column_index: fee_type_column_index
}}
end
end
# Returns the index of the first header whose normalized form is in `variants`,
# or nil if none match.
defp first_matching_index(headers, variants) do
headers
|> Enum.with_index()
|> Enum.find_value(fn {header, index} ->
if normalize_header(header) in variants, do: index
end)
end
# Returns the column indices whose normalized header is in the fee-status ignore list.
defp ignored_indices(headers) do
headers
|> Enum.with_index()
|> Enum.filter(fn {header, _index} -> normalize_header(header) in @ignored_normalized end)
|> Enum.map(fn {_header, index} -> index end)
end
# --- Private Functions ---
# Transliterates German umlauts and special characters
defp transliterate_unicode(str) do
str
|> String.replace("ß", "ss")
|> String.replace("ä", "ae")
|> String.replace("ö", "oe")
|> String.replace("ü", "ue")
|> String.replace("Ä", "ae")
|> String.replace("Ö", "oe")
|> String.replace("Ü", "ue")
end
# Unifies different hyphen variants to standard hyphen
defp unify_hyphens(str) do
str
# en dash
|> String.replace(<<0x2013::utf8>>, "-")
# em dash
|> String.replace(<<0x2014::utf8>>, "-")
# minus sign
|> String.replace(<<0x2212::utf8>>, "-")
end
# Normalizes punctuation: parentheses, slashes, underscores become spaces
defp normalize_punctuation(str) do
str
|> String.replace("_", " ")
|> String.replace(~r/[()\[\]{}]/, " ")
|> String.replace(~r/[\/\\]/, " ")
end
# Compresses multiple whitespace characters to single space, then removes all spaces
# This ensures "first name" and "firstname" normalize to the same value
defp compress_whitespace(str) do
str
|> String.replace(~r/\s+/, " ")
|> String.replace(" ", "")
end
# Builds member field column map, skipping reserved (e.g. ignored) indices.
defp build_member_map(headers, reserved) do
result =
headers
|> Enum.with_index()
|> Enum.reduce_while({%{}, []}, fn {header, index}, {acc_map, acc_unknown} ->
normalized =
if MapSet.member?(reserved, index), do: "", else: normalize_header(header)
case process_member_header(header, index, normalized, acc_map, %{}) do
{:error, reason} ->
{:halt, {:error, reason}}
{:ok, new_map, _} ->
{:cont, {new_map, acc_unknown}}
{:unknown} ->
{:cont, {acc_map, [index | acc_unknown]}}
end
end)
case result do
{:error, reason} ->
{:error, reason}
{member_map, unknown_indices} ->
validate_required_fields(member_map, unknown_indices)
end
end
# Processes a single header for member field mapping
defp process_member_header(_header, _index, normalized, acc_map, acc_seen)
when normalized == "" do
{:ok, acc_map, acc_seen}
end
defp process_member_header(_header, index, normalized, acc_map, _acc_seen) do
case Map.get(normalized_to_canonical(), normalized) do
nil ->
{:unknown}
canonical ->
if Map.has_key?(acc_map, canonical) do
{:error, "duplicate header for #{canonical} (normalized: #{normalized})"}
else
{:ok, Map.put(acc_map, canonical, index), %{}}
end
end
end
# Validates that all required member fields are present
defp validate_required_fields(member_map, unknown_indices) do
missing_required =
@required_member_fields
|> Enum.filter(&(not Map.has_key?(member_map, &1)))
if Enum.empty?(missing_required) do
{:ok, member_map, Enum.reverse(unknown_indices)}
else
missing_field = List.first(missing_required)
variants = Map.get(@member_field_variants_raw, missing_field, [])
accepted = Enum.join(variants, ", ")
{:error, "Missing required header: #{missing_field} (accepted: #{accepted})"}
end
end
# Builds custom field column map from unmatched headers
defp build_custom_field_map(headers, unknown_indices, custom_fields, _member_map) do
custom_field_lookup = build_custom_field_lookup(custom_fields)
result =
unknown_indices
|> Enum.reduce_while({%{}, []}, fn index, {acc_map, acc_unknown} ->
header = Enum.at(headers, index)
normalized = normalize_header(header)
case process_custom_field_header(
header,
index,
normalized,
custom_field_lookup,
acc_map,
%{}
) do
{:error, reason} ->
{:halt, {:error, reason}}
{:ok, new_map, _} ->
{:cont, {new_map, acc_unknown}}
{:unknown} ->
{:cont, {acc_map, [index | acc_unknown]}}
end
end)
case result do
{:error, reason} ->
{:error, reason}
{custom_map, remaining_unknown} ->
{:ok, custom_map, Enum.reverse(remaining_unknown)}
end
end
# Builds normalized custom field name -> id lookup map
defp build_custom_field_lookup(custom_fields) do
custom_fields
|> Enum.reduce(%{}, fn cf, acc ->
name = Map.get(cf, :name) || Map.get(cf, "name")
id = Map.get(cf, :id) || Map.get(cf, "id")
if name && id do
normalized_name = normalize_header(name)
Map.put(acc, normalized_name, id)
else
acc
end
end)
end
# Processes a single header for custom field mapping
defp process_custom_field_header(
_header,
_index,
normalized,
_custom_field_lookup,
acc_map,
_acc_seen
)
when normalized == "" do
{:ok, acc_map, %{}}
end
defp process_custom_field_header(
_header,
index,
normalized,
custom_field_lookup,
acc_map,
_acc_seen
) do
if Map.has_key?(custom_field_lookup, normalized) do
custom_field_id = custom_field_lookup[normalized]
if Map.has_key?(acc_map, custom_field_id) do
{:error, "duplicate custom field header (normalized: #{normalized})"}
else
{:ok, Map.put(acc_map, custom_field_id, index), %{}}
end
else
{:unknown}
end
end
end