460 lines
13 KiB
Elixir
460 lines
13 KiB
Elixir
defmodule Mv.Membership.Import.HeaderMapper do
|
||
@moduledoc """
|
||
Maps CSV headers to canonical member fields and custom fields.
|
||
|
||
Provides header normalization and mapping functionality for CSV imports.
|
||
Handles bilingual header variants (English/German) and custom field detection.
|
||
|
||
## Header Normalization
|
||
|
||
Headers are normalized using the following rules:
|
||
- Trim whitespace
|
||
- Convert to lowercase
|
||
- Unicode normalization (ß → ss, ä → ae, ö → oe, ü → ue)
|
||
- Remove all whitespace (ensures "first name" == "firstname")
|
||
- Unify hyphen variants (en dash, minus sign → standard hyphen)
|
||
- Remove or unify punctuation (parentheses, slashes → spaces)
|
||
|
||
## Member Field Mapping
|
||
|
||
Maps CSV headers to canonical member fields (same as `Mv.Constants.member_fields()` for
|
||
importable attributes). All DB-backed member attributes can be imported.
|
||
|
||
- `email` (required)
|
||
- `first_name`, `last_name` (optional)
|
||
- `join_date`, `exit_date` (optional, ISO-8601 date)
|
||
- `notes` (optional)
|
||
- `country`, `city`, `street`, `house_number`, `postal_code` (optional)
|
||
- `membership_fee_start_date` (optional, ISO-8601 date)
|
||
|
||
Supports English and German header variants (e.g. "Email" / "E-Mail", "Join Date" / "Beitrittsdatum").
|
||
|
||
## Fields not supported for import
|
||
|
||
- **membership_fee_status** – Computed (calculation from membership fee cycles). Not stored;
|
||
cannot be set via CSV. Export can include it.
|
||
- **groups** – Many-to-many relationship (through member_groups). Import would require
|
||
resolving group names/slugs to IDs and creating associations; not in current import scope.
|
||
|
||
## Custom Field Detection
|
||
|
||
Custom fields are detected by matching normalized header names to custom field names.
|
||
Member fields have priority over custom fields (member field wins in case of collision).
|
||
|
||
## Examples
|
||
|
||
iex> HeaderMapper.normalize_header(" E-Mail ")
|
||
"e-mail"
|
||
|
||
iex> HeaderMapper.build_maps(["Email", "First Name"], [])
|
||
{:ok, %{member: %{email: 0, first_name: 1}, custom: %{}, unknown: []}}
|
||
|
||
iex> HeaderMapper.build_maps(["Email", "CustomField"], [%{id: "cf1", name: "CustomField"}])
|
||
{:ok, %{member: %{email: 0}, custom: %{"cf1" => 1}, unknown: []}}
|
||
"""
|
||
|
||
@type column_map :: %{atom() => non_neg_integer()}
|
||
@type custom_field_map :: %{String.t() => non_neg_integer()}
|
||
@type unknown_headers :: [String.t()]
|
||
|
||
# Required member fields
|
||
@required_member_fields [:email]
|
||
|
||
# Canonical member fields with their raw variants
|
||
# These will be normalized at runtime when building the lookup map
|
||
@member_field_variants_raw %{
|
||
email: [
|
||
"email",
|
||
"e-mail",
|
||
"e_mail",
|
||
"e mail",
|
||
"e-mail adresse",
|
||
"e-mail-adresse",
|
||
"mail"
|
||
],
|
||
first_name: [
|
||
"first name",
|
||
"firstname",
|
||
"vorname"
|
||
],
|
||
last_name: [
|
||
"last name",
|
||
"lastname",
|
||
"surname",
|
||
"nachname",
|
||
"familienname"
|
||
],
|
||
join_date: [
|
||
"join date",
|
||
"join_date",
|
||
"beitrittsdatum",
|
||
"beitritts-datum"
|
||
],
|
||
exit_date: [
|
||
"exit date",
|
||
"exit_date",
|
||
"austrittsdatum",
|
||
"austritts-datum"
|
||
],
|
||
notes: [
|
||
"notes",
|
||
"notizen",
|
||
"bemerkungen"
|
||
],
|
||
street: [
|
||
"street",
|
||
"address",
|
||
"strasse"
|
||
],
|
||
house_number: [
|
||
"house number",
|
||
"house_number",
|
||
"house no",
|
||
"hausnummer",
|
||
"nr",
|
||
"nr.",
|
||
"nummer"
|
||
],
|
||
postal_code: [
|
||
"postal code",
|
||
"postal_code",
|
||
"zip",
|
||
"postcode",
|
||
"plz",
|
||
"postleitzahl"
|
||
],
|
||
city: [
|
||
"city",
|
||
"town",
|
||
"stadt",
|
||
"ort"
|
||
],
|
||
country: [
|
||
"country",
|
||
"land",
|
||
"staat"
|
||
],
|
||
membership_fee_start_date: [
|
||
"membership fee start date",
|
||
"membership_fee_start_date",
|
||
"fee start",
|
||
"beitragsbeginn",
|
||
"beitrags-beginn"
|
||
]
|
||
}
|
||
|
||
# Build reverse map: normalized_variant -> canonical_field
|
||
# Computed on each access - the map is small enough that recomputing is fast
|
||
# This avoids Module.get_attribute issues while maintaining simplicity
|
||
defp normalized_to_canonical do
|
||
@member_field_variants_raw
|
||
|> Enum.flat_map(fn {canonical, variants} ->
|
||
Enum.map(variants, fn variant ->
|
||
{normalize_header(variant), canonical}
|
||
end)
|
||
end)
|
||
|> Map.new()
|
||
end
|
||
|
||
@doc """
|
||
Returns a MapSet of normalized member field names.
|
||
|
||
This is the single source of truth for known member fields.
|
||
Used to distinguish between member fields and custom fields.
|
||
|
||
## Returns
|
||
|
||
- `MapSet.t(String.t())` - Set of normalized member field names
|
||
|
||
## Examples
|
||
|
||
iex> HeaderMapper.known_member_fields()
|
||
#MapSet<["email", "firstname", "lastname", "street", "postalcode", "city"]>
|
||
"""
|
||
# Known member fields computed at compile-time for performance and determinism
|
||
@known_member_fields @member_field_variants_raw
|
||
|> Map.keys()
|
||
|> Enum.map(fn canonical ->
|
||
# Normalize the canonical field name (e.g., :first_name -> "firstname")
|
||
canonical
|
||
|> Atom.to_string()
|
||
|> String.replace("_", "")
|
||
|> String.downcase()
|
||
end)
|
||
|> MapSet.new()
|
||
|
||
@spec known_member_fields() :: MapSet.t(String.t())
|
||
def known_member_fields do
|
||
@known_member_fields
|
||
end
|
||
|
||
@doc """
|
||
Normalizes a CSV header string for comparison.
|
||
|
||
Applies the following transformations:
|
||
- Trim whitespace
|
||
- Convert to lowercase
|
||
- Unicode transliteration (ß → ss, ä → ae, ö → oe, ü → ue)
|
||
- Unify hyphen variants (en dash U+2013, minus sign U+2212 → standard hyphen)
|
||
- Remove or unify punctuation (parentheses, slashes → spaces)
|
||
- Remove all whitespace (ensures "first name" == "firstname")
|
||
- Final trim
|
||
|
||
## Examples
|
||
|
||
iex> normalize_header(" E-Mail ")
|
||
"e-mail"
|
||
|
||
iex> normalize_header("Straße")
|
||
"strasse"
|
||
|
||
iex> normalize_header("E-Mail (privat)")
|
||
"e-mailprivat"
|
||
|
||
iex> normalize_header("First Name")
|
||
"firstname"
|
||
|
||
"""
|
||
@spec normalize_header(String.t()) :: String.t()
|
||
def normalize_header(header) when is_binary(header) do
|
||
header
|
||
|> String.trim()
|
||
|> String.downcase()
|
||
|> transliterate_unicode()
|
||
|> unify_hyphens()
|
||
|> normalize_punctuation()
|
||
|> compress_whitespace()
|
||
|> String.trim()
|
||
end
|
||
|
||
def normalize_header(_), do: ""
|
||
|
||
@doc """
|
||
Builds column maps for member fields and custom fields from CSV headers.
|
||
|
||
## Parameters
|
||
|
||
- `headers` - List of CSV header strings (in column order, 0-based indices)
|
||
- `custom_fields` - List of custom field maps/structs with at least `:id` and `:name` keys
|
||
|
||
## Returns
|
||
|
||
- `{:ok, %{member: column_map, custom: custom_field_map, unknown: unknown_headers}}` on success
|
||
- `{:error, reason}` on error (missing required field, duplicate headers)
|
||
|
||
## Examples
|
||
|
||
iex> build_maps(["Email", "First Name"], [])
|
||
{:ok, %{member: %{email: 0, first_name: 1}, custom: %{}, unknown: []}}
|
||
|
||
iex> build_maps(["Email", "CustomField"], [%{id: "cf1", name: "CustomField"}])
|
||
{:ok, %{member: %{email: 0}, custom: %{"cf1" => 1}, unknown: []}}
|
||
|
||
"""
|
||
@spec build_maps([String.t()], [map()]) ::
|
||
{:ok, %{member: column_map(), custom: custom_field_map(), unknown: unknown_headers()}}
|
||
| {:error, String.t()}
|
||
def build_maps(headers, custom_fields) when is_list(headers) and is_list(custom_fields) do
|
||
with {:ok, member_map, unknown_after_member} <- build_member_map(headers),
|
||
{:ok, custom_map, unknown_after_custom} <-
|
||
build_custom_field_map(headers, unknown_after_member, custom_fields, member_map) do
|
||
unknown = Enum.map(unknown_after_custom, &Enum.at(headers, &1))
|
||
{:ok, %{member: member_map, custom: custom_map, unknown: unknown}}
|
||
end
|
||
end
|
||
|
||
# --- Private Functions ---
|
||
|
||
# Transliterates German umlauts and special characters
|
||
defp transliterate_unicode(str) do
|
||
str
|
||
|> String.replace("ß", "ss")
|
||
|> String.replace("ä", "ae")
|
||
|> String.replace("ö", "oe")
|
||
|> String.replace("ü", "ue")
|
||
|> String.replace("Ä", "ae")
|
||
|> String.replace("Ö", "oe")
|
||
|> String.replace("Ü", "ue")
|
||
end
|
||
|
||
# Unifies different hyphen variants to standard hyphen
|
||
defp unify_hyphens(str) do
|
||
str
|
||
# en dash
|
||
|> String.replace(<<0x2013::utf8>>, "-")
|
||
# em dash
|
||
|> String.replace(<<0x2014::utf8>>, "-")
|
||
# minus sign
|
||
|> String.replace(<<0x2212::utf8>>, "-")
|
||
end
|
||
|
||
# Normalizes punctuation: parentheses, slashes, underscores become spaces
|
||
defp normalize_punctuation(str) do
|
||
str
|
||
|> String.replace("_", " ")
|
||
|> String.replace(~r/[()\[\]{}]/, " ")
|
||
|> String.replace(~r/[\/\\]/, " ")
|
||
end
|
||
|
||
# Compresses multiple whitespace characters to single space, then removes all spaces
|
||
# This ensures "first name" and "firstname" normalize to the same value
|
||
defp compress_whitespace(str) do
|
||
str
|
||
|> String.replace(~r/\s+/, " ")
|
||
|> String.replace(" ", "")
|
||
end
|
||
|
||
# Builds member field column map
|
||
defp build_member_map(headers) do
|
||
result =
|
||
headers
|
||
|> Enum.with_index()
|
||
|> Enum.reduce_while({%{}, []}, fn {header, index}, {acc_map, acc_unknown} ->
|
||
normalized = normalize_header(header)
|
||
|
||
case process_member_header(header, index, normalized, acc_map, %{}) do
|
||
{:error, reason} ->
|
||
{:halt, {:error, reason}}
|
||
|
||
{:ok, new_map, _} ->
|
||
{:cont, {new_map, acc_unknown}}
|
||
|
||
{:unknown} ->
|
||
{:cont, {acc_map, [index | acc_unknown]}}
|
||
end
|
||
end)
|
||
|
||
case result do
|
||
{:error, reason} ->
|
||
{:error, reason}
|
||
|
||
{member_map, unknown_indices} ->
|
||
validate_required_fields(member_map, unknown_indices)
|
||
end
|
||
end
|
||
|
||
# Processes a single header for member field mapping
|
||
defp process_member_header(_header, _index, normalized, acc_map, acc_seen)
|
||
when normalized == "" do
|
||
{:ok, acc_map, acc_seen}
|
||
end
|
||
|
||
defp process_member_header(_header, index, normalized, acc_map, _acc_seen) do
|
||
case Map.get(normalized_to_canonical(), normalized) do
|
||
nil ->
|
||
{:unknown}
|
||
|
||
canonical ->
|
||
if Map.has_key?(acc_map, canonical) do
|
||
{:error, "duplicate header for #{canonical} (normalized: #{normalized})"}
|
||
else
|
||
{:ok, Map.put(acc_map, canonical, index), %{}}
|
||
end
|
||
end
|
||
end
|
||
|
||
# Validates that all required member fields are present
|
||
defp validate_required_fields(member_map, unknown_indices) do
|
||
missing_required =
|
||
@required_member_fields
|
||
|> Enum.filter(&(not Map.has_key?(member_map, &1)))
|
||
|
||
if Enum.empty?(missing_required) do
|
||
{:ok, member_map, Enum.reverse(unknown_indices)}
|
||
else
|
||
missing_field = List.first(missing_required)
|
||
variants = Map.get(@member_field_variants_raw, missing_field, [])
|
||
accepted = Enum.join(variants, ", ")
|
||
|
||
{:error, "Missing required header: #{missing_field} (accepted: #{accepted})"}
|
||
end
|
||
end
|
||
|
||
# Builds custom field column map from unmatched headers
|
||
defp build_custom_field_map(headers, unknown_indices, custom_fields, _member_map) do
|
||
custom_field_lookup = build_custom_field_lookup(custom_fields)
|
||
|
||
result =
|
||
unknown_indices
|
||
|> Enum.reduce_while({%{}, []}, fn index, {acc_map, acc_unknown} ->
|
||
header = Enum.at(headers, index)
|
||
normalized = normalize_header(header)
|
||
|
||
case process_custom_field_header(
|
||
header,
|
||
index,
|
||
normalized,
|
||
custom_field_lookup,
|
||
acc_map,
|
||
%{}
|
||
) do
|
||
{:error, reason} ->
|
||
{:halt, {:error, reason}}
|
||
|
||
{:ok, new_map, _} ->
|
||
{:cont, {new_map, acc_unknown}}
|
||
|
||
{:unknown} ->
|
||
{:cont, {acc_map, [index | acc_unknown]}}
|
||
end
|
||
end)
|
||
|
||
case result do
|
||
{:error, reason} ->
|
||
{:error, reason}
|
||
|
||
{custom_map, remaining_unknown} ->
|
||
{:ok, custom_map, Enum.reverse(remaining_unknown)}
|
||
end
|
||
end
|
||
|
||
# Builds normalized custom field name -> id lookup map
|
||
defp build_custom_field_lookup(custom_fields) do
|
||
custom_fields
|
||
|> Enum.reduce(%{}, fn cf, acc ->
|
||
name = Map.get(cf, :name) || Map.get(cf, "name")
|
||
id = Map.get(cf, :id) || Map.get(cf, "id")
|
||
|
||
if name && id do
|
||
normalized_name = normalize_header(name)
|
||
Map.put(acc, normalized_name, id)
|
||
else
|
||
acc
|
||
end
|
||
end)
|
||
end
|
||
|
||
# Processes a single header for custom field mapping
|
||
defp process_custom_field_header(
|
||
_header,
|
||
_index,
|
||
normalized,
|
||
_custom_field_lookup,
|
||
acc_map,
|
||
_acc_seen
|
||
)
|
||
when normalized == "" do
|
||
{:ok, acc_map, %{}}
|
||
end
|
||
|
||
defp process_custom_field_header(
|
||
_header,
|
||
index,
|
||
normalized,
|
||
custom_field_lookup,
|
||
acc_map,
|
||
_acc_seen
|
||
) do
|
||
if Map.has_key?(custom_field_lookup, normalized) do
|
||
custom_field_id = custom_field_lookup[normalized]
|
||
|
||
if Map.has_key?(acc_map, custom_field_id) do
|
||
{:error, "duplicate custom field header (normalized: #{normalized})"}
|
||
else
|
||
{:ok, Map.put(acc_map, custom_field_id, index), %{}}
|
||
end
|
||
else
|
||
{:unknown}
|
||
end
|
||
end
|
||
end
|