feat(import): resolve import group and fee-type names against existing records

This commit is contained in:
Moritz 2026-06-03 02:10:33 +02:00
parent 95c7bf7a15
commit a4a34cab3a
3 changed files with 557 additions and 0 deletions

View file

@ -0,0 +1,258 @@
defmodule Mv.Membership.Import.ColumnResolver do
@moduledoc """
Read-only resolution of CSV import columns against the database.
Given the `HeaderMapper.build_maps/2` result, the raw numbered rows, and an
actor, `resolve/3` determines:
- which group names in the groups column already exist (`groups_found`) and
which would have to be created (`groups_to_create`);
- a small set of preview rows for the mapping preview UI.
No database writes happen here; the resolver only reads. Group creation and
member-group assignment happen during processing via `create_or_find_group/3`.
This module has no Phoenix or web dependencies.
"""
require Logger
alias Mv.Membership.Import.HeaderMapper
@preview_row_limit 3
@type numbered_row :: {pos_integer(), [String.t()]}
@type resolution :: %{
groups_found: [%{id: String.t(), name: String.t()}],
groups_to_create: [String.t()],
fee_type_map: %{String.t() => String.t()},
fee_type_warnings: [String.t()],
has_empty_fee_type_cells?: boolean(),
preview_rows: [[String.t()]]
}
@doc """
Resolves the group and fee-type columns of an import against the database and
extracts preview rows.
Returns a map with `:groups_found`, `:groups_to_create`, `:fee_type_map`,
`:fee_type_warnings`, `:has_empty_fee_type_cells?`, and `:preview_rows`.
"""
@spec resolve(map(), [numbered_row()], term()) :: resolution()
def resolve(header_maps, rows, actor) do
%{
groups_found: groups_found,
groups_to_create: groups_to_create
} = resolve_groups(header_maps, rows, actor)
%{
fee_type_map: fee_type_map,
fee_type_warnings: fee_type_warnings,
has_empty_fee_type_cells?: has_empty_fee_type_cells?
} = resolve_fee_types(header_maps, rows, actor)
%{
groups_found: groups_found,
groups_to_create: groups_to_create,
fee_type_map: fee_type_map,
fee_type_warnings: fee_type_warnings,
has_empty_fee_type_cells?: has_empty_fee_type_cells?,
preview_rows: preview_rows(rows)
}
end
defp resolve_groups(%{groups_column_index: nil}, _rows, _actor) do
%{groups_found: [], groups_to_create: []}
end
defp resolve_groups(%{groups_column_index: index}, rows, actor) do
existing_groups = list_groups(actor)
lookup = build_group_lookup(existing_groups)
names = unique_group_names(rows, index)
{found, to_create} =
Enum.reduce(names, {[], []}, fn name, {found, to_create} ->
case Map.get(lookup, normalize_name(name)) do
nil -> {found, [name | to_create]}
group -> {[%{id: group.id, name: group.name} | found], to_create}
end
end)
%{groups_found: Enum.reverse(found), groups_to_create: Enum.reverse(to_create)}
end
defp resolve_fee_types(%{fee_type_column_index: nil}, _rows, _actor) do
%{fee_type_map: %{}, fee_type_warnings: [], has_empty_fee_type_cells?: false}
end
defp resolve_fee_types(%{fee_type_column_index: index}, rows, actor) do
lookup = build_fee_type_lookup(actor)
cells = Enum.map(rows, fn {_line, values} -> Enum.at(values, index) end)
has_empty? = Enum.any?(cells, &blank?/1)
{fee_type_map, warnings} =
cells
|> Enum.reject(&blank?/1)
|> Enum.uniq_by(&normalize_fee_type_name/1)
|> Enum.reduce({%{}, []}, fn name, {map, warnings} ->
case Map.get(lookup, normalize_fee_type_name(name)) do
nil -> {map, [String.trim(name) | warnings]}
id -> {Map.put(map, normalize_fee_type_name(name), id), warnings}
end
end)
%{
fee_type_map: fee_type_map,
fee_type_warnings: Enum.reverse(warnings),
has_empty_fee_type_cells?: has_empty?
}
end
@doc """
Normalizes a fee-type name using the same rules as CSV header normalization
(trim, lowercase, transliterate, drop hyphens and whitespace).
"""
@spec normalize_fee_type_name(String.t() | nil) :: String.t()
def normalize_fee_type_name(name) when is_binary(name), do: HeaderMapper.normalize_header(name)
def normalize_fee_type_name(_), do: ""
defp build_fee_type_lookup(actor) do
actor
|> list_fee_types()
|> Enum.reduce(%{}, fn fee_type, acc ->
normalized = normalize_fee_type_name(fee_type.name)
if Map.has_key?(acc, normalized) do
Logger.warning(
"Multiple membership fee types normalize to #{inspect(normalized)}; using the first match for CSV import."
)
acc
else
Map.put(acc, normalized, fee_type.id)
end
end)
end
defp list_fee_types(actor) do
Mv.MembershipFees.list_membership_fee_types!(actor: actor)
end
defp blank?(nil), do: true
defp blank?(value) when is_binary(value), do: String.trim(value) == ""
defp blank?(_), do: false
@doc """
Finds an existing group by name (case-insensitive) or creates it.
Looks first in the pre-fetched `groups` list, then in the database (to catch
groups created earlier in the same import), and only creates a new group when
none is found. This keeps group resolution idempotent across re-imports.
"""
@spec create_or_find_group(String.t(), [Mv.Membership.Group.t()], term()) ::
{:ok, Mv.Membership.Group.t()} | {:error, term()}
def create_or_find_group(name, groups, actor) when is_binary(name) do
trimmed = String.trim(name)
normalized = normalize_name(trimmed)
case find_group_in_list(groups, normalized) do
nil -> find_or_create_group(trimmed, normalized, actor)
group -> {:ok, group}
end
end
defp find_group_in_list(groups, normalized) do
Enum.find(groups, fn group -> normalize_name(group.name) == normalized end)
end
defp find_or_create_group(trimmed, normalized, actor) do
case fetch_group_by_normalized_name(normalized, actor) do
nil -> create_group(trimmed, normalized, actor)
group -> {:ok, group}
end
end
# Normalizes the Ash code-interface return to a two-shape result.
#
# On a create failure the group may have been created concurrently by another
# import session between our read and our write (the DB unique index is the
# final arbiter, and the name validation is fail-open). Re-fetch by normalized
# name and link to the existing group rather than failing the row.
defp create_group(name, normalized, actor) do
case Mv.Membership.create_group(%{name: name}, actor: actor) do
{:ok, %Mv.Membership.Group{} = group} ->
{:ok, group}
{:error, reason} ->
case fetch_group_by_normalized_name(normalized, actor) do
nil -> {:error, reason}
group -> {:ok, group}
end
end
end
# Fetches a single group by case-insensitive name using a name-filtered query
# rather than reading the whole groups table. `normalized` is the trimmed,
# lower-cased name; the DB comparison uses LOWER(name) consistent with the
# Group resource's case-insensitive uniqueness constraint.
defp fetch_group_by_normalized_name(normalized, actor) do
require Ash.Query
Mv.Membership.Group
|> Ash.Query.filter(fragment("LOWER(?) = ?", name, ^normalized))
|> Ash.read(actor: actor, domain: Mv.Membership)
|> case do
{:ok, [group | _]} -> group
_ -> nil
end
end
@doc """
Splits a raw groups-cell value into trimmed, non-empty group names.
"""
@spec split_group_names(String.t() | nil) :: [String.t()]
def split_group_names(nil), do: []
def split_group_names(cell) when is_binary(cell) do
cell
|> String.split(",")
|> Enum.map(&String.trim/1)
|> Enum.reject(&(&1 == ""))
end
defp unique_group_names(rows, index) do
rows
|> Enum.flat_map(fn {_line, values} ->
values
|> Enum.at(index)
|> split_group_names()
end)
|> Enum.uniq_by(&normalize_name/1)
end
defp preview_rows(rows) do
rows
|> Enum.take(@preview_row_limit)
|> Enum.map(fn {_line, values} -> values end)
end
defp list_groups(actor) do
Mv.Membership.list_groups!(actor: actor)
end
defp build_group_lookup(groups) do
Enum.reduce(groups, %{}, fn group, acc ->
Map.put(acc, normalize_name(group.name), group)
end)
end
# Case-insensitive comparison consistent with the Group resource's
# case-insensitive name uniqueness.
defp normalize_name(name) when is_binary(name) do
name |> String.trim() |> String.downcase()
end
end

View file

@ -0,0 +1,72 @@
defmodule Mv.Membership.Import.ColumnResolverQueryTest do
# async: false — attaches a global telemetry handler to inspect emitted SQL.
use Mv.DataCase, async: false
alias Mv.Membership.Import.ColumnResolver
setup do
%{actor: Mv.Helpers.SystemActor.get_system_actor()}
end
describe "create_or_find_group/3 group lookup is name-filtered (no full-table scan)" do
test "resolving a new name absent from the snapshot queries by name, not the whole table",
%{actor: actor} do
# Populate the table so a full-table read would be costly and observable.
for n <- 1..20, do: Mv.Fixtures.group_fixture(%{name: "Existing #{n}"})
queries =
capture_group_select_queries(fn ->
# The name is absent from the (empty) snapshot, forcing a DB lookup
# before the create attempt. That lookup must filter by name.
assert {:ok, group} = ColumnResolver.create_or_find_group("New One", [], actor)
assert group.name == "New One"
end)
# No SELECT against the groups table issued during resolution may be an
# unfiltered full-table scan. The pre-create existence check must filter by
# name (carry a WHERE predicate).
refute Enum.any?(queries, &unfiltered_groups_select?/1),
"expected no unfiltered groups table scan, got:\n#{Enum.join(queries, "\n")}"
end
end
defp capture_group_select_queries(fun) do
test_pid = self()
handler_id = "test-group-query-#{System.unique_integer([:positive])}"
:telemetry.attach(
handler_id,
[:mv, :repo, :query],
fn _event, _measurements, metadata, _config ->
sql = metadata[:query] || ""
if String.contains?(sql, "SELECT") and String.contains?(sql, "\"groups\"") do
send(test_pid, {:group_query, sql})
end
end,
nil
)
try do
fun.()
after
:telemetry.detach(handler_id)
end
collect_group_queries([])
end
defp collect_group_queries(acc) do
receive do
{:group_query, sql} -> collect_group_queries([sql | acc])
after
0 -> Enum.reverse(acc)
end
end
# An unfiltered groups SELECT reads the whole table: it selects FROM "groups"
# with no WHERE clause at all. A name-filtered lookup carries a WHERE predicate.
defp unfiltered_groups_select?(sql) do
String.contains?(sql, "FROM \"groups\"") and not String.contains?(sql, "WHERE")
end
end

View file

@ -0,0 +1,227 @@
defmodule Mv.Membership.Import.ColumnResolverTest do
use Mv.DataCase, async: true
use ExUnitProperties
alias Mv.Membership.Import.ColumnResolver
setup do
%{actor: Mv.Helpers.SystemActor.get_system_actor()}
end
defp fee_type_fixture(name, actor) do
{:ok, fee_type} =
Mv.MembershipFees.create_membership_fee_type(
%{name: name, amount: Decimal.new("10.00"), interval: :yearly},
actor: actor
)
fee_type
end
defp header_maps(overrides) do
Map.merge(
%{
member: %{email: 0},
custom: %{},
unknown: [],
ignored: [],
groups_column_index: nil,
fee_type_column_index: nil
},
overrides
)
end
describe "resolve/3 group classification" do
test "splits group names into found (existing) and to_create (missing)", %{actor: actor} do
existing = Mv.Fixtures.group_fixture(%{name: "Orchester"})
maps = header_maps(%{member: %{email: 0}, groups_column_index: 1})
rows = [
{2, ["a@example.com", "Orchester"]},
{3, ["b@example.com", "Neues Ensemble"]}
]
result = ColumnResolver.resolve(maps, rows, actor)
assert Enum.any?(result.groups_found, &(&1.name == "Orchester" and &1.id == existing.id))
assert "Neues Ensemble" in result.groups_to_create
refute "Orchester" in result.groups_to_create
end
test "groups_found and groups_to_create are empty when no groups column", %{actor: actor} do
maps = header_maps(%{})
rows = [{2, ["a@example.com"]}]
result = ColumnResolver.resolve(maps, rows, actor)
assert result.groups_found == []
assert result.groups_to_create == []
end
end
describe "resolve/3 preview rows" do
test "returns up to 3 preview rows", %{actor: actor} do
maps = header_maps(%{})
rows = [
{2, ["a@example.com"]},
{3, ["b@example.com"]},
{4, ["c@example.com"]},
{5, ["d@example.com"]}
]
result = ColumnResolver.resolve(maps, rows, actor)
assert length(result.preview_rows) == 3
assert result.preview_rows == [["a@example.com"], ["b@example.com"], ["c@example.com"]]
end
test "returns fewer preview rows when file has fewer data rows", %{actor: actor} do
maps = header_maps(%{})
rows = [{2, ["a@example.com"]}]
result = ColumnResolver.resolve(maps, rows, actor)
assert result.preview_rows == [["a@example.com"]]
end
end
describe "resolve/3 fee-type resolution" do
test "maps known fee-type names to their id by normalized name", %{actor: actor} do
standard = fee_type_fixture("Standard", actor)
maps = header_maps(%{fee_type_column_index: 1})
rows = [{2, ["a@example.com", "Standard"]}]
result = ColumnResolver.resolve(maps, rows, actor)
assert result.fee_type_map["standard"] == standard.id
assert result.fee_type_warnings == []
end
test "records a warning for an unknown fee-type name", %{actor: actor} do
maps = header_maps(%{fee_type_column_index: 1})
rows = [{2, ["a@example.com", "Nonexistent Type"]}]
result = ColumnResolver.resolve(maps, rows, actor)
assert "Nonexistent Type" in result.fee_type_warnings
end
test "sets has_empty_fee_type_cells? when a fee-type cell is blank", %{actor: actor} do
fee_type_fixture("Standard", actor)
maps = header_maps(%{fee_type_column_index: 1})
rows = [
{2, ["a@example.com", "Standard"]},
{3, ["b@example.com", " "]}
]
result = ColumnResolver.resolve(maps, rows, actor)
assert result.has_empty_fee_type_cells? == true
end
test "has_empty_fee_type_cells? is false when all cells filled", %{actor: actor} do
fee_type_fixture("Standard", actor)
maps = header_maps(%{fee_type_column_index: 1})
rows = [{2, ["a@example.com", "Standard"]}]
result = ColumnResolver.resolve(maps, rows, actor)
assert result.has_empty_fee_type_cells? == false
end
test "fee-type resolution defaults are empty when no fee-type column", %{actor: actor} do
maps = header_maps(%{})
rows = [{2, ["a@example.com"]}]
result = ColumnResolver.resolve(maps, rows, actor)
assert result.fee_type_map == %{}
assert result.fee_type_warnings == []
assert result.has_empty_fee_type_cells? == false
end
end
describe "create_or_find_group/3" do
test "creates a new group when none exists", %{actor: actor} do
assert {:ok, group} = ColumnResolver.create_or_find_group("Brand New Group", [], actor)
assert group.name == "Brand New Group"
end
test "returns the existing group from the pre-fetched list without creating", %{actor: actor} do
existing = Mv.Fixtures.group_fixture(%{name: "Existing Group"})
before_count = length(Mv.Membership.list_groups!(actor: actor))
assert {:ok, group} =
ColumnResolver.create_or_find_group("Existing Group", [existing], actor)
assert group.id == existing.id
assert length(Mv.Membership.list_groups!(actor: actor)) == before_count
end
test "resolves to a group created concurrently after the snapshot was taken",
%{actor: actor} do
# Simulates a concurrent import session: the group name is absent from the
# caller's pre-fetched snapshot, but the group now exists in the DB. The
# resolver must link to the existing group, never error or duplicate it.
stale_snapshot = []
_concurrently_created = Mv.Fixtures.group_fixture(%{name: "Concurrent Group"})
before_count = length(Mv.Membership.list_groups!(actor: actor))
assert {:ok, group} =
ColumnResolver.create_or_find_group("Concurrent Group", stale_snapshot, actor)
assert group.name == "Concurrent Group"
assert length(Mv.Membership.list_groups!(actor: actor)) == before_count
end
property "is idempotent: same names never create duplicate groups", %{actor: actor} do
check all(
names <-
StreamData.list_of(
StreamData.string(:alphanumeric, min_length: 1, max_length: 20),
min_length: 1,
max_length: 5
),
max_runs: 25
) do
names = Enum.map(names, &("grp-" <> &1))
existing = Mv.Membership.list_groups!(actor: actor)
first_ids = resolve_all(names, existing, actor)
existing_after = Mv.Membership.list_groups!(actor: actor)
second_ids = resolve_all(names, existing_after, actor)
# Same name always resolves to the same group id across both passes.
assert first_ids == second_ids
# No duplicate groups exist for any of the names (case-insensitive).
all_groups = Mv.Membership.list_groups!(actor: actor)
for name <- Enum.uniq_by(names, &String.downcase/1) do
matching =
Enum.filter(all_groups, fn g ->
String.downcase(g.name) == String.downcase(name)
end)
assert length(matching) == 1
end
end
end
end
defp resolve_all(names, existing, actor) do
Enum.map(names, fn name ->
{:ok, group} = ColumnResolver.create_or_find_group(name, existing, actor)
{String.downcase(name), group.id}
end)
|> Map.new()
end
end