Merge pull request 'Implement fuzzy search' (#187) from feature/162_fuzzy_search into main
All checks were successful
continuous-integration/drone/push Build is passing

Reviewed-on: #187
This commit is contained in:
carla 2025-11-12 13:10:30 +01:00
commit 7305c63130
6 changed files with 776 additions and 4 deletions

View file

@ -3,6 +3,9 @@ defmodule Mv.Membership.Member do
domain: Mv.Membership,
data_layer: AshPostgres.DataLayer
require Ash.Query
import Ash.Expr
postgres do
table "members"
repo Mv.Repo
@ -108,6 +111,50 @@ defmodule Mv.Membership.Member do
where [changing(:user)]
end
end
# Action to handle fuzzy search on specific fields
read :search do
argument :query, :string, allow_nil?: true
argument :similarity_threshold, :float, allow_nil?: true
prepare fn query, _ctx ->
q = Ash.Query.get_argument(query, :query) || ""
# 0.2 as similarity threshold (recommended) - lower value can lead to more results but also to more unspecific results
threshold = Ash.Query.get_argument(query, :similarity_threshold) || 0.2
if is_binary(q) and String.trim(q) != "" do
q2 = String.trim(q)
pat = "%" <> q2 <> "%"
# FTS as main filter and fuzzy search just for first name, last name and strees
query
|> Ash.Query.filter(
expr(
# Substring on numeric-like fields (best effort, supports middle substrings)
fragment("search_vector @@ websearch_to_tsquery('simple', ?)", ^q2) or
fragment("search_vector @@ plainto_tsquery('simple', ?)", ^q2) or
contains(postal_code, ^q2) or
contains(house_number, ^q2) or
contains(phone_number, ^q2) or
contains(email, ^q2) or
contains(city, ^q2) or ilike(city, ^pat) or
fragment("? % first_name", ^q2) or
fragment("? % last_name", ^q2) or
fragment("? % street", ^q2) or
fragment("word_similarity(?, first_name) > ?", ^q2, ^threshold) or
fragment("word_similarity(?, last_name) > ?", ^q2, ^threshold) or
fragment("word_similarity(?, street) > ?", ^q2, ^threshold) or
fragment("similarity(first_name, ?) > ?", ^q2, ^threshold) or
fragment("similarity(last_name, ?) > ?", ^q2, ^threshold) or
fragment("similarity(street, ?) > ?", ^q2, ^threshold)
)
)
else
query
end
end
end
end
validations do
@ -281,4 +328,21 @@ defmodule Mv.Membership.Member do
identities do
identity :unique_email, [:email]
end
# Fuzzy Search function that can be called by live view and calls search action
def fuzzy_search(query, opts) do
q = (opts[:query] || opts["query"] || "") |> to_string()
if String.trim(q) == "" do
query
else
args =
case opts[:fields] || opts["fields"] do
nil -> %{query: q}
fields -> %{query: q, fields: fields}
end
Ash.Query.for_read(query, :search, args)
end
end
end

View file

@ -5,7 +5,7 @@ defmodule Mv.Repo do
@impl true
def installed_extensions do
# Add extensions here, and the migration generator will install them.
["ash-functions", "citext"]
["ash-functions", "citext", "pg_trgm"]
end
# Don't open unnecessary transactions

View file

@ -1,7 +1,5 @@
defmodule MvWeb.MemberLive.Index do
use MvWeb, :live_view
import Ash.Expr
import Ash.Query
@impl true
def mount(_params, _session, socket) do
@ -194,7 +192,9 @@ defmodule MvWeb.MemberLive.Index do
defp apply_search_filter(query, search_query) do
if search_query && String.trim(search_query) != "" do
query
|> filter(expr(fragment("search_vector @@ plainto_tsquery('simple', ?)", ^search_query)))
|> Mv.Membership.Member.fuzzy_search(%{
query: search_query
})
else
query
end

View file

@ -0,0 +1,66 @@
defmodule Mv.Repo.Migrations.AddTrigramToMembers do
@moduledoc """
Updates resources based on their most recent snapshots.
This file was autogenerated with `mix ash_postgres.generate_migrations`
"""
use Ecto.Migration
def up do
# activate trigram-extension
execute("CREATE EXTENSION IF NOT EXISTS pg_trgm;")
# -------------------------------------------------
# TrigramIndizes (GIN) for fields we want to search in
# -------------------------------------------------
#
# `gin_trgm_ops` ist the operator-class-name
#
execute("""
CREATE INDEX members_first_name_trgm_idx
ON members
USING GIN (first_name gin_trgm_ops);
""")
execute("""
CREATE INDEX members_last_name_trgm_idx
ON members
USING GIN (last_name gin_trgm_ops);
""")
execute("""
CREATE INDEX members_email_trgm_idx
ON members
USING GIN (email gin_trgm_ops);
""")
execute("""
CREATE INDEX members_city_trgm_idx
ON members
USING GIN (city gin_trgm_ops);
""")
execute("""
CREATE INDEX members_street_trgm_idx
ON members
USING GIN (street gin_trgm_ops);
""")
execute("""
CREATE INDEX members_notes_trgm_idx
ON members
USING GIN (notes gin_trgm_ops);
""")
end
def down do
execute("DROP INDEX IF EXISTS members_first_name_trgm_idx;")
execute("DROP INDEX IF EXISTS members_last_name_trgm_idx;")
execute("DROP INDEX IF EXISTS members_email_trgm_idx;")
execute("DROP INDEX IF EXISTS members_city_trgm_idx;")
execute("DROP INDEX IF EXISTS members_street_trgm_idx;")
execute("DROP INDEX IF EXISTS members_notes_trgm_idx;")
end
end

View file

@ -0,0 +1,199 @@
{
"attributes": [
{
"allow_nil?": false,
"default": "fragment(\"uuid_generate_v7()\")",
"generated?": false,
"precision": null,
"primary_key?": true,
"references": null,
"scale": null,
"size": null,
"source": "id",
"type": "uuid"
},
{
"allow_nil?": false,
"default": "nil",
"generated?": false,
"precision": null,
"primary_key?": false,
"references": null,
"scale": null,
"size": null,
"source": "first_name",
"type": "text"
},
{
"allow_nil?": false,
"default": "nil",
"generated?": false,
"precision": null,
"primary_key?": false,
"references": null,
"scale": null,
"size": null,
"source": "last_name",
"type": "text"
},
{
"allow_nil?": false,
"default": "nil",
"generated?": false,
"precision": null,
"primary_key?": false,
"references": null,
"scale": null,
"size": null,
"source": "email",
"type": "text"
},
{
"allow_nil?": true,
"default": "nil",
"generated?": false,
"precision": null,
"primary_key?": false,
"references": null,
"scale": null,
"size": null,
"source": "birth_date",
"type": "date"
},
{
"allow_nil?": true,
"default": "nil",
"generated?": false,
"precision": null,
"primary_key?": false,
"references": null,
"scale": null,
"size": null,
"source": "paid",
"type": "boolean"
},
{
"allow_nil?": true,
"default": "nil",
"generated?": false,
"precision": null,
"primary_key?": false,
"references": null,
"scale": null,
"size": null,
"source": "phone_number",
"type": "text"
},
{
"allow_nil?": true,
"default": "nil",
"generated?": false,
"precision": null,
"primary_key?": false,
"references": null,
"scale": null,
"size": null,
"source": "join_date",
"type": "date"
},
{
"allow_nil?": true,
"default": "nil",
"generated?": false,
"precision": null,
"primary_key?": false,
"references": null,
"scale": null,
"size": null,
"source": "exit_date",
"type": "date"
},
{
"allow_nil?": true,
"default": "nil",
"generated?": false,
"precision": null,
"primary_key?": false,
"references": null,
"scale": null,
"size": null,
"source": "notes",
"type": "text"
},
{
"allow_nil?": true,
"default": "nil",
"generated?": false,
"precision": null,
"primary_key?": false,
"references": null,
"scale": null,
"size": null,
"source": "city",
"type": "text"
},
{
"allow_nil?": true,
"default": "nil",
"generated?": false,
"precision": null,
"primary_key?": false,
"references": null,
"scale": null,
"size": null,
"source": "street",
"type": "text"
},
{
"allow_nil?": true,
"default": "nil",
"generated?": false,
"precision": null,
"primary_key?": false,
"references": null,
"scale": null,
"size": null,
"source": "house_number",
"type": "text"
},
{
"allow_nil?": true,
"default": "nil",
"generated?": false,
"precision": null,
"primary_key?": false,
"references": null,
"scale": null,
"size": null,
"source": "postal_code",
"type": "text"
},
{
"allow_nil?": true,
"default": "nil",
"generated?": false,
"precision": null,
"primary_key?": false,
"references": null,
"scale": null,
"size": null,
"source": "search_vector",
"type": "tsvector"
}
],
"base_filter": null,
"check_constraints": [],
"custom_indexes": [],
"custom_statements": [],
"has_create_action": true,
"hash": "9019AD59832AB926899B6A871A368CF65F757533795E4E38D5C0EE6AE58BE070",
"identities": [],
"multitenancy": {
"attribute": null,
"global": null,
"strategy": null
},
"repo": "Elixir.Mv.Repo",
"schema": null,
"table": "members"
}

View file

@ -0,0 +1,443 @@
defmodule Mv.Membership.FuzzySearchTest do
use Mv.DataCase, async: false
test "fuzzy_search/2 function exists" do
assert function_exported?(Mv.Membership.Member, :fuzzy_search, 2)
end
test "fuzzy_search returns only John Doe by fuzzy query 'john'" do
{:ok, john} =
Mv.Membership.create_member(%{
first_name: "John",
last_name: "Doe",
email: "john.doe@example.com"
})
{:ok, _jane} =
Mv.Membership.create_member(%{
first_name: "Adriana",
last_name: "Smith",
email: "adriana.smith@example.com"
})
{:ok, alice} =
Mv.Membership.create_member(%{
first_name: "Alice",
last_name: "Johnson",
email: "alice.johnson@example.com"
})
result =
Mv.Membership.Member
|> Mv.Membership.Member.fuzzy_search(%{
query: "john"
})
|> Ash.read!()
assert Enum.map(result, & &1.id) == [john.id, alice.id]
end
test "fuzzy_search finds 'Thomas' when searching misspelled 'tomas'" do
{:ok, thomas} =
Mv.Membership.create_member(%{
first_name: "Thomas",
last_name: "Doe",
email: "john.doe@example.com"
})
{:ok, jane} =
Mv.Membership.create_member(%{
first_name: "Jane",
last_name: "Smith",
email: "jane.smith@example.com"
})
{:ok, _alice} =
Mv.Membership.create_member(%{
first_name: "Alice",
last_name: "Johnson",
email: "alice.johnson@example.com"
})
result =
Mv.Membership.Member
|> Mv.Membership.Member.fuzzy_search(%{
query: "tomas"
})
|> Ash.read!()
ids = Enum.map(result, & &1.id)
assert thomas.id in ids
refute jane.id in ids
assert length(ids) >= 1
end
test "empty query returns all members" do
{:ok, a} =
Mv.Membership.create_member(%{first_name: "A", last_name: "One", email: "a1@example.com"})
{:ok, b} =
Mv.Membership.create_member(%{first_name: "B", last_name: "Two", email: "b2@example.com"})
result =
Mv.Membership.Member
|> Mv.Membership.Member.fuzzy_search(%{query: ""})
|> Ash.read!()
assert Enum.sort(Enum.map(result, & &1.id))
|> Enum.uniq()
|> Enum.sort()
|> Enum.all?(fn id -> id in [a.id, b.id] end)
end
test "substring numeric search matches postal_code mid-string" do
{:ok, m1} =
Mv.Membership.create_member(%{
first_name: "Num",
last_name: "One",
email: "n1@example.com",
postal_code: "12345"
})
{:ok, _m2} =
Mv.Membership.create_member(%{
first_name: "Num",
last_name: "Two",
email: "n2@example.com",
postal_code: "67890"
})
result =
Mv.Membership.Member
|> Mv.Membership.Member.fuzzy_search(%{query: "345"})
|> Ash.read!()
ids = Enum.map(result, & &1.id)
assert m1.id in ids
end
test "substring numeric search matches house_number mid-string" do
{:ok, m1} =
Mv.Membership.create_member(%{
first_name: "Home",
last_name: "One",
email: "h1@example.com",
house_number: "A345B"
})
{:ok, _m2} =
Mv.Membership.create_member(%{
first_name: "Home",
last_name: "Two",
email: "h2@example.com",
house_number: "77"
})
result =
Mv.Membership.Member
|> Mv.Membership.Member.fuzzy_search(%{query: "345"})
|> Ash.read!()
ids = Enum.map(result, & &1.id)
assert m1.id in ids
end
test "fuzzy matches street misspelling" do
{:ok, s1} =
Mv.Membership.create_member(%{
first_name: "Road",
last_name: "Test",
email: "s1@example.com",
street: "Main Street"
})
{:ok, _s2} =
Mv.Membership.create_member(%{
first_name: "Road",
last_name: "Other",
email: "s2@example.com",
street: "Second Avenue"
})
result =
Mv.Membership.Member
|> Mv.Membership.Member.fuzzy_search(%{query: "mainn"})
|> Ash.read!()
ids = Enum.map(result, & &1.id)
assert s1.id in ids
end
test "substring in city matches mid-string" do
{:ok, b} =
Mv.Membership.create_member(%{
first_name: "City",
last_name: "One",
email: "city1@example.com",
city: "Berlin"
})
{:ok, _m} =
Mv.Membership.create_member(%{
first_name: "City",
last_name: "Two",
email: "city2@example.com",
city: "München"
})
result =
Mv.Membership.Member
|> Mv.Membership.Member.fuzzy_search(%{query: "erl"})
|> Ash.read!()
ids = Enum.map(result, & &1.id)
assert b.id in ids
end
test "blank character handling: query with spaces matches full name" do
{:ok, member} =
Mv.Membership.create_member(%{
first_name: "John",
last_name: "Doe",
email: "john.doe@example.com"
})
{:ok, _other} =
Mv.Membership.create_member(%{
first_name: "Jane",
last_name: "Smith",
email: "jane.smith@example.com"
})
result =
Mv.Membership.Member
|> Mv.Membership.Member.fuzzy_search(%{query: "john doe"})
|> Ash.read!()
ids = Enum.map(result, & &1.id)
assert member.id in ids
end
test "blank character handling: query with multiple spaces is handled" do
{:ok, member} =
Mv.Membership.create_member(%{
first_name: "Mary",
last_name: "Jane",
email: "mary.jane@example.com"
})
result =
Mv.Membership.Member
|> Mv.Membership.Member.fuzzy_search(%{query: "mary jane"})
|> Ash.read!()
ids = Enum.map(result, & &1.id)
assert member.id in ids
end
test "special character handling: @ symbol in query matches email" do
{:ok, member} =
Mv.Membership.create_member(%{
first_name: "Test",
last_name: "User",
email: "test.user@example.com"
})
{:ok, _other} =
Mv.Membership.create_member(%{
first_name: "Other",
last_name: "Person",
email: "other.person@different.org"
})
result =
Mv.Membership.Member
|> Mv.Membership.Member.fuzzy_search(%{query: "example"})
|> Ash.read!()
ids = Enum.map(result, & &1.id)
assert member.id in ids
end
test "special character handling: dot in query matches email" do
{:ok, member} =
Mv.Membership.create_member(%{
first_name: "Dot",
last_name: "Test",
email: "dot.test@example.com"
})
{:ok, _other} =
Mv.Membership.create_member(%{
first_name: "No",
last_name: "Dot",
email: "nodot@example.com"
})
result =
Mv.Membership.Member
|> Mv.Membership.Member.fuzzy_search(%{query: "dot.test"})
|> Ash.read!()
ids = Enum.map(result, & &1.id)
assert member.id in ids
end
test "special character handling: hyphen in query matches data" do
{:ok, member} =
Mv.Membership.create_member(%{
first_name: "Mary-Jane",
last_name: "Watson",
email: "mary.jane@example.com"
})
{:ok, _other} =
Mv.Membership.create_member(%{
first_name: "Mary",
last_name: "Smith",
email: "mary.smith@example.com"
})
result =
Mv.Membership.Member
|> Mv.Membership.Member.fuzzy_search(%{query: "mary-jane"})
|> Ash.read!()
ids = Enum.map(result, & &1.id)
assert member.id in ids
end
test "unicode character handling: umlaut ö in query matches data" do
{:ok, member} =
Mv.Membership.create_member(%{
first_name: "Jörg",
last_name: "Schmidt",
email: "joerg.schmidt@example.com"
})
{:ok, _other} =
Mv.Membership.create_member(%{
first_name: "John",
last_name: "Smith",
email: "john.smith@example.com"
})
result =
Mv.Membership.Member
|> Mv.Membership.Member.fuzzy_search(%{query: "jörg"})
|> Ash.read!()
ids = Enum.map(result, & &1.id)
assert member.id in ids
end
test "unicode character handling: umlaut ä in query matches data" do
{:ok, member} =
Mv.Membership.create_member(%{
first_name: "Märta",
last_name: "Andersson",
email: "maerta.andersson@example.com"
})
{:ok, _other} =
Mv.Membership.create_member(%{
first_name: "Marta",
last_name: "Johnson",
email: "marta.johnson@example.com"
})
result =
Mv.Membership.Member
|> Mv.Membership.Member.fuzzy_search(%{query: "märta"})
|> Ash.read!()
ids = Enum.map(result, & &1.id)
assert member.id in ids
end
test "unicode character handling: umlaut ü in query matches data" do
{:ok, member} =
Mv.Membership.create_member(%{
first_name: "Günther",
last_name: "Müller",
email: "guenther.mueller@example.com"
})
{:ok, _other} =
Mv.Membership.create_member(%{
first_name: "Gunter",
last_name: "Miller",
email: "gunter.miller@example.com"
})
result =
Mv.Membership.Member
|> Mv.Membership.Member.fuzzy_search(%{query: "müller"})
|> Ash.read!()
ids = Enum.map(result, & &1.id)
assert member.id in ids
end
test "unicode character handling: query without umlaut matches data with umlaut" do
{:ok, member} =
Mv.Membership.create_member(%{
first_name: "Müller",
last_name: "Schmidt",
email: "mueller.schmidt@example.com"
})
{:ok, _other} =
Mv.Membership.create_member(%{
first_name: "Miller",
last_name: "Smith",
email: "miller.smith@example.com"
})
result =
Mv.Membership.Member
|> Mv.Membership.Member.fuzzy_search(%{query: "muller"})
|> Ash.read!()
ids = Enum.map(result, & &1.id)
assert member.id in ids
end
test "very long search strings: handles long query without error" do
{:ok, _member} =
Mv.Membership.create_member(%{
first_name: "Test",
last_name: "User",
email: "test@example.com"
})
long_query = String.duplicate("a", 1000)
result =
Mv.Membership.Member
|> Mv.Membership.Member.fuzzy_search(%{query: long_query})
|> Ash.read!()
# Should not crash, may return empty or some results
assert is_list(result)
end
test "very long search strings: handles extremely long query" do
{:ok, _member} =
Mv.Membership.create_member(%{
first_name: "Test",
last_name: "User",
email: "test@example.com"
})
very_long_query = String.duplicate("test query ", 1000)
result =
Mv.Membership.Member
|> Mv.Membership.Member.fuzzy_search(%{query: very_long_query})
|> Ash.read!()
# Should not crash, may return empty or some results
assert is_list(result)
end
end