Implement fuzzy search #187

Merged
carla merged 10 commits from feature/162_fuzzy_search into main 2025-11-12 13:10:32 +01:00
6 changed files with 776 additions and 4 deletions

View file

@ -3,6 +3,9 @@ defmodule Mv.Membership.Member do
domain: Mv.Membership,
data_layer: AshPostgres.DataLayer
require Ash.Query
import Ash.Expr
postgres do
table "members"
repo Mv.Repo
@ -108,6 +111,50 @@ defmodule Mv.Membership.Member do
where [changing(:user)]
end
end
# Action to handle fuzzy search on specific fields
read :search do
argument :query, :string, allow_nil?: true
argument :similarity_threshold, :float, allow_nil?: true
prepare fn query, _ctx ->
q = Ash.Query.get_argument(query, :query) || ""
# 0.2 as similarity threshold (recommended) - lower value can lead to more results but also to more unspecific results
threshold = Ash.Query.get_argument(query, :similarity_threshold) || 0.2
if is_binary(q) and String.trim(q) != "" do
carla marked this conversation as resolved

some moduledocs would be nice for the public search function.

some moduledocs would be nice for the public `search` function.
q2 = String.trim(q)
pat = "%" <> q2 <> "%"
# FTS as main filter and fuzzy search just for first name, last name and strees
query
|> Ash.Query.filter(
expr(
carla marked this conversation as resolved

the fields argument is never used in the search function.

the `fields` argument is never used in the `search` function.
# Substring on numeric-like fields (best effort, supports middle substrings)
fragment("search_vector @@ websearch_to_tsquery('simple', ?)", ^q2) or
fragment("search_vector @@ plainto_tsquery('simple', ?)", ^q2) or
contains(postal_code, ^q2) or
contains(house_number, ^q2) or
contains(phone_number, ^q2) or
contains(email, ^q2) or
contains(city, ^q2) or ilike(city, ^pat) or
fragment("? % first_name", ^q2) or
fragment("? % last_name", ^q2) or
fragment("? % street", ^q2) or
fragment("word_similarity(?, first_name) > ?", ^q2, ^threshold) or
fragment("word_similarity(?, last_name) > ?", ^q2, ^threshold) or
fragment("word_similarity(?, street) > ?", ^q2, ^threshold) or
fragment("similarity(first_name, ?) > ?", ^q2, ^threshold) or
fragment("similarity(last_name, ?) > ?", ^q2, ^threshold) or
fragment("similarity(street, ?) > ?", ^q2, ^threshold)
)
)
else
query
carla marked this conversation as resolved

maybe the duplicate code for first name last name and street could be generated dynamically by the fields argument?

maybe the duplicate code for `first name` `last name` and `street` could be generated dynamically by the fields argument?

I decided to omit fields now...but maybe during refactoring we could think about a dynamic way

I decided to omit fields now...but maybe during refactoring we could think about a dynamic way
end
end
carla marked this conversation as resolved

filter for email is missing
adding this worked for me:

                fragment("? % email", ^q2) or
                fragment("word_similarity(?, email) > ?", ^q2, ^threshold) or
                fragment("similarity(email, ?) > ?", ^q2, ^threshold) or
filter for email is missing adding this worked for me: ``` fragment("? % email", ^q2) or fragment("word_similarity(?, email) > ?", ^q2, ^threshold) or fragment("similarity(email, ?) > ?", ^q2, ^threshold) or ```

You're totally right, i forgot the email field. But I would favor for a simple contains ilike on the email field, because I think fuzzy search on the name already is enough? or what do you think?

You're totally right, i forgot the email field. But I would favor for a simple contains ilike on the email field, because I think fuzzy search on the name already is enough? or what do you think?

I added it as ilike and not as fuzzy search. But we can create another issue if we see we need it :)

I added it as ilike and not as fuzzy search. But we can create another issue if we see we need it :)
end
end
validations do
@ -281,4 +328,21 @@ defmodule Mv.Membership.Member do
identities do
identity :unique_email, [:email]
end
# Fuzzy Search function that can be called by live view and calls search action
def fuzzy_search(query, opts) do
q = (opts[:query] || opts["query"] || "") |> to_string()
if String.trim(q) == "" do
query
else
args =
case opts[:fields] || opts["fields"] do
nil -> %{query: q}
fields -> %{query: q, fields: fields}
end
Ash.Query.for_read(query, :search, args)
end
end
end

View file

@ -5,7 +5,7 @@ defmodule Mv.Repo do
@impl true
def installed_extensions do
# Add extensions here, and the migration generator will install them.
["ash-functions", "citext"]
["ash-functions", "citext", "pg_trgm"]
end
# Don't open unnecessary transactions

View file

@ -1,7 +1,5 @@
defmodule MvWeb.MemberLive.Index do
use MvWeb, :live_view
import Ash.Expr
import Ash.Query
@impl true
def mount(_params, _session, socket) do
@ -194,7 +192,9 @@ defmodule MvWeb.MemberLive.Index do
defp apply_search_filter(query, search_query) do
if search_query && String.trim(search_query) != "" do
query
|> filter(expr(fragment("search_vector @@ plainto_tsquery('simple', ?)", ^search_query)))
|> Mv.Membership.Member.fuzzy_search(%{
query: search_query
})
carla marked this conversation as resolved

these fields are passed to fuzzy_serach->search function but don't have any effect, as the search function ignores them.

these fields are passed to `fuzzy_serach`->`search` function but don't have any effect, as the `search` function ignores them.
else
query
end

View file

@ -0,0 +1,66 @@
defmodule Mv.Repo.Migrations.AddTrigramToMembers do
@moduledoc """
Updates resources based on their most recent snapshots.
This file was autogenerated with `mix ash_postgres.generate_migrations`
"""
use Ecto.Migration
def up do
# activate trigram-extension
execute("CREATE EXTENSION IF NOT EXISTS pg_trgm;")
# -------------------------------------------------
# TrigramIndizes (GIN) for fields we want to search in
# -------------------------------------------------
#
# `gin_trgm_ops` ist the operator-class-name
#
execute("""
CREATE INDEX members_first_name_trgm_idx
ON members
USING GIN (first_name gin_trgm_ops);
""")
execute("""
CREATE INDEX members_last_name_trgm_idx
ON members
USING GIN (last_name gin_trgm_ops);
""")
execute("""
CREATE INDEX members_email_trgm_idx
ON members
USING GIN (email gin_trgm_ops);
""")
execute("""
CREATE INDEX members_city_trgm_idx
ON members
USING GIN (city gin_trgm_ops);
""")
execute("""
CREATE INDEX members_street_trgm_idx
ON members
USING GIN (street gin_trgm_ops);
""")
execute("""
CREATE INDEX members_notes_trgm_idx
ON members
USING GIN (notes gin_trgm_ops);
""")
end
def down do
execute("DROP INDEX IF EXISTS members_first_name_trgm_idx;")
execute("DROP INDEX IF EXISTS members_last_name_trgm_idx;")
execute("DROP INDEX IF EXISTS members_email_trgm_idx;")
execute("DROP INDEX IF EXISTS members_city_trgm_idx;")
execute("DROP INDEX IF EXISTS members_street_trgm_idx;")
execute("DROP INDEX IF EXISTS members_notes_trgm_idx;")
end
end

View file

@ -0,0 +1,199 @@
{
"attributes": [
{
"allow_nil?": false,
"default": "fragment(\"uuid_generate_v7()\")",
"generated?": false,
"precision": null,
"primary_key?": true,
"references": null,
"scale": null,
"size": null,
"source": "id",
"type": "uuid"
},
{
"allow_nil?": false,
"default": "nil",
"generated?": false,
"precision": null,
"primary_key?": false,
"references": null,
"scale": null,
"size": null,
"source": "first_name",
"type": "text"
},
{
"allow_nil?": false,
"default": "nil",
"generated?": false,
"precision": null,
"primary_key?": false,
"references": null,
"scale": null,
"size": null,
"source": "last_name",
"type": "text"
},
{
"allow_nil?": false,
"default": "nil",
"generated?": false,
"precision": null,
"primary_key?": false,
"references": null,
"scale": null,
"size": null,
"source": "email",
"type": "text"
},
{
"allow_nil?": true,
"default": "nil",
"generated?": false,
"precision": null,
"primary_key?": false,
"references": null,
"scale": null,
"size": null,
"source": "birth_date",
"type": "date"
},
{
"allow_nil?": true,
"default": "nil",
"generated?": false,
"precision": null,
"primary_key?": false,
"references": null,
"scale": null,
"size": null,
"source": "paid",
"type": "boolean"
},
{
"allow_nil?": true,
"default": "nil",
"generated?": false,
"precision": null,
"primary_key?": false,
"references": null,
"scale": null,
"size": null,
"source": "phone_number",
"type": "text"
},
{
"allow_nil?": true,
"default": "nil",
"generated?": false,
"precision": null,
"primary_key?": false,
"references": null,
"scale": null,
"size": null,
"source": "join_date",
"type": "date"
},
{
"allow_nil?": true,
"default": "nil",
"generated?": false,
"precision": null,
"primary_key?": false,
"references": null,
"scale": null,
"size": null,
"source": "exit_date",
"type": "date"
},
{
"allow_nil?": true,
"default": "nil",
"generated?": false,
"precision": null,
"primary_key?": false,
"references": null,
"scale": null,
"size": null,
"source": "notes",
"type": "text"
},
{
"allow_nil?": true,
"default": "nil",
"generated?": false,
"precision": null,
"primary_key?": false,
"references": null,
"scale": null,
"size": null,
"source": "city",
"type": "text"
},
{
"allow_nil?": true,
"default": "nil",
"generated?": false,
"precision": null,
"primary_key?": false,
"references": null,
"scale": null,
"size": null,
"source": "street",
"type": "text"
},
{
"allow_nil?": true,
"default": "nil",
"generated?": false,
"precision": null,
"primary_key?": false,
"references": null,
"scale": null,
"size": null,
"source": "house_number",
"type": "text"
},
{
"allow_nil?": true,
"default": "nil",
"generated?": false,
"precision": null,
"primary_key?": false,
"references": null,
"scale": null,
"size": null,
"source": "postal_code",
"type": "text"
},
{
"allow_nil?": true,
"default": "nil",
"generated?": false,
"precision": null,
"primary_key?": false,
"references": null,
"scale": null,
"size": null,
"source": "search_vector",
"type": "tsvector"
}
],
"base_filter": null,
"check_constraints": [],
"custom_indexes": [],
"custom_statements": [],
"has_create_action": true,
"hash": "9019AD59832AB926899B6A871A368CF65F757533795E4E38D5C0EE6AE58BE070",
"identities": [],
"multitenancy": {
"attribute": null,
"global": null,
"strategy": null
},
"repo": "Elixir.Mv.Repo",
"schema": null,
"table": "members"
}

View file

@ -0,0 +1,443 @@
defmodule Mv.Membership.FuzzySearchTest do
use Mv.DataCase, async: false
test "fuzzy_search/2 function exists" do
assert function_exported?(Mv.Membership.Member, :fuzzy_search, 2)
end
test "fuzzy_search returns only John Doe by fuzzy query 'john'" do
{:ok, john} =
Mv.Membership.create_member(%{
first_name: "John",
last_name: "Doe",
email: "john.doe@example.com"
})
{:ok, _jane} =
Mv.Membership.create_member(%{
first_name: "Adriana",
last_name: "Smith",
email: "adriana.smith@example.com"
})
{:ok, alice} =
Mv.Membership.create_member(%{
first_name: "Alice",
last_name: "Johnson",
email: "alice.johnson@example.com"
})
result =
Mv.Membership.Member
|> Mv.Membership.Member.fuzzy_search(%{
query: "john"
})
carla marked this conversation as resolved

In this test the email field is given, but it only tests the first_name "John" and the last_name "Johnson". It would be good to explicitly test the email field.

In this test the `email` field is given, but it only tests the `first_name` "John" and the `last_name` "Johnson". It would be good to explicitly test the `email` field.
|> Ash.read!()
assert Enum.map(result, & &1.id) == [john.id, alice.id]
end
test "fuzzy_search finds 'Thomas' when searching misspelled 'tomas'" do
{:ok, thomas} =
Mv.Membership.create_member(%{
first_name: "Thomas",
last_name: "Doe",
email: "john.doe@example.com"
})
{:ok, jane} =
Mv.Membership.create_member(%{
first_name: "Jane",
last_name: "Smith",
email: "jane.smith@example.com"
})
{:ok, _alice} =
Mv.Membership.create_member(%{
first_name: "Alice",
last_name: "Johnson",
email: "alice.johnson@example.com"
})
result =
Mv.Membership.Member
|> Mv.Membership.Member.fuzzy_search(%{
query: "tomas"
})
|> Ash.read!()
ids = Enum.map(result, & &1.id)
assert thomas.id in ids
refute jane.id in ids
assert length(ids) >= 1
end
test "empty query returns all members" do
{:ok, a} =
Mv.Membership.create_member(%{first_name: "A", last_name: "One", email: "a1@example.com"})
{:ok, b} =
Mv.Membership.create_member(%{first_name: "B", last_name: "Two", email: "b2@example.com"})
result =
Mv.Membership.Member
|> Mv.Membership.Member.fuzzy_search(%{query: ""})
|> Ash.read!()
assert Enum.sort(Enum.map(result, & &1.id))
|> Enum.uniq()
|> Enum.sort()
|> Enum.all?(fn id -> id in [a.id, b.id] end)
end
test "substring numeric search matches postal_code mid-string" do
{:ok, m1} =
Mv.Membership.create_member(%{
first_name: "Num",
last_name: "One",
email: "n1@example.com",
postal_code: "12345"
})
{:ok, _m2} =
Mv.Membership.create_member(%{
first_name: "Num",
last_name: "Two",
email: "n2@example.com",
postal_code: "67890"
})
result =
Mv.Membership.Member
|> Mv.Membership.Member.fuzzy_search(%{query: "345"})
|> Ash.read!()
ids = Enum.map(result, & &1.id)
assert m1.id in ids
end
test "substring numeric search matches house_number mid-string" do
{:ok, m1} =
Mv.Membership.create_member(%{
first_name: "Home",
last_name: "One",
email: "h1@example.com",
house_number: "A345B"
})
{:ok, _m2} =
Mv.Membership.create_member(%{
first_name: "Home",
last_name: "Two",
email: "h2@example.com",
house_number: "77"
})
result =
Mv.Membership.Member
|> Mv.Membership.Member.fuzzy_search(%{query: "345"})
|> Ash.read!()
ids = Enum.map(result, & &1.id)
assert m1.id in ids
end
test "fuzzy matches street misspelling" do
{:ok, s1} =
Mv.Membership.create_member(%{
first_name: "Road",
last_name: "Test",
email: "s1@example.com",
street: "Main Street"
})
{:ok, _s2} =
Mv.Membership.create_member(%{
first_name: "Road",
last_name: "Other",
email: "s2@example.com",
street: "Second Avenue"
})
result =
Mv.Membership.Member
|> Mv.Membership.Member.fuzzy_search(%{query: "mainn"})
|> Ash.read!()
ids = Enum.map(result, & &1.id)
assert s1.id in ids
end
test "substring in city matches mid-string" do
{:ok, b} =
Mv.Membership.create_member(%{
first_name: "City",
last_name: "One",
email: "city1@example.com",
city: "Berlin"
})
{:ok, _m} =
Mv.Membership.create_member(%{
first_name: "City",
last_name: "Two",
email: "city2@example.com",
city: "München"
})
result =
Mv.Membership.Member
|> Mv.Membership.Member.fuzzy_search(%{query: "erl"})
|> Ash.read!()
ids = Enum.map(result, & &1.id)
assert b.id in ids
end
test "blank character handling: query with spaces matches full name" do
{:ok, member} =
Mv.Membership.create_member(%{
first_name: "John",
last_name: "Doe",
email: "john.doe@example.com"
})
{:ok, _other} =
Mv.Membership.create_member(%{
first_name: "Jane",
last_name: "Smith",
email: "jane.smith@example.com"
})
result =
Mv.Membership.Member
|> Mv.Membership.Member.fuzzy_search(%{query: "john doe"})
|> Ash.read!()
ids = Enum.map(result, & &1.id)
assert member.id in ids
end
test "blank character handling: query with multiple spaces is handled" do
{:ok, member} =
Mv.Membership.create_member(%{
first_name: "Mary",
last_name: "Jane",
email: "mary.jane@example.com"
})
result =
Mv.Membership.Member
|> Mv.Membership.Member.fuzzy_search(%{query: "mary jane"})
|> Ash.read!()
ids = Enum.map(result, & &1.id)
assert member.id in ids
end
test "special character handling: @ symbol in query matches email" do
{:ok, member} =
Mv.Membership.create_member(%{
first_name: "Test",
last_name: "User",
email: "test.user@example.com"
})
{:ok, _other} =
Mv.Membership.create_member(%{
first_name: "Other",
last_name: "Person",
email: "other.person@different.org"
})
result =
Mv.Membership.Member
|> Mv.Membership.Member.fuzzy_search(%{query: "example"})
|> Ash.read!()
ids = Enum.map(result, & &1.id)
assert member.id in ids
end
test "special character handling: dot in query matches email" do
{:ok, member} =
Mv.Membership.create_member(%{
first_name: "Dot",
last_name: "Test",
email: "dot.test@example.com"
})
{:ok, _other} =
Mv.Membership.create_member(%{
first_name: "No",
last_name: "Dot",
email: "nodot@example.com"
})
result =
Mv.Membership.Member
|> Mv.Membership.Member.fuzzy_search(%{query: "dot.test"})
|> Ash.read!()
ids = Enum.map(result, & &1.id)
assert member.id in ids
end
test "special character handling: hyphen in query matches data" do
{:ok, member} =
Mv.Membership.create_member(%{
first_name: "Mary-Jane",
last_name: "Watson",
email: "mary.jane@example.com"
})
{:ok, _other} =
Mv.Membership.create_member(%{
first_name: "Mary",
last_name: "Smith",
email: "mary.smith@example.com"
})
result =
Mv.Membership.Member
|> Mv.Membership.Member.fuzzy_search(%{query: "mary-jane"})
|> Ash.read!()
ids = Enum.map(result, & &1.id)
assert member.id in ids
end
test "unicode character handling: umlaut ö in query matches data" do
{:ok, member} =
Mv.Membership.create_member(%{
first_name: "Jörg",
last_name: "Schmidt",
email: "joerg.schmidt@example.com"
})
{:ok, _other} =
Mv.Membership.create_member(%{
first_name: "John",
last_name: "Smith",
email: "john.smith@example.com"
})
result =
Mv.Membership.Member
|> Mv.Membership.Member.fuzzy_search(%{query: "jörg"})
|> Ash.read!()
ids = Enum.map(result, & &1.id)
assert member.id in ids
end
test "unicode character handling: umlaut ä in query matches data" do
{:ok, member} =
Mv.Membership.create_member(%{
first_name: "Märta",
last_name: "Andersson",
email: "maerta.andersson@example.com"
})
{:ok, _other} =
Mv.Membership.create_member(%{
first_name: "Marta",
last_name: "Johnson",
email: "marta.johnson@example.com"
})
result =
Mv.Membership.Member
|> Mv.Membership.Member.fuzzy_search(%{query: "märta"})
|> Ash.read!()
ids = Enum.map(result, & &1.id)
assert member.id in ids
end
test "unicode character handling: umlaut ü in query matches data" do
{:ok, member} =
Mv.Membership.create_member(%{
first_name: "Günther",
last_name: "Müller",
email: "guenther.mueller@example.com"
})
{:ok, _other} =
Mv.Membership.create_member(%{
first_name: "Gunter",
last_name: "Miller",
email: "gunter.miller@example.com"
})
result =
Mv.Membership.Member
|> Mv.Membership.Member.fuzzy_search(%{query: "müller"})
|> Ash.read!()
ids = Enum.map(result, & &1.id)
assert member.id in ids
end
test "unicode character handling: query without umlaut matches data with umlaut" do
{:ok, member} =
Mv.Membership.create_member(%{
first_name: "Müller",
last_name: "Schmidt",
email: "mueller.schmidt@example.com"
})
{:ok, _other} =
Mv.Membership.create_member(%{
first_name: "Miller",
last_name: "Smith",
email: "miller.smith@example.com"
})
result =
Mv.Membership.Member
|> Mv.Membership.Member.fuzzy_search(%{query: "muller"})
|> Ash.read!()
ids = Enum.map(result, & &1.id)
assert member.id in ids
end
test "very long search strings: handles long query without error" do
{:ok, _member} =
Mv.Membership.create_member(%{
first_name: "Test",
last_name: "User",
email: "test@example.com"
})
long_query = String.duplicate("a", 1000)
result =
Mv.Membership.Member
|> Mv.Membership.Member.fuzzy_search(%{query: long_query})
|> Ash.read!()
# Should not crash, may return empty or some results
assert is_list(result)
end
test "very long search strings: handles extremely long query" do
{:ok, _member} =
Mv.Membership.create_member(%{
first_name: "Test",
last_name: "User",
email: "test@example.com"
})
very_long_query = String.duplicate("test query ", 1000)
result =
Mv.Membership.Member
|> Mv.Membership.Member.fuzzy_search(%{query: very_long_query})
|> Ash.read!()
# Should not crash, may return empty or some results
assert is_list(result)
end
end