1
0
Fork 0
mirror of https://github.com/mealie-recipes/mealie.git synced 2025-08-05 05:25:26 +02:00

feat: search tokenization, handling of quoted literal search, and postgres fuzziness (#2351)

* Creating postgres migration script and starting to set up to detect database

* non-working placeholders for postgres pg_tgrm

* First draft of some indexes

* non-working commit of postgres indexing

* Further non-working edits to db-centric fuzzy search

* update alembic for extensions

* More non-working setup

* Move db type check to init_db

* fix typo in db name check

* Add sqlite token search and postgres full text search

* reorder search to hit exact matches faster

* Add settings and docs for POSTGRES_LANGUAGE (full text search)

* Use user-specified POSTGRES_LANGUAGE in search

* fix fuzzy search typo

* Remove full text search and instead order by trigram match

* cleaner adding of indices, remove fulltext

* Cleanup old import of getting app settings

* Fix typo in index

* Fix some alembic fuzzy typos

* Remove diagnostic printing from alembic migration

* Fix mixed up commutator for trigram operator and relax criteria

* forgot to remove query debug

* sort only on name

* token and fuzzy search tests

* Refactor recipe search test to avoid rare random string cross-matches.

* Add ability to quote parts of search for exact match

* Remove internal punctuation, unless it's quoted for literal search

* Add tests for special character removal and literal search

* Remove the outer double quotes from searches, but leave internal single quotes alone.

* Update tests to avoid intra-test name collisions

* Fixing leftovers highlighted by lint

* cleanup linting and mypy errors

* Fix test cross-matching on dirty db (leftovers from bulk import)

* forgot to cleanup something when debugging mypy errors

* re-order pg_trgm loading in postgres

* address comments
This commit is contained in:
Jacob Corn 2023-05-28 19:46:53 +02:00 committed by GitHub
parent 27ebb4c462
commit 7e0d29afc7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 304 additions and 43 deletions

View file

@ -432,36 +432,28 @@ def test_recipe_repo_pagination_by_foods(database: AllRepositories, unique_user:
def test_recipe_repo_search(database: AllRepositories, unique_user: TestUser):
ingredient_1 = random_string(10)
ingredient_2 = random_string(10)
name_part_1 = random_string(10)
name_1 = f"{name_part_1} soup"
name_part_2 = random_string(10)
name_2 = f"Rustic {name_part_2} stew"
name_3 = f"{ingredient_1} Soup"
description_part_1 = random_string(10)
recipes = [
Recipe(
user_id=unique_user.user_id,
group_id=unique_user.group_id,
name=name_1,
description=f"My favorite {description_part_1}",
name="Steinbock Sloop",
description=f"My favorite horns are delicious",
recipe_ingredient=[
RecipeIngredient(note=ingredient_1),
RecipeIngredient(note="alpine animal"),
],
),
Recipe(
user_id=unique_user.user_id,
group_id=unique_user.group_id,
name=name_2,
name="Fiddlehead Fern Stir Fry",
recipe_ingredient=[
RecipeIngredient(note=ingredient_2),
RecipeIngredient(note="moss"),
],
),
Recipe(
user_id=unique_user.user_id,
group_id=unique_user.group_id,
name=name_3,
name="Animal Sloop",
),
# Test diacritics
Recipe(
@ -481,28 +473,50 @@ def test_recipe_repo_search(database: AllRepositories, unique_user: TestUser):
assert len(empty_result) == 0
# Search by title
title_result = database.recipes.page_all(pagination_query, search=name_part_2).items
title_result = database.recipes.page_all(pagination_query, search="Steinbock").items
assert len(title_result) == 1
assert title_result[0].name == name_2
assert title_result[0].name == "Steinbock Sloop"
# Search by description
description_result = database.recipes.page_all(pagination_query, search=description_part_1).items
description_result = database.recipes.page_all(pagination_query, search="horns").items
assert len(description_result) == 1
assert description_result[0].name == name_1
assert description_result[0].name == "Steinbock Sloop"
# Search by ingredient
ingredient_result = database.recipes.page_all(pagination_query, search=ingredient_2).items
ingredient_result = database.recipes.page_all(pagination_query, search="moss").items
assert len(ingredient_result) == 1
assert ingredient_result[0].name == name_2
assert ingredient_result[0].name == "Fiddlehead Fern Stir Fry"
# Make sure title matches are ordered in front
ordered_result = database.recipes.page_all(pagination_query, search=ingredient_1).items
ordered_result = database.recipes.page_all(pagination_query, search="animal sloop").items
assert len(ordered_result) == 2
assert ordered_result[0].name == name_3
assert ordered_result[1].name == name_1
assert ordered_result[0].name == "Animal Sloop"
assert ordered_result[1].name == "Steinbock Sloop"
# Test literal search
literal_result = database.recipes.page_all(pagination_query, search='"Animal Sloop"').items
assert len(literal_result) == 1
assert literal_result[0].name == "Animal Sloop"
# Test special character removal from non-literal searches
character_result = database.recipes.page_all(pagination_query, search="animal-sloop").items
assert len(character_result) == 2
assert character_result[0].name == "Animal Sloop"
assert character_result[1].name == "Steinbock Sloop"
# Test string normalization
normalized_result = database.recipes.page_all(pagination_query, search="ratat").items
print([r.name for r in normalized_result])
assert len(normalized_result) == 1
assert normalized_result[0].name == "Rátàtôuile"
# Test token separation
token_result = database.recipes.page_all(pagination_query, search="delicious horns").items
assert len(token_result) == 1
assert token_result[0].name == "Steinbock Sloop"
# Test fuzzy search
if database.session.get_bind().name == "postgresql":
fuzzy_result = database.recipes.page_all(pagination_query, search="Steinbuck").items
assert len(fuzzy_result) == 1
assert fuzzy_result[0].name == "Steinbock Sloop"