mirror of
https://github.com/mealie-recipes/mealie.git
synced 2025-08-02 20:15:24 +02:00
feat: improve automatic ingredient linking (#1836)
* Filtering special characters during automatic linking of ingredients to instructions Used a unicode group to have a set of all unicode punctuation marks * allowing for linking of ingredients to instruction at the beginning of a newline in the instruction * Extracted ingredient matching into a composable and added tests. Ignoring 2 letter words to avoid false matches. While testing the code 2 letter matches were a large source of false positives.
This commit is contained in:
parent
19ae89a195
commit
83b8ce659e
3 changed files with 135 additions and 44 deletions
|
@ -0,0 +1,68 @@
|
|||
import { describe, expect, test } from "vitest";
|
||||
import { useExtractIngredientReferences } from "./use-extract-ingredient-references";
|
||||
|
||||
const punctuationMarks = ["*", "?", "/", "!", "**", "&", "."];
|
||||
|
||||
|
||||
describe("test use extract ingredient references", () => {
|
||||
test("when text empty return empty", () => {
|
||||
const result = useExtractIngredientReferences([{ note: "Onions", referenceId: "123" }], [], "", true)
|
||||
expect(result).toStrictEqual(new Set());
|
||||
});
|
||||
|
||||
test("when and ingredient matches exactly and has a reference id, return the referenceId", () => {
|
||||
const result = useExtractIngredientReferences([{ note: "Onions", referenceId: "123" }], [], "A sentence containing Onion", true);
|
||||
|
||||
expect(result).toEqual(new Set(["123"]));
|
||||
});
|
||||
|
||||
|
||||
test.each(punctuationMarks)("when ingredient is suffixed by punctuation, return the referenceId", (suffix) => {
|
||||
const result = useExtractIngredientReferences([{ note: "Onions", referenceId: "123" }], [], "A sentence containing Onion" + suffix, true);
|
||||
|
||||
expect(result).toEqual(new Set(["123"]));
|
||||
});
|
||||
|
||||
test.each(punctuationMarks)("when ingredient is prefixed by punctuation, return the referenceId", (prefix) => {
|
||||
const result = useExtractIngredientReferences([{ note: "Onions", referenceId: "123" }], [], "A sentence containing " + prefix + "Onion", true);
|
||||
expect(result).toEqual(new Set(["123"]));
|
||||
});
|
||||
|
||||
test("when ingredient is first on a multiline, return the referenceId", () => {
|
||||
const multilineSting = "lksjdlk\nOnion"
|
||||
const result = useExtractIngredientReferences([{ note: "Onions", referenceId: "123" }], [], multilineSting, true);
|
||||
expect(result).toEqual(new Set(["123"]));
|
||||
});
|
||||
|
||||
test("when the ingredient matches partially exactly and has a reference id, return the referenceId", () => {
|
||||
const result = useExtractIngredientReferences([{ note: "Onions", referenceId: "123" }], [], "A sentence containing Onions", true);
|
||||
expect(result).toEqual(new Set(["123"]));
|
||||
});
|
||||
|
||||
|
||||
test("when the ingredient matches with different casing and has a reference id, return the referenceId", () => {
|
||||
const result = useExtractIngredientReferences([{ note: "Onions", referenceId: "123" }], [], "A sentence containing oNions", true);
|
||||
expect(result).toEqual(new Set(["123"]));
|
||||
});
|
||||
|
||||
test("when no ingredients, return empty", () => {
|
||||
const result = useExtractIngredientReferences([], [], "A sentence containing oNions", true);
|
||||
expect(result).toEqual(new Set());
|
||||
});
|
||||
|
||||
test("when and ingredient matches but in the existing referenceIds, do not return the referenceId", () => {
|
||||
const result = useExtractIngredientReferences([{ note: "Onion", referenceId: "123" }], ["123"], "A sentence containing Onion", true);
|
||||
|
||||
expect(result).toEqual(new Set());
|
||||
});
|
||||
|
||||
test("when an word is 2 letter of shorter, it is ignored", () => {
|
||||
const result = useExtractIngredientReferences([{ note: "Onion", referenceId: "123" }], [], "A sentence containing On", true);
|
||||
|
||||
expect(result).toEqual(new Set());
|
||||
|
||||
})
|
||||
|
||||
|
||||
|
||||
});
|
|
@ -0,0 +1,60 @@
|
|||
import { RecipeIngredient } from "~/lib/api/types/recipe";
|
||||
import { parseIngredientText } from "~/composables/recipes";
|
||||
|
||||
|
||||
function normalize(word: string): string {
|
||||
let normalizing = word;
|
||||
normalizing = removeTrailingPunctuation(normalizing);
|
||||
normalizing = removeStartingPunctuation(normalizing);
|
||||
return normalizing;
|
||||
}
|
||||
|
||||
function removeTrailingPunctuation(word: string): string {
|
||||
const punctuationAtEnding = /\p{P}+$/u;
|
||||
return word.replace(punctuationAtEnding, "");
|
||||
}
|
||||
|
||||
function removeStartingPunctuation(word: string): string {
|
||||
const punctuationAtBeginning = /^\p{P}+/u;
|
||||
return word.replace(punctuationAtBeginning, "");
|
||||
}
|
||||
|
||||
function ingredientMatchesWord(ingredient: RecipeIngredient, word: string, recipeIngredientAmountsDisabled: boolean) {
|
||||
const searchText = parseIngredientText(ingredient, recipeIngredientAmountsDisabled);
|
||||
return searchText.toLowerCase().includes(word.toLowerCase());
|
||||
}
|
||||
|
||||
function isBlackListedWord(word: string) {
|
||||
// Ignore matching blacklisted words when auto-linking - This is kind of a cludgey implementation. We're blacklisting common words but
|
||||
// other common phrases trigger false positives and I'm not sure how else to approach this. In the future I maybe look at looking directly
|
||||
// at the food variable and seeing if the food is in the instructions, but I still need to support those who don't want to provide the value
|
||||
// and only use the "notes" feature.
|
||||
const blackListedText: string[] = [
|
||||
"and",
|
||||
"the",
|
||||
"for",
|
||||
"with",
|
||||
"without"
|
||||
];
|
||||
const blackListedRegexMatch = /\d/gm; // Match Any Number
|
||||
return blackListedText.includes(word) || word.match(blackListedRegexMatch);
|
||||
}
|
||||
|
||||
export function useExtractIngredientReferences(recipeIngredients: RecipeIngredient[], activeRefs: string[], text: string, recipeIngredientAmountsDisabled: boolean): Set<string> {
|
||||
const availableIngredients = recipeIngredients
|
||||
.filter((ingredient) => ingredient.referenceId !== undefined)
|
||||
.filter((ingredient) => !activeRefs.includes(ingredient.referenceId as string));
|
||||
|
||||
const allMatchedIngredientIds: string[] = text
|
||||
.toLowerCase()
|
||||
.split(/\s/)
|
||||
.map(normalize)
|
||||
.filter((word) => word.length > 2)
|
||||
.filter((word) => !isBlackListedWord(word))
|
||||
.flatMap((word) => availableIngredients.filter((ingredient) => ingredientMatchesWord(ingredient, word, recipeIngredientAmountsDisabled)))
|
||||
.map((ingredient) => ingredient.referenceId as string);
|
||||
// deduplicate
|
||||
|
||||
return new Set<string>(allMatchedIngredientIds)
|
||||
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue