mirror of
https://github.com/mealie-recipes/mealie.git
synced 2025-07-19 13:19:41 +02:00
* add universal toast plugin * add server side locales * integrate CRF++ into CI/CD Pipeline * docs(docs): 📝 add recipe parser docs * feat(backend): ✨ Continued work on ingredient parsers * add new model dest * feat(frontend): ✨ New ingredient parser page * formatting Co-authored-by: Hayden <hay-kot@pm.me>
38 lines
1.5 KiB
Python
38 lines
1.5 KiB
Python
import re
|
|
|
|
|
|
def clumpFractions(s):
|
|
"""
|
|
Replaces the whitespace between the integer and fractional part of a quantity
|
|
with a dollar sign, so it's interpreted as a single token. The rest of the
|
|
string is left alone.
|
|
clumpFractions("aaa 1 2/3 bbb")
|
|
# => "aaa 1$2/3 bbb"
|
|
"""
|
|
|
|
return re.sub(r"(\d+)\s+(\d)/(\d)", r"\1$\2/\3", s)
|
|
|
|
|
|
def tokenize(s):
|
|
"""
|
|
Tokenize on parenthesis, punctuation, spaces and American units followed by a slash.
|
|
We sometimes give American units and metric units for baking recipes. For example:
|
|
* 2 tablespoons/30 mililiters milk or cream
|
|
* 2 1/2 cups/300 grams all-purpose flour
|
|
The recipe database only allows for one unit, and we want to use the American one.
|
|
But we must split the text on "cups/" etc. in order to pick it up.
|
|
"""
|
|
|
|
# handle abbreviation like "100g" by treating it as "100 grams"
|
|
s = re.sub(r"(\d+)g", r"\1 grams", s)
|
|
s = re.sub(r"(\d+)oz", r"\1 ounces", s)
|
|
s = re.sub(r"(\d+)ml", r"\1 milliliters", s, flags=re.IGNORECASE)
|
|
|
|
# TODO: Replace american_units with list of units from database?
|
|
american_units = ["cup", "tablespoon", "teaspoon", "pound", "ounce", "quart", "pint"]
|
|
# The following removes slashes following American units and replaces it with a space.
|
|
for unit in american_units:
|
|
s = s.replace(unit + "/", unit + " ")
|
|
s = s.replace(unit + "s/", unit + "s ")
|
|
|
|
return [token.strip() for token in re.split(r"([,()\s]{1})", clumpFractions(s)) if token and token.strip()]
|