mirror of
https://github.com/mealie-recipes/mealie.git
synced 2025-08-02 20:15:24 +02:00
Feature/improve error message on scrape (#476)
* add better feedback on failed scrape * fix json download link * add better recipe parser * dump deps * fix force open on mobile * formatting * rewrite scraper to use new library * fix failing tests * bookmarklet support * bookmarklet instructions * recipes changelog Co-authored-by: hay-kot <hay-kot@pm.me>
This commit is contained in:
parent
3702331630
commit
a78fbea711
22 changed files with 658 additions and 15582 deletions
|
@ -2,8 +2,8 @@ import json
|
|||
import re
|
||||
|
||||
import pytest
|
||||
from mealie.services.scraper.cleaner import Cleaner
|
||||
from mealie.services.scraper.scraper import extract_recipe_from_html
|
||||
from mealie.services.scraper import cleaner
|
||||
from mealie.services.scraper.scraper import open_graph
|
||||
from tests.test_config import TEST_RAW_HTML, TEST_RAW_RECIPES
|
||||
|
||||
# https://github.com/django/django/blob/stable/1.3.x/django/core/validators.py#L45
|
||||
|
@ -39,23 +39,23 @@ url_validation_regex = re.compile(
|
|||
],
|
||||
)
|
||||
def test_cleaner_clean(json_file, num_steps):
|
||||
recipe_data = Cleaner.clean(json.load(open(TEST_RAW_RECIPES.joinpath(json_file))))
|
||||
recipe_data = cleaner.clean(json.load(open(TEST_RAW_RECIPES.joinpath(json_file))))
|
||||
assert len(recipe_data["recipeInstructions"]) == num_steps
|
||||
|
||||
|
||||
def test_clean_category():
|
||||
assert Cleaner.category("my-category") == ["my-category"]
|
||||
assert cleaner.category("my-category") == ["my-category"]
|
||||
|
||||
|
||||
def test_clean_html():
|
||||
assert Cleaner.html("<div>Hello World</div>") == "Hello World"
|
||||
def test_clean_string():
|
||||
assert cleaner.clean_string("<div>Hello World</div>") == "Hello World"
|
||||
|
||||
|
||||
def test_clean_image():
|
||||
assert Cleaner.image(None) == "no image"
|
||||
assert Cleaner.image("https://my.image/path/") == "https://my.image/path/"
|
||||
assert Cleaner.image({"url": "My URL!"}) == "My URL!"
|
||||
assert Cleaner.image(["My URL!", "MY SECOND URL"]) == "My URL!"
|
||||
assert cleaner.image(None) == "no image"
|
||||
assert cleaner.image("https://my.image/path/") == "https://my.image/path/"
|
||||
assert cleaner.image({"url": "My URL!"}) == "My URL!"
|
||||
assert cleaner.image(["My URL!", "MY SECOND URL"]) == "My URL!"
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
|
@ -70,7 +70,7 @@ def test_clean_image():
|
|||
],
|
||||
)
|
||||
def test_cleaner_instructions(instructions):
|
||||
assert Cleaner.instructions(instructions) == [
|
||||
assert cleaner.instructions(instructions) == [
|
||||
{"text": "A"},
|
||||
{"text": "B"},
|
||||
{"text": "C"},
|
||||
|
@ -80,20 +80,18 @@ def test_cleaner_instructions(instructions):
|
|||
def test_html_with_recipe_data():
|
||||
path = TEST_RAW_HTML.joinpath("healthy_pasta_bake_60759.html")
|
||||
url = "https://www.bbc.co.uk/food/recipes/healthy_pasta_bake_60759"
|
||||
recipe_data = extract_recipe_from_html(open(path, encoding="utf8").read(), url)
|
||||
recipe_data = open_graph.basic_recipe_from_opengraph(open(path, encoding="utf8").read(), url)
|
||||
|
||||
assert len(recipe_data["name"]) > 10
|
||||
assert len(recipe_data["slug"]) > 10
|
||||
assert recipe_data["orgURL"] == url
|
||||
assert len(recipe_data["description"]) > 100
|
||||
assert url_validation_regex.match(recipe_data["image"])
|
||||
assert len(recipe_data["recipeIngredient"]) == 13
|
||||
assert len(recipe_data["recipeInstructions"]) == 4
|
||||
|
||||
|
||||
def test_time_cleaner():
|
||||
|
||||
my_time_delta = "PT2H30M"
|
||||
return_delta = Cleaner.time(my_time_delta)
|
||||
return_delta = cleaner.clean_time(my_time_delta)
|
||||
|
||||
assert return_delta == "2 Hours 30 Minutes"
|
||||
|
|
62
tests/unit_tests/test_recipe_parser.py
Normal file
62
tests/unit_tests/test_recipe_parser.py
Normal file
|
@ -0,0 +1,62 @@
|
|||
from dataclasses import dataclass
|
||||
|
||||
import pytest
|
||||
from mealie.services.scraper import scraper
|
||||
|
||||
|
||||
@dataclass
|
||||
class RecipeSiteTestCase:
|
||||
url: str
|
||||
expected_slug: str
|
||||
num_ingredients: int
|
||||
num_steps: int
|
||||
|
||||
|
||||
test_cases = [
|
||||
RecipeSiteTestCase(
|
||||
url="https://www.seriouseats.com/taiwanese-three-cup-chicken-san-bei-gi-recipe",
|
||||
expected_slug="taiwanese-three-cup-chicken-san-bei-ji-recipe",
|
||||
num_ingredients=10,
|
||||
num_steps=3,
|
||||
),
|
||||
RecipeSiteTestCase(
|
||||
url="https://www.rezeptwelt.de/backen-herzhaft-rezepte/schinken-kaese-waffeln-ohne-viel-schnickschnack/4j0bkiig-94d4d-106529-cfcd2-is97x2ml",
|
||||
expected_slug="schinken-kase-waffeln-ohne-viel-schnickschnack",
|
||||
num_ingredients=7,
|
||||
num_steps=1, # Malformed JSON Data, can't parse steps just get one string
|
||||
),
|
||||
RecipeSiteTestCase(
|
||||
url="https://cookpad.com/us/recipes/5544853-sous-vide-smoked-beef-ribs",
|
||||
expected_slug="sous-vide-smoked-beef-ribs",
|
||||
num_ingredients=7,
|
||||
num_steps=12,
|
||||
),
|
||||
RecipeSiteTestCase(
|
||||
url="https://www.greatbritishchefs.com/recipes/jam-roly-poly-recipe",
|
||||
expected_slug="jam-roly-poly-with-custard",
|
||||
num_ingredients=13,
|
||||
num_steps=9,
|
||||
),
|
||||
RecipeSiteTestCase(
|
||||
url="https://recipes.anovaculinary.com/recipe/sous-vide-shrimp",
|
||||
expected_slug="sous-vide-shrimp",
|
||||
num_ingredients=5,
|
||||
num_steps=0,
|
||||
),
|
||||
RecipeSiteTestCase(
|
||||
url="https://www.bonappetit.com/recipe/detroit-style-pepperoni-pizza",
|
||||
expected_slug="detroit-style-pepperoni-pizza",
|
||||
num_ingredients=8,
|
||||
num_steps=5,
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("recipe_test_data", test_cases)
|
||||
def test_recipe_parser(recipe_test_data: RecipeSiteTestCase):
|
||||
recipe = scraper.create_from_url(recipe_test_data.url)
|
||||
|
||||
assert recipe.slug == recipe_test_data.expected_slug
|
||||
assert len(recipe.recipe_instructions) == recipe_test_data.num_steps
|
||||
assert len(recipe.recipe_ingredient) == recipe_test_data.num_ingredients
|
||||
assert recipe.org_url == recipe_test_data.url
|
Loading…
Add table
Add a link
Reference in a new issue