1
0
Fork 0
mirror of https://github.com/mealie-recipes/mealie.git synced 2025-07-24 15:49:42 +02:00
mealie/mealie/tests/test_recipes/test_scraper.py
Hayden 88dfd40b8d
Release v0.1.0 Candidate (#85)
* Changed uvicorn port to 80

* Changed port in docker-compose to match dockerfile

* Readded environment variables in docker-compose

* production image rework

* Use opengraph metadata to make basic recipe cards when full recipe metadata is not available

* fixed instrucitons on parse

* add last_recipe

* automated testing

* roadmap update

* Sqlite (#75)

* file structure

* auto-test

* take 2

* refactor ap scheduler and startup process

* fixed scraper error

* database abstraction

* database abstraction

* port recipes over to new schema

* meal migration

* start settings migration

* finale mongo port

* backup improvements

* migration imports to new DB structure

* unused import cleanup

* docs strings

* settings and theme import logic

* cleanup

* fixed tinydb error

* requirements

* fuzzy search

* remove scratch file

* sqlalchemy models

* improved search ui

* recipe models almost done

* sql modal population

* del scratch

* rewrite database model mixins

* mostly grabage

* recipe updates

* working sqllite

* remove old files and reorganize

* final cleanup

Co-authored-by: Hayden <hay-kot@pm.me>

* Backup card (#78)

* backup / import dialog

* upgrade to new tag method

* New import card

* rename settings.py to app_config.py

* migrate to poetry for development

* fix failing test

Co-authored-by: Hayden <hay-kot@pm.me>

* added mkdocs to docker-compose

* Translations (#72)

* Translations + danish

* changed back proxy target to use ENV

* Resolved more merge conflicts

* Removed test in translation

* Documentation of translations

* Updated translations

* removed old packages

Co-authored-by: Hayden <64056131+hay-kot@users.noreply.github.com>

* fail to start bug fixes

* feature: prep/cook/total time slots (#80)

Co-authored-by: Hayden <hay-kot@pm.me>

* missing bind attributes

* Bug fixes (#81)

* fix: url remains after succesful import

* docs: changelog + update todos

* arm image

* arm compose

* compose updates

* update poetry

* arm support

Co-authored-by: Hayden <hay-kot@pm.me>

* dockerfile hotfix

* dockerfile hotfix

* Version Release Final Touches (#84)

* Remove slim

* bug: opacity issues

* bug: startup failure with no database

* ci/cd on dev branch

* formatting

* v0.1.0 documentation

Co-authored-by: Hayden <hay-kot@pm.me>

* db init hotfix

* bug: fix crash in mongo

* fix mongo bug

* fixed version notifier

* finale changelog

Co-authored-by: kentora <=>
Co-authored-by: Hayden <hay-kot@pm.me>
Co-authored-by: Richard Mitic <richard.h.mitic@gmail.com>
Co-authored-by: kentora <kentora@kentora.dk>
2021-01-17 22:22:54 -09:00

99 lines
3.4 KiB
Python

import json
import re
from pathlib import Path
import pytest
from services.scrape_services import (
extract_recipe_from_html,
normalize_data,
normalize_instructions,
)
CWD = Path(__file__).parent
RAW_RECIPE_DIR = CWD.parent.joinpath("data", "recipes-raw")
RAW_HTML_DIR = CWD.parent.joinpath("data", "html-raw")
# https://github.com/django/django/blob/stable/1.3.x/django/core/validators.py#L45
url_validation_regex = re.compile(
r"^(?:http|ftp)s?://" # http:// or https://
r"(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|" # domain...
r"localhost|" # localhost...
r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})" # ...or ip
r"(?::\d+)?" # optional port
r"(?:/?|[/?]\S+)$",
re.IGNORECASE,
)
@pytest.mark.parametrize(
"json_file,num_steps",
[
("best-homemade-salsa-recipe.json", 2),
(
"blue-cheese-stuffed-turkey-meatballs-with-raspberry-balsamic-glaze-2.json",
3,
),
("bon_appetit.json", 8),
("chunky-apple-cake.json", 4),
("dairy-free-impossible-pumpkin-pie.json", 7),
("how-to-make-instant-pot-spaghetti.json", 8),
("instant-pot-chicken-and-potatoes.json", 4),
("instant-pot-kerala-vegetable-stew.json", 13),
("jalapeno-popper-dip.json", 4),
("microwave_sweet_potatoes_04783.json", 4),
("moroccan-skirt-steak-with-roasted-pepper-couscous.json", 4),
("Pizza-Knoblauch-Champignon-Paprika-vegan.html.json", 3),
],
)
def test_normalize_data(json_file, num_steps):
recipe_data = normalize_data(json.load(open(RAW_RECIPE_DIR.joinpath(json_file))))
assert len(recipe_data["recipeInstructions"]) == num_steps
@pytest.mark.parametrize(
"instructions",
[
"A\n\nB\n\nC\n\n",
"A\nB\nC\n",
"A\r\n\r\nB\r\n\r\nC\r\n\r\n",
"A\r\nB\r\nC\r\n",
["A", "B", "C"],
[{"@type": "HowToStep", "text": x} for x in ["A", "B", "C"]],
],
)
def test_normalize_instructions(instructions):
assert normalize_instructions(instructions) == [
{"text": "A"},
{"text": "B"},
{"text": "C"},
]
def test_html_no_recipe_data():
path = RAW_HTML_DIR.joinpath("carottes-rapps-with-rice-and-sunflower-seeds.html")
url = "https://www.feedtheswimmers.com/blog/2019/6/5/carottes-rapps-with-rice-and-sunflower-seeds"
recipe_data = extract_recipe_from_html(open(path).read(), url)
assert len(recipe_data["name"]) > 10
assert len(recipe_data["slug"]) > 10
assert recipe_data["orgURL"] == url
assert len(recipe_data["description"]) > 100
assert url_validation_regex.match(recipe_data["image"])
assert recipe_data["recipeIngredient"] == ["Could not detect ingredients"]
assert recipe_data["recipeInstructions"] == [
{"text": "Could not detect instructions"}
]
def test_html_with_recipe_data():
path = RAW_HTML_DIR.joinpath("healthy_pasta_bake_60759.html")
url = "https://www.bbc.co.uk/food/recipes/healthy_pasta_bake_60759"
recipe_data = extract_recipe_from_html(open(path).read(), url)
assert len(recipe_data["name"]) > 10
assert len(recipe_data["slug"]) > 10
assert recipe_data["orgURL"] == url
assert len(recipe_data["description"]) > 100
assert url_validation_regex.match(recipe_data["image"])
assert len(recipe_data["recipeIngredient"]) == 13
assert len(recipe_data["recipeInstructions"]) == 4