Convert scraper to use async (#1915)

* add httpx depedency for async http requests * rework scraper strategies to download recipe html asynchronously * rework recipe_data_service to download recipe images asynchronously * fix recipe_parser test, so it can use async results * fix bulk import so that it also works with async scraper * fix broken recipe_parser tests * Fix issues found by scanners * Add additional checks for ingredient and instruction count in test_create_by_url * Revert changes in test recipe_data Since we are checking ingredients and instructions in test_create_url now, these would fail with the stored html of recipe data * Add explicit type annotation in recipe_data_service.largest_content_len * Fix typo in annotation
2025-08-09 07:25:21 +02:00 · 2023-01-29 01:43:27 +01:00 · 2023-01-29 01:43:27 +01:00 · 3415a9c310
commit 3415a9c310
parent 7275dd2696
11 changed files with 129 additions and 115 deletions
--- a/tests/integration_tests/user_recipe_tests/test_recipe_bulk_import.py
+++ b/tests/integration_tests/user_recipe_tests/test_recipe_bulk_import.py
@ -21,7 +21,7 @@ def test_bulk_import(api_client: TestClient, unique_user: TestUser):

    response = api_client.post(api_routes.recipes_create_url_bulk, json=recipes, headers=unique_user.token)

-    assert response.status_code == 201
+    assert response.status_code == 202

    for slug in slugs:
        response = api_client.get(api_routes.recipes_slug(slug), headers=unique_user.token)
--- a/tests/integration_tests/user_recipe_tests/test_recipe_crud.py
+++ b/tests/integration_tests/user_recipe_tests/test_recipe_crud.py
@ -89,6 +89,16 @@ def test_create_by_url(
    assert response.status_code == 201
    assert json.loads(response.text) == recipe_data.expected_slug

+    recipe = api_client.get(api_routes.recipes_slug(recipe_data.expected_slug), headers=unique_user.token)
+
+    assert recipe.status_code == 200
+
+    recipe_dict: dict = json.loads(recipe.text)
+
+    assert recipe_dict["slug"] == recipe_data.expected_slug
+    assert len(recipe_dict["recipeInstructions"]) == recipe_data.num_steps
+    assert len(recipe_dict["recipeIngredient"]) == recipe_data.num_ingredients
+

 def test_create_by_url_with_tags(
    api_client: TestClient,
--- a/tests/unit_tests/test_recipe_parser.py
+++ b/tests/unit_tests/test_recipe_parser.py
@ -16,8 +16,9 @@ and then use this test case by removing the `@pytest.mark.skip` and than testing

@pytest.mark.skipif(True, reason="Long Running API Test - manually run when updating the parser")
@pytest.mark.parametrize("recipe_test_data", test_cases)
-def test_recipe_parser(recipe_test_data: RecipeSiteTestCase):
-    recipe, _ = scraper.create_from_url(recipe_test_data.url)
+@pytest.mark.asyncio
+async def test_recipe_parser(recipe_test_data: RecipeSiteTestCase):
+    recipe, _ = await scraper.create_from_url(recipe_test_data.url)

    assert recipe.slug == recipe_test_data.expected_slug
    assert len(recipe.recipe_instructions) == recipe_test_data.num_steps
--- a/tests/utils/recipe_data.py
+++ b/tests/utils/recipe_data.py
@ -45,7 +45,7 @@ def get_recipe_test_cases():
            html="jam-roly-poly-with-custard.html",
            html_file=test_data.html_jam_roly_poly_with_custard,
            expected_slug="jam-roly-poly-with-custard",
-            num_ingredients=13,
+            num_ingredients=11,
            num_steps=9,
        ),
        RecipeSiteTestCase(
@ -54,7 +54,7 @@ def get_recipe_test_cases():
            html_file=test_data.html_sous_vide_shrimp,
            expected_slug="sous-vide-shrimp",
            num_ingredients=5,
-            num_steps=0,
+            num_steps=1,
        ),
        RecipeSiteTestCase(
            url="https://www.bonappetit.com/recipe/detroit-style-pepperoni-pizza",