1
0
Fork 0
mirror of https://github.com/mealie-recipes/mealie.git synced 2025-07-24 23:59:45 +02:00

fix: More Backup Restore Fixes (#2859)
Some checks are pending
Docker Nightly Production / Backend Server Tests (push) Waiting to run
Docker Nightly Production / Frontend and End-to-End Tests (push) Waiting to run
Docker Nightly Production / Build Tagged Release (push) Blocked by required conditions
Docker Nightly Production / Notify Discord (push) Blocked by required conditions

* refactor normalized search migration to use dummy default

* changed group slug migration to use raw SQL

* updated comment

* added tests with anonymized backups (currently failing)

* typo

* fixed LDAP enum in test data

* fix for adding label settings across groups

* add migration data fixes

* fix shopping list label settings test

* re-run db init instead of just running alembic migration, to include fixes

* intentionally broke SQLAlchemy GUID handling

* safely convert between GUID types in different databases

* restore original test data after testing backup restores

* added missing group name update to migration
This commit is contained in:
Michael Genson 2024-01-02 22:19:04 -06:00 committed by GitHub
parent b3f7f2d89f
commit 7602c67449
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
14 changed files with 422 additions and 45 deletions

View file

@ -7,12 +7,11 @@ Create Date: 2023-02-14 20:45:41.102571
"""
import sqlalchemy as sa
from sqlalchemy import orm, select
from sqlalchemy.orm import Mapped, mapped_column, DeclarativeBase
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
from text_unidecode import unidecode
import mealie.db.migration_types
from alembic import op
from mealie.db.models._model_utils import GUID
# revision identifiers, used by Alembic.
@ -52,30 +51,46 @@ def do_data_migration():
session = orm.Session(bind=bind)
recipes = session.execute(select(RecipeModel)).scalars().all()
ingredients = session.execute(select(RecipeIngredient)).scalars().all()
for recipe in recipes:
if recipe.name is not None:
recipe.name_normalized = unidecode(recipe.name).lower().strip()
session.execute(
sa.text(
f"UPDATE {RecipeModel.__tablename__} SET name_normalized=:name_normalized WHERE id=:id"
).bindparams(name_normalized=unidecode(recipe.name).lower().strip(), id=recipe.id)
)
if recipe.description is not None:
recipe.description_normalized = unidecode(recipe.description).lower().strip()
session.add(recipe)
session.execute(
sa.text(
f"UPDATE {RecipeModel.__tablename__} SET description_normalized=:description_normalized WHERE id=:id"
).bindparams(description_normalized=unidecode(recipe.description).lower().strip(), id=recipe.id)
)
ingredients = session.execute(select(RecipeIngredient)).scalars().all()
for ingredient in ingredients:
if ingredient.note is not None:
ingredient.note_normalized = unidecode(ingredient.note).lower().strip()
session.execute(
sa.text(
f"UPDATE {RecipeIngredient.__tablename__} SET note_normalized=:note_normalized WHERE id=:id"
).bindparams(note_normalized=unidecode(ingredient.note).lower().strip(), id=ingredient.id)
)
if ingredient.original_text is not None:
ingredient.original_text_normalized = unidecode(ingredient.original_text).lower().strip()
session.add(ingredient)
session.execute(
sa.text(
f"UPDATE {RecipeIngredient.__tablename__} SET original_text_normalized=:original_text_normalized WHERE id=:id"
).bindparams(
original_text_normalized=unidecode(ingredient.original_text).lower().strip(), id=ingredient.id
)
)
session.commit()
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
# Set column to nullable first, since we do not have values here yet
op.add_column("recipes", sa.Column("name_normalized", sa.String(), nullable=True))
# Set column default first, since we do not have values here yet
op.add_column("recipes", sa.Column("name_normalized", sa.String(), nullable=False, server_default=""))
op.add_column("recipes", sa.Column("description_normalized", sa.String(), nullable=True))
op.drop_index("ix_recipes_description", table_name="recipes")
op.drop_index("ix_recipes_name", table_name="recipes")
@ -95,9 +110,9 @@ def upgrade():
unique=False,
)
do_data_migration()
# Make recipes.name_normalized not nullable now that column should be filled for all rows
# Remove server default now that column should be filled for all rows
with op.batch_alter_table("recipes", schema=None) as batch_op:
batch_op.alter_column("name_normalized", nullable=False, existing_type=sa.String())
batch_op.alter_column("name_normalized", existing_type=sa.String(), server_default=None)
# ### end Alembic commands ###

View file

@ -24,10 +24,10 @@ depends_on = None
def populate_shopping_lists_multi_purpose_labels(shopping_lists_multi_purpose_labels_table: sa.Table, session: Session):
shopping_lists = session.query(ShoppingList).all()
labels = session.query(MultiPurposeLabel).all()
shopping_lists_labels_data: list[dict] = []
for shopping_list in shopping_lists:
labels = session.query(MultiPurposeLabel).filter(MultiPurposeLabel.group_id == ShoppingList.group_id).all()
for i, label in enumerate(labels):
shopping_lists_labels_data.append(
{"id": uuid4(), "shopping_list_id": shopping_list.id, "label_id": label.id, "position": i}

View file

@ -24,17 +24,22 @@ def populate_group_slugs(session: Session):
seen_slugs: set[str] = set()
for group in groups:
original_name = group.name
new_name = original_name
attempts = 0
while True:
slug = slugify(group.name)
slug = slugify(new_name)
if slug not in seen_slugs:
break
attempts += 1
group.name = f"{original_name} ({attempts})"
new_name = f"{original_name} ({attempts})"
seen_slugs.add(slug)
group.slug = slug
session.execute(
sa.text(f"UPDATE {Group.__tablename__} SET name=:name, slug=:slug WHERE id=:id").bindparams(
name=new_name, slug=slug, id=group.id
)
)
session.commit()

View file

@ -0,0 +1,74 @@
import json
import logging
import random
import string
from datetime import datetime
from uuid import UUID
logger = logging.getLogger("anonymize_backups")
def is_uuid4(value: str):
try:
UUID(value)
return True
except ValueError:
return False
def is_iso_datetime(value: str):
try:
datetime.fromisoformat(value)
return True
except ValueError:
return False
def random_string(length=10):
return "".join(random.choice(string.ascii_lowercase) for _ in range(length))
def clean_value(value):
try:
match value:
# preserve non-strings
case int(value) | float(value):
return value
case None:
return value
# preserve UUIDs and datetimes
case str(value) if is_uuid4(value) or is_iso_datetime(value):
return value
# randomize strings
case str(value):
return random_string()
case _:
pass
except Exception as e:
logger.exception(e)
logger.error(f"Failed to anonymize value: {value}")
return value
def walk_data_and_anonymize(data):
for k, v in data.items():
if isinstance(v, list):
for item in v:
walk_data_and_anonymize(item)
else:
# preserve alembic version number and enums
if k in ["auth_method", "version_num"]:
continue
data[k] = clean_value(v)
def anonymize_database_json(input_filepath: str, output_filepath: str):
with open(input_filepath) as f:
data = json.load(f)
walk_data_and_anonymize(data)
with open(output_filepath, "w") as f:
json.dump(data, f)

View file

@ -0,0 +1,150 @@
from uuid import uuid4
from slugify import slugify
from sqlalchemy.orm import Session
from mealie.core import root_logger
from mealie.db.models.group.group import Group
from mealie.db.models.group.shopping_list import ShoppingList, ShoppingListMultiPurposeLabel
from mealie.db.models.labels import MultiPurposeLabel
from mealie.db.models.recipe.ingredient import IngredientFoodModel, IngredientUnitModel
from mealie.db.models.recipe.recipe import RecipeModel
logger = root_logger.get_logger("init_db")
def fix_recipe_normalized_search_properties(session: Session):
recipes = session.query(RecipeModel).all()
recipes_fixed = False
for recipe in recipes:
add_to_session = False
if recipe.name and not recipe.name_normalized:
recipe.name_normalized = RecipeModel.normalize(recipe.name)
add_to_session = True
if recipe.description and not recipe.description_normalized:
recipe.description_normalized = RecipeModel.normalize(recipe.description)
add_to_session = True
for ingredient in recipe.recipe_ingredient:
if ingredient.note and not ingredient.note_normalized:
ingredient.note_normalized = RecipeModel.normalize(ingredient.note)
add_to_session = True
if ingredient.original_text and not ingredient.original_text_normalized:
ingredient.original_text = RecipeModel.normalize(ingredient.original_text_normalized)
add_to_session = True
if add_to_session:
recipes_fixed = True
session.add(recipe)
if recipes_fixed:
logger.info("Updating recipe normalized search properties")
session.commit()
def fix_shopping_list_label_settings(session: Session):
shopping_lists = session.query(ShoppingList).all()
labels = session.query(MultiPurposeLabel).all()
label_settings_fixed = False
for shopping_list in shopping_lists:
labels_by_id = {label.id: label for label in labels if label.group_id == shopping_list.group_id}
for label_setting in shopping_list.label_settings:
if not labels_by_id.pop(label_setting.label_id, None):
# label setting is no longer valid, so delete it
session.delete(label_setting)
label_settings_fixed = True
if not labels_by_id:
# all labels are accounted for, so we don't need to add any
continue
label_settings_fixed = True
for i, label in enumerate(labels_by_id.values()):
new_label_setting = ShoppingListMultiPurposeLabel(
id=uuid4(),
shopping_list_id=shopping_list.id,
label_id=label.id,
position=i + len(shopping_list.label_settings),
)
session.add(new_label_setting)
if label_settings_fixed:
logger.info("Fixing shopping list label settings")
session.commit()
def fix_group_slugs(session: Session):
groups = session.query(Group).all()
seen_slugs: set[str] = set()
groups_fixed = False
for group in groups:
if not group.slug:
original_name = group.name
new_name = original_name
attempts = 0
while True:
slug = slugify(group.name)
if slug not in seen_slugs:
break
attempts += 1
new_name = f"{original_name} ({attempts})"
groups_fixed = True
group.name = new_name
group.slug = slug
if groups_fixed:
logger.info("Adding missing group slugs")
session.commit()
def fix_normalized_unit_and_food_names(session: Session):
units = session.query(IngredientUnitModel).all()
units_fixed = False
for unit in units:
add_to_session = False
if unit.name and not unit.name_normalized:
unit.name_normalized = IngredientUnitModel.normalize(unit.name)
add_to_session = True
if unit.abbreviation and not unit.abbreviation_normalized:
unit.abbreviation_normalized = IngredientUnitModel.normalize(unit.abbreviation)
add_to_session = True
if add_to_session:
units_fixed = True
session.add(unit)
if units_fixed:
logger.info("Updating unit normalized search properties")
session.commit()
foods = session.query(IngredientFoodModel).all()
foods_fixed = False
for food in foods:
add_to_session = False
if food.name and not food.name_normalized:
food.name_normalized = IngredientFoodModel.normalize(food.name)
add_to_session = True
if add_to_session:
foods_fixed = True
session.add(food)
if foods_fixed:
logger.info("Updating food normalized search properties")
session.commit()
def fix_migration_data(session: Session):
logger.info("Checking for migration data fixes")
fix_recipe_normalized_search_properties(session)
fix_shopping_list_label_settings(session)
fix_group_slugs(session)
fix_normalized_unit_and_food_names(session)

View file

@ -11,6 +11,7 @@ from mealie.core import root_logger
from mealie.core.config import get_app_settings
from mealie.db.db_setup import session_context
from mealie.db.fixes.fix_group_with_no_name import fix_group_with_no_name
from mealie.db.fixes.fix_migration_data import fix_migration_data
from mealie.db.fixes.fix_slug_foods import fix_slug_food_names
from mealie.repos.all_repositories import get_repositories
from mealie.repos.repository_factory import AllRepositories
@ -97,6 +98,9 @@ def main():
session.execute(text("CREATE EXTENSION IF NOT EXISTS pg_trgm;"))
db = get_repositories(session)
safe_try(lambda: fix_migration_data(session))
safe_try(lambda: fix_slug_food_names(db))
safe_try(lambda: fix_group_with_no_name(session))
if db.users.get_all():
logger.debug("Database exists")
@ -104,9 +108,6 @@ def main():
logger.info("Database contains no users, initializing...")
init_db(db)
safe_try(lambda: fix_slug_food_names(db))
safe_try(lambda: fix_group_with_no_name(session))
if __name__ == "__main__":
main()

View file

@ -1,5 +1,7 @@
import uuid
from typing import Any
from sqlalchemy import Dialect
from sqlalchemy.dialects.postgresql import UUID
from sqlalchemy.types import CHAR, TypeDecorator
@ -17,13 +19,8 @@ class GUID(TypeDecorator):
def generate():
return uuid.uuid4()
def load_dialect_impl(self, dialect):
if dialect.name == "postgresql":
return dialect.type_descriptor(UUID())
else:
return dialect.type_descriptor(CHAR(32))
def process_bind_param(self, value, dialect):
@staticmethod
def convert_value_to_guid(value: Any, dialect: Dialect) -> str | None:
if value is None:
return value
elif dialect.name == "postgresql":
@ -35,7 +32,25 @@ class GUID(TypeDecorator):
# hexstring
return "%.32x" % value.int
def load_dialect_impl(self, dialect):
if dialect.name == "postgresql":
return dialect.type_descriptor(UUID())
else:
return dialect.type_descriptor(CHAR(32))
def process_bind_param(self, value, dialect):
return self.convert_value_to_guid(value, dialect)
def _uuid_value(self, value):
if value is None:
return value
else:
if not isinstance(value, uuid.UUID):
value = uuid.UUID(value)
return value
def process_result_value(self, value, dialect):
if value is not None and not isinstance(value, uuid.UUID):
value = uuid.UUID(value)
return value
return self._uuid_value(value)
def sort_key_function(self, value):
return self._uuid_value(value)

View file

@ -1,4 +1,5 @@
import datetime
import uuid
from os import path
from pathlib import Path
@ -10,6 +11,8 @@ from sqlalchemy.orm import sessionmaker
from alembic import command
from alembic.config import Config
from mealie.db import init_db
from mealie.db.models._model_utils import GUID
from mealie.services._base_service import BaseService
PROJECT_DIR = Path(__file__).parent.parent.parent.parent
@ -38,23 +41,33 @@ class AlchemyExporter(BaseService):
self.session_maker = sessionmaker(bind=self.engine)
@staticmethod
def convert_to_datetime(data: dict) -> dict:
def is_uuid(value: str) -> bool:
try:
uuid.UUID(value)
return True
except ValueError:
return False
def convert_types(self, data: dict) -> dict:
"""
walks the dictionary to convert all things that look like timestamps to datetime objects
walks the dictionary to restore all things that look like string representations of their complex types
used in the context of reading a json file into a database via SQLAlchemy.
"""
for key, value in data.items():
if isinstance(value, dict):
data = AlchemyExporter.convert_to_datetime(value)
data = self.convert_types(value)
elif isinstance(value, list): # assume that this is a list of dictionaries
data[key] = [AlchemyExporter.convert_to_datetime(item) for item in value]
data[key] = [self.convert_types(item) for item in value]
elif isinstance(value, str):
if key in AlchemyExporter.look_for_datetime:
data[key] = AlchemyExporter.DateTimeParser(dt=value).dt
if key in AlchemyExporter.look_for_date:
data[key] = AlchemyExporter.DateTimeParser(date=value).date
if key in AlchemyExporter.look_for_time:
data[key] = AlchemyExporter.DateTimeParser(time=value).time
if self.is_uuid(value):
# convert the data to the current database's native GUID type
data[key] = GUID.convert_value_to_guid(value, self.engine.dialect)
if key in self.look_for_datetime:
data[key] = self.DateTimeParser(dt=value).dt
if key in self.look_for_date:
data[key] = self.DateTimeParser(date=value).date
if key in self.look_for_time:
data[key] = self.DateTimeParser(time=value).time
return data
def dump_schema(self) -> dict:
@ -105,7 +118,7 @@ class AlchemyExporter(BaseService):
del db_dump["alembic_version"]
"""Restores all data from dictionary into the database"""
with self.engine.begin() as connection:
data = AlchemyExporter.convert_to_datetime(db_dump)
data = self.convert_types(db_dump)
self.meta.reflect(bind=self.engine)
for table_name, rows in data.items():
@ -139,8 +152,8 @@ SELECT SETVAL('shopping_list_item_extras_id_seq', (SELECT MAX(id) FROM shopping_
)
)
# Run all migrations up to current version
command.upgrade(alembic_cfg, "head")
# Re-init database to finish migrations
init_db.main()
def drop_all(self) -> None:
"""Drops all data from the database"""

View file

@ -83,7 +83,7 @@ class BackupV2(BaseService):
# Validation
if not contents.validate():
self.logger.error(
"Invalid backup file. file does not contain required elements (data directory and database.json"
"Invalid backup file. file does not contain required elements (data directory and database.json)"
)
raise ValueError("Invalid backup file")

View file

@ -4,6 +4,15 @@ CWD = Path(__file__).parent
locale_dir = CWD / "locale"
backup_version_44e8d670719d = CWD / "backups/backup_version_44e8d670719d.zip"
"""44e8d670719d: add extras to shopping lists, list items, and ingredient foods"""
backup_version_ba1e4a6cfe99 = CWD / "backups/backup_version_ba1e4a6cfe99.zip"
"""ba1e4a6cfe99: added plural names and alias tables for foods and units"""
backup_version_bcfdad6b7355 = CWD / "backups/backup_version_bcfdad6b7355.zip"
"""bcfdad6b7355: remove tool name and slug unique contraints"""
migrations_paprika = CWD / "migrations/paprika.zip"
migrations_chowdown = CWD / "migrations/chowdown.zip"

Binary file not shown.

Binary file not shown.

Binary file not shown.

View file

@ -1,8 +1,18 @@
import filecmp
from pathlib import Path
from typing import Any
from typing import Any, cast
import pytest
from sqlalchemy.orm import Session
import tests.data as test_data
from mealie.core.config import get_app_settings
from mealie.db.db_setup import session_context
from mealie.db.models.group import Group
from mealie.db.models.group.shopping_list import ShoppingList
from mealie.db.models.labels import MultiPurposeLabel
from mealie.db.models.recipe.ingredient import IngredientFoodModel, IngredientUnitModel
from mealie.db.models.recipe.recipe import RecipeModel
from mealie.services.backups_v2.alchemy_exporter import AlchemyExporter
from mealie.services.backups_v2.backup_file import BackupFile
from mealie.services.backups_v2.backup_v2 import BackupV2
@ -56,3 +66,88 @@ def test_database_restore():
for s1, s2 in zip(snapshop_1, snapshop_2):
assert snapshop_1[s1].sort(key=dict_sorter) == snapshop_2[s2].sort(key=dict_sorter)
@pytest.mark.parametrize(
"backup_path",
[
test_data.backup_version_44e8d670719d,
test_data.backup_version_ba1e4a6cfe99,
test_data.backup_version_bcfdad6b7355,
],
ids=[
"44e8d670719d: add extras to shopping lists, list items, and ingredient foods",
"ba1e4a6cfe99: added plural names and alias tables for foods and units",
"bcfdad6b7355: remove tool name and slug unique contraints",
],
)
def test_database_restore_data(backup_path: Path):
"""
This tests real user backups to make sure the data is restored correctly. The data has been anonymized, but
relationships and data types should be preserved.
This test should verify all migrations that do some sort of database manipulation (e.g. populating a new column).
If a new migration is added that does any sort of data manipulation, this test should be updated.
"""
settings = get_app_settings()
backup_v2 = BackupV2(settings.DB_URL)
# create a backup of the existing data so we can restore it later
original_data_backup = backup_v2.backup()
try:
assert backup_path.exists()
backup_v2.restore(backup_path)
# make sure migrations populated data successfully
with session_context() as session:
session = cast(Session, session)
groups = session.query(Group).all()
recipes = session.query(RecipeModel).all()
shopping_lists = session.query(ShoppingList).all()
labels = session.query(MultiPurposeLabel).all()
foods = session.query(IngredientFoodModel).all()
units = session.query(IngredientUnitModel).all()
# 2023-02-14-20.45.41_5ab195a474eb_add_normalized_search_properties
for recipe in recipes:
if recipe.name:
assert recipe.name_normalized
if recipe.description:
assert recipe.description_normalized
for ingredient in recipe.recipe_ingredient:
if ingredient.note:
assert ingredient.note_normalized
if ingredient.original_text:
assert ingredient.original_text_normalized
# 2023-02-21-22.03.19_b04a08da2108_added_shopping_list_label_settings
for shopping_list in shopping_lists:
group_labels = [label for label in labels if label.group_id == shopping_list.group_id]
assert len(shopping_list.label_settings) == len(group_labels)
for label_setting, label in zip(
sorted(shopping_list.label_settings, key=lambda x: x.label.id),
sorted(group_labels, key=lambda x: x.id),
strict=True,
):
assert label_setting.label == label
# 2023-08-06-21.00.34_04ac51cbe9a4_added_group_slug
for group in groups:
assert group.slug
# 2023-09-01-14.55.42_0341b154f79a_added_normalized_unit_and_food_names
for food in foods:
if food.name:
assert food.name_normalized
for unit in units:
assert unit.name_normalized
if unit.abbreviation:
assert unit.abbreviation_normalized
finally:
backup_v2.restore(original_data_backup)