1
0
Fork 0
mirror of https://github.com/seanmorley15/AdventureLog.git synced 2025-07-19 12:59:36 +02:00

fix: add warning for interrupted download-countries command due to memory issues

This commit is contained in:
Sean Morley 2025-01-13 14:12:05 -05:00
parent f27453824a
commit a766a07ea1
2 changed files with 173 additions and 95 deletions

View file

@ -52,8 +52,13 @@ EOF
fi fi
# Sync the countries and world travel regions
# Sync the countries and world travel regions # Sync the countries and world travel regions
python manage.py download-countries python manage.py download-countries
if [ $? -eq 137 ]; then
>&2 echo "WARNING: The download-countries command was interrupted. This is likely due to lack of memory allocated to the container or the host. Please try again with more memory."
exit 1
fi
cat /code/adventurelog.txt cat /code/adventurelog.txt

View file

@ -1,11 +1,9 @@
import os import os
import requests
import uuid
from django.core.management.base import BaseCommand from django.core.management.base import BaseCommand
import requests
from worldtravel.models import Country, Region, City from worldtravel.models import Country, Region, City
from django.db import transaction from django.db import transaction
import ijson import ijson
from django.conf import settings
import psutil import psutil
def get_memory_usage(): def get_memory_usage():
@ -17,15 +15,22 @@ def log_memory_usage(stage):
memory_usage = get_memory_usage() memory_usage = get_memory_usage()
print(f"Memory usage at {stage}: {memory_usage / 1024 / 1024:.2f} MB") print(f"Memory usage at {stage}: {memory_usage / 1024 / 1024:.2f} MB")
from django.conf import settings
COUNTRY_REGION_JSON_VERSION = settings.COUNTRY_REGION_JSON_VERSION COUNTRY_REGION_JSON_VERSION = settings.COUNTRY_REGION_JSON_VERSION
media_root = settings.MEDIA_ROOT media_root = settings.MEDIA_ROOT
def saveCountryFlag(country_code): def saveCountryFlag(country_code):
# For standards, use the lowercase country_code
country_code = country_code.lower() country_code = country_code.lower()
flags_dir = os.path.join(media_root, 'flags') flags_dir = os.path.join(media_root, 'flags')
# Check if the flags directory exists, if not, create it
if not os.path.exists(flags_dir): if not os.path.exists(flags_dir):
os.makedirs(flags_dir) os.makedirs(flags_dir)
# Check if the flag already exists in the media folder
flag_path = os.path.join(flags_dir, f'{country_code}.png') flag_path = os.path.join(flags_dir, f'{country_code}.png')
if os.path.exists(flag_path): if os.path.exists(flag_path):
print(f'Flag for {country_code} already exists') print(f'Flag for {country_code} already exists')
@ -46,8 +51,8 @@ class Command(BaseCommand):
parser.add_argument('--force', action='store_true', help='Force download the countries+regions+states.json file') parser.add_argument('--force', action='store_true', help='Force download the countries+regions+states.json file')
def handle(self, **options): def handle(self, **options):
log_memory_usage("start")
force = options['force'] force = options['force']
batch_size = 250
countries_json_path = os.path.join(settings.MEDIA_ROOT, f'countries+regions+states-{COUNTRY_REGION_JSON_VERSION}.json') countries_json_path = os.path.join(settings.MEDIA_ROOT, f'countries+regions+states-{COUNTRY_REGION_JSON_VERSION}.json')
if not os.path.exists(countries_json_path) or force: if not os.path.exists(countries_json_path) or force:
res = requests.get(f'https://raw.githubusercontent.com/dr5hn/countries-states-cities-database/{COUNTRY_REGION_JSON_VERSION}/json/countries%2Bstates%2Bcities.json') res = requests.get(f'https://raw.githubusercontent.com/dr5hn/countries-states-cities-database/{COUNTRY_REGION_JSON_VERSION}/json/countries%2Bstates%2Bcities.json')
@ -68,17 +73,31 @@ class Command(BaseCommand):
else: else:
self.stdout.write(self.style.SUCCESS('Latest country, region, and state data already downloaded.')) self.stdout.write(self.style.SUCCESS('Latest country, region, and state data already downloaded.'))
return return
insert_id = uuid.uuid4()
with transaction.atomic(): with transaction.atomic():
# Process data in chunks using ijson
f = open(countries_json_path, 'rb') f = open(countries_json_path, 'rb')
parser = ijson.items(f, 'item') parser = ijson.items(f, 'item')
existing_countries = {}
for country in Country.objects.iterator():
existing_countries[country.country_code] = country
existing_regions = {}
for region in Region.objects.iterator():
existing_regions[region.id] = region
existing_cities = {}
for city in City.objects.iterator():
existing_cities[city.id] = city
region_batch = [] countries_to_create = []
city_batch = [] regions_to_create = []
existing_region_ids = set() countries_to_update = []
existing_city_ids = set() regions_to_update = []
cities_to_create = []
cities_to_update = []
processed_country_codes = set()
processed_region_ids = set()
processed_city_ids = set()
for country in parser: for country in parser:
country_code = country['iso2'] country_code = country['iso2']
@ -88,102 +107,156 @@ class Command(BaseCommand):
longitude = round(float(country['longitude']), 6) if country['longitude'] else None longitude = round(float(country['longitude']), 6) if country['longitude'] else None
latitude = round(float(country['latitude']), 6) if country['latitude'] else None latitude = round(float(country['latitude']), 6) if country['latitude'] else None
country_obj, _ = Country.objects.update_or_create( processed_country_codes.add(country_code)
country_code=country_code,
defaults={ if country_code in existing_countries:
'name': country_name, country_obj = existing_countries[country_code]
'subregion': country_subregion, country_obj.name = country_name
'capital': country_capital, country_obj.subregion = country_subregion
'longitude': longitude, country_obj.capital = country_capital
'latitude': latitude, country_obj.longitude = longitude
'insert_id': insert_id country_obj.latitude = latitude
} countries_to_update.append(country_obj)
) else:
country_obj = Country(
name=country_name,
country_code=country_code,
subregion=country_subregion,
capital=country_capital,
longitude=longitude,
latitude=latitude
)
countries_to_create.append(country_obj)
saveCountryFlag(country_code) saveCountryFlag(country_code)
log_memory_usage(country_code) # self.stdout.write(self.style.SUCCESS(f'Country {country_name} prepared'))
if country['states']: if country['states']:
for state in country['states']: for state in country['states']:
state_id = f"{country_code}-{state['state_code']}" if state['state_code'] else f"{country_code}-00" name = state['name']
state_id = f"{country_code}-{state['state_code']}"
# Ensure no duplicate regions latitude = round(float(state['latitude']), 6) if state['latitude'] else None
if state_id not in existing_region_ids: longitude = round(float(state['longitude']), 6) if state['longitude'] else None
# Check for duplicate regions
if state_id in processed_region_ids:
self.stdout.write(self.style.ERROR(f'State {state_id} already processed'))
continue
log_memory_usage(f"State {state_id} processing")
processed_region_ids.add(state_id)
if state_id in existing_regions:
region_obj = existing_regions[state_id]
region_obj.name = name
region_obj.country = country_obj
region_obj.longitude = longitude
region_obj.latitude = latitude
regions_to_update.append(region_obj)
else:
region_obj = Region(
id=state_id,
name=name,
country=country_obj,
longitude=longitude,
latitude=latitude
)
regions_to_create.append(region_obj)
# self.stdout.write(self.style.SUCCESS(f'State {state_id} prepared'))
if 'cities' in state and len(state['cities']) > 0:
for city in state['cities']:
city_id = f"{state_id}-{city['id']}"
city_name = city['name']
latitude = round(float(city['latitude']), 6) if city['latitude'] else None
longitude = round(float(city['longitude']), 6) if city['longitude'] else None
# Check for duplicate cities
if city_id in processed_city_ids:
self.stdout.write(self.style.ERROR(f'City {city_id} already processed'))
continue
processed_city_ids.add(city_id)
if city_id in existing_cities:
city_obj = existing_cities[city_id]
city_obj.name = city_name
city_obj.region = region_obj
city_obj.longitude = longitude
city_obj.latitude = latitude
cities_to_update.append(city_obj)
else:
city_obj = City(
id=city_id,
name=city_name,
region=region_obj,
longitude=longitude,
latitude=latitude
)
cities_to_create.append(city_obj)
# self.stdout.write(self.style.SUCCESS(f'City {city_id} prepared'))
else:
state_id = f"{country_code}-00"
processed_region_ids.add(state_id)
if state_id in existing_regions:
region_obj = existing_regions[state_id]
region_obj.name = country_name
region_obj.country = country_obj
regions_to_update.append(region_obj)
else:
region_obj = Region( region_obj = Region(
id=state_id, id=state_id,
name=state['name'], name=country_name,
country=country_obj, country=country_obj
longitude=state['longitude'],
latitude=state['latitude'],
insert_id=insert_id
) )
region_batch.append(region_obj) regions_to_create.append(region_obj)
existing_region_ids.add(state_id) # self.stdout.write(self.style.SUCCESS(f'Region {state_id} prepared for {country_name}'))
log_memory_usage(state_id) # Process in batches
for i in range(0, len(countries_to_create), batch_size):
batch = countries_to_create[i:i + batch_size]
Country.objects.bulk_create(batch)
self.stdout.write(self.style.SUCCESS(f'Processed countries batch {i//batch_size + 1}/{(len(countries_to_create)-1)//batch_size + 1}'))
log_memory_usage(f"Country batch {i//batch_size + 1}/{(len(countries_to_create)-1)//batch_size + 1}")
# Handle cities and avoid duplicates for i in range(0, len(regions_to_create), batch_size):
if 'cities' in state and len(state['cities']) > 0: batch = regions_to_create[i:i + batch_size]
for city in state['cities']: Region.objects.bulk_create(batch)
city_id = f"{state_id}-{city['id']}" self.stdout.write(self.style.SUCCESS(f'Processed regions batch {i//batch_size + 1}/{(len(regions_to_create)-1)//batch_size + 1}'))
log_memory_usage(f"Region batch {i//batch_size + 1}/{(len(regions_to_create)-1)//batch_size + 1}")
if city_id not in existing_city_ids:
city_obj = City(
id=city_id,
name=city['name'],
region=region_obj,
longitude=city['longitude'],
latitude=city['latitude'],
insert_id=insert_id
)
city_batch.append(city_obj)
existing_city_ids.add(city_id)
# Bulk insert regions in smaller batches for i in range(0, len(cities_to_create), batch_size):
if len(region_batch) >= 100: batch = cities_to_create[i:i + batch_size]
Region.objects.bulk_create( City.objects.bulk_create(batch)
region_batch, self.stdout.write(self.style.SUCCESS(f'Processed cities batch {i//batch_size + 1}/{(len(cities_to_create)-1)//batch_size + 1}'))
update_conflicts=True, log_memory_usage(f"City batch {i//batch_size + 1}/{(len(cities_to_create)-1)//batch_size + 1}")
batch_size=100,
update_fields=['name', 'country', 'longitude', 'latitude', 'insert_id'],
unique_fields=['id']
)
region_batch.clear()
# Bulk insert cities in smaller batches # Process updates in batches
if len(city_batch) >= 100: for i in range(0, len(countries_to_update), batch_size):
City.objects.bulk_create( batch = countries_to_update[i:i + batch_size]
city_batch, Country.objects.bulk_update(batch, ['name', 'subregion', 'capital', 'longitude', 'latitude'])
update_conflicts=True, self.stdout.write(self.style.SUCCESS(f'Updated countries batch {i//batch_size + 1}/{(len(countries_to_update)-1)//batch_size + 1}'))
batch_size=100, log_memory_usage(f"Country update batch {i//batch_size + 1}/{(len(countries_to_update)-1)//batch_size + 1}")
update_fields=['name', 'region', 'longitude', 'latitude', 'insert_id'],
unique_fields=['id']
)
city_batch.clear()
# Final insertion of any remaining regions and cities for i in range(0, len(regions_to_update), batch_size):
if region_batch: batch = regions_to_update[i:i + batch_size]
Region.objects.bulk_create( Region.objects.bulk_update(batch, ['name', 'country', 'longitude', 'latitude'])
region_batch, self.stdout.write(self.style.SUCCESS(f'Updated regions batch {i//batch_size + 1}/{(len(regions_to_update)-1)//batch_size + 1}'))
update_conflicts=True, log_memory_usage(f"Region update batch {i//batch_size + 1}/{(len(regions_to_update)-1)//batch_size + 1}")
batch_size=100,
update_fields=['name', 'country', 'longitude', 'latitude', 'insert_id'],
unique_fields=['id']
)
if city_batch: for i in range(0, len(cities_to_update), batch_size):
City.objects.bulk_create( batch = cities_to_update[i:i + batch_size]
city_batch, City.objects.bulk_update(batch, ['name', 'region', 'longitude', 'latitude'])
update_conflicts=True, self.stdout.write(self.style.SUCCESS(f'Updated cities batch {i//batch_size + 1}/{(len(cities_to_update)-1)//batch_size + 1}'))
batch_size=100, log_memory_usage(f"City update batch {i//batch_size + 1}/{(len(cities_to_update)-1)//batch_size + 1}")
update_fields=['name', 'region', 'longitude', 'latitude', 'insert_id'],
unique_fields=['id']
)
self.stdout.write(self.style.SUCCESS('Regions and cities created')) # Delete countries and regions that are no longer in the data
Country.objects.exclude(country_code__in=processed_country_codes).delete()
log_memory_usage("Extra Countries deleted")
Region.objects.exclude(id__in=processed_region_ids).delete()
log_memory_usage("Extra Regions deleted")
City.objects.exclude(id__in=processed_city_ids).delete()
log_memory_usage("Extra Cities deleted")
# Clean up old data
Country.objects.exclude(insert_id=insert_id).delete()
Region.objects.exclude(insert_id=insert_id).delete()
City.objects.exclude(insert_id=insert_id).delete()
self.stdout.write(self.style.SUCCESS('All data imported successfully and old data cleaned up')) self.stdout.write(self.style.SUCCESS('All data imported successfully'))