1
0
Fork 0
mirror of https://github.com/seanmorley15/AdventureLog.git synced 2025-07-24 07:19:36 +02:00

fix: replace psutil with tqdm for progress tracking in country download command

This commit is contained in:
Sean Morley 2025-01-13 17:07:52 -05:00
parent 563373dd54
commit a010afcc43
2 changed files with 11 additions and 43 deletions

View file

@ -19,4 +19,4 @@ django-widget-tweaks==1.5.0
django-ical==1.9.2
icalendar==6.1.0
ijson==3.3.0
psutil==6.1.1
tqdm==4.67.1

View file

@ -3,25 +3,8 @@ from django.core.management.base import BaseCommand
import requests
from worldtravel.models import Country, Region, City
from django.db import transaction
import psutil
from tqdm import tqdm
import ijson
import resource
def limit_memory(max_memory):
soft, hard = resource.getrlimit(resource.RLIMIT_AS)
resource.setrlimit(resource.RLIMIT_AS, (max_memory, hard))
# Set memory limit to 800MB
limit_memory(800 * 1024 * 1024)
def get_memory_usage():
process = psutil.Process(os.getpid())
memory_info = process.memory_info()
return memory_info.rss # in bytes
def log_memory_usage(stage):
memory_usage = get_memory_usage()
print(f"Memory usage at {stage}: {memory_usage / 1024 / 1024:.2f} MB")
from django.conf import settings
@ -142,7 +125,7 @@ class Command(BaseCommand):
# Check for duplicate regions
if state_id in processed_region_ids:
self.stdout.write(self.style.ERROR(f'State {state_id} already processed'))
# self.stdout.write(self.style.ERROR(f'State {state_id} already processed'))
continue
processed_region_ids.add(state_id)
@ -164,7 +147,6 @@ class Command(BaseCommand):
)
regions_to_create.append(region_obj)
# self.stdout.write(self.style.SUCCESS(f'State {state_id} prepared'))
log_memory_usage('state')
if 'cities' in state and len(state['cities']) > 0:
for city in state['cities']:
@ -175,7 +157,7 @@ class Command(BaseCommand):
# Check for duplicate cities
if city_id in processed_city_ids:
self.stdout.write(self.style.ERROR(f'City {city_id} already processed'))
# self.stdout.write(self.style.ERROR(f'City {city_id} already processed'))
continue
processed_city_ids.add(city_id)
@ -197,7 +179,6 @@ class Command(BaseCommand):
)
cities_to_create.append(city_obj)
# self.stdout.write(self.style.SUCCESS(f'City {city_id} prepared'))
log_memory_usage('city')
else:
state_id = f"{country_code}-00"
@ -215,45 +196,32 @@ class Command(BaseCommand):
)
regions_to_create.append(region_obj)
# self.stdout.write(self.style.SUCCESS(f'Region {state_id} prepared for {country_name}'))
# Process in batches
for i in range(0, len(countries_to_create), batch_size):
for i in tqdm(range(0, len(countries_to_create), batch_size), desc="Processing countries"):
batch = countries_to_create[i:i + batch_size]
Country.objects.bulk_create(batch)
self.stdout.write(self.style.SUCCESS(f'Processed countries batch {i//batch_size + 1}/{(len(countries_to_create)-1)//batch_size + 1}'))
log_memory_usage('country')
for i in range(0, len(regions_to_create), batch_size):
for i in tqdm(range(0, len(regions_to_create), batch_size), desc="Processing regions"):
batch = regions_to_create[i:i + batch_size]
Region.objects.bulk_create(batch)
self.stdout.write(self.style.SUCCESS(f'Processed regions batch {i//batch_size + 1}/{(len(regions_to_create)-1)//batch_size + 1}'))
log_memory_usage('region')
for i in range(0, len(cities_to_create), batch_size):
for i in tqdm(range(0, len(cities_to_create), batch_size), desc="Processing cities"):
batch = cities_to_create[i:i + batch_size]
City.objects.bulk_create(batch)
self.stdout.write(self.style.SUCCESS(f'Processed cities batch {i//batch_size + 1}/{(len(cities_to_create)-1)//batch_size + 1}'))
log_memory_usage('city')
# Process updates in batches
for i in range(0, len(countries_to_update), batch_size):
batch = countries_to_update[i:i + batch_size]
for i in tqdm(range(0, len(countries_to_update), batch_size), desc="Updating countries"):
batch = countries_to_update[i:i + batch_size]
Country.objects.bulk_update(batch, ['name', 'subregion', 'capital', 'longitude', 'latitude'])
self.stdout.write(self.style.SUCCESS(f'Updated countries batch {i//batch_size + 1}/{(len(countries_to_update)-1)//batch_size + 1}'))
log_memory_usage('country')
for i in range(0, len(regions_to_update), batch_size):
for i in tqdm(range(0, len(regions_to_update), batch_size), desc="Updating regions"):
batch = regions_to_update[i:i + batch_size]
Region.objects.bulk_update(batch, ['name', 'country', 'longitude', 'latitude'])
self.stdout.write(self.style.SUCCESS(f'Updated regions batch {i//batch_size + 1}/{(len(regions_to_update)-1)//batch_size + 1}'))
log_memory_usage('region')
for i in range(0, len(cities_to_update), batch_size):
for i in tqdm(range(0, len(cities_to_update), batch_size), desc="Updating cities"):
batch = cities_to_update[i:i + batch_size]
City.objects.bulk_update(batch, ['name', 'region', 'longitude', 'latitude'])
self.stdout.write(self.style.SUCCESS(f'Updated cities batch {i//batch_size + 1}/{(len(cities_to_update)-1)//batch_size + 1}'))
log_memory_usage('city')
# Delete countries and regions that are no longer in the data
Country.objects.exclude(country_code__in=processed_country_codes).delete()
Region.objects.exclude(id__in=processed_region_ids).delete()
City.objects.exclude(id__in=processed_city_ids).delete()