mirror of
https://github.com/seanmorley15/AdventureLog.git
synced 2025-08-04 12:45:17 +02:00
fix: implement memory limit and adjust batch size for country downloads
This commit is contained in:
parent
a766a07ea1
commit
563373dd54
1 changed files with 25 additions and 26 deletions
|
@ -3,8 +3,16 @@ from django.core.management.base import BaseCommand
|
||||||
import requests
|
import requests
|
||||||
from worldtravel.models import Country, Region, City
|
from worldtravel.models import Country, Region, City
|
||||||
from django.db import transaction
|
from django.db import transaction
|
||||||
import ijson
|
|
||||||
import psutil
|
import psutil
|
||||||
|
import ijson
|
||||||
|
import resource
|
||||||
|
|
||||||
|
def limit_memory(max_memory):
|
||||||
|
soft, hard = resource.getrlimit(resource.RLIMIT_AS)
|
||||||
|
resource.setrlimit(resource.RLIMIT_AS, (max_memory, hard))
|
||||||
|
|
||||||
|
# Set memory limit to 800MB
|
||||||
|
limit_memory(800 * 1024 * 1024)
|
||||||
|
|
||||||
def get_memory_usage():
|
def get_memory_usage():
|
||||||
process = psutil.Process(os.getpid())
|
process = psutil.Process(os.getpid())
|
||||||
|
@ -52,7 +60,7 @@ class Command(BaseCommand):
|
||||||
|
|
||||||
def handle(self, **options):
|
def handle(self, **options):
|
||||||
force = options['force']
|
force = options['force']
|
||||||
batch_size = 250
|
batch_size = 100
|
||||||
countries_json_path = os.path.join(settings.MEDIA_ROOT, f'countries+regions+states-{COUNTRY_REGION_JSON_VERSION}.json')
|
countries_json_path = os.path.join(settings.MEDIA_ROOT, f'countries+regions+states-{COUNTRY_REGION_JSON_VERSION}.json')
|
||||||
if not os.path.exists(countries_json_path) or force:
|
if not os.path.exists(countries_json_path) or force:
|
||||||
res = requests.get(f'https://raw.githubusercontent.com/dr5hn/countries-states-cities-database/{COUNTRY_REGION_JSON_VERSION}/json/countries%2Bstates%2Bcities.json')
|
res = requests.get(f'https://raw.githubusercontent.com/dr5hn/countries-states-cities-database/{COUNTRY_REGION_JSON_VERSION}/json/countries%2Bstates%2Bcities.json')
|
||||||
|
@ -74,19 +82,14 @@ class Command(BaseCommand):
|
||||||
self.stdout.write(self.style.SUCCESS('Latest country, region, and state data already downloaded.'))
|
self.stdout.write(self.style.SUCCESS('Latest country, region, and state data already downloaded.'))
|
||||||
return
|
return
|
||||||
|
|
||||||
with transaction.atomic():
|
with open(countries_json_path, 'r') as f:
|
||||||
# Process data in chunks using ijson
|
|
||||||
f = open(countries_json_path, 'rb')
|
f = open(countries_json_path, 'rb')
|
||||||
parser = ijson.items(f, 'item')
|
parser = ijson.items(f, 'item')
|
||||||
existing_countries = {}
|
|
||||||
for country in Country.objects.iterator():
|
with transaction.atomic():
|
||||||
existing_countries[country.country_code] = country
|
existing_countries = {country.country_code: country for country in Country.objects.all()}
|
||||||
existing_regions = {}
|
existing_regions = {region.id: region for region in Region.objects.all()}
|
||||||
for region in Region.objects.iterator():
|
existing_cities = {city.id: city for city in City.objects.all()}
|
||||||
existing_regions[region.id] = region
|
|
||||||
existing_cities = {}
|
|
||||||
for city in City.objects.iterator():
|
|
||||||
existing_cities[city.id] = city
|
|
||||||
|
|
||||||
countries_to_create = []
|
countries_to_create = []
|
||||||
regions_to_create = []
|
regions_to_create = []
|
||||||
|
@ -129,7 +132,6 @@ class Command(BaseCommand):
|
||||||
countries_to_create.append(country_obj)
|
countries_to_create.append(country_obj)
|
||||||
|
|
||||||
saveCountryFlag(country_code)
|
saveCountryFlag(country_code)
|
||||||
# self.stdout.write(self.style.SUCCESS(f'Country {country_name} prepared'))
|
|
||||||
|
|
||||||
if country['states']:
|
if country['states']:
|
||||||
for state in country['states']:
|
for state in country['states']:
|
||||||
|
@ -142,7 +144,6 @@ class Command(BaseCommand):
|
||||||
if state_id in processed_region_ids:
|
if state_id in processed_region_ids:
|
||||||
self.stdout.write(self.style.ERROR(f'State {state_id} already processed'))
|
self.stdout.write(self.style.ERROR(f'State {state_id} already processed'))
|
||||||
continue
|
continue
|
||||||
log_memory_usage(f"State {state_id} processing")
|
|
||||||
|
|
||||||
processed_region_ids.add(state_id)
|
processed_region_ids.add(state_id)
|
||||||
|
|
||||||
|
@ -163,6 +164,7 @@ class Command(BaseCommand):
|
||||||
)
|
)
|
||||||
regions_to_create.append(region_obj)
|
regions_to_create.append(region_obj)
|
||||||
# self.stdout.write(self.style.SUCCESS(f'State {state_id} prepared'))
|
# self.stdout.write(self.style.SUCCESS(f'State {state_id} prepared'))
|
||||||
|
log_memory_usage('state')
|
||||||
|
|
||||||
if 'cities' in state and len(state['cities']) > 0:
|
if 'cities' in state and len(state['cities']) > 0:
|
||||||
for city in state['cities']:
|
for city in state['cities']:
|
||||||
|
@ -195,6 +197,7 @@ class Command(BaseCommand):
|
||||||
)
|
)
|
||||||
cities_to_create.append(city_obj)
|
cities_to_create.append(city_obj)
|
||||||
# self.stdout.write(self.style.SUCCESS(f'City {city_id} prepared'))
|
# self.stdout.write(self.style.SUCCESS(f'City {city_id} prepared'))
|
||||||
|
log_memory_usage('city')
|
||||||
|
|
||||||
else:
|
else:
|
||||||
state_id = f"{country_code}-00"
|
state_id = f"{country_code}-00"
|
||||||
|
@ -217,46 +220,42 @@ class Command(BaseCommand):
|
||||||
batch = countries_to_create[i:i + batch_size]
|
batch = countries_to_create[i:i + batch_size]
|
||||||
Country.objects.bulk_create(batch)
|
Country.objects.bulk_create(batch)
|
||||||
self.stdout.write(self.style.SUCCESS(f'Processed countries batch {i//batch_size + 1}/{(len(countries_to_create)-1)//batch_size + 1}'))
|
self.stdout.write(self.style.SUCCESS(f'Processed countries batch {i//batch_size + 1}/{(len(countries_to_create)-1)//batch_size + 1}'))
|
||||||
log_memory_usage(f"Country batch {i//batch_size + 1}/{(len(countries_to_create)-1)//batch_size + 1}")
|
log_memory_usage('country')
|
||||||
|
|
||||||
for i in range(0, len(regions_to_create), batch_size):
|
for i in range(0, len(regions_to_create), batch_size):
|
||||||
batch = regions_to_create[i:i + batch_size]
|
batch = regions_to_create[i:i + batch_size]
|
||||||
Region.objects.bulk_create(batch)
|
Region.objects.bulk_create(batch)
|
||||||
self.stdout.write(self.style.SUCCESS(f'Processed regions batch {i//batch_size + 1}/{(len(regions_to_create)-1)//batch_size + 1}'))
|
self.stdout.write(self.style.SUCCESS(f'Processed regions batch {i//batch_size + 1}/{(len(regions_to_create)-1)//batch_size + 1}'))
|
||||||
log_memory_usage(f"Region batch {i//batch_size + 1}/{(len(regions_to_create)-1)//batch_size + 1}")
|
log_memory_usage('region')
|
||||||
|
|
||||||
for i in range(0, len(cities_to_create), batch_size):
|
for i in range(0, len(cities_to_create), batch_size):
|
||||||
batch = cities_to_create[i:i + batch_size]
|
batch = cities_to_create[i:i + batch_size]
|
||||||
City.objects.bulk_create(batch)
|
City.objects.bulk_create(batch)
|
||||||
self.stdout.write(self.style.SUCCESS(f'Processed cities batch {i//batch_size + 1}/{(len(cities_to_create)-1)//batch_size + 1}'))
|
self.stdout.write(self.style.SUCCESS(f'Processed cities batch {i//batch_size + 1}/{(len(cities_to_create)-1)//batch_size + 1}'))
|
||||||
log_memory_usage(f"City batch {i//batch_size + 1}/{(len(cities_to_create)-1)//batch_size + 1}")
|
log_memory_usage('city')
|
||||||
|
|
||||||
# Process updates in batches
|
# Process updates in batches
|
||||||
for i in range(0, len(countries_to_update), batch_size):
|
for i in range(0, len(countries_to_update), batch_size):
|
||||||
batch = countries_to_update[i:i + batch_size]
|
batch = countries_to_update[i:i + batch_size]
|
||||||
Country.objects.bulk_update(batch, ['name', 'subregion', 'capital', 'longitude', 'latitude'])
|
Country.objects.bulk_update(batch, ['name', 'subregion', 'capital', 'longitude', 'latitude'])
|
||||||
self.stdout.write(self.style.SUCCESS(f'Updated countries batch {i//batch_size + 1}/{(len(countries_to_update)-1)//batch_size + 1}'))
|
self.stdout.write(self.style.SUCCESS(f'Updated countries batch {i//batch_size + 1}/{(len(countries_to_update)-1)//batch_size + 1}'))
|
||||||
log_memory_usage(f"Country update batch {i//batch_size + 1}/{(len(countries_to_update)-1)//batch_size + 1}")
|
log_memory_usage('country')
|
||||||
|
|
||||||
for i in range(0, len(regions_to_update), batch_size):
|
for i in range(0, len(regions_to_update), batch_size):
|
||||||
batch = regions_to_update[i:i + batch_size]
|
batch = regions_to_update[i:i + batch_size]
|
||||||
Region.objects.bulk_update(batch, ['name', 'country', 'longitude', 'latitude'])
|
Region.objects.bulk_update(batch, ['name', 'country', 'longitude', 'latitude'])
|
||||||
self.stdout.write(self.style.SUCCESS(f'Updated regions batch {i//batch_size + 1}/{(len(regions_to_update)-1)//batch_size + 1}'))
|
self.stdout.write(self.style.SUCCESS(f'Updated regions batch {i//batch_size + 1}/{(len(regions_to_update)-1)//batch_size + 1}'))
|
||||||
log_memory_usage(f"Region update batch {i//batch_size + 1}/{(len(regions_to_update)-1)//batch_size + 1}")
|
log_memory_usage('region')
|
||||||
|
|
||||||
for i in range(0, len(cities_to_update), batch_size):
|
for i in range(0, len(cities_to_update), batch_size):
|
||||||
batch = cities_to_update[i:i + batch_size]
|
batch = cities_to_update[i:i + batch_size]
|
||||||
City.objects.bulk_update(batch, ['name', 'region', 'longitude', 'latitude'])
|
City.objects.bulk_update(batch, ['name', 'region', 'longitude', 'latitude'])
|
||||||
self.stdout.write(self.style.SUCCESS(f'Updated cities batch {i//batch_size + 1}/{(len(cities_to_update)-1)//batch_size + 1}'))
|
self.stdout.write(self.style.SUCCESS(f'Updated cities batch {i//batch_size + 1}/{(len(cities_to_update)-1)//batch_size + 1}'))
|
||||||
log_memory_usage(f"City update batch {i//batch_size + 1}/{(len(cities_to_update)-1)//batch_size + 1}")
|
log_memory_usage('city')
|
||||||
|
|
||||||
# Delete countries and regions that are no longer in the data
|
# Delete countries and regions that are no longer in the data
|
||||||
Country.objects.exclude(country_code__in=processed_country_codes).delete()
|
Country.objects.exclude(country_code__in=processed_country_codes).delete()
|
||||||
log_memory_usage("Extra Countries deleted")
|
|
||||||
Region.objects.exclude(id__in=processed_region_ids).delete()
|
Region.objects.exclude(id__in=processed_region_ids).delete()
|
||||||
log_memory_usage("Extra Regions deleted")
|
|
||||||
City.objects.exclude(id__in=processed_city_ids).delete()
|
City.objects.exclude(id__in=processed_city_ids).delete()
|
||||||
log_memory_usage("Extra Cities deleted")
|
|
||||||
|
|
||||||
|
|
||||||
self.stdout.write(self.style.SUCCESS('All data imported successfully'))
|
self.stdout.write(self.style.SUCCESS('All data imported successfully'))
|
Loading…
Add table
Add a link
Reference in a new issue