1
0
Fork 0
mirror of https://github.com/seanmorley15/AdventureLog.git synced 2025-07-20 21:39:37 +02:00

fix: add insert_id field to City, Country, and Region models

This commit is contained in:
Sean Morley 2025-01-13 13:01:36 -05:00
parent 53d500b7d5
commit ac37bc98c4
3 changed files with 136 additions and 151 deletions

View file

@ -1,26 +1,31 @@
import os import os
from django.core.management.base import BaseCommand
import requests import requests
import uuid
from django.core.management.base import BaseCommand
from worldtravel.models import Country, Region, City from worldtravel.models import Country, Region, City
from django.db import transaction from django.db import transaction
import ijson import ijson
from django.conf import settings from django.conf import settings
import psutil
def get_memory_usage():
process = psutil.Process(os.getpid())
memory_info = process.memory_info()
return memory_info.rss # in bytes
def log_memory_usage(stage):
memory_usage = get_memory_usage()
print(f"Memory usage at {stage}: {memory_usage / 1024 / 1024:.2f} MB")
COUNTRY_REGION_JSON_VERSION = settings.COUNTRY_REGION_JSON_VERSION COUNTRY_REGION_JSON_VERSION = settings.COUNTRY_REGION_JSON_VERSION
media_root = settings.MEDIA_ROOT media_root = settings.MEDIA_ROOT
def saveCountryFlag(country_code): def saveCountryFlag(country_code):
# For standards, use the lowercase country_code
country_code = country_code.lower() country_code = country_code.lower()
flags_dir = os.path.join(media_root, 'flags') flags_dir = os.path.join(media_root, 'flags')
# Check if the flags directory exists, if not, create it
if not os.path.exists(flags_dir): if not os.path.exists(flags_dir):
os.makedirs(flags_dir) os.makedirs(flags_dir)
# Check if the flag already exists in the media folder
flag_path = os.path.join(flags_dir, f'{country_code}.png') flag_path = os.path.join(flags_dir, f'{country_code}.png')
if os.path.exists(flag_path): if os.path.exists(flag_path):
print(f'Flag for {country_code} already exists') print(f'Flag for {country_code} already exists')
@ -41,8 +46,8 @@ class Command(BaseCommand):
parser.add_argument('--force', action='store_true', help='Force download the countries+regions+states.json file') parser.add_argument('--force', action='store_true', help='Force download the countries+regions+states.json file')
def handle(self, **options): def handle(self, **options):
log_memory_usage("start")
force = options['force'] force = options['force']
batch_size = 250
countries_json_path = os.path.join(settings.MEDIA_ROOT, f'countries+regions+states-{COUNTRY_REGION_JSON_VERSION}.json') countries_json_path = os.path.join(settings.MEDIA_ROOT, f'countries+regions+states-{COUNTRY_REGION_JSON_VERSION}.json')
if not os.path.exists(countries_json_path) or force: if not os.path.exists(countries_json_path) or force:
res = requests.get(f'https://raw.githubusercontent.com/dr5hn/countries-states-cities-database/{COUNTRY_REGION_JSON_VERSION}/json/countries%2Bstates%2Bcities.json') res = requests.get(f'https://raw.githubusercontent.com/dr5hn/countries-states-cities-database/{COUNTRY_REGION_JSON_VERSION}/json/countries%2Bstates%2Bcities.json')
@ -63,25 +68,17 @@ class Command(BaseCommand):
else: else:
self.stdout.write(self.style.SUCCESS('Latest country, region, and state data already downloaded.')) self.stdout.write(self.style.SUCCESS('Latest country, region, and state data already downloaded.'))
return return
insert_id = uuid.uuid4()
with transaction.atomic(): with transaction.atomic():
# Process data in chunks using ijson
f = open(countries_json_path, 'rb') f = open(countries_json_path, 'rb')
parser = ijson.items(f, 'item') parser = ijson.items(f, 'item')
existing_countries = {country.country_code: country for country in Country.objects.all()}
existing_regions = {region.id: region for region in Region.objects.all()}
existing_cities = {city.id: city for city in City.objects.all()}
countries_to_create = [] region_batch = []
regions_to_create = [] city_batch = []
countries_to_update = [] existing_region_ids = set()
regions_to_update = [] existing_city_ids = set()
cities_to_create = []
cities_to_update = []
processed_country_codes = set()
processed_region_ids = set()
processed_city_ids = set()
for country in parser: for country in parser:
country_code = country['iso2'] country_code = country['iso2']
@ -91,145 +88,102 @@ class Command(BaseCommand):
longitude = round(float(country['longitude']), 6) if country['longitude'] else None longitude = round(float(country['longitude']), 6) if country['longitude'] else None
latitude = round(float(country['latitude']), 6) if country['latitude'] else None latitude = round(float(country['latitude']), 6) if country['latitude'] else None
processed_country_codes.add(country_code) country_obj, _ = Country.objects.update_or_create(
country_code=country_code,
if country_code in existing_countries: defaults={
country_obj = existing_countries[country_code] 'name': country_name,
country_obj.name = country_name 'subregion': country_subregion,
country_obj.subregion = country_subregion 'capital': country_capital,
country_obj.capital = country_capital 'longitude': longitude,
country_obj.longitude = longitude 'latitude': latitude,
country_obj.latitude = latitude 'insert_id': insert_id
countries_to_update.append(country_obj) }
else: )
country_obj = Country(
name=country_name,
country_code=country_code,
subregion=country_subregion,
capital=country_capital,
longitude=longitude,
latitude=latitude
)
countries_to_create.append(country_obj)
saveCountryFlag(country_code) saveCountryFlag(country_code)
# self.stdout.write(self.style.SUCCESS(f'Country {country_name} prepared')) log_memory_usage(country_code)
if country['states']: if country['states']:
for state in country['states']: for state in country['states']:
name = state['name'] state_id = f"{country_code}-{state['state_code']}" if state['state_code'] else f"{country_code}-00"
state_id = f"{country_code}-{state['state_code']}"
latitude = round(float(state['latitude']), 6) if state['latitude'] else None # Ensure no duplicate regions
longitude = round(float(state['longitude']), 6) if state['longitude'] else None if state_id not in existing_region_ids:
# Check for duplicate regions
if state_id in processed_region_ids:
self.stdout.write(self.style.ERROR(f'State {state_id} already processed'))
continue
processed_region_ids.add(state_id)
if state_id in existing_regions:
region_obj = existing_regions[state_id]
region_obj.name = name
region_obj.country = country_obj
region_obj.longitude = longitude
region_obj.latitude = latitude
regions_to_update.append(region_obj)
else:
region_obj = Region(
id=state_id,
name=name,
country=country_obj,
longitude=longitude,
latitude=latitude
)
regions_to_create.append(region_obj)
# self.stdout.write(self.style.SUCCESS(f'State {state_id} prepared'))
if 'cities' in state and len(state['cities']) > 0:
for city in state['cities']:
city_id = f"{state_id}-{city['id']}"
city_name = city['name']
latitude = round(float(city['latitude']), 6) if city['latitude'] else None
longitude = round(float(city['longitude']), 6) if city['longitude'] else None
# Check for duplicate cities
if city_id in processed_city_ids:
self.stdout.write(self.style.ERROR(f'City {city_id} already processed'))
continue
processed_city_ids.add(city_id)
if city_id in existing_cities:
city_obj = existing_cities[city_id]
city_obj.name = city_name
city_obj.region = region_obj
city_obj.longitude = longitude
city_obj.latitude = latitude
cities_to_update.append(city_obj)
else:
city_obj = City(
id=city_id,
name=city_name,
region=region_obj,
longitude=longitude,
latitude=latitude
)
cities_to_create.append(city_obj)
# self.stdout.write(self.style.SUCCESS(f'City {city_id} prepared'))
else:
state_id = f"{country_code}-00"
processed_region_ids.add(state_id)
if state_id in existing_regions:
region_obj = existing_regions[state_id]
region_obj.name = country_name
region_obj.country = country_obj
regions_to_update.append(region_obj)
else:
region_obj = Region( region_obj = Region(
id=state_id, id=state_id,
name=country_name, name=state['name'],
country=country_obj country=country_obj,
longitude=state['longitude'],
latitude=state['latitude'],
insert_id=insert_id
) )
regions_to_create.append(region_obj) region_batch.append(region_obj)
# self.stdout.write(self.style.SUCCESS(f'Region {state_id} prepared for {country_name}')) existing_region_ids.add(state_id)
# Process in batches log_memory_usage(state_id)
for i in range(0, len(countries_to_create), batch_size):
batch = countries_to_create[i:i + batch_size]
Country.objects.bulk_create(batch)
self.stdout.write(self.style.SUCCESS(f'Processed countries batch {i//batch_size + 1}/{(len(countries_to_create)-1)//batch_size + 1}'))
for i in range(0, len(regions_to_create), batch_size): # Handle cities and avoid duplicates
batch = regions_to_create[i:i + batch_size] if 'cities' in state and len(state['cities']) > 0:
Region.objects.bulk_create(batch) for city in state['cities']:
self.stdout.write(self.style.SUCCESS(f'Processed regions batch {i//batch_size + 1}/{(len(regions_to_create)-1)//batch_size + 1}')) city_id = f"{state_id}-{city['id']}"
if city_id not in existing_city_ids:
city_obj = City(
id=city_id,
name=city['name'],
region=region_obj,
longitude=city['longitude'],
latitude=city['latitude'],
insert_id=insert_id
)
city_batch.append(city_obj)
existing_city_ids.add(city_id)
for i in range(0, len(cities_to_create), batch_size): # Bulk insert regions in smaller batches
batch = cities_to_create[i:i + batch_size] if len(region_batch) >= 100:
City.objects.bulk_create(batch) Region.objects.bulk_create(
self.stdout.write(self.style.SUCCESS(f'Processed cities batch {i//batch_size + 1}/{(len(cities_to_create)-1)//batch_size + 1}')) region_batch,
update_conflicts=True,
batch_size=100,
update_fields=['name', 'country', 'longitude', 'latitude', 'insert_id'],
unique_fields=['id']
)
region_batch.clear()
# Process updates in batches # Bulk insert cities in smaller batches
for i in range(0, len(countries_to_update), batch_size): if len(city_batch) >= 100:
batch = countries_to_update[i:i + batch_size] City.objects.bulk_create(
Country.objects.bulk_update(batch, ['name', 'subregion', 'capital', 'longitude', 'latitude']) city_batch,
self.stdout.write(self.style.SUCCESS(f'Updated countries batch {i//batch_size + 1}/{(len(countries_to_update)-1)//batch_size + 1}')) update_conflicts=True,
batch_size=100,
update_fields=['name', 'region', 'longitude', 'latitude', 'insert_id'],
unique_fields=['id']
)
city_batch.clear()
for i in range(0, len(regions_to_update), batch_size): # Final insertion of any remaining regions and cities
batch = regions_to_update[i:i + batch_size] if region_batch:
Region.objects.bulk_update(batch, ['name', 'country', 'longitude', 'latitude']) Region.objects.bulk_create(
self.stdout.write(self.style.SUCCESS(f'Updated regions batch {i//batch_size + 1}/{(len(regions_to_update)-1)//batch_size + 1}')) region_batch,
update_conflicts=True,
batch_size=100,
update_fields=['name', 'country', 'longitude', 'latitude', 'insert_id'],
unique_fields=['id']
)
for i in range(0, len(cities_to_update), batch_size): if city_batch:
batch = cities_to_update[i:i + batch_size] City.objects.bulk_create(
City.objects.bulk_update(batch, ['name', 'region', 'longitude', 'latitude']) city_batch,
self.stdout.write(self.style.SUCCESS(f'Updated cities batch {i//batch_size + 1}/{(len(cities_to_update)-1)//batch_size + 1}')) update_conflicts=True,
batch_size=100,
update_fields=['name', 'region', 'longitude', 'latitude', 'insert_id'],
unique_fields=['id']
)
# Delete countries and regions that are no longer in the data self.stdout.write(self.style.SUCCESS('Regions and cities created'))
Country.objects.exclude(country_code__in=processed_country_codes).delete()
Region.objects.exclude(id__in=processed_region_ids).delete()
City.objects.exclude(id__in=processed_city_ids).delete()
self.stdout.write(self.style.SUCCESS('All data imported successfully')) # Clean up old data
Country.objects.exclude(insert_id=insert_id).delete()
Region.objects.exclude(insert_id=insert_id).delete()
City.objects.exclude(insert_id=insert_id).delete()
self.stdout.write(self.style.SUCCESS('All data imported successfully and old data cleaned up'))

View file

@ -0,0 +1,28 @@
# Generated by Django 5.0.8 on 2025-01-13 17:50
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('worldtravel', '0014_alter_visitedcity_options'),
]
operations = [
migrations.AddField(
model_name='city',
name='insert_id',
field=models.UUIDField(blank=True, null=True),
),
migrations.AddField(
model_name='country',
name='insert_id',
field=models.UUIDField(blank=True, null=True),
),
migrations.AddField(
model_name='region',
name='insert_id',
field=models.UUIDField(blank=True, null=True),
),
]

View file

@ -17,6 +17,7 @@ class Country(models.Model):
capital = models.CharField(max_length=100, blank=True, null=True) capital = models.CharField(max_length=100, blank=True, null=True)
longitude = models.DecimalField(max_digits=9, decimal_places=6, null=True, blank=True) longitude = models.DecimalField(max_digits=9, decimal_places=6, null=True, blank=True)
latitude = models.DecimalField(max_digits=9, decimal_places=6, null=True, blank=True) latitude = models.DecimalField(max_digits=9, decimal_places=6, null=True, blank=True)
insert_id = models.UUIDField(unique=False, blank=True, null=True)
class Meta: class Meta:
verbose_name = "Country" verbose_name = "Country"
@ -31,6 +32,7 @@ class Region(models.Model):
country = models.ForeignKey(Country, on_delete=models.CASCADE) country = models.ForeignKey(Country, on_delete=models.CASCADE)
longitude = models.DecimalField(max_digits=9, decimal_places=6, null=True, blank=True) longitude = models.DecimalField(max_digits=9, decimal_places=6, null=True, blank=True)
latitude = models.DecimalField(max_digits=9, decimal_places=6, null=True, blank=True) latitude = models.DecimalField(max_digits=9, decimal_places=6, null=True, blank=True)
insert_id = models.UUIDField(unique=False, blank=True, null=True)
def __str__(self): def __str__(self):
return self.name return self.name
@ -41,6 +43,7 @@ class City(models.Model):
region = models.ForeignKey(Region, on_delete=models.CASCADE) region = models.ForeignKey(Region, on_delete=models.CASCADE)
longitude = models.DecimalField(max_digits=9, decimal_places=6, null=True, blank=True) longitude = models.DecimalField(max_digits=9, decimal_places=6, null=True, blank=True)
latitude = models.DecimalField(max_digits=9, decimal_places=6, null=True, blank=True) latitude = models.DecimalField(max_digits=9, decimal_places=6, null=True, blank=True)
insert_id = models.UUIDField(unique=False, blank=True, null=True)
class Meta: class Meta:
verbose_name_plural = "Cities" verbose_name_plural = "Cities"