1
0
Fork 0
mirror of https://github.com/seanmorley15/AdventureLog.git synced 2025-07-19 21:09:37 +02:00

fix: add insert_id field to City, Country, and Region models

This commit is contained in:
Sean Morley 2025-01-13 13:01:36 -05:00
parent 53d500b7d5
commit ac37bc98c4
3 changed files with 136 additions and 151 deletions

View file

@ -1,26 +1,31 @@
import os
from django.core.management.base import BaseCommand
import requests
import uuid
from django.core.management.base import BaseCommand
from worldtravel.models import Country, Region, City
from django.db import transaction
import ijson
from django.conf import settings
import psutil
def get_memory_usage():
process = psutil.Process(os.getpid())
memory_info = process.memory_info()
return memory_info.rss # in bytes
def log_memory_usage(stage):
memory_usage = get_memory_usage()
print(f"Memory usage at {stage}: {memory_usage / 1024 / 1024:.2f} MB")
COUNTRY_REGION_JSON_VERSION = settings.COUNTRY_REGION_JSON_VERSION
media_root = settings.MEDIA_ROOT
def saveCountryFlag(country_code):
# For standards, use the lowercase country_code
country_code = country_code.lower()
flags_dir = os.path.join(media_root, 'flags')
# Check if the flags directory exists, if not, create it
if not os.path.exists(flags_dir):
os.makedirs(flags_dir)
# Check if the flag already exists in the media folder
flag_path = os.path.join(flags_dir, f'{country_code}.png')
if os.path.exists(flag_path):
print(f'Flag for {country_code} already exists')
@ -41,8 +46,8 @@ class Command(BaseCommand):
parser.add_argument('--force', action='store_true', help='Force download the countries+regions+states.json file')
def handle(self, **options):
log_memory_usage("start")
force = options['force']
batch_size = 250
countries_json_path = os.path.join(settings.MEDIA_ROOT, f'countries+regions+states-{COUNTRY_REGION_JSON_VERSION}.json')
if not os.path.exists(countries_json_path) or force:
res = requests.get(f'https://raw.githubusercontent.com/dr5hn/countries-states-cities-database/{COUNTRY_REGION_JSON_VERSION}/json/countries%2Bstates%2Bcities.json')
@ -63,25 +68,17 @@ class Command(BaseCommand):
else:
self.stdout.write(self.style.SUCCESS('Latest country, region, and state data already downloaded.'))
return
insert_id = uuid.uuid4()
with transaction.atomic():
# Process data in chunks using ijson
f = open(countries_json_path, 'rb')
parser = ijson.items(f, 'item')
existing_countries = {country.country_code: country for country in Country.objects.all()}
existing_regions = {region.id: region for region in Region.objects.all()}
existing_cities = {city.id: city for city in City.objects.all()}
countries_to_create = []
regions_to_create = []
countries_to_update = []
regions_to_update = []
cities_to_create = []
cities_to_update = []
processed_country_codes = set()
processed_region_ids = set()
processed_city_ids = set()
region_batch = []
city_batch = []
existing_region_ids = set()
existing_city_ids = set()
for country in parser:
country_code = country['iso2']
@ -91,145 +88,102 @@ class Command(BaseCommand):
longitude = round(float(country['longitude']), 6) if country['longitude'] else None
latitude = round(float(country['latitude']), 6) if country['latitude'] else None
processed_country_codes.add(country_code)
if country_code in existing_countries:
country_obj = existing_countries[country_code]
country_obj.name = country_name
country_obj.subregion = country_subregion
country_obj.capital = country_capital
country_obj.longitude = longitude
country_obj.latitude = latitude
countries_to_update.append(country_obj)
else:
country_obj = Country(
name=country_name,
country_code=country_code,
subregion=country_subregion,
capital=country_capital,
longitude=longitude,
latitude=latitude
)
countries_to_create.append(country_obj)
country_obj, _ = Country.objects.update_or_create(
country_code=country_code,
defaults={
'name': country_name,
'subregion': country_subregion,
'capital': country_capital,
'longitude': longitude,
'latitude': latitude,
'insert_id': insert_id
}
)
saveCountryFlag(country_code)
# self.stdout.write(self.style.SUCCESS(f'Country {country_name} prepared'))
log_memory_usage(country_code)
if country['states']:
for state in country['states']:
name = state['name']
state_id = f"{country_code}-{state['state_code']}"
latitude = round(float(state['latitude']), 6) if state['latitude'] else None
longitude = round(float(state['longitude']), 6) if state['longitude'] else None
# Check for duplicate regions
if state_id in processed_region_ids:
self.stdout.write(self.style.ERROR(f'State {state_id} already processed'))
continue
processed_region_ids.add(state_id)
if state_id in existing_regions:
region_obj = existing_regions[state_id]
region_obj.name = name
region_obj.country = country_obj
region_obj.longitude = longitude
region_obj.latitude = latitude
regions_to_update.append(region_obj)
else:
region_obj = Region(
id=state_id,
name=name,
country=country_obj,
longitude=longitude,
latitude=latitude
)
regions_to_create.append(region_obj)
# self.stdout.write(self.style.SUCCESS(f'State {state_id} prepared'))
if 'cities' in state and len(state['cities']) > 0:
for city in state['cities']:
city_id = f"{state_id}-{city['id']}"
city_name = city['name']
latitude = round(float(city['latitude']), 6) if city['latitude'] else None
longitude = round(float(city['longitude']), 6) if city['longitude'] else None
# Check for duplicate cities
if city_id in processed_city_ids:
self.stdout.write(self.style.ERROR(f'City {city_id} already processed'))
continue
processed_city_ids.add(city_id)
if city_id in existing_cities:
city_obj = existing_cities[city_id]
city_obj.name = city_name
city_obj.region = region_obj
city_obj.longitude = longitude
city_obj.latitude = latitude
cities_to_update.append(city_obj)
else:
city_obj = City(
id=city_id,
name=city_name,
region=region_obj,
longitude=longitude,
latitude=latitude
)
cities_to_create.append(city_obj)
# self.stdout.write(self.style.SUCCESS(f'City {city_id} prepared'))
else:
state_id = f"{country_code}-00"
processed_region_ids.add(state_id)
if state_id in existing_regions:
region_obj = existing_regions[state_id]
region_obj.name = country_name
region_obj.country = country_obj
regions_to_update.append(region_obj)
else:
if country['states']:
for state in country['states']:
state_id = f"{country_code}-{state['state_code']}" if state['state_code'] else f"{country_code}-00"
# Ensure no duplicate regions
if state_id not in existing_region_ids:
region_obj = Region(
id=state_id,
name=country_name,
country=country_obj
name=state['name'],
country=country_obj,
longitude=state['longitude'],
latitude=state['latitude'],
insert_id=insert_id
)
regions_to_create.append(region_obj)
# self.stdout.write(self.style.SUCCESS(f'Region {state_id} prepared for {country_name}'))
# Process in batches
for i in range(0, len(countries_to_create), batch_size):
batch = countries_to_create[i:i + batch_size]
Country.objects.bulk_create(batch)
self.stdout.write(self.style.SUCCESS(f'Processed countries batch {i//batch_size + 1}/{(len(countries_to_create)-1)//batch_size + 1}'))
region_batch.append(region_obj)
existing_region_ids.add(state_id)
log_memory_usage(state_id)
for i in range(0, len(regions_to_create), batch_size):
batch = regions_to_create[i:i + batch_size]
Region.objects.bulk_create(batch)
self.stdout.write(self.style.SUCCESS(f'Processed regions batch {i//batch_size + 1}/{(len(regions_to_create)-1)//batch_size + 1}'))
# Handle cities and avoid duplicates
if 'cities' in state and len(state['cities']) > 0:
for city in state['cities']:
city_id = f"{state_id}-{city['id']}"
if city_id not in existing_city_ids:
city_obj = City(
id=city_id,
name=city['name'],
region=region_obj,
longitude=city['longitude'],
latitude=city['latitude'],
insert_id=insert_id
)
city_batch.append(city_obj)
existing_city_ids.add(city_id)
for i in range(0, len(cities_to_create), batch_size):
batch = cities_to_create[i:i + batch_size]
City.objects.bulk_create(batch)
self.stdout.write(self.style.SUCCESS(f'Processed cities batch {i//batch_size + 1}/{(len(cities_to_create)-1)//batch_size + 1}'))
# Bulk insert regions in smaller batches
if len(region_batch) >= 100:
Region.objects.bulk_create(
region_batch,
update_conflicts=True,
batch_size=100,
update_fields=['name', 'country', 'longitude', 'latitude', 'insert_id'],
unique_fields=['id']
)
region_batch.clear()
# Process updates in batches
for i in range(0, len(countries_to_update), batch_size):
batch = countries_to_update[i:i + batch_size]
Country.objects.bulk_update(batch, ['name', 'subregion', 'capital', 'longitude', 'latitude'])
self.stdout.write(self.style.SUCCESS(f'Updated countries batch {i//batch_size + 1}/{(len(countries_to_update)-1)//batch_size + 1}'))
# Bulk insert cities in smaller batches
if len(city_batch) >= 100:
City.objects.bulk_create(
city_batch,
update_conflicts=True,
batch_size=100,
update_fields=['name', 'region', 'longitude', 'latitude', 'insert_id'],
unique_fields=['id']
)
city_batch.clear()
for i in range(0, len(regions_to_update), batch_size):
batch = regions_to_update[i:i + batch_size]
Region.objects.bulk_update(batch, ['name', 'country', 'longitude', 'latitude'])
self.stdout.write(self.style.SUCCESS(f'Updated regions batch {i//batch_size + 1}/{(len(regions_to_update)-1)//batch_size + 1}'))
# Final insertion of any remaining regions and cities
if region_batch:
Region.objects.bulk_create(
region_batch,
update_conflicts=True,
batch_size=100,
update_fields=['name', 'country', 'longitude', 'latitude', 'insert_id'],
unique_fields=['id']
)
for i in range(0, len(cities_to_update), batch_size):
batch = cities_to_update[i:i + batch_size]
City.objects.bulk_update(batch, ['name', 'region', 'longitude', 'latitude'])
self.stdout.write(self.style.SUCCESS(f'Updated cities batch {i//batch_size + 1}/{(len(cities_to_update)-1)//batch_size + 1}'))
if city_batch:
City.objects.bulk_create(
city_batch,
update_conflicts=True,
batch_size=100,
update_fields=['name', 'region', 'longitude', 'latitude', 'insert_id'],
unique_fields=['id']
)
# Delete countries and regions that are no longer in the data
Country.objects.exclude(country_code__in=processed_country_codes).delete()
Region.objects.exclude(id__in=processed_region_ids).delete()
City.objects.exclude(id__in=processed_city_ids).delete()
self.stdout.write(self.style.SUCCESS('Regions and cities created'))
self.stdout.write(self.style.SUCCESS('All data imported successfully'))
# Clean up old data
Country.objects.exclude(insert_id=insert_id).delete()
Region.objects.exclude(insert_id=insert_id).delete()
City.objects.exclude(insert_id=insert_id).delete()
self.stdout.write(self.style.SUCCESS('All data imported successfully and old data cleaned up'))

View file

@ -0,0 +1,28 @@
# Generated by Django 5.0.8 on 2025-01-13 17:50
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('worldtravel', '0014_alter_visitedcity_options'),
]
operations = [
migrations.AddField(
model_name='city',
name='insert_id',
field=models.UUIDField(blank=True, null=True),
),
migrations.AddField(
model_name='country',
name='insert_id',
field=models.UUIDField(blank=True, null=True),
),
migrations.AddField(
model_name='region',
name='insert_id',
field=models.UUIDField(blank=True, null=True),
),
]

View file

@ -17,6 +17,7 @@ class Country(models.Model):
capital = models.CharField(max_length=100, blank=True, null=True)
longitude = models.DecimalField(max_digits=9, decimal_places=6, null=True, blank=True)
latitude = models.DecimalField(max_digits=9, decimal_places=6, null=True, blank=True)
insert_id = models.UUIDField(unique=False, blank=True, null=True)
class Meta:
verbose_name = "Country"
@ -31,6 +32,7 @@ class Region(models.Model):
country = models.ForeignKey(Country, on_delete=models.CASCADE)
longitude = models.DecimalField(max_digits=9, decimal_places=6, null=True, blank=True)
latitude = models.DecimalField(max_digits=9, decimal_places=6, null=True, blank=True)
insert_id = models.UUIDField(unique=False, blank=True, null=True)
def __str__(self):
return self.name
@ -41,6 +43,7 @@ class City(models.Model):
region = models.ForeignKey(Region, on_delete=models.CASCADE)
longitude = models.DecimalField(max_digits=9, decimal_places=6, null=True, blank=True)
latitude = models.DecimalField(max_digits=9, decimal_places=6, null=True, blank=True)
insert_id = models.UUIDField(unique=False, blank=True, null=True)
class Meta:
verbose_name_plural = "Cities"