mirror of
https://github.com/seanmorley15/AdventureLog.git
synced 2025-07-20 21:39:37 +02:00
fix: add insert_id field to City, Country, and Region models
This commit is contained in:
parent
53d500b7d5
commit
ac37bc98c4
3 changed files with 136 additions and 151 deletions
|
@ -1,26 +1,31 @@
|
||||||
import os
|
import os
|
||||||
from django.core.management.base import BaseCommand
|
|
||||||
import requests
|
import requests
|
||||||
|
import uuid
|
||||||
|
from django.core.management.base import BaseCommand
|
||||||
from worldtravel.models import Country, Region, City
|
from worldtravel.models import Country, Region, City
|
||||||
from django.db import transaction
|
from django.db import transaction
|
||||||
import ijson
|
import ijson
|
||||||
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
|
import psutil
|
||||||
|
|
||||||
|
def get_memory_usage():
|
||||||
|
process = psutil.Process(os.getpid())
|
||||||
|
memory_info = process.memory_info()
|
||||||
|
return memory_info.rss # in bytes
|
||||||
|
|
||||||
|
def log_memory_usage(stage):
|
||||||
|
memory_usage = get_memory_usage()
|
||||||
|
print(f"Memory usage at {stage}: {memory_usage / 1024 / 1024:.2f} MB")
|
||||||
|
|
||||||
COUNTRY_REGION_JSON_VERSION = settings.COUNTRY_REGION_JSON_VERSION
|
COUNTRY_REGION_JSON_VERSION = settings.COUNTRY_REGION_JSON_VERSION
|
||||||
|
|
||||||
media_root = settings.MEDIA_ROOT
|
media_root = settings.MEDIA_ROOT
|
||||||
|
|
||||||
def saveCountryFlag(country_code):
|
def saveCountryFlag(country_code):
|
||||||
# For standards, use the lowercase country_code
|
|
||||||
country_code = country_code.lower()
|
country_code = country_code.lower()
|
||||||
flags_dir = os.path.join(media_root, 'flags')
|
flags_dir = os.path.join(media_root, 'flags')
|
||||||
|
|
||||||
# Check if the flags directory exists, if not, create it
|
|
||||||
if not os.path.exists(flags_dir):
|
if not os.path.exists(flags_dir):
|
||||||
os.makedirs(flags_dir)
|
os.makedirs(flags_dir)
|
||||||
|
|
||||||
# Check if the flag already exists in the media folder
|
|
||||||
flag_path = os.path.join(flags_dir, f'{country_code}.png')
|
flag_path = os.path.join(flags_dir, f'{country_code}.png')
|
||||||
if os.path.exists(flag_path):
|
if os.path.exists(flag_path):
|
||||||
print(f'Flag for {country_code} already exists')
|
print(f'Flag for {country_code} already exists')
|
||||||
|
@ -41,8 +46,8 @@ class Command(BaseCommand):
|
||||||
parser.add_argument('--force', action='store_true', help='Force download the countries+regions+states.json file')
|
parser.add_argument('--force', action='store_true', help='Force download the countries+regions+states.json file')
|
||||||
|
|
||||||
def handle(self, **options):
|
def handle(self, **options):
|
||||||
|
log_memory_usage("start")
|
||||||
force = options['force']
|
force = options['force']
|
||||||
batch_size = 250
|
|
||||||
countries_json_path = os.path.join(settings.MEDIA_ROOT, f'countries+regions+states-{COUNTRY_REGION_JSON_VERSION}.json')
|
countries_json_path = os.path.join(settings.MEDIA_ROOT, f'countries+regions+states-{COUNTRY_REGION_JSON_VERSION}.json')
|
||||||
if not os.path.exists(countries_json_path) or force:
|
if not os.path.exists(countries_json_path) or force:
|
||||||
res = requests.get(f'https://raw.githubusercontent.com/dr5hn/countries-states-cities-database/{COUNTRY_REGION_JSON_VERSION}/json/countries%2Bstates%2Bcities.json')
|
res = requests.get(f'https://raw.githubusercontent.com/dr5hn/countries-states-cities-database/{COUNTRY_REGION_JSON_VERSION}/json/countries%2Bstates%2Bcities.json')
|
||||||
|
@ -63,25 +68,17 @@ class Command(BaseCommand):
|
||||||
else:
|
else:
|
||||||
self.stdout.write(self.style.SUCCESS('Latest country, region, and state data already downloaded.'))
|
self.stdout.write(self.style.SUCCESS('Latest country, region, and state data already downloaded.'))
|
||||||
return
|
return
|
||||||
|
|
||||||
|
insert_id = uuid.uuid4()
|
||||||
|
|
||||||
with transaction.atomic():
|
with transaction.atomic():
|
||||||
# Process data in chunks using ijson
|
|
||||||
f = open(countries_json_path, 'rb')
|
f = open(countries_json_path, 'rb')
|
||||||
parser = ijson.items(f, 'item')
|
parser = ijson.items(f, 'item')
|
||||||
existing_countries = {country.country_code: country for country in Country.objects.all()}
|
|
||||||
existing_regions = {region.id: region for region in Region.objects.all()}
|
|
||||||
existing_cities = {city.id: city for city in City.objects.all()}
|
|
||||||
|
|
||||||
countries_to_create = []
|
region_batch = []
|
||||||
regions_to_create = []
|
city_batch = []
|
||||||
countries_to_update = []
|
existing_region_ids = set()
|
||||||
regions_to_update = []
|
existing_city_ids = set()
|
||||||
cities_to_create = []
|
|
||||||
cities_to_update = []
|
|
||||||
|
|
||||||
processed_country_codes = set()
|
|
||||||
processed_region_ids = set()
|
|
||||||
processed_city_ids = set()
|
|
||||||
|
|
||||||
for country in parser:
|
for country in parser:
|
||||||
country_code = country['iso2']
|
country_code = country['iso2']
|
||||||
|
@ -91,145 +88,102 @@ class Command(BaseCommand):
|
||||||
longitude = round(float(country['longitude']), 6) if country['longitude'] else None
|
longitude = round(float(country['longitude']), 6) if country['longitude'] else None
|
||||||
latitude = round(float(country['latitude']), 6) if country['latitude'] else None
|
latitude = round(float(country['latitude']), 6) if country['latitude'] else None
|
||||||
|
|
||||||
processed_country_codes.add(country_code)
|
country_obj, _ = Country.objects.update_or_create(
|
||||||
|
country_code=country_code,
|
||||||
if country_code in existing_countries:
|
defaults={
|
||||||
country_obj = existing_countries[country_code]
|
'name': country_name,
|
||||||
country_obj.name = country_name
|
'subregion': country_subregion,
|
||||||
country_obj.subregion = country_subregion
|
'capital': country_capital,
|
||||||
country_obj.capital = country_capital
|
'longitude': longitude,
|
||||||
country_obj.longitude = longitude
|
'latitude': latitude,
|
||||||
country_obj.latitude = latitude
|
'insert_id': insert_id
|
||||||
countries_to_update.append(country_obj)
|
}
|
||||||
else:
|
)
|
||||||
country_obj = Country(
|
|
||||||
name=country_name,
|
|
||||||
country_code=country_code,
|
|
||||||
subregion=country_subregion,
|
|
||||||
capital=country_capital,
|
|
||||||
longitude=longitude,
|
|
||||||
latitude=latitude
|
|
||||||
)
|
|
||||||
countries_to_create.append(country_obj)
|
|
||||||
|
|
||||||
saveCountryFlag(country_code)
|
saveCountryFlag(country_code)
|
||||||
# self.stdout.write(self.style.SUCCESS(f'Country {country_name} prepared'))
|
log_memory_usage(country_code)
|
||||||
|
|
||||||
if country['states']:
|
if country['states']:
|
||||||
for state in country['states']:
|
for state in country['states']:
|
||||||
name = state['name']
|
state_id = f"{country_code}-{state['state_code']}" if state['state_code'] else f"{country_code}-00"
|
||||||
state_id = f"{country_code}-{state['state_code']}"
|
|
||||||
latitude = round(float(state['latitude']), 6) if state['latitude'] else None
|
# Ensure no duplicate regions
|
||||||
longitude = round(float(state['longitude']), 6) if state['longitude'] else None
|
if state_id not in existing_region_ids:
|
||||||
|
|
||||||
# Check for duplicate regions
|
|
||||||
if state_id in processed_region_ids:
|
|
||||||
self.stdout.write(self.style.ERROR(f'State {state_id} already processed'))
|
|
||||||
continue
|
|
||||||
|
|
||||||
processed_region_ids.add(state_id)
|
|
||||||
|
|
||||||
if state_id in existing_regions:
|
|
||||||
region_obj = existing_regions[state_id]
|
|
||||||
region_obj.name = name
|
|
||||||
region_obj.country = country_obj
|
|
||||||
region_obj.longitude = longitude
|
|
||||||
region_obj.latitude = latitude
|
|
||||||
regions_to_update.append(region_obj)
|
|
||||||
else:
|
|
||||||
region_obj = Region(
|
|
||||||
id=state_id,
|
|
||||||
name=name,
|
|
||||||
country=country_obj,
|
|
||||||
longitude=longitude,
|
|
||||||
latitude=latitude
|
|
||||||
)
|
|
||||||
regions_to_create.append(region_obj)
|
|
||||||
# self.stdout.write(self.style.SUCCESS(f'State {state_id} prepared'))
|
|
||||||
|
|
||||||
if 'cities' in state and len(state['cities']) > 0:
|
|
||||||
for city in state['cities']:
|
|
||||||
city_id = f"{state_id}-{city['id']}"
|
|
||||||
city_name = city['name']
|
|
||||||
latitude = round(float(city['latitude']), 6) if city['latitude'] else None
|
|
||||||
longitude = round(float(city['longitude']), 6) if city['longitude'] else None
|
|
||||||
|
|
||||||
# Check for duplicate cities
|
|
||||||
if city_id in processed_city_ids:
|
|
||||||
self.stdout.write(self.style.ERROR(f'City {city_id} already processed'))
|
|
||||||
continue
|
|
||||||
|
|
||||||
processed_city_ids.add(city_id)
|
|
||||||
|
|
||||||
if city_id in existing_cities:
|
|
||||||
city_obj = existing_cities[city_id]
|
|
||||||
city_obj.name = city_name
|
|
||||||
city_obj.region = region_obj
|
|
||||||
city_obj.longitude = longitude
|
|
||||||
city_obj.latitude = latitude
|
|
||||||
cities_to_update.append(city_obj)
|
|
||||||
else:
|
|
||||||
city_obj = City(
|
|
||||||
id=city_id,
|
|
||||||
name=city_name,
|
|
||||||
region=region_obj,
|
|
||||||
longitude=longitude,
|
|
||||||
latitude=latitude
|
|
||||||
)
|
|
||||||
cities_to_create.append(city_obj)
|
|
||||||
# self.stdout.write(self.style.SUCCESS(f'City {city_id} prepared'))
|
|
||||||
|
|
||||||
else:
|
|
||||||
state_id = f"{country_code}-00"
|
|
||||||
processed_region_ids.add(state_id)
|
|
||||||
if state_id in existing_regions:
|
|
||||||
region_obj = existing_regions[state_id]
|
|
||||||
region_obj.name = country_name
|
|
||||||
region_obj.country = country_obj
|
|
||||||
regions_to_update.append(region_obj)
|
|
||||||
else:
|
|
||||||
region_obj = Region(
|
region_obj = Region(
|
||||||
id=state_id,
|
id=state_id,
|
||||||
name=country_name,
|
name=state['name'],
|
||||||
country=country_obj
|
country=country_obj,
|
||||||
|
longitude=state['longitude'],
|
||||||
|
latitude=state['latitude'],
|
||||||
|
insert_id=insert_id
|
||||||
)
|
)
|
||||||
regions_to_create.append(region_obj)
|
region_batch.append(region_obj)
|
||||||
# self.stdout.write(self.style.SUCCESS(f'Region {state_id} prepared for {country_name}'))
|
existing_region_ids.add(state_id)
|
||||||
# Process in batches
|
log_memory_usage(state_id)
|
||||||
for i in range(0, len(countries_to_create), batch_size):
|
|
||||||
batch = countries_to_create[i:i + batch_size]
|
|
||||||
Country.objects.bulk_create(batch)
|
|
||||||
self.stdout.write(self.style.SUCCESS(f'Processed countries batch {i//batch_size + 1}/{(len(countries_to_create)-1)//batch_size + 1}'))
|
|
||||||
|
|
||||||
for i in range(0, len(regions_to_create), batch_size):
|
# Handle cities and avoid duplicates
|
||||||
batch = regions_to_create[i:i + batch_size]
|
if 'cities' in state and len(state['cities']) > 0:
|
||||||
Region.objects.bulk_create(batch)
|
for city in state['cities']:
|
||||||
self.stdout.write(self.style.SUCCESS(f'Processed regions batch {i//batch_size + 1}/{(len(regions_to_create)-1)//batch_size + 1}'))
|
city_id = f"{state_id}-{city['id']}"
|
||||||
|
|
||||||
|
if city_id not in existing_city_ids:
|
||||||
|
city_obj = City(
|
||||||
|
id=city_id,
|
||||||
|
name=city['name'],
|
||||||
|
region=region_obj,
|
||||||
|
longitude=city['longitude'],
|
||||||
|
latitude=city['latitude'],
|
||||||
|
insert_id=insert_id
|
||||||
|
)
|
||||||
|
city_batch.append(city_obj)
|
||||||
|
existing_city_ids.add(city_id)
|
||||||
|
|
||||||
for i in range(0, len(cities_to_create), batch_size):
|
# Bulk insert regions in smaller batches
|
||||||
batch = cities_to_create[i:i + batch_size]
|
if len(region_batch) >= 100:
|
||||||
City.objects.bulk_create(batch)
|
Region.objects.bulk_create(
|
||||||
self.stdout.write(self.style.SUCCESS(f'Processed cities batch {i//batch_size + 1}/{(len(cities_to_create)-1)//batch_size + 1}'))
|
region_batch,
|
||||||
|
update_conflicts=True,
|
||||||
|
batch_size=100,
|
||||||
|
update_fields=['name', 'country', 'longitude', 'latitude', 'insert_id'],
|
||||||
|
unique_fields=['id']
|
||||||
|
)
|
||||||
|
region_batch.clear()
|
||||||
|
|
||||||
# Process updates in batches
|
# Bulk insert cities in smaller batches
|
||||||
for i in range(0, len(countries_to_update), batch_size):
|
if len(city_batch) >= 100:
|
||||||
batch = countries_to_update[i:i + batch_size]
|
City.objects.bulk_create(
|
||||||
Country.objects.bulk_update(batch, ['name', 'subregion', 'capital', 'longitude', 'latitude'])
|
city_batch,
|
||||||
self.stdout.write(self.style.SUCCESS(f'Updated countries batch {i//batch_size + 1}/{(len(countries_to_update)-1)//batch_size + 1}'))
|
update_conflicts=True,
|
||||||
|
batch_size=100,
|
||||||
|
update_fields=['name', 'region', 'longitude', 'latitude', 'insert_id'],
|
||||||
|
unique_fields=['id']
|
||||||
|
)
|
||||||
|
city_batch.clear()
|
||||||
|
|
||||||
for i in range(0, len(regions_to_update), batch_size):
|
# Final insertion of any remaining regions and cities
|
||||||
batch = regions_to_update[i:i + batch_size]
|
if region_batch:
|
||||||
Region.objects.bulk_update(batch, ['name', 'country', 'longitude', 'latitude'])
|
Region.objects.bulk_create(
|
||||||
self.stdout.write(self.style.SUCCESS(f'Updated regions batch {i//batch_size + 1}/{(len(regions_to_update)-1)//batch_size + 1}'))
|
region_batch,
|
||||||
|
update_conflicts=True,
|
||||||
|
batch_size=100,
|
||||||
|
update_fields=['name', 'country', 'longitude', 'latitude', 'insert_id'],
|
||||||
|
unique_fields=['id']
|
||||||
|
)
|
||||||
|
|
||||||
for i in range(0, len(cities_to_update), batch_size):
|
if city_batch:
|
||||||
batch = cities_to_update[i:i + batch_size]
|
City.objects.bulk_create(
|
||||||
City.objects.bulk_update(batch, ['name', 'region', 'longitude', 'latitude'])
|
city_batch,
|
||||||
self.stdout.write(self.style.SUCCESS(f'Updated cities batch {i//batch_size + 1}/{(len(cities_to_update)-1)//batch_size + 1}'))
|
update_conflicts=True,
|
||||||
|
batch_size=100,
|
||||||
|
update_fields=['name', 'region', 'longitude', 'latitude', 'insert_id'],
|
||||||
|
unique_fields=['id']
|
||||||
|
)
|
||||||
|
|
||||||
# Delete countries and regions that are no longer in the data
|
self.stdout.write(self.style.SUCCESS('Regions and cities created'))
|
||||||
Country.objects.exclude(country_code__in=processed_country_codes).delete()
|
|
||||||
Region.objects.exclude(id__in=processed_region_ids).delete()
|
|
||||||
City.objects.exclude(id__in=processed_city_ids).delete()
|
|
||||||
|
|
||||||
self.stdout.write(self.style.SUCCESS('All data imported successfully'))
|
# Clean up old data
|
||||||
|
Country.objects.exclude(insert_id=insert_id).delete()
|
||||||
|
Region.objects.exclude(insert_id=insert_id).delete()
|
||||||
|
City.objects.exclude(insert_id=insert_id).delete()
|
||||||
|
|
||||||
|
self.stdout.write(self.style.SUCCESS('All data imported successfully and old data cleaned up'))
|
|
@ -0,0 +1,28 @@
|
||||||
|
# Generated by Django 5.0.8 on 2025-01-13 17:50
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('worldtravel', '0014_alter_visitedcity_options'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='city',
|
||||||
|
name='insert_id',
|
||||||
|
field=models.UUIDField(blank=True, null=True),
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='country',
|
||||||
|
name='insert_id',
|
||||||
|
field=models.UUIDField(blank=True, null=True),
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='region',
|
||||||
|
name='insert_id',
|
||||||
|
field=models.UUIDField(blank=True, null=True),
|
||||||
|
),
|
||||||
|
]
|
|
@ -17,6 +17,7 @@ class Country(models.Model):
|
||||||
capital = models.CharField(max_length=100, blank=True, null=True)
|
capital = models.CharField(max_length=100, blank=True, null=True)
|
||||||
longitude = models.DecimalField(max_digits=9, decimal_places=6, null=True, blank=True)
|
longitude = models.DecimalField(max_digits=9, decimal_places=6, null=True, blank=True)
|
||||||
latitude = models.DecimalField(max_digits=9, decimal_places=6, null=True, blank=True)
|
latitude = models.DecimalField(max_digits=9, decimal_places=6, null=True, blank=True)
|
||||||
|
insert_id = models.UUIDField(unique=False, blank=True, null=True)
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
verbose_name = "Country"
|
verbose_name = "Country"
|
||||||
|
@ -31,6 +32,7 @@ class Region(models.Model):
|
||||||
country = models.ForeignKey(Country, on_delete=models.CASCADE)
|
country = models.ForeignKey(Country, on_delete=models.CASCADE)
|
||||||
longitude = models.DecimalField(max_digits=9, decimal_places=6, null=True, blank=True)
|
longitude = models.DecimalField(max_digits=9, decimal_places=6, null=True, blank=True)
|
||||||
latitude = models.DecimalField(max_digits=9, decimal_places=6, null=True, blank=True)
|
latitude = models.DecimalField(max_digits=9, decimal_places=6, null=True, blank=True)
|
||||||
|
insert_id = models.UUIDField(unique=False, blank=True, null=True)
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return self.name
|
return self.name
|
||||||
|
@ -41,6 +43,7 @@ class City(models.Model):
|
||||||
region = models.ForeignKey(Region, on_delete=models.CASCADE)
|
region = models.ForeignKey(Region, on_delete=models.CASCADE)
|
||||||
longitude = models.DecimalField(max_digits=9, decimal_places=6, null=True, blank=True)
|
longitude = models.DecimalField(max_digits=9, decimal_places=6, null=True, blank=True)
|
||||||
latitude = models.DecimalField(max_digits=9, decimal_places=6, null=True, blank=True)
|
latitude = models.DecimalField(max_digits=9, decimal_places=6, null=True, blank=True)
|
||||||
|
insert_id = models.UUIDField(unique=False, blank=True, null=True)
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
verbose_name_plural = "Cities"
|
verbose_name_plural = "Cities"
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue