1
0
Fork 0
mirror of https://github.com/seanmorley15/AdventureLog.git synced 2025-08-05 05:05:17 +02:00
AdventureLog/backend/server/worldtravel/management/commands/download-countries.py

189 lines
No EOL
7.9 KiB
Python

import os
import requests
import uuid
from django.core.management.base import BaseCommand
from worldtravel.models import Country, Region, City
from django.db import transaction
import ijson
from django.conf import settings
import psutil
def get_memory_usage():
process = psutil.Process(os.getpid())
memory_info = process.memory_info()
return memory_info.rss # in bytes
def log_memory_usage(stage):
memory_usage = get_memory_usage()
print(f"Memory usage at {stage}: {memory_usage / 1024 / 1024:.2f} MB")
COUNTRY_REGION_JSON_VERSION = settings.COUNTRY_REGION_JSON_VERSION
media_root = settings.MEDIA_ROOT
def saveCountryFlag(country_code):
country_code = country_code.lower()
flags_dir = os.path.join(media_root, 'flags')
if not os.path.exists(flags_dir):
os.makedirs(flags_dir)
flag_path = os.path.join(flags_dir, f'{country_code}.png')
if os.path.exists(flag_path):
print(f'Flag for {country_code} already exists')
return
res = requests.get(f'https://flagcdn.com/h240/{country_code}.png'.lower())
if res.status_code == 200:
with open(flag_path, 'wb') as f:
f.write(res.content)
print(f'Flag for {country_code} downloaded')
else:
print(f'Error downloading flag for {country_code}')
class Command(BaseCommand):
help = 'Imports the world travel data'
def add_arguments(self, parser):
parser.add_argument('--force', action='store_true', help='Force download the countries+regions+states.json file')
def handle(self, **options):
log_memory_usage("start")
force = options['force']
countries_json_path = os.path.join(settings.MEDIA_ROOT, f'countries+regions+states-{COUNTRY_REGION_JSON_VERSION}.json')
if not os.path.exists(countries_json_path) or force:
res = requests.get(f'https://raw.githubusercontent.com/dr5hn/countries-states-cities-database/{COUNTRY_REGION_JSON_VERSION}/json/countries%2Bstates%2Bcities.json')
if res.status_code == 200:
with open(countries_json_path, 'w') as f:
f.write(res.text)
self.stdout.write(self.style.SUCCESS('countries+regions+states.json downloaded successfully'))
else:
self.stdout.write(self.style.ERROR('Error downloading countries+regions+states.json'))
return
elif not os.path.isfile(countries_json_path):
self.stdout.write(self.style.ERROR('countries+regions+states.json is not a file'))
return
elif os.path.getsize(countries_json_path) == 0:
self.stdout.write(self.style.ERROR('countries+regions+states.json is empty'))
elif Country.objects.count() == 0 or Region.objects.count() == 0 or City.objects.count() == 0:
self.stdout.write(self.style.WARNING('Some region data is missing. Re-importing all data.'))
else:
self.stdout.write(self.style.SUCCESS('Latest country, region, and state data already downloaded.'))
return
insert_id = uuid.uuid4()
with transaction.atomic():
f = open(countries_json_path, 'rb')
parser = ijson.items(f, 'item')
region_batch = []
city_batch = []
existing_region_ids = set()
existing_city_ids = set()
for country in parser:
country_code = country['iso2']
country_name = country['name']
country_subregion = country['subregion']
country_capital = country['capital']
longitude = round(float(country['longitude']), 6) if country['longitude'] else None
latitude = round(float(country['latitude']), 6) if country['latitude'] else None
country_obj, _ = Country.objects.update_or_create(
country_code=country_code,
defaults={
'name': country_name,
'subregion': country_subregion,
'capital': country_capital,
'longitude': longitude,
'latitude': latitude,
'insert_id': insert_id
}
)
saveCountryFlag(country_code)
log_memory_usage(country_code)
if country['states']:
for state in country['states']:
state_id = f"{country_code}-{state['state_code']}" if state['state_code'] else f"{country_code}-00"
# Ensure no duplicate regions
if state_id not in existing_region_ids:
region_obj = Region(
id=state_id,
name=state['name'],
country=country_obj,
longitude=state['longitude'],
latitude=state['latitude'],
insert_id=insert_id
)
region_batch.append(region_obj)
existing_region_ids.add(state_id)
log_memory_usage(state_id)
# Handle cities and avoid duplicates
if 'cities' in state and len(state['cities']) > 0:
for city in state['cities']:
city_id = f"{state_id}-{city['id']}"
if city_id not in existing_city_ids:
city_obj = City(
id=city_id,
name=city['name'],
region=region_obj,
longitude=city['longitude'],
latitude=city['latitude'],
insert_id=insert_id
)
city_batch.append(city_obj)
existing_city_ids.add(city_id)
# Bulk insert regions in smaller batches
if len(region_batch) >= 100:
Region.objects.bulk_create(
region_batch,
update_conflicts=True,
batch_size=100,
update_fields=['name', 'country', 'longitude', 'latitude', 'insert_id'],
unique_fields=['id']
)
region_batch.clear()
# Bulk insert cities in smaller batches
if len(city_batch) >= 100:
City.objects.bulk_create(
city_batch,
update_conflicts=True,
batch_size=100,
update_fields=['name', 'region', 'longitude', 'latitude', 'insert_id'],
unique_fields=['id']
)
city_batch.clear()
# Final insertion of any remaining regions and cities
if region_batch:
Region.objects.bulk_create(
region_batch,
update_conflicts=True,
batch_size=100,
update_fields=['name', 'country', 'longitude', 'latitude', 'insert_id'],
unique_fields=['id']
)
if city_batch:
City.objects.bulk_create(
city_batch,
update_conflicts=True,
batch_size=100,
update_fields=['name', 'region', 'longitude', 'latitude', 'insert_id'],
unique_fields=['id']
)
self.stdout.write(self.style.SUCCESS('Regions and cities created'))
# Clean up old data
Country.objects.exclude(insert_id=insert_id).delete()
Region.objects.exclude(insert_id=insert_id).delete()
City.objects.exclude(insert_id=insert_id).delete()
self.stdout.write(self.style.SUCCESS('All data imported successfully and old data cleaned up'))