2024-09-10 23:00:13 -04:00
|
|
|
import os
|
|
|
|
import requests
|
2025-01-13 13:01:36 -05:00
|
|
|
import uuid
|
|
|
|
from django.core.management.base import BaseCommand
|
2025-01-09 11:11:02 -05:00
|
|
|
from worldtravel.models import Country, Region, City
|
2024-09-10 23:00:13 -04:00
|
|
|
from django.db import transaction
|
2025-01-13 11:23:57 -05:00
|
|
|
import ijson
|
2024-09-10 23:00:13 -04:00
|
|
|
from django.conf import settings
|
2025-01-13 13:01:36 -05:00
|
|
|
import psutil
|
2024-09-11 16:08:10 -04:00
|
|
|
|
2025-01-13 13:01:36 -05:00
|
|
|
def get_memory_usage():
|
|
|
|
process = psutil.Process(os.getpid())
|
|
|
|
memory_info = process.memory_info()
|
|
|
|
return memory_info.rss # in bytes
|
2025-01-13 11:23:57 -05:00
|
|
|
|
2025-01-13 13:01:36 -05:00
|
|
|
def log_memory_usage(stage):
|
|
|
|
memory_usage = get_memory_usage()
|
|
|
|
print(f"Memory usage at {stage}: {memory_usage / 1024 / 1024:.2f} MB")
|
|
|
|
|
|
|
|
COUNTRY_REGION_JSON_VERSION = settings.COUNTRY_REGION_JSON_VERSION
|
2024-09-10 23:00:13 -04:00
|
|
|
media_root = settings.MEDIA_ROOT
|
|
|
|
|
|
|
|
def saveCountryFlag(country_code):
|
2024-09-11 09:31:25 -04:00
|
|
|
country_code = country_code.lower()
|
2024-09-10 23:00:13 -04:00
|
|
|
flags_dir = os.path.join(media_root, 'flags')
|
|
|
|
if not os.path.exists(flags_dir):
|
|
|
|
os.makedirs(flags_dir)
|
|
|
|
|
|
|
|
flag_path = os.path.join(flags_dir, f'{country_code}.png')
|
|
|
|
if os.path.exists(flag_path):
|
|
|
|
print(f'Flag for {country_code} already exists')
|
|
|
|
return
|
|
|
|
|
2024-09-11 09:31:25 -04:00
|
|
|
res = requests.get(f'https://flagcdn.com/h240/{country_code}.png'.lower())
|
2024-09-10 23:00:13 -04:00
|
|
|
if res.status_code == 200:
|
|
|
|
with open(flag_path, 'wb') as f:
|
|
|
|
f.write(res.content)
|
|
|
|
print(f'Flag for {country_code} downloaded')
|
|
|
|
else:
|
|
|
|
print(f'Error downloading flag for {country_code}')
|
|
|
|
|
|
|
|
class Command(BaseCommand):
|
|
|
|
help = 'Imports the world travel data'
|
|
|
|
|
2025-01-09 11:11:02 -05:00
|
|
|
def add_arguments(self, parser):
|
|
|
|
parser.add_argument('--force', action='store_true', help='Force download the countries+regions+states.json file')
|
|
|
|
|
2025-01-12 21:53:16 -05:00
|
|
|
def handle(self, **options):
|
2025-01-13 13:01:36 -05:00
|
|
|
log_memory_usage("start")
|
2025-01-09 11:11:02 -05:00
|
|
|
force = options['force']
|
|
|
|
countries_json_path = os.path.join(settings.MEDIA_ROOT, f'countries+regions+states-{COUNTRY_REGION_JSON_VERSION}.json')
|
|
|
|
if not os.path.exists(countries_json_path) or force:
|
|
|
|
res = requests.get(f'https://raw.githubusercontent.com/dr5hn/countries-states-cities-database/{COUNTRY_REGION_JSON_VERSION}/json/countries%2Bstates%2Bcities.json')
|
2024-09-10 23:00:13 -04:00
|
|
|
if res.status_code == 200:
|
|
|
|
with open(countries_json_path, 'w') as f:
|
|
|
|
f.write(res.text)
|
2025-01-12 20:28:27 -05:00
|
|
|
self.stdout.write(self.style.SUCCESS('countries+regions+states.json downloaded successfully'))
|
2024-09-10 23:00:13 -04:00
|
|
|
else:
|
2025-01-09 11:11:02 -05:00
|
|
|
self.stdout.write(self.style.ERROR('Error downloading countries+regions+states.json'))
|
2024-09-10 23:00:13 -04:00
|
|
|
return
|
2025-01-09 11:11:02 -05:00
|
|
|
elif not os.path.isfile(countries_json_path):
|
|
|
|
self.stdout.write(self.style.ERROR('countries+regions+states.json is not a file'))
|
|
|
|
return
|
|
|
|
elif os.path.getsize(countries_json_path) == 0:
|
|
|
|
self.stdout.write(self.style.ERROR('countries+regions+states.json is empty'))
|
2025-01-12 20:28:27 -05:00
|
|
|
elif Country.objects.count() == 0 or Region.objects.count() == 0 or City.objects.count() == 0:
|
|
|
|
self.stdout.write(self.style.WARNING('Some region data is missing. Re-importing all data.'))
|
2025-01-09 11:11:02 -05:00
|
|
|
else:
|
2025-01-09 13:53:16 -05:00
|
|
|
self.stdout.write(self.style.SUCCESS('Latest country, region, and state data already downloaded.'))
|
2025-01-09 11:11:02 -05:00
|
|
|
return
|
2025-01-13 13:01:36 -05:00
|
|
|
|
|
|
|
insert_id = uuid.uuid4()
|
2024-09-10 23:00:13 -04:00
|
|
|
|
|
|
|
with transaction.atomic():
|
2025-01-13 11:23:57 -05:00
|
|
|
f = open(countries_json_path, 'rb')
|
|
|
|
parser = ijson.items(f, 'item')
|
2024-09-10 23:12:01 -04:00
|
|
|
|
2025-01-13 13:01:36 -05:00
|
|
|
region_batch = []
|
|
|
|
city_batch = []
|
|
|
|
existing_region_ids = set()
|
|
|
|
existing_city_ids = set()
|
2024-09-10 23:12:01 -04:00
|
|
|
|
2025-01-13 11:23:57 -05:00
|
|
|
for country in parser:
|
2024-09-10 23:12:01 -04:00
|
|
|
country_code = country['iso2']
|
|
|
|
country_name = country['name']
|
|
|
|
country_subregion = country['subregion']
|
2024-09-11 16:08:10 -04:00
|
|
|
country_capital = country['capital']
|
2025-01-01 19:27:33 -05:00
|
|
|
longitude = round(float(country['longitude']), 6) if country['longitude'] else None
|
|
|
|
latitude = round(float(country['latitude']), 6) if country['latitude'] else None
|
2024-09-10 23:12:01 -04:00
|
|
|
|
2025-01-13 13:01:36 -05:00
|
|
|
country_obj, _ = Country.objects.update_or_create(
|
|
|
|
country_code=country_code,
|
|
|
|
defaults={
|
|
|
|
'name': country_name,
|
|
|
|
'subregion': country_subregion,
|
|
|
|
'capital': country_capital,
|
|
|
|
'longitude': longitude,
|
|
|
|
'latitude': latitude,
|
|
|
|
'insert_id': insert_id
|
|
|
|
}
|
|
|
|
)
|
2024-09-10 23:12:01 -04:00
|
|
|
|
|
|
|
saveCountryFlag(country_code)
|
2025-01-13 13:01:36 -05:00
|
|
|
log_memory_usage(country_code)
|
|
|
|
|
|
|
|
if country['states']:
|
|
|
|
for state in country['states']:
|
|
|
|
state_id = f"{country_code}-{state['state_code']}" if state['state_code'] else f"{country_code}-00"
|
|
|
|
|
|
|
|
# Ensure no duplicate regions
|
|
|
|
if state_id not in existing_region_ids:
|
2024-09-10 23:12:01 -04:00
|
|
|
region_obj = Region(
|
|
|
|
id=state_id,
|
2025-01-13 13:01:36 -05:00
|
|
|
name=state['name'],
|
|
|
|
country=country_obj,
|
|
|
|
longitude=state['longitude'],
|
|
|
|
latitude=state['latitude'],
|
|
|
|
insert_id=insert_id
|
2024-09-10 23:12:01 -04:00
|
|
|
)
|
2025-01-13 13:01:36 -05:00
|
|
|
region_batch.append(region_obj)
|
|
|
|
existing_region_ids.add(state_id)
|
|
|
|
log_memory_usage(state_id)
|
|
|
|
|
|
|
|
# Handle cities and avoid duplicates
|
|
|
|
if 'cities' in state and len(state['cities']) > 0:
|
|
|
|
for city in state['cities']:
|
|
|
|
city_id = f"{state_id}-{city['id']}"
|
|
|
|
|
|
|
|
if city_id not in existing_city_ids:
|
|
|
|
city_obj = City(
|
|
|
|
id=city_id,
|
|
|
|
name=city['name'],
|
|
|
|
region=region_obj,
|
|
|
|
longitude=city['longitude'],
|
|
|
|
latitude=city['latitude'],
|
|
|
|
insert_id=insert_id
|
|
|
|
)
|
|
|
|
city_batch.append(city_obj)
|
|
|
|
existing_city_ids.add(city_id)
|
|
|
|
|
|
|
|
# Bulk insert regions in smaller batches
|
|
|
|
if len(region_batch) >= 100:
|
|
|
|
Region.objects.bulk_create(
|
|
|
|
region_batch,
|
|
|
|
update_conflicts=True,
|
|
|
|
batch_size=100,
|
|
|
|
update_fields=['name', 'country', 'longitude', 'latitude', 'insert_id'],
|
|
|
|
unique_fields=['id']
|
|
|
|
)
|
|
|
|
region_batch.clear()
|
|
|
|
|
|
|
|
# Bulk insert cities in smaller batches
|
|
|
|
if len(city_batch) >= 100:
|
|
|
|
City.objects.bulk_create(
|
|
|
|
city_batch,
|
|
|
|
update_conflicts=True,
|
|
|
|
batch_size=100,
|
|
|
|
update_fields=['name', 'region', 'longitude', 'latitude', 'insert_id'],
|
|
|
|
unique_fields=['id']
|
|
|
|
)
|
|
|
|
city_batch.clear()
|
|
|
|
|
|
|
|
# Final insertion of any remaining regions and cities
|
|
|
|
if region_batch:
|
|
|
|
Region.objects.bulk_create(
|
|
|
|
region_batch,
|
|
|
|
update_conflicts=True,
|
|
|
|
batch_size=100,
|
|
|
|
update_fields=['name', 'country', 'longitude', 'latitude', 'insert_id'],
|
|
|
|
unique_fields=['id']
|
|
|
|
)
|
|
|
|
|
|
|
|
if city_batch:
|
|
|
|
City.objects.bulk_create(
|
|
|
|
city_batch,
|
|
|
|
update_conflicts=True,
|
|
|
|
batch_size=100,
|
|
|
|
update_fields=['name', 'region', 'longitude', 'latitude', 'insert_id'],
|
|
|
|
unique_fields=['id']
|
|
|
|
)
|
|
|
|
|
|
|
|
self.stdout.write(self.style.SUCCESS('Regions and cities created'))
|
|
|
|
|
|
|
|
# Clean up old data
|
|
|
|
Country.objects.exclude(insert_id=insert_id).delete()
|
|
|
|
Region.objects.exclude(insert_id=insert_id).delete()
|
|
|
|
City.objects.exclude(insert_id=insert_id).delete()
|
|
|
|
|
|
|
|
self.stdout.write(self.style.SUCCESS('All data imported successfully and old data cleaned up'))
|