1
0
Fork 0
mirror of https://github.com/seanmorley15/AdventureLog.git synced 2025-07-24 15:29:36 +02:00
AdventureLog/backend/server/worldtravel/management/commands/download-countries.py

275 lines
13 KiB
Python
Raw Normal View History

2024-09-10 23:00:13 -04:00
import os
from django.core.management.base import BaseCommand
import requests
from worldtravel.models import Country, Region, City
2024-09-10 23:00:13 -04:00
from django.db import transaction
from tqdm import tqdm
import ijson
from django.conf import settings
ADVENTURELOG_CDN_URL = settings.ADVENTURELOG_CDN_URL
2024-09-10 23:00:13 -04:00
media_root = settings.MEDIA_ROOT
def saveCountryFlag(country_code):
# For standards, use the lowercase country_code
2024-09-11 09:31:25 -04:00
country_code = country_code.lower()
2024-09-10 23:00:13 -04:00
flags_dir = os.path.join(media_root, 'flags')
# Check if the flags directory exists, if not, create it
2024-09-10 23:00:13 -04:00
if not os.path.exists(flags_dir):
os.makedirs(flags_dir)
# Check if the flag already exists in the media folder
2024-09-10 23:00:13 -04:00
flag_path = os.path.join(flags_dir, f'{country_code}.png')
if os.path.exists(flag_path):
print(f'Flag for {country_code} already exists')
return
res = requests.get(f'{ADVENTURELOG_CDN_URL}/data/flags/{country_code}.png'.lower())
2024-09-10 23:00:13 -04:00
if res.status_code == 200:
with open(flag_path, 'wb') as f:
f.write(res.content)
print(f'Flag for {country_code} downloaded')
else:
print(f'Error downloading flag for {country_code}')
class Command(BaseCommand):
help = 'Imports the world travel data'
def add_arguments(self, parser):
parser.add_argument('--force', action='store_true', help='Force re-download of AdventureLog setup content from the CDN')
def handle(self, **options):
force = options['force']
batch_size = 100
current_version_json = os.path.join(settings.MEDIA_ROOT, 'data_version.json')
cdn_version_json = requests.get(f'{ADVENTURELOG_CDN_URL}/data/version.json')
if cdn_version_json.status_code == 200:
cdn_version = cdn_version_json.json().get('version')
if os.path.exists(current_version_json):
with open(current_version_json, 'r') as f:
local_version = f.read().strip()
self.stdout.write(self.style.SUCCESS(f'Local version: {local_version}'))
2024-09-10 23:00:13 -04:00
else:
local_version = None
if force or local_version != cdn_version:
with open(current_version_json, 'w') as f:
f.write(cdn_version)
self.stdout.write(self.style.SUCCESS('Version updated successfully to ' + cdn_version))
else:
self.stdout.write(self.style.SUCCESS('Data is already up-to-date. Run with --force to re-download'))
2024-09-10 23:00:13 -04:00
return
else:
self.stdout.write(self.style.ERROR('Error downloading version.json'))
return
self.stdout.write(self.style.SUCCESS('Fetching latest data from the AdventureLog CDN located at: ' + ADVENTURELOG_CDN_URL))
# Delete the existing flags
flags_dir = os.path.join(media_root, 'flags')
if os.path.exists(flags_dir):
for file in os.listdir(flags_dir):
os.remove(os.path.join(flags_dir, file))
# Delete the existing countries, regions, and cities json files
countries_json_path = os.path.join(media_root, 'countries_states_cities.json')
if os.path.exists(countries_json_path):
os.remove(countries_json_path)
self.stdout.write(self.style.SUCCESS('countries_states_cities.json deleted successfully'))
# Download the latest countries, regions, and cities json file
res = requests.get(f'{ADVENTURELOG_CDN_URL}/data/countries_states_cities.json')
if res.status_code == 200:
with open(countries_json_path, 'w') as f:
f.write(res.text)
self.stdout.write(self.style.SUCCESS('countries_states_cities.json downloaded successfully'))
else:
self.stdout.write(self.style.ERROR('Error downloading countries_states_cities.json'))
return
2024-09-10 23:00:13 -04:00
# if not os.path.exists(version_json) or force:
# res = requests.get(f'https://raw.githubusercontent.com/dr5hn/countries-states-cities-database/{COUNTRY_REGION_JSON_VERSION}/json/countries%2Bstates%2Bcities.json')
# if res.status_code == 200:
# with open(countries_json_path, 'w') as f:
# f.write(res.text)
# self.stdout.write(self.style.SUCCESS('countries+regions+states.json downloaded successfully'))
# else:
# self.stdout.write(self.style.ERROR('Error downloading countries+regions+states.json'))
# return
# elif not os.path.isfile(countries_json_path):
# self.stdout.write(self.style.ERROR('countries+regions+states.json is not a file'))
# return
# elif os.path.getsize(countries_json_path) == 0:
# self.stdout.write(self.style.ERROR('countries+regions+states.json is empty'))
# elif Country.objects.count() == 0 or Region.objects.count() == 0 or City.objects.count() == 0:
# self.stdout.write(self.style.WARNING('Some region data is missing. Re-importing all data.'))
# else:
# self.stdout.write(self.style.SUCCESS('Latest country, region, and state data already downloaded.'))
# return
with open(countries_json_path, 'r') as f:
f = open(countries_json_path, 'rb')
parser = ijson.items(f, 'item')
with transaction.atomic():
existing_countries = {country.country_code: country for country in Country.objects.all()}
existing_regions = {region.id: region for region in Region.objects.all()}
existing_cities = {city.id: city for city in City.objects.all()}
2024-09-10 23:12:01 -04:00
countries_to_create = []
regions_to_create = []
countries_to_update = []
regions_to_update = []
cities_to_create = []
cities_to_update = []
processed_country_codes = set()
processed_region_ids = set()
processed_city_ids = set()
2024-09-10 23:12:01 -04:00
for country in parser:
2024-09-10 23:12:01 -04:00
country_code = country['iso2']
country_name = country['name']
country_subregion = country['subregion']
2024-09-11 16:08:10 -04:00
country_capital = country['capital']
longitude = round(float(country['longitude']), 6) if country['longitude'] else None
latitude = round(float(country['latitude']), 6) if country['latitude'] else None
2024-09-10 23:12:01 -04:00
processed_country_codes.add(country_code)
if country_code in existing_countries:
country_obj = existing_countries[country_code]
country_obj.name = country_name
country_obj.subregion = country_subregion
country_obj.capital = country_capital
country_obj.longitude = longitude
country_obj.latitude = latitude
countries_to_update.append(country_obj)
else:
country_obj = Country(
name=country_name,
country_code=country_code,
subregion=country_subregion,
capital=country_capital,
longitude=longitude,
latitude=latitude
)
countries_to_create.append(country_obj)
2024-09-10 23:12:01 -04:00
saveCountryFlag(country_code)
if country['states']:
for state in country['states']:
name = state['name']
state_id = f"{country_code}-{state['state_code']}"
latitude = round(float(state['latitude']), 6) if state['latitude'] else None
longitude = round(float(state['longitude']), 6) if state['longitude'] else None
# Check for duplicate regions
if state_id in processed_region_ids:
# self.stdout.write(self.style.ERROR(f'State {state_id} already processed'))
continue
processed_region_ids.add(state_id)
if state_id in existing_regions:
region_obj = existing_regions[state_id]
region_obj.name = name
region_obj.country = country_obj
region_obj.longitude = longitude
region_obj.latitude = latitude
regions_to_update.append(region_obj)
else:
region_obj = Region(
id=state_id,
name=name,
country=country_obj,
longitude=longitude,
latitude=latitude
)
regions_to_create.append(region_obj)
# self.stdout.write(self.style.SUCCESS(f'State {state_id} prepared'))
if 'cities' in state and len(state['cities']) > 0:
for city in state['cities']:
city_id = f"{state_id}-{city['id']}"
city_name = city['name']
latitude = round(float(city['latitude']), 6) if city['latitude'] else None
longitude = round(float(city['longitude']), 6) if city['longitude'] else None
# Check for duplicate cities
if city_id in processed_city_ids:
# self.stdout.write(self.style.ERROR(f'City {city_id} already processed'))
continue
processed_city_ids.add(city_id)
if city_id in existing_cities:
city_obj = existing_cities[city_id]
city_obj.name = city_name
city_obj.region = region_obj
city_obj.longitude = longitude
city_obj.latitude = latitude
cities_to_update.append(city_obj)
else:
city_obj = City(
id=city_id,
name=city_name,
region=region_obj,
longitude=longitude,
latitude=latitude
)
cities_to_create.append(city_obj)
# self.stdout.write(self.style.SUCCESS(f'City {city_id} prepared'))
else:
state_id = f"{country_code}-00"
processed_region_ids.add(state_id)
if state_id in existing_regions:
region_obj = existing_regions[state_id]
region_obj.name = country_name
region_obj.country = country_obj
regions_to_update.append(region_obj)
else:
2024-09-10 23:12:01 -04:00
region_obj = Region(
id=state_id,
name=country_name,
country=country_obj
2024-09-10 23:12:01 -04:00
)
regions_to_create.append(region_obj)
# self.stdout.write(self.style.SUCCESS(f'Region {state_id} prepared for {country_name}'))
for i in tqdm(range(0, len(countries_to_create), batch_size), desc="Processing countries"):
batch = countries_to_create[i:i + batch_size]
Country.objects.bulk_create(batch)
for i in tqdm(range(0, len(regions_to_create), batch_size), desc="Processing regions"):
batch = regions_to_create[i:i + batch_size]
Region.objects.bulk_create(batch)
for i in tqdm(range(0, len(cities_to_create), batch_size), desc="Processing cities"):
batch = cities_to_create[i:i + batch_size]
City.objects.bulk_create(batch)
# Process updates in batches
for i in range(0, len(countries_to_update), batch_size):
batch = countries_to_update[i:i + batch_size]
for i in tqdm(range(0, len(countries_to_update), batch_size), desc="Updating countries"):
batch = countries_to_update[i:i + batch_size]
Country.objects.bulk_update(batch, ['name', 'subregion', 'capital', 'longitude', 'latitude'])
for i in tqdm(range(0, len(regions_to_update), batch_size), desc="Updating regions"):
batch = regions_to_update[i:i + batch_size]
Region.objects.bulk_update(batch, ['name', 'country', 'longitude', 'latitude'])
for i in tqdm(range(0, len(cities_to_update), batch_size), desc="Updating cities"):
batch = cities_to_update[i:i + batch_size]
City.objects.bulk_update(batch, ['name', 'region', 'longitude', 'latitude'])
Country.objects.exclude(country_code__in=processed_country_codes).delete()
Region.objects.exclude(id__in=processed_region_ids).delete()
City.objects.exclude(id__in=processed_city_ids).delete()
self.stdout.write(self.style.SUCCESS('All data imported successfully'))