1
0
Fork 0
mirror of https://github.com/seanmorley15/AdventureLog.git synced 2025-07-19 04:49:37 +02:00
AdventureLog/backend/server/worldtravel/management/commands/download-countries.py

275 lines
No EOL
13 KiB
Python

import os
from django.core.management.base import BaseCommand
import requests
from worldtravel.models import Country, Region, City
from django.db import transaction
from tqdm import tqdm
import ijson
from django.conf import settings
ADVENTURELOG_CDN_URL = settings.ADVENTURELOG_CDN_URL
media_root = settings.MEDIA_ROOT
def saveCountryFlag(country_code):
# For standards, use the lowercase country_code
country_code = country_code.lower()
flags_dir = os.path.join(media_root, 'flags')
# Check if the flags directory exists, if not, create it
if not os.path.exists(flags_dir):
os.makedirs(flags_dir)
# Check if the flag already exists in the media folder
flag_path = os.path.join(flags_dir, f'{country_code}.png')
if os.path.exists(flag_path):
print(f'Flag for {country_code} already exists')
return
res = requests.get(f'{ADVENTURELOG_CDN_URL}/data/flags/{country_code}.png'.lower())
if res.status_code == 200:
with open(flag_path, 'wb') as f:
f.write(res.content)
print(f'Flag for {country_code} downloaded')
else:
print(f'Error downloading flag for {country_code}')
class Command(BaseCommand):
help = 'Imports the world travel data'
def add_arguments(self, parser):
parser.add_argument('--force', action='store_true', help='Force re-download of AdventureLog setup content from the CDN')
def handle(self, **options):
force = options['force']
batch_size = 100
current_version_json = os.path.join(settings.MEDIA_ROOT, 'data_version.json')
cdn_version_json = requests.get(f'{ADVENTURELOG_CDN_URL}/data/version.json')
if cdn_version_json.status_code == 200:
cdn_version = cdn_version_json.json().get('version')
if os.path.exists(current_version_json):
with open(current_version_json, 'r') as f:
local_version = f.read().strip()
self.stdout.write(self.style.SUCCESS(f'Local version: {local_version}'))
else:
local_version = None
if force or local_version != cdn_version:
with open(current_version_json, 'w') as f:
f.write(cdn_version)
self.stdout.write(self.style.SUCCESS('Version updated successfully to ' + cdn_version))
else:
self.stdout.write(self.style.SUCCESS('Data is already up-to-date. Run with --force to re-download'))
return
else:
self.stdout.write(self.style.ERROR('Error downloading version.json'))
return
self.stdout.write(self.style.SUCCESS('Fetching latest data from the AdventureLog CDN located at: ' + ADVENTURELOG_CDN_URL))
# Delete the existing flags
flags_dir = os.path.join(media_root, 'flags')
if os.path.exists(flags_dir):
for file in os.listdir(flags_dir):
os.remove(os.path.join(flags_dir, file))
# Delete the existing countries, regions, and cities json files
countries_json_path = os.path.join(media_root, 'countries_states_cities.json')
if os.path.exists(countries_json_path):
os.remove(countries_json_path)
self.stdout.write(self.style.SUCCESS('countries_states_cities.json deleted successfully'))
# Download the latest countries, regions, and cities json file
res = requests.get(f'{ADVENTURELOG_CDN_URL}/data/countries_states_cities.json')
if res.status_code == 200:
with open(countries_json_path, 'w') as f:
f.write(res.text)
self.stdout.write(self.style.SUCCESS('countries_states_cities.json downloaded successfully'))
else:
self.stdout.write(self.style.ERROR('Error downloading countries_states_cities.json'))
return
# if not os.path.exists(version_json) or force:
# res = requests.get(f'https://raw.githubusercontent.com/dr5hn/countries-states-cities-database/{COUNTRY_REGION_JSON_VERSION}/json/countries%2Bstates%2Bcities.json')
# if res.status_code == 200:
# with open(countries_json_path, 'w') as f:
# f.write(res.text)
# self.stdout.write(self.style.SUCCESS('countries+regions+states.json downloaded successfully'))
# else:
# self.stdout.write(self.style.ERROR('Error downloading countries+regions+states.json'))
# return
# elif not os.path.isfile(countries_json_path):
# self.stdout.write(self.style.ERROR('countries+regions+states.json is not a file'))
# return
# elif os.path.getsize(countries_json_path) == 0:
# self.stdout.write(self.style.ERROR('countries+regions+states.json is empty'))
# elif Country.objects.count() == 0 or Region.objects.count() == 0 or City.objects.count() == 0:
# self.stdout.write(self.style.WARNING('Some region data is missing. Re-importing all data.'))
# else:
# self.stdout.write(self.style.SUCCESS('Latest country, region, and state data already downloaded.'))
# return
with open(countries_json_path, 'r') as f:
f = open(countries_json_path, 'rb')
parser = ijson.items(f, 'item')
with transaction.atomic():
existing_countries = {country.country_code: country for country in Country.objects.all()}
existing_regions = {region.id: region for region in Region.objects.all()}
existing_cities = {city.id: city for city in City.objects.all()}
countries_to_create = []
regions_to_create = []
countries_to_update = []
regions_to_update = []
cities_to_create = []
cities_to_update = []
processed_country_codes = set()
processed_region_ids = set()
processed_city_ids = set()
for country in parser:
country_code = country['iso2']
country_name = country['name']
country_subregion = country['subregion']
country_capital = country['capital']
longitude = round(float(country['longitude']), 6) if country['longitude'] else None
latitude = round(float(country['latitude']), 6) if country['latitude'] else None
processed_country_codes.add(country_code)
if country_code in existing_countries:
country_obj = existing_countries[country_code]
country_obj.name = country_name
country_obj.subregion = country_subregion
country_obj.capital = country_capital
country_obj.longitude = longitude
country_obj.latitude = latitude
countries_to_update.append(country_obj)
else:
country_obj = Country(
name=country_name,
country_code=country_code,
subregion=country_subregion,
capital=country_capital,
longitude=longitude,
latitude=latitude
)
countries_to_create.append(country_obj)
saveCountryFlag(country_code)
if country['states']:
for state in country['states']:
name = state['name']
state_id = f"{country_code}-{state['state_code']}"
latitude = round(float(state['latitude']), 6) if state['latitude'] else None
longitude = round(float(state['longitude']), 6) if state['longitude'] else None
# Check for duplicate regions
if state_id in processed_region_ids:
# self.stdout.write(self.style.ERROR(f'State {state_id} already processed'))
continue
processed_region_ids.add(state_id)
if state_id in existing_regions:
region_obj = existing_regions[state_id]
region_obj.name = name
region_obj.country = country_obj
region_obj.longitude = longitude
region_obj.latitude = latitude
regions_to_update.append(region_obj)
else:
region_obj = Region(
id=state_id,
name=name,
country=country_obj,
longitude=longitude,
latitude=latitude
)
regions_to_create.append(region_obj)
# self.stdout.write(self.style.SUCCESS(f'State {state_id} prepared'))
if 'cities' in state and len(state['cities']) > 0:
for city in state['cities']:
city_id = f"{state_id}-{city['id']}"
city_name = city['name']
latitude = round(float(city['latitude']), 6) if city['latitude'] else None
longitude = round(float(city['longitude']), 6) if city['longitude'] else None
# Check for duplicate cities
if city_id in processed_city_ids:
# self.stdout.write(self.style.ERROR(f'City {city_id} already processed'))
continue
processed_city_ids.add(city_id)
if city_id in existing_cities:
city_obj = existing_cities[city_id]
city_obj.name = city_name
city_obj.region = region_obj
city_obj.longitude = longitude
city_obj.latitude = latitude
cities_to_update.append(city_obj)
else:
city_obj = City(
id=city_id,
name=city_name,
region=region_obj,
longitude=longitude,
latitude=latitude
)
cities_to_create.append(city_obj)
# self.stdout.write(self.style.SUCCESS(f'City {city_id} prepared'))
else:
state_id = f"{country_code}-00"
processed_region_ids.add(state_id)
if state_id in existing_regions:
region_obj = existing_regions[state_id]
region_obj.name = country_name
region_obj.country = country_obj
regions_to_update.append(region_obj)
else:
region_obj = Region(
id=state_id,
name=country_name,
country=country_obj
)
regions_to_create.append(region_obj)
# self.stdout.write(self.style.SUCCESS(f'Region {state_id} prepared for {country_name}'))
for i in tqdm(range(0, len(countries_to_create), batch_size), desc="Processing countries"):
batch = countries_to_create[i:i + batch_size]
Country.objects.bulk_create(batch)
for i in tqdm(range(0, len(regions_to_create), batch_size), desc="Processing regions"):
batch = regions_to_create[i:i + batch_size]
Region.objects.bulk_create(batch)
for i in tqdm(range(0, len(cities_to_create), batch_size), desc="Processing cities"):
batch = cities_to_create[i:i + batch_size]
City.objects.bulk_create(batch)
# Process updates in batches
for i in range(0, len(countries_to_update), batch_size):
batch = countries_to_update[i:i + batch_size]
for i in tqdm(range(0, len(countries_to_update), batch_size), desc="Updating countries"):
batch = countries_to_update[i:i + batch_size]
Country.objects.bulk_update(batch, ['name', 'subregion', 'capital', 'longitude', 'latitude'])
for i in tqdm(range(0, len(regions_to_update), batch_size), desc="Updating regions"):
batch = regions_to_update[i:i + batch_size]
Region.objects.bulk_update(batch, ['name', 'country', 'longitude', 'latitude'])
for i in tqdm(range(0, len(cities_to_update), batch_size), desc="Updating cities"):
batch = cities_to_update[i:i + batch_size]
City.objects.bulk_update(batch, ['name', 'region', 'longitude', 'latitude'])
Country.objects.exclude(country_code__in=processed_country_codes).delete()
Region.objects.exclude(id__in=processed_region_ids).delete()
City.objects.exclude(id__in=processed_city_ids).delete()
self.stdout.write(self.style.SUCCESS('All data imported successfully'))