Skip to content

Commit

Permalink
Revert back to passing logger to import modules
Browse files Browse the repository at this point in the history
  • Loading branch information
mvaaltola committed Jan 21, 2022
1 parent 0f25506 commit c72bd48
Show file tree
Hide file tree
Showing 9 changed files with 102 additions and 127 deletions.
2 changes: 1 addition & 1 deletion export.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
delete = args.get("delete", False)

# log each city separately
logger = create_logger("export", slug)
logger = create_logger(slug)

sql_url = get_connection_url(dbname='geoviz')
engine = create_engine(sql_url)
Expand Down
18 changes: 9 additions & 9 deletions import.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@
delete = args.get("delete", False)

# log each city separately
logger = create_logger("import", slug)
logger = create_logger(slug)
logger.info(f"--- Importing datasets {datasets} for {city} ---")

if osmnames_url:
Expand Down Expand Up @@ -149,43 +149,43 @@ def mark_imported(dataset: str):

if "osm" in datasets:
logger.info(f"--- Importing OSM data for {city} ---")
osm_importer = OsmImporter({"slug": slug,
"bbox": ", ".join([str(coord) for coord in bbox])})
osm_bbox = ", ".join([str(coord) for coord in bbox])
osm_importer = OsmImporter({"slug": slug, "bbox": osm_bbox}, logger)
osm_importer.run()
mark_imported("osm")

if "flickr" in datasets:
logger.info(f"--- Importing Flickr data for {city} ---")
flick_importer = FlickrImporter(slug=slug, bbox=bbox)
flick_importer = FlickrImporter(slug, bbox, logger)
flick_importer.run()
mark_imported("flickr")

if "gtfs" in datasets:
# GTFS importer uses the provided URL or, failing that, default values for some cities
if gtfs_url:
logger.info(f"--- Importing GTFS data from {gtfs_url} ---")
gtfs_importer = GTFSImporter(slug=slug, url=gtfs_url, city=city, bbox=bbox)
gtfs_importer = GTFSImporter(slug, city, logger, gtfs_url, bbox)
else:
logger.info(f"--- Importing GTFS data for {city} ---")
gtfs_importer = GTFSImporter(slug=slug, city=city, bbox=bbox)
gtfs_importer = GTFSImporter(slug, city, logger, bbox=bbox)
gtfs_importer.run()
mark_imported("gtfs")

if "access" in datasets:
logger.info(f"--- Importing OSM walkability & accessibility data for {city} ---")
accessibility_importer = AccessibilityImporter(slug=slug, bbox=bbox)
accessibility_importer = AccessibilityImporter(slug, bbox, logger)
accessibility_importer.run()
mark_imported("access")

if "ookla" in datasets:
logger.info(f"--- Importing Ookla speedtest data for {city} ---")
ookla_importer = OoklaImporter(slug=slug, city=city, bbox=bbox)
ookla_importer = OoklaImporter(slug, city, bbox, logger)
ookla_importer.run()
mark_imported("ookla")

if "kontur" in datasets:
logger.info(f"--- Importing Kontur population data for {city} ---")
kontur_importer = KonturImporter(slug=slug, city=city, bbox=bbox)
kontur_importer = KonturImporter(slug, city, bbox, logger)
kontur_importer.run()
mark_imported("kontur")

Expand Down
34 changes: 17 additions & 17 deletions scripts/import_flickr.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import argparse
import datetime
import logging
from logging import Logger
import os
import sys
from flickrapi import FlickrAPI, FlickrError
Expand All @@ -17,11 +18,9 @@
sys.path.insert(0, "..")
from models import FlickrPoint

logger = logging.getLogger("import")


class FlickrImporter:
def __init__(self, slug: str, bbox: List[float]):
def __init__(self, slug: str, bbox: List[float], logger: Logger):
"""Sets the initial parameters, connects to flickr and database"""
if not slug:
raise AssertionError("You must specify the city name.")
Expand All @@ -33,6 +32,7 @@ def __init__(self, slug: str, bbox: List[float]):
start_date = end_date - datetime.timedelta(days=3*365)
# List for api request parameter tuples
self.start_params = [(total_bbox, start_date, end_date)]
self.logger = logger

# List for photos
self.photos = []
Expand Down Expand Up @@ -65,7 +65,7 @@ def run(self):

# Save the photo locations
flickr_points = {}
logger.info(f"Found {len(self.photos)} Flickr photos, importing...")
self.logger.info(f"Found {len(self.photos)} Flickr photos, importing...")
for point in self.photos:
pid = point.pop("id")
geom = from_shape(
Expand All @@ -74,9 +74,9 @@ def run(self):
)
# Use dict, since the json may contain the same image twice!
if pid in flickr_points:
logger.info(f"Image {pid} found twice, overwriting")
self.logger.info(f"Image {pid} found twice, overwriting")
flickr_points[pid] = FlickrPoint(point_id=pid, properties=point, geom=geom)
logger.info(f"Saving {len(flickr_points)} flickr points...")
self.logger.info(f"Saving {len(flickr_points)} flickr points...")
self.session.bulk_save_objects(flickr_points.values())
self.session.commit()

Expand Down Expand Up @@ -109,9 +109,9 @@ def loop(self, params_list: list):
f"{self.max_date.day:02} 00:00:00"
)
# Print info
logger.info(f"\nbbox: {self.bbox}")
logger.info(f" From: {self.min_date_str}")
logger.info(f" To: {self.max_date_str}")
self.logger.info(f"\nbbox: {self.bbox}")
self.logger.info(f" From: {self.min_date_str}")
self.logger.info(f" To: {self.max_date_str}")

# Start reading photos from page 1
page = 1
Expand All @@ -134,15 +134,15 @@ def loop(self, params_list: list):
self.photos += photos_to_add["photo"]
# Stop when photos from every page have been added
if page >= photos_to_add["pages"]:
logger.info(f" {photos_to_add['total']} photos added")
self.logger.info(f" {photos_to_add['total']} photos added")
break
# Move on to next page
page += 1

# See if any new params had to be created
if len(new_params) > 0:
# Loop with the new params
logger.info("\n\nSwitching to a new parameter list")
self.logger.info("\n\nSwitching to a new parameter list")
self.loop(new_params)

def flickr_query(self, page):
Expand Down Expand Up @@ -175,13 +175,13 @@ def flickr_query(self, page):
page=page,
)
except FlickrError as e:
logger.warning(f"Flickr API returned an error: {e}. Trying again.")
self.logger.warning(f"Flickr API returned an error: {e}. Trying again.")
self.q_count += 1
continue
break

self.q_count += 1
logger.info(f" queries: {self.q_count}")
self.logger.info(f" queries: {self.q_count}")
return result["photos"]

def add_new_params(self, new_params: list):
Expand All @@ -192,13 +192,13 @@ def add_new_params(self, new_params: list):
small for dividing.
"""

logger.info(" Too much data, trying with new parameters")
self.logger.info(" Too much data, trying with new parameters")
# Divide bbox if possible
if (
(self.bbox[2] - self.bbox[0] > 1e-4) and
(self.bbox[3] - self.bbox[1] > 1e-4)
):
logger.info(" Bbox big enough to divide, dividing bbox")
self.logger.info(" Bbox big enough to divide, dividing bbox")
# Divide bbox to 4, add new bboxes to new params
middle_lon = (self.bbox[0] + self.bbox[2]) / 2
middle_lat = (self.bbox[1] + self.bbox[3]) / 2
Expand All @@ -221,7 +221,7 @@ def add_new_params(self, new_params: list):

# If bbox too small, divide time extent instead
else:
logger.info(" Bbox too small to divide, dividing time extent")
self.logger.info(" Bbox too small to divide, dividing time extent")
# Divide time extent to 2, add new dates to params_list
mid_date = self.min_date + (self.max_date - self.min_date) / 2
new_params.append((self.bbox, self.min_date, mid_date))
Expand All @@ -238,5 +238,5 @@ def add_new_params(self, new_params: list):
arg_slug = slugify(arg_city)
arg_bbox = args["bbox"]
arg_bbox = list(map(float, arg_bbox.split(", ")))
importer = FlickrImporter(slug=arg_slug, bbox=arg_bbox)
importer = FlickrImporter(slug=arg_slug, bbox=arg_bbox, logger=logging.getLogger("import"))
importer.run()
28 changes: 14 additions & 14 deletions scripts/import_gtfs.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import argparse
import logging
from logging import Logger
import os
import sys
import requests
Expand Down Expand Up @@ -38,16 +39,15 @@

DATA_PATH = "data"

logger = logging.getLogger("import")


class GTFSImporter(object):
def __init__(self, slug: str, city: str, url: str = "", bbox: List[float] = None):
def __init__(self, slug: str, city: str, logger: Logger, url: str = "", bbox: List[float] = None):
if not city or not slug:
raise AssertionError("You must specify the city name.")
self.city = city
# optional bbox allows filtering gtfs layer
self.bbox = bbox
self.logger = logger
if url:
self.url = url
else:
Expand All @@ -64,7 +64,7 @@ def __init__(self, slug: str, city: str, url: str = "", bbox: List[float] = None

def run(self):
if not self.url:
logger.error(f"GTFS data not found for {self.city}, skipping.")
self.logger.error(f"GTFS data not found for {self.city}, skipping.")
return
# data should be stored one directory level above importers
filename = os.path.join(
Expand All @@ -73,13 +73,13 @@ def run(self):
f"{self.city}.gtfs.zip"
)
if os.path.isfile(filename):
logger.info("Found saved gtfs zip...")
self.logger.info("Found saved gtfs zip...")
else:
logger.info("Downloading gtfs zip...")
self.logger.info("Downloading gtfs zip...")
response = requests.get(self.url, allow_redirects=True)
open(filename, 'wb').write(response.content)

logger.info("Loading gtfs zip...")
self.logger.info("Loading gtfs zip...")
routes, stops, stop_times, trips, shapes = import_gtfs(filename)
# TODO: delete file after reading, we don't want to keep caching them all?
# This is the only large dataset we download separately. or is gtfs data valuable?
Expand All @@ -88,14 +88,14 @@ def run(self):
# luckily, we have nifty bbox filtering available for geodataframes
# https://geopandas.org/docs/user_guide/indexing.html
if self.bbox:
logger.info("Filtering gtfs data with bbox...")
logger.info(self.bbox)
self.logger.info("Filtering gtfs data with bbox...")
self.logger.info(self.bbox)
stops = stops.cx[self.bbox[0]:self.bbox[2], self.bbox[1]:self.bbox[3]]
stop_times = stop_times.cx[self.bbox[0]:self.bbox[2], self.bbox[1]:self.bbox[3]]

# only calculate average daily frequency for all stops for now
cutoffs = [0, 24]
logger.info("Calculating stop frequencies...")
self.logger.info("Calculating stop frequencies...")
stop_frequencies = stops_freq(stop_times, stops, cutoffs)
# only consider outbound departures for now
outbound_frequencies = stop_frequencies.loc[
Expand All @@ -106,15 +106,15 @@ def run(self):
if not outbound_frequencies:
outbound_frequencies = stop_frequencies.to_dict(orient="records")
stops_to_save = {}
logger.info(f"Found {len(outbound_frequencies)} GTFS stops, importing...")
self.logger.info(f"Found {len(outbound_frequencies)} GTFS stops, importing...")
for stop in outbound_frequencies:
stop_id = stop.pop("stop_id")
geom = from_shape(stop.pop("geometry"), srid=4326)
# use dict, since the json may contain the same stop twice!
if stop_id in stops_to_save:
logger.info(f"Stop {stop_id} found twice, overwriting")
self.logger.info(f"Stop {stop_id} found twice, overwriting")
stops_to_save[stop_id] = GTFSStop(stop_id=stop_id, properties=stop, geom=geom)
logger.info(f"Saving {len(stops_to_save)} GTFS stops...")
self.logger.info(f"Saving {len(stops_to_save)} GTFS stops...")
self.session.bulk_save_objects(stops_to_save.values())
self.session.commit()

Expand All @@ -127,5 +127,5 @@ def run(self):
arg_city = args.get("city", None)
arg_slug = slugify(arg_city)
arg_url = args.get("url", None)
importer = GTFSImporter(slug=arg_slug, city=arg_city, url=arg_url)
importer = GTFSImporter(arg_slug, arg_city, logging.getLogger("import"), arg_url)
importer.run()
24 changes: 12 additions & 12 deletions scripts/import_kontur.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import argparse
import logging
from logging import Logger

import fiona
import gzip
Expand All @@ -22,19 +23,18 @@

DATA_PATH = "data"

logger = logging.getLogger("import")


class KonturImporter(object):

def __init__(self, slug: str, city: str, bbox: List[float]):
def __init__(self, slug: str, city: str, bbox: List[float], logger: Logger):
if not city or not slug:
raise AssertionError("You must specify the city name.")
# BBOX (minx, miny, maxx, maxy)
self.bbox = bbox
self.city = city
self.download_url = "https://adhoc.kontur.io/data/"
self.download_name = "kontur_population_20200928.gpkg"
self.logger = logger

# data should be stored one directory level above importers
self.unzipped_file = os.path.join(
Expand All @@ -58,23 +58,23 @@ def __init__(self, slug: str, city: str, bbox: List[float]):

def run(self):
if os.path.isfile(self.download_file):
logger.info("Found saved Kontur data...")
self.logger.info("Found saved Kontur data...")
else:
logger.info("Downloading Kontur data...")
logger.info(f"{self.download_url}{self.download_name}.gz")
self.logger.info("Downloading Kontur data...")
self.logger.info(f"{self.download_url}{self.download_name}.gz")
with requests.get(f"{self.download_url}{self.download_name}.gz", stream=True) as request:
with open(self.download_file, 'wb') as file:
shutil.copyfileobj(request.raw, file)
if not os.path.isfile(self.unzipped_file):
logger.info("Extracting gz...")
self.logger.info("Extracting gz...")
with gzip.open(self.download_file, 'rb') as gzip_file:
with open(self.unzipped_file, 'wb') as out_file:
shutil.copyfileobj(gzip_file, out_file)

if not os.path.isfile(self.city_file):
if not os.path.isdir(f"{self.unzipped_file}_extracts"):
os.mkdir(f"{self.unzipped_file}_extracts")
logger.info(f"Extracting {self.city} from Kontur data...")
self.logger.info(f"Extracting {self.city} from Kontur data...")
gdal.UseExceptions()
# this does the same as ogr2ogr
# https://gdal.org/python/osgeo.gdal-module.html#VectorTranslateOptions
Expand All @@ -89,8 +89,8 @@ def run(self):
# https://gdal.org/api/python_gotchas.html#saving-and-closing-datasets-datasources
del city_data
else:
logger.info(f"Found geopackage for {self.city}...")
logger.info(f"Reading Kontur data for {self.city}...")
self.logger.info(f"Found geopackage for {self.city}...")
self.logger.info(f"Reading Kontur data for {self.city}...")
points_to_save = {}
for layer_name in fiona.listlayers(self.city_file):
with fiona.open(self.city_file, layer=layer_name) as source:
Expand All @@ -107,7 +107,7 @@ def run(self):
points_to_save[hex_id] = KonturPoint(
hex_id=hex_id, properties=properties, geom=geom
)
logger.info(f"Saving {len(points_to_save)} Kontur points...")
self.logger.info(f"Saving {len(points_to_save)} Kontur points...")
self.session.bulk_save_objects(points_to_save.values())
self.session.commit()

Expand All @@ -121,5 +121,5 @@ def run(self):
arg_slug = slugify(arg_city)
arg_bbox = args["bbox"]
arg_bbox = list(map(float, arg_bbox.split(", ")))
importer = KonturImporter(slug=arg_slug, city=arg_city, bbox=arg_bbox)
importer = KonturImporter(arg_slug, arg_city, arg_bbox, logging.getLogger("import"))
importer.run()
Loading

0 comments on commit c72bd48

Please sign in to comment.