From 2fa64de4c23c8cd36dfa3b57b16d75803ae7a5c2 Mon Sep 17 00:00:00 2001 From: Jonas Jaszkowic Date: Wed, 16 Oct 2024 09:49:14 +0200 Subject: [PATCH] feat: improve docs + increase statement timeout (#136) * feat: improve docs * fix: deps * feat: increase statement timeout --- README.md | 32 ++++++++++++++--------------- harvester/requirements-mac.txt | 24 ++++++++++++++++++++++ harvester/sample.env | 10 +++++---- harvester/src/mapbox_tree_update.py | 3 +++ 4 files changed, 48 insertions(+), 21 deletions(-) create mode 100644 harvester/requirements-mac.txt diff --git a/README.md b/README.md index 73d921ab..b225342c 100644 --- a/README.md +++ b/README.md @@ -11,44 +11,40 @@ - Gather precipitation data from DWD's radolan data set, for the region of Berlin and connect to the giessdenkiez.de postgres DB - Uploads trees combined with weather data to Mapbox and uses its API to create vector tiles for use on mobile devices - Generates CSV and GeoJSON files that contain trees locations and weather data (grid) and uploads them to a Supabase Storage bucket +- Fetch historical weather data via BrightSky API: https://brightsky.dev/ -## Pre-Install +## Development environment -I am using venv to setup a virtual python environment for separating dependencies: +It is recommended to use Python virtual environments to manage and separate Python dependencies: ``` python -m venv REPO_DIRECTORY ``` -## Install +## Install dependencies ``` pip install -r requirements.txt ``` -I had some trouble installing psycopg2 on MacOS, there is a problem with the ssl-lib linking. Following install resolved the issue: +or (if you are on MacOS) ``` -env LDFLAGS='-L/usr/local/lib -L/usr/local/opt/openssl/lib -L/usr/local/opt/readline/lib' pip install psycopg2 +pip install -r requirements-mac.txt ``` -### GDAL - -As some of python's gdal bindings are not as good as the command line tool, i had to use the original. Therefore, `gdal` needs to be installed. GDAL is a dependency in requirements.txt, but sometimes this does not work. Then GDAL needs to be installed manually. Afterwards, make sure the command line calls for `gdalwarp` and `gdal_polygonize.py` are working. - -#### Linux - -Here is a good explanation on how to install gdal on linux: https://mothergeo-py.readthedocs.io/en/latest/development/how-to/gdal-ubuntu-pkg.html - -#### Mac +### Dependency troubleshooting -For mac we can use `brew install gdal`. +- If installing `psycopg2` on MacOS fails, there nmight be a problem with the ssl-lib linking. Following install resolved the issue: +``` +env LDFLAGS='-L/usr/local/lib -L/usr/local/opt/openssl/lib -L/usr/local/opt/readline/lib' pip install psycopg2 +``` -The current python binding of gdal is fixed to GDAL==2.4.2. If you get another gdal (`ogrinfo --version`), make sure to upgrade to your version: `pip install GDAL==VERSION_FROM_PREVIOUS_COMMAND` +- The project uses the command line tool of GDAL (because the Python bindings are hard to install without dependency conflicts). The GDAL dependency is not listed in the requirements file and must therefore be installed manually on the system: For Mac, use `brew install gdal`: https://formulae.brew.sh/formula/gdal For Linux, follow https://mothergeo-py.readthedocs.io/en/latest/development/how-to/gdal-ubuntu-pkg.html ### Configuration -Copy the `sample.env` file and rename to `.env` then update the parameters, most importantly the database connection parameters. +Copy the `sample.env` file and rename to `.env` then update all variables: ``` PG_SERVER=localhost @@ -66,6 +62,8 @@ MAPBOXLAYERNAME=your_mapbox_layer_name SKIP_MAPBOX=False LIMIT_DAYS=30 SURROUNDING_SHAPE_FILE=./assets/buffer.shp +WEATHER_HARVEST_LAT=52.520008 +WEATHER_HARVEST_LNG=13.404954 ``` ## Running diff --git a/harvester/requirements-mac.txt b/harvester/requirements-mac.txt new file mode 100644 index 00000000..7df3937c --- /dev/null +++ b/harvester/requirements-mac.txt @@ -0,0 +1,24 @@ +attrs==23.2.0 +boto3==1.34.90 +botocore==1.34.90 +click==8.1.7 +click-plugins==1.1.1 +cligj==0.7.2 +DateTime==5.4 +docutils==0.20.1 +Fiona==1.9.4.post1 +geopandas==0.14.3 +jmespath==1.0.1 +munch==4.0.0 +numpy==1.26.4 +pandas==2.2.0 +psycopg2==2.9.9 +python-dateutil==2.8.2 +python-dotenv==1.0.1 +pytz==2024.1 +shapely==2.0.3 +six==1.16.0 +urllib3==2.0.7 +zope.interface==6.2 +requests==2.31.0 +tqdm==4.66.2 \ No newline at end of file diff --git a/harvester/sample.env b/harvester/sample.env index 676cdb80..4d99aecf 100644 --- a/harvester/sample.env +++ b/harvester/sample.env @@ -1,15 +1,17 @@ PG_SERVER=localhost PG_PORT=54322 PG_USER=postgres -PG_PASS=postsgres +PG_PASS=postgres PG_DB=postgres SUPABASE_URL=http://127.0.0.1:54321 -SUPABASE_SERVICE_ROLE=eyJh... +SUPABASE_SERVICE_ROLE_KEY=eyJh... SUPABASE_BUCKET_NAME=data_assets MAPBOXUSERNAME=your_mapbox_username -MAPBOXTOKEN=your_mapbox +MAPBOXTOKEN=your_mapbox_access_token MAPBOXTILESET=your_mapbox_tileset_id MAPBOXLAYERNAME=your_mapbox_layer_name SKIP_MAPBOX=False LIMIT_DAYS=30 -SURROUNDING_SHAPE_FILE=./assets/buffer.shp \ No newline at end of file +SURROUNDING_SHAPE_FILE=./assets/buffer.shp +WEATHER_HARVEST_LAT=52.520008 +WEATHER_HARVEST_LNG=13.404954 \ No newline at end of file diff --git a/harvester/src/mapbox_tree_update.py b/harvester/src/mapbox_tree_update.py index f4cf8068..dadb5fa1 100644 --- a/harvester/src/mapbox_tree_update.py +++ b/harvester/src/mapbox_tree_update.py @@ -53,6 +53,9 @@ def generate_trees_csv(temp_dir, db_conn): current_year = datetime.now().year with db_conn.cursor() as cur: + # Set statement timeout to 5 minutes because the following query can take a long time + cur.execute("SET LOCAL statement_timeout = '5min';") + # Fetch all trees from database cur.execute( # WARNING: The coordinates in the database columns lat and lng are mislabeled! They mean the opposite.