Skip to content

Commit

Permalink
Fixes h3 tables having partial/missing regions.
Browse files Browse the repository at this point in the history
With the last datasets updates the nodata handling changed meaning that the results of raster to h3 have different cardinalities. The join supposed all dataframes equal. It failed by keeping only the indexes of the first in the list.
  • Loading branch information
BielStela committed Jan 17, 2025
1 parent bd1ca16 commit b8e3515
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 5 deletions.
8 changes: 7 additions & 1 deletion data/h3_data_importer/delete_h3_tables.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
"""Script to delete dangling h3 tables that are no longer used.
The delete criteria is if the table is referenced anywhere else.
"""

import logging

import click
Expand All @@ -13,6 +18,7 @@
@click.option("--drop-contextuals", is_flag=True)
@click.option("--dry-run", is_flag=True)
def main(drop_contextuals: bool, dry_run: bool):
"""Delete dangling h3 tables that are no longer used"""
with psycopg.connect(get_connection_info()) as conn:
with conn.cursor() as cursor:
# find all the tables that start with h3_grid*
Expand Down Expand Up @@ -51,7 +57,7 @@ def main(drop_contextuals: bool, dry_run: bool):
"""DELETE FROM contextual_layer
WHERE id = ANY(%s);
""",
(list(ctx[0] for ctx in contextuals_to_drop),),
([ctx[0] for ctx in contextuals_to_drop],),
)
log.info(f"Deleted contextual layers {', '.join(str(ctx[0]) for ctx in contextuals_to_drop)}")
else:
Expand Down
6 changes: 2 additions & 4 deletions data/h3_data_importer/raster_folder_to_h3_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@ def raster_to_h3(reference_raster: Path, h3_resolution: int, raster_file: Path)
with rio.open(reference_raster) as ref:
check_srs(ref, raster)
check_transform(ref, raster)

h3 = h3ronpy.raster.raster_to_dataframe(
raster.read(1),
transform=raster.transform,
Expand Down Expand Up @@ -261,12 +260,11 @@ def main(folder: Path, table: str, data_type: str, dataset: str, year: int, h3_r
with multiprocessing.Pool(thread_count) as pool:
h3s = pool.map(partial_raster_to_h3, raster_files)
log.info(f"Joining H3 data of each raster into single dataframe for table {table}")
df = h3s[0]
df: pd.DataFrame = h3s[0]
with click.progressbar(h3s[1:], label="Joining H3 dataframes") as pbar:
for h3df in pbar:
df = df.join(h3df)
df = df.join(h3df, how="outer")
del h3df

# Part 2: Ingest h3 index into the database
to_the_db(df, table, data_type, dataset, year, h3_res)

Expand Down

0 comments on commit b8e3515

Please sign in to comment.