Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for exporting to duckdb (via parquet) #157

Merged
merged 15 commits into from
Oct 9, 2024
Merged
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

### Added

- Option to export to DuckDB database [#94](https://github.com/kraina-ai/quackosm/issues/119) (implemented by [@mwip](https://github.com/mwip))

## [0.11.0] - 2024-09-24

### Changed
Expand Down
6 changes: 6 additions & 0 deletions quackosm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,13 @@
"""

from quackosm.functions import (
convert_geometry_to_duckdb,
convert_geometry_to_geodataframe,
convert_geometry_to_parquet,
convert_osm_extract_to_duckdb,
convert_osm_extract_to_geodataframe,
convert_osm_extract_to_parquet,
convert_pbf_to_duckdb,
convert_pbf_to_geodataframe,
convert_pbf_to_parquet,
)
Expand All @@ -23,8 +26,11 @@
__all__ = [
"PbfFileReader",
"convert_pbf_to_parquet",
"convert_pbf_to_duckdb",
"convert_geometry_to_parquet",
"convert_geometry_to_duckdb",
"convert_osm_extract_to_parquet",
"convert_osm_extract_to_duckdb",
"convert_pbf_to_geodataframe",
"convert_geometry_to_geodataframe",
"convert_osm_extract_to_geodataframe",
Expand Down
129 changes: 119 additions & 10 deletions quackosm/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -500,11 +500,31 @@
"--output",
"-o",
help=(
"Path where to save final geoparquet file. If not provided, it will be generated"
"Path where to save final result file. If not provided, it will be generated"
" automatically based on the input pbf file name."
" Can be [bold green].parquet[/bold green] or"
" [bold green].db[/bold green] or [bold green].duckdb[/bold green] extension."
),
),
] = None,
duckdb: Annotated[
bool,
typer.Option(
"--duckdb",
help=(
"Export to duckdb database. If not provided, data can still be exported if"
" [bold bright_cyan]output[/bold bright_cyan] has [bold green].db[/bold green]"
" or [bold green].duckdb[/bold green] extension."
),
),
] = False,
duckdb_table_name: Annotated[
Optional[str],
typer.Option(
"--duckdb-table-name",
help="Table name which the data will be imported into in the DuckDB database.",
),
] = "quackosm",
ignore_cache: Annotated[
bool,
typer.Option(
Expand Down Expand Up @@ -687,11 +707,21 @@
verbosity_mode = "silent"

logging.disable(logging.CRITICAL)
if pbf_file:

is_duckdb = (result_file_path and result_file_path.suffix in (".duckdb", ".db")) or duckdb

pbf_file_parquet = pbf_file and not is_duckdb
pbf_file_duckdb = pbf_file and is_duckdb
osm_extract_parquet = osm_extract_query and not is_duckdb
osm_extract_duckdb = osm_extract_query and is_duckdb
geometry_parquet = not pbf_file and not osm_extract_query and not is_duckdb
geometry_duckdb = not pbf_file and not osm_extract_query and is_duckdb

if pbf_file_parquet:
from quackosm.functions import convert_pbf_to_parquet

geoparquet_path = convert_pbf_to_parquet(
pbf_path=pbf_file,
result_path = convert_pbf_to_parquet(
pbf_path=cast(str, pbf_file),
tags_filter=osm_tags_filter or osm_tags_filter_file, # type: ignore
keep_all_tags=keep_all_tags,
geometry_filter=geometry_filter_value,
Expand All @@ -708,13 +738,34 @@
save_as_wkt=wkt_result,
verbosity_mode=verbosity_mode,
)
elif osm_extract_query:
elif pbf_file_duckdb:
from quackosm.functions import convert_pbf_to_duckdb

result_path = convert_pbf_to_duckdb(
pbf_path=cast(str, pbf_file),
tags_filter=osm_tags_filter or osm_tags_filter_file, # type: ignore
keep_all_tags=keep_all_tags,
geometry_filter=geometry_filter_value,
explode_tags=explode_tags,
ignore_cache=ignore_cache,
working_directory=working_directory,
result_file_path=result_file_path,
osm_way_polygon_features_config=(
json.loads(Path(osm_way_polygon_features_config).read_text())
if osm_way_polygon_features_config
else None
),
filter_osm_ids=filter_osm_ids, # type: ignore
duckdb_table_name=duckdb_table_name or "quackosm",
verbosity_mode=verbosity_mode,
)
elif osm_extract_parquet:
from quackosm._exceptions import OsmExtractSearchError
from quackosm.functions import convert_osm_extract_to_parquet

try:
geoparquet_path = convert_osm_extract_to_parquet(
osm_extract_query=osm_extract_query,
result_path = convert_osm_extract_to_parquet(
osm_extract_query=cast(str, osm_extract_query),
osm_extract_source=osm_extract_source,
tags_filter=osm_tags_filter or osm_tags_filter_file, # type: ignore
keep_all_tags=keep_all_tags,
Expand All @@ -738,10 +789,64 @@
err_console = Console(stderr=True)
err_console.print(ex)
raise typer.Exit(code=1) from None
else:
elif osm_extract_duckdb:
from quackosm._exceptions import OsmExtractSearchError
from quackosm.functions import convert_osm_extract_to_duckdb

try:
result_path = convert_osm_extract_to_duckdb(
osm_extract_query=cast(str, osm_extract_query),
osm_extract_source=osm_extract_source,
tags_filter=osm_tags_filter or osm_tags_filter_file, # type: ignore
keep_all_tags=keep_all_tags,
geometry_filter=geometry_filter_value,
explode_tags=explode_tags,
ignore_cache=ignore_cache,
working_directory=working_directory,
result_file_path=result_file_path,
osm_way_polygon_features_config=(
json.loads(Path(osm_way_polygon_features_config).read_text())
if osm_way_polygon_features_config
else None
),
filter_osm_ids=filter_osm_ids, # type: ignore
duckdb_table_name=duckdb_table_name or "quackosm",
save_as_wkt=wkt_result,
verbosity_mode=verbosity_mode,
)
except OsmExtractSearchError as ex:
from rich.console import Console

Check warning on line 818 in quackosm/cli.py

View check run for this annotation

Codecov / codecov/patch

quackosm/cli.py#L818

Added line #L818 was not covered by tests

err_console = Console(stderr=True)
err_console.print(ex)
raise typer.Exit(code=1) from None

Check warning on line 822 in quackosm/cli.py

View check run for this annotation

Codecov / codecov/patch

quackosm/cli.py#L820-L822

Added lines #L820 - L822 were not covered by tests
elif geometry_parquet:
from quackosm.functions import convert_geometry_to_parquet

geoparquet_path = convert_geometry_to_parquet(
result_path = convert_geometry_to_parquet(
geometry_filter=geometry_filter_value,
osm_extract_source=osm_extract_source,
tags_filter=osm_tags_filter or osm_tags_filter_file, # type: ignore
keep_all_tags=keep_all_tags,
explode_tags=explode_tags,
ignore_cache=ignore_cache,
working_directory=working_directory,
result_file_path=result_file_path,
osm_way_polygon_features_config=(
json.loads(Path(osm_way_polygon_features_config).read_text())
if osm_way_polygon_features_config
else None
),
filter_osm_ids=filter_osm_ids, # type: ignore
save_as_wkt=wkt_result,
verbosity_mode=verbosity_mode,
geometry_coverage_iou_threshold=geometry_coverage_iou_threshold,
allow_uncovered_geometry=allow_uncovered_geometry,
)
elif geometry_duckdb:
from quackosm.functions import convert_geometry_to_duckdb

Check warning on line 847 in quackosm/cli.py

View check run for this annotation

Codecov / codecov/patch

quackosm/cli.py#L846-L847

Added lines #L846 - L847 were not covered by tests

result_path = convert_geometry_to_duckdb(

Check warning on line 849 in quackosm/cli.py

View check run for this annotation

Codecov / codecov/patch

quackosm/cli.py#L849

Added line #L849 was not covered by tests
geometry_filter=geometry_filter_value,
osm_extract_source=osm_extract_source,
tags_filter=osm_tags_filter or osm_tags_filter_file, # type: ignore
Expand All @@ -756,9 +861,13 @@
else None
),
filter_osm_ids=filter_osm_ids, # type: ignore
duckdb_table_name=duckdb_table_name or "quackosm",
save_as_wkt=wkt_result,
verbosity_mode=verbosity_mode,
geometry_coverage_iou_threshold=geometry_coverage_iou_threshold,
allow_uncovered_geometry=allow_uncovered_geometry,
)
typer.secho(geoparquet_path, fg="green")
else:
raise RuntimeError("Unknown operation mode")

Check warning on line 871 in quackosm/cli.py

View check run for this annotation

Codecov / codecov/patch

quackosm/cli.py#L871

Added line #L871 was not covered by tests

typer.secho(result_path, fg="green")
Loading
Loading