Skip to content

Commit

Permalink
feat: add option to keep all tags while filtering (#26)
Browse files Browse the repository at this point in the history
* feat: modified CLI parameters and added option to pass JSON OSM tags filter

* chore: change typos in the CLI docs

* chore: added changelog entry

* chore: added new cli tests for grouping filters

* feat: modify the CLI help image

* chore: added keep_all_tags parameter with a test

* feat: add keep_all_tags logic

* chore: add new test case

* chore: add test for wrong geo file

* docs: add an example with keep_all_tags parameter
  • Loading branch information
RaczeQ authored Jan 16, 2024
1 parent 482e309 commit 33c5fae
Show file tree
Hide file tree
Showing 9 changed files with 436 additions and 90 deletions.
13 changes: 13 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,19 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

### Added

- Option to pass OSM tags filter in the form of JSON file to the CLI
- Option to keep all tags when filtering with the OSM tags [#25](https://github.com/kraina-ai/quackosm/issues/25)

### Changed

- Logic for `explode_tags` parameter when filtering based on tags, but still keeping them all

### Fixed

- Typos in the CLI docs

## [0.3.2] - 2024-01-10

### Added
Expand Down
Binary file modified docs/assets/images/cli_help.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
33 changes: 32 additions & 1 deletion examples/pbf_file_reader.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,17 @@
"reykjavik_gdf"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"To filter out buildings, we will utilize format used also in the `osmnx` library: a dictionary with keys representing tag keys and values that could be a bool, string or a list of string.\n",
"\n",
"By default, `QuackOSM` will return only the tags that are present in the passed filter.\n",
"\n",
"In this example we will select all the buildings using `{ \"building\": True }` filter and only `building` tag values will be present in the result."
]
},
{
"cell_type": "code",
"execution_count": null,
Expand Down Expand Up @@ -113,6 +124,26 @@
"features_relation.count(\"feature_id\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Keeping all the tags while filtering the data\n",
"\n",
"To keep all of the tags present in the source data, we can use `keep_all_tags` parameter. That way we will still return only buildings, but with all of the tags attached. \n",
"\n",
"By default, all of those tags will be kept in a single column as a `dict`."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"reader.get_features_gdf(\"iceland.osm.pbf\", keep_all_tags=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down Expand Up @@ -328,7 +359,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.4"
"version": "3.11.7"
},
"vscode": {
"interpreter": {
Expand Down
213 changes: 153 additions & 60 deletions quackosm/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,14 @@
import pathlib
from typing import Annotated, Optional, Union, cast

import click
import geopandas as gpd
import typer
from shapely import from_geojson, from_wkt
from shapely.geometry.base import BaseGeometry

from quackosm import __app_name__, __version__
from quackosm._osm_tags_filters import GroupedOsmTagsFilter, OsmTagsFilter
from quackosm._typing import is_expected_type
from quackosm.functions import convert_pbf_to_gpq

app = typer.Typer(context_settings={"help_option_names": ["-h", "--help"]}, rich_markup_mode="rich")
Expand Down Expand Up @@ -38,47 +39,92 @@ def _empty_path_callback(ctx: typer.Context, value: pathlib.Path) -> Optional[pa
return value


def _wkt_callback(value: str) -> BaseGeometry:
if not value:
return None
try:
return from_wkt(value)
except Exception:
raise typer.BadParameter("Cannot parse provided WKT") from None
class WktGeometryParser(click.ParamType): # type: ignore
"""Parser for geometry in WKT form."""

name = "TEXT (WKT)"

def _geojson_callback(value: str) -> BaseGeometry:
if not value:
return None
try:
return from_geojson(value)
except Exception:
raise typer.BadParameter("Cannot parse provided GeoJSON") from None
def convert(self, value, param, ctx): # type: ignore
"""Convert parameter value."""
if not value:
return None
try:
return from_wkt(value)
except Exception:
raise typer.BadParameter("Cannot parse provided WKT") from None


def _geo_file_callback(value: str) -> BaseGeometry:
if not value:
return None
class GeoJsonGeometryParser(click.ParamType): # type: ignore
"""Parser for geometry in GeoJSON form."""

if not pathlib.Path(value).exists():
raise typer.BadParameter("Cannot parse provided geo file")
name = "TEXT (GeoJSON)"

try:
gdf = gpd.read_file(value)
return gdf.unary_union
except Exception:
raise typer.BadParameter("Cannot parse provided geo file") from None
def convert(self, value, param, ctx): # type: ignore
"""Convert parameter value."""
if not value:
return None
try:
return from_geojson(value)
except Exception:
raise typer.BadParameter("Cannot parse provided GeoJSON") from None


def parse_tags_filter(value: str) -> Optional[Union[OsmTagsFilter, GroupedOsmTagsFilter]]:
"""Parse provided cli agrument to tags filter."""
if not value:
return None
try:
parsed_dict = json.loads(value)
class GeoFileGeometryParser(click.ParamType): # type: ignore
"""Parser for geometry in geo file form."""

name = "PATH"

def convert(self, value, param, ctx): # type: ignore
"""Convert parameter value."""
if not value:
return None

value = _path_callback(ctx=ctx, value=value)

try:
gdf = gpd.read_file(value)
return gdf.unary_union
except Exception:
raise typer.BadParameter("Cannot parse provided geo file") from None


class OsmTagsFilterJsonParser(click.ParamType): # type: ignore
"""Parser for OSM tags filter in JSON form."""

name = "TEXT (JSON)"

def convert(self, value, param, ctx): # type: ignore
"""Convert parameter value."""
if not value:
return None
try:
parsed_dict = json.loads(value)
except Exception:
raise typer.BadParameter("Cannot parse provided OSM tags filter") from None

if not is_expected_type(parsed_dict, OsmTagsFilter) and not is_expected_type(
parsed_dict, GroupedOsmTagsFilter
):
raise typer.BadParameter(
"Provided OSM tags filter is not in a required format."
) from None

return cast(Union[OsmTagsFilter, GroupedOsmTagsFilter], parsed_dict)
except Exception:
raise typer.BadParameter("Cannot parse provided OSM tags filter") from None


class OsmTagsFilterFileParser(OsmTagsFilterJsonParser):
"""Parser for OSM tags filter in file form."""

name = "PATH"

def convert(self, value, param, ctx): # type: ignore
"""Convert parameter value."""
if not value:
return None

value = _path_callback(ctx=ctx, value=value)

return super().convert(pathlib.Path(value).read_text(), param, ctx) # type: ignore


def _filter_osm_ids_callback(value: list[str]) -> list[str]:
Expand All @@ -102,49 +148,87 @@ def main(
osm_tags_filter: Annotated[
Optional[str],
typer.Option(
parser=parse_tags_filter,
help=(
"OSM tags used to filter the data. Can the the form of flat or grouped dict "
"OSM tags used to filter the data in the "
"[bold dark_orange]JSON text[/bold dark_orange] form."
" Can take the form of a flat or grouped dict "
"(look: [bold green]OsmTagsFilter[/bold green]"
" and [bold green]GroupedOsmTagsFilter[/bold green])."
" Cannot be used together with"
" [bold bright_cyan]osm-tags-filter-file[/bold bright_cyan]."
),
click_type=OsmTagsFilterJsonParser(),
),
] = None,
osm_tags_filter_file: Annotated[
Optional[str],
typer.Option(
help=(
"OSM tags used to filter the data in the "
"[bold dark_orange]JSON file[/bold dark_orange] form."
" Can take the form of a flat or grouped dict "
"(look: [bold green]OsmTagsFilter[/bold green]"
" and [bold green]GroupedOsmTagsFilter[/bold green])."
" Cannot be used together with"
" [bold bright_cyan]osm-tags-filter[/bold bright_cyan]."
),
click_type=OsmTagsFilterFileParser(),
),
] = None,
keep_all_tags: Annotated[
bool,
typer.Option(
"--keep-all-tags/",
"--all-tags/",
help=(
"Whether to keep all tags while filtering with OSM tags."
" Doesn't work when there is no OSM tags filter applied"
" ([bold bright_cyan]osm-tags-filter[/bold bright_cyan]"
" or [bold bright_cyan]osm-tags-filter-file[/bold bright_cyan])."
" Will override grouping if [bold green]GroupedOsmTagsFilter[/bold green]"
" has been passed as a filter."
),
show_default=False,
),
] = False,
geom_filter_wkt: Annotated[
Optional[str],
typer.Option(
help=(
"Geometry to use as filter in [bold green]WKT[/bold green] format."
"Geometry to use as a filter in the [bold dark_orange]WKT[/bold dark_orange]"
" format."
" Cannot be used together with"
" [bold dark_orange]geom-filter-geojson[/bold dark_orange] or"
" [bold dark_orange]geom-filter-file[/bold dark_orange]."
" [bold bright_cyan]geom-filter-geojson[/bold bright_cyan] or"
" [bold bright_cyan]geom-filter-file[/bold bright_cyan]."
),
parser=_wkt_callback,
click_type=WktGeometryParser(),
),
] = None,
geom_filter_geojson: Annotated[
Optional[str],
typer.Option(
help=(
"Geometry to use as filter in [bold green]GeoJSON[/bold green] format."
"Geometry to use as a filter in the [bold dark_orange]GeoJSON[/bold dark_orange]"
" format."
" Cannot be used used together with"
" [bold dark_orange]geom-filter-wkt[/bold dark_orange] or"
" [bold dark_orange]geom-filter-file[/bold dark_orange]."
" [bold bright_cyan]geom-filter-wkt[/bold bright_cyan] or"
" [bold bright_cyan]geom-filter-file[/bold bright_cyan]."
),
parser=_geojson_callback,
click_type=GeoJsonGeometryParser(),
),
] = None,
geom_filter_file: Annotated[
Optional[str],
typer.Option(
help=(
"Geometry to use as filter in [bold green]file[/bold green] format - any that can"
" be opened by GeoPandas. Fill return unary_union of the file."
"Geometry to use as a filter in the"
" [bold dark_orange]file[/bold dark_orange] format - any that can be opened by"
" GeoPandas. Will return the unary union of the geometries in the file."
" Cannot be used together with"
" [bold dark_orange]geom-filter-wkt[/bold dark_orange] or"
" [bold dark_orange]geom-filter-geojson[/bold dark_orange]."
" [bold bright_cyan]geom-filter-wkt[/bold bright_cyan] or"
" [bold bright_cyan]geom-filter-geojson[/bold bright_cyan]."
),
parser=_geo_file_callback,
click_type=GeoFileGeometryParser(),
),
] = None,
explode_tags: Annotated[
Expand All @@ -153,12 +237,17 @@ def main(
"--explode-tags/--compact-tags",
"--explode/--compact",
help=(
"Whether to split tags into columns based on OSM tag keys. "
"If [bold violet]None[/bold violet], will be set based on "
"[bold dark_orange]osm-tags-filter[/bold dark_orange] parameter. "
"If no tags filter is provided, then explode_tags will set to [bold"
" red]False[/bold red], "
"if there is tags filter it will set to [bold green]True[/bold green]."
"Whether to split tags into columns based on the OSM tag keys."
" If [bold violet]None[/bold violet], it will be set based on"
" the [bold bright_cyan]osm-tags-filter[/bold bright_cyan]"
"/[bold bright_cyan]osm-tags-filter-file[/bold bright_cyan]"
" and [bold bright_cyan]keep-all-tags[/bold bright_cyan] parameters."
" If there is a tags filter applied without"
" [bold bright_cyan]keep-all-tags[/bold bright_cyan] then it'll be set to"
" [bold bright_cyan]explode-tags[/bold bright_cyan]"
" ([bold green]True[/bold green])."
" Otherwise it'll be set to [bold magenta]compact-tags[/bold magenta]"
" ([bold red]False[/bold red])."
),
show_default=None,
),
Expand All @@ -169,8 +258,8 @@ def main(
"--output",
"-o",
help=(
"Path where to save final geoparquet file. If not provided, will be generated"
" automatically based on input pbf file name."
"Path where to save final geoparquet file. If not provided, it will be generated"
" automatically based on the input pbf file name."
),
),
] = None,
Expand Down Expand Up @@ -202,7 +291,7 @@ def main(
"Config where alternative OSM way polygon features config is defined."
" Will determine how to parse way features based on tags."
" Option is intended for experienced users. It's recommended to disable"
" cache ([bold dark_orange]no-cache[/bold dark_orange]) when using this option,"
" cache ([bold bright_cyan]no-cache[/bold bright_cyan]) when using this option,"
" since file names don't contain information what config file has been used"
" for file generation."
),
Expand All @@ -215,8 +304,8 @@ def main(
"--filter-osm-id",
"--filter",
help=(
"List of OSM features ids to read from the file."
"Have to be in the form of 'node/<id>', 'way/<id>' or 'relation/<id>'."
"List of OSM features IDs to read from the file."
" Have to be in the form of 'node/<id>', 'way/<id>' or 'relation/<id>'."
),
callback=_filter_osm_ids_callback,
),
Expand Down Expand Up @@ -244,9 +333,13 @@ def main(
if more_than_one_geometry_provided:
raise typer.BadParameter("Provided more than one geometry for filtering")

if osm_tags_filter is not None and osm_tags_filter_file is not None:
raise typer.BadParameter("Provided more than one osm tags filter parameter")

geoparquet_path = convert_pbf_to_gpq(
pbf_path=pbf_file,
tags_filter=osm_tags_filter, # type: ignore
tags_filter=osm_tags_filter or osm_tags_filter_file, # type: ignore
keep_all_tags=keep_all_tags,
geometry_filter=geom_filter_wkt or geom_filter_geojson or geom_filter_file,
explode_tags=explode_tags,
ignore_cache=ignore_cache,
Expand Down
Loading

0 comments on commit 33c5fae

Please sign in to comment.