Skip to content

Commit

Permalink
feat: add keep_all_tags logic
Browse files Browse the repository at this point in the history
  • Loading branch information
RaczeQ committed Jan 16, 2024
1 parent e881efc commit 782838e
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 9 deletions.
1 change: 1 addition & 0 deletions quackosm/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,7 @@ def main(
geoparquet_path = convert_pbf_to_gpq(
pbf_path=pbf_file,
tags_filter=osm_tags_filter or osm_tags_filter_file, # type: ignore
keep_all_tags=keep_all_tags,
geometry_filter=geom_filter_wkt or geom_filter_geojson or geom_filter_file,
explode_tags=explode_tags,
ignore_cache=ignore_cache,
Expand Down
33 changes: 24 additions & 9 deletions quackosm/pbf_file_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,12 +180,13 @@ def get_features_gdf(
filter_osm_ids = []

if explode_tags is None:
explode_tags = self.tags_filter is not None
explode_tags = self.tags_filter is not None and not keep_all_tags

parsed_geoparquet_files = []
for file_path in file_paths:
parsed_geoparquet_file = self.convert_pbf_to_gpq(
file_path,
keep_all_tags=keep_all_tags,
explode_tags=explode_tags,
ignore_cache=ignore_cache,
filter_osm_ids=filter_osm_ids,
Expand Down Expand Up @@ -242,7 +243,7 @@ def convert_pbf_to_gpq(
filter_osm_ids = []

if explode_tags is None:
explode_tags = self.tags_filter is not None
explode_tags = self.tags_filter is not None and not keep_all_tags

with tempfile.TemporaryDirectory(dir=self.working_directory.resolve()) as self.tmp_dir_name:
self.tmp_dir_path = Path(self.tmp_dir_name)
Expand All @@ -251,12 +252,14 @@ def convert_pbf_to_gpq(
result_file_path = result_file_path or self._generate_geoparquet_result_file_path(
pbf_path,
filter_osm_ids=filter_osm_ids,
keep_all_tags=keep_all_tags,
explode_tags=explode_tags,
)
parsed_geoparquet_file = self._parse_pbf_file(
pbf_path=pbf_path,
result_file_path=Path(result_file_path),
filter_osm_ids=filter_osm_ids,
keep_all_tags=keep_all_tags,
explode_tags=explode_tags,
ignore_cache=ignore_cache,
)
Expand Down Expand Up @@ -289,6 +292,7 @@ def _parse_pbf_file(
pbf_path: Union[str, Path],
result_file_path: Path,
filter_osm_ids: list[str],
keep_all_tags: bool = False,
explode_tags: bool = True,
ignore_cache: bool = False,
) -> Path:
Expand Down Expand Up @@ -384,6 +388,7 @@ def _parse_pbf_file(
self._concatenate_results_to_geoparquet(
parsed_geometries=parsed_geometries,
save_file_path=result_file_path,
keep_all_tags=keep_all_tags,
explode_tags=explode_tags,
)

Expand All @@ -392,16 +397,18 @@ def _parse_pbf_file(
def _generate_geoparquet_result_file_path(
self,
pbf_file_path: Union[str, Path],
keep_all_tags: bool,
explode_tags: bool,
filter_osm_ids: list[str],
) -> Path:
pbf_file_name = Path(pbf_file_path).name.removesuffix(".osm.pbf")

osm_filter_tags_hash_part = "nofilter"
if self.tags_filter is not None:
keep_all_tags_part = "" if not keep_all_tags else "_alltags"
h = hashlib.new("sha256")
h.update(json.dumps(self.tags_filter).encode())
osm_filter_tags_hash_part = h.hexdigest()
osm_filter_tags_hash_part = f"{h.hexdigest()}{keep_all_tags_part}"

clipping_geometry_hash_part = "noclip"
if self.geometry_filter is not None:
Expand Down Expand Up @@ -1393,11 +1400,14 @@ def _concatenate_results_to_geoparquet(
self,
parsed_geometries: "duckdb.DuckDBPyRelation",
save_file_path: Path,
keep_all_tags: bool,
explode_tags: bool,
) -> None:
select_clauses = [
"feature_id",
*self._generate_osm_tags_sql_select(parsed_geometries, explode_tags),
*self._generate_osm_tags_sql_select(
parsed_geometries, keep_all_tags=keep_all_tags, explode_tags=explode_tags
),
"ST_GeomFromWKB(geometry_wkb) AS geometry",
]

Expand Down Expand Up @@ -1524,15 +1534,20 @@ def _concatenate_results_to_geoparquet(
)

def _generate_osm_tags_sql_select(
self, parsed_geometries: "duckdb.DuckDBPyRelation", explode_tags: bool
self, parsed_geometries: "duckdb.DuckDBPyRelation", keep_all_tags: bool, explode_tags: bool
) -> list[str]:
"""Prepare features filter clauses based on tags filter."""
osm_tag_keys_select_clauses = []

# TODO: elif keep other tags
if not self.merged_tags_filter and not explode_tags:
no_tags_filter = not self.merged_tags_filter
tags_filter_and_keep_all_tags = self.merged_tags_filter and keep_all_tags
keep_tags_compact = not explode_tags

if (no_tags_filter and keep_tags_compact) or (
tags_filter_and_keep_all_tags and keep_tags_compact
):
osm_tag_keys_select_clauses = ["tags"]
elif not self.merged_tags_filter and explode_tags:
elif (no_tags_filter and explode_tags) or (tags_filter_and_keep_all_tags and explode_tags):
osm_tag_keys = set()
found_tag_keys = [row[0] for row in self.connection.sql(f"""
SELECT DISTINCT UNNEST(map_keys(tags)) tag_key
Expand All @@ -1543,7 +1558,7 @@ def _generate_osm_tags_sql_select(
f"list_extract(map_extract(tags, '{osm_tag_key}'), 1) as \"{osm_tag_key}\""
for osm_tag_key in sorted(list(osm_tag_keys))
]
elif self.merged_tags_filter and not explode_tags:
elif self.merged_tags_filter and keep_tags_compact:
filter_tag_clauses = []
for filter_tag_key, filter_tag_value in self.merged_tags_filter.items():
if isinstance(filter_tag_value, bool) and filter_tag_value:
Expand Down
36 changes: 36 additions & 0 deletions tests/base/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,42 @@ def test_basic_run(monaco_pbf_file_path: str) -> None:
],
"files/monaco_nofilter_noclip_compact_c740a1597e53ae8c5e98c5119eaa1893ddc177161afe8642addcbe54a6dc089d.geoparquet",
) # type: ignore
@P.case(
"Keep all tags",
[
"--keep-all-tags",
],
"files/monaco_nofilter_noclip_compact.geoparquet",
) # type: ignore
@P.case(
"OSM tags filter with keep all tags",
[
"--keep-all-tags",
"--osm-tags-filter",
'{"building": true, "highway": ["primary", "secondary"], "amenity": "bench"}',
],
"files/monaco_a9dd1c3c2e3d6a94354464e9a1a536ef44cca77eebbd882f48ca52799eb4ca91_alltags_noclip_compact.geoparquet",
) # type: ignore
@P.case(
"OSM tags filter with keep all tags compact",
[
"--keep-all-tags",
"--osm-tags-filter",
'{"building": true, "highway": ["primary", "secondary"], "amenity": "bench"}',
"--compact",
],
"files/monaco_a9dd1c3c2e3d6a94354464e9a1a536ef44cca77eebbd882f48ca52799eb4ca91_alltags_noclip_compact.geoparquet",
) # type: ignore
@P.case(
"OSM tags filter with keep all tags exploded",
[
"--keep-all-tags",
"--osm-tags-filter",
'{"building": true, "highway": ["primary", "secondary"], "amenity": "bench"}',
"--explode",
],
"files/monaco_a9dd1c3c2e3d6a94354464e9a1a536ef44cca77eebbd882f48ca52799eb4ca91_alltags_noclip_exploded.geoparquet",
) # type: ignore
@P.case(
"OSM way polygon config",
["--osm-way-polygon-config", osm_way_config_file_path()],
Expand Down

0 comments on commit 782838e

Please sign in to comment.