Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Options for untagged entities #103

Merged
merged 3 commits into from
Nov 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions include/osm2rdf/config/Config.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,11 @@ struct Config {
bool addWayNodeSpatialMetadata = false;
bool skipWikiLinks = false;

bool addUntaggedNodes = true;
bool addUntaggedWays = true;
bool addUntaggedRelations = true;
bool addUntaggedAreas = true;

int numThreads = std::thread::hardware_concurrency();

// Default settings for data
Expand Down
32 changes: 32 additions & 0 deletions include/osm2rdf/config/Constants.h
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,38 @@ const static inline std::string NO_WAY_GEOM_RELATIONS_OPTION_LONG =
const static inline std::string NO_WAY_GEOM_RELATIONS_OPTION_HELP =
"Do not dump way geometric relations";

const static inline std::string NO_UNTAGGED_NODES_INFO =
"Do not output untagged nodes";
const static inline std::string NO_UNTAGGED_NODES_OPTION_SHORT = "";
const static inline std::string NO_UNTAGGED_NODES_OPTION_LONG =
"no-untagged-nodes";
const static inline std::string NO_UNTAGGED_NODES_OPTION_HELP =
"Do not output untagged nodes";

const static inline std::string NO_UNTAGGED_WAYS_INFO =
"Do not output untagged ways";
const static inline std::string NO_UNTAGGED_WAYS_OPTION_SHORT = "";
const static inline std::string NO_UNTAGGED_WAYS_OPTION_LONG =
"no-untagged-ways";
const static inline std::string NO_UNTAGGED_WAYS_OPTION_HELP =
"Do not output untagged ways";

const static inline std::string NO_UNTAGGED_RELATIONS_INFO =
"Do not output untagged relations";
const static inline std::string NO_UNTAGGED_RELATIONS_OPTION_SHORT = "";
const static inline std::string NO_UNTAGGED_RELATIONS_OPTION_LONG =
"no-untagged-relations";
const static inline std::string NO_UNTAGGED_RELATIONS_OPTION_HELP =
"Do not output untagged relations";

const static inline std::string NO_UNTAGGED_AREAS_INFO =
"Do not output untagged areas";
const static inline std::string NO_UNTAGGED_AREAS_OPTION_SHORT = "";
const static inline std::string NO_UNTAGGED_AREAS_OPTION_LONG =
"no-untagged-areas";
const static inline std::string NO_UNTAGGED_AREAS_OPTION_HELP =
"Do not output untagged areas";

const static inline std::string ADD_AREA_WAY_LINESTRINGS_INFO =
"Adding linestrings for ways which form areas";
const static inline std::string ADD_AREA_WAY_LINESTRINGS_OPTION_SHORT = "";
Expand Down
3 changes: 3 additions & 0 deletions include/osm2rdf/osm/CountHandler.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ namespace osm2rdf::osm {

class CountHandler : public osmium::handler::Handler {
public:
CountHandler(const osm2rdf::config::Config& config) : _config(config) {};
void node(const osmium::Node& node);
void relation(const osmium::Relation& relation);
void way(const osmium::Way& way);
Expand All @@ -45,6 +46,8 @@ class CountHandler : public osmium::handler::Handler {
bool _firstPassDone = false;
size_t _minId = std::numeric_limits<size_t>::max();
size_t _maxId = 0;

osm2rdf::config::Config _config;
};
}

Expand Down
69 changes: 57 additions & 12 deletions src/config/Config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,12 @@
// You should have received a copy of the GNU General Public License
// along with osm2rdf. If not, see <https://www.gnu.org/licenses/>.

#include "osm2rdf/config/Config.h"

#include <filesystem>
#include <iostream>
#include <string>

#include "osm2rdf/config/Config.h"

#if defined(_OPENMP)
#include "omp.h"
#endif
Expand Down Expand Up @@ -87,6 +87,22 @@ std::string osm2rdf::config::Config::getInfo(std::string_view prefix) const {
<< osm2rdf::config::constants::ADD_AREA_WAY_LINESTRINGS_INFO;
}
}
if (!addUntaggedNodes) {
oss << "\n"
<< prefix << osm2rdf::config::constants::NO_UNTAGGED_NODES_INFO;
}
if (!addUntaggedWays) {
oss << "\n"
<< prefix << osm2rdf::config::constants::NO_UNTAGGED_WAYS_INFO;
}
if (!addUntaggedRelations) {
oss << "\n"
<< prefix << osm2rdf::config::constants::NO_UNTAGGED_RELATIONS_INFO;
}
if (!addUntaggedAreas) {
oss << "\n"
<< prefix << osm2rdf::config::constants::NO_UNTAGGED_AREAS_INFO;
}
if (simplifyWKT > 0) {
oss << "\n" << prefix << osm2rdf::config::constants::SIMPLIFY_WKT_INFO;
oss << "\n"
Expand Down Expand Up @@ -145,13 +161,12 @@ std::string osm2rdf::config::Config::getInfo(std::string_view prefix) const {
}
}
oss << "\n" << prefix << osm2rdf::config::constants::SECTION_MISCELLANEOUS;
oss << "\n"
<< prefix << "Num Threads: " << numThreads;
oss << "\n" << prefix << "Num Threads: " << numThreads;

if (!storeLocations.empty()) {
oss << "\n"
<< prefix << osm2rdf::config::constants::STORE_LOCATIONS_INFO
<< " " << storeLocations;
<< prefix << osm2rdf::config::constants::STORE_LOCATIONS_INFO << " "
<< storeLocations;
}

if (writeRDFStatistics) {
Expand Down Expand Up @@ -265,6 +280,30 @@ void osm2rdf::config::Config::fromArgs(int argc, char** argv) {
osm2rdf::config::constants::ADD_AREA_WAY_LINESTRINGS_OPTION_LONG,
osm2rdf::config::constants::ADD_AREA_WAY_LINESTRINGS_OPTION_HELP);

auto noUntaggedNodesOp =
parser.add<popl::Switch, popl::Attribute::expert>(
osm2rdf::config::constants::NO_UNTAGGED_NODES_OPTION_SHORT,
osm2rdf::config::constants::NO_UNTAGGED_NODES_OPTION_LONG,
osm2rdf::config::constants::NO_UNTAGGED_NODES_OPTION_HELP);

auto noUntaggedWaysOp =
parser.add<popl::Switch, popl::Attribute::expert>(
osm2rdf::config::constants::NO_UNTAGGED_WAYS_OPTION_SHORT,
osm2rdf::config::constants::NO_UNTAGGED_WAYS_OPTION_LONG,
osm2rdf::config::constants::NO_UNTAGGED_WAYS_OPTION_HELP);

auto noUntaggedRelationsOp =
parser.add<popl::Switch, popl::Attribute::expert>(
osm2rdf::config::constants::NO_UNTAGGED_RELATIONS_OPTION_SHORT,
osm2rdf::config::constants::NO_UNTAGGED_RELATIONS_OPTION_LONG,
osm2rdf::config::constants::NO_UNTAGGED_RELATIONS_OPTION_HELP);

auto noUntaggedAreasOp =
parser.add<popl::Switch, popl::Attribute::expert>(
osm2rdf::config::constants::NO_UNTAGGED_AREAS_OPTION_SHORT,
osm2rdf::config::constants::NO_UNTAGGED_AREAS_OPTION_LONG,
osm2rdf::config::constants::NO_UNTAGGED_AREAS_OPTION_HELP);

auto addWayMetadataOp = parser.add<popl::Switch>(
osm2rdf::config::constants::ADD_WAY_METADATA_OPTION_SHORT,
osm2rdf::config::constants::ADD_WAY_METADATA_OPTION_LONG,
Expand All @@ -288,11 +327,10 @@ void osm2rdf::config::Config::fromArgs(int argc, char** argv) {
osm2rdf::config::constants::AUX_GEO_FILES_OPTION_LONG,
osm2rdf::config::constants::AUX_GEO_FILES_OPTION_HELP);

auto numThreadsOp =
parser.add<popl::Value<int>, popl::Attribute::advanced>(
osm2rdf::config::constants::NUM_THREADS_OPTION_SHORT,
osm2rdf::config::constants::NUM_THREADS_OPTION_LONG,
osm2rdf::config::constants::NUM_THREADS_OPTION_HELP, numThreads);
auto numThreadsOp = parser.add<popl::Value<int>, popl::Attribute::advanced>(
osm2rdf::config::constants::NUM_THREADS_OPTION_SHORT,
osm2rdf::config::constants::NUM_THREADS_OPTION_LONG,
osm2rdf::config::constants::NUM_THREADS_OPTION_HELP, numThreads);

auto semicolonTagKeysOp =
parser.add<popl::Value<std::string>, popl::Attribute::advanced>(
Expand Down Expand Up @@ -444,6 +482,11 @@ void osm2rdf::config::Config::fromArgs(int argc, char** argv) {
wktDeviation = wktDeviationOp->value();
wktPrecision = wktPrecisionOp->value();

addUntaggedNodes = !noUntaggedNodesOp->is_set();
addUntaggedWays = !noUntaggedWaysOp->is_set();
addUntaggedRelations = !noUntaggedRelationsOp->is_set();
addUntaggedAreas = !noUntaggedAreasOp->is_set();

addWayNodeOrder |= addWayNodeSpatialMetadata;

if (semicolonTagKeysOp->is_set()) {
Expand All @@ -464,7 +507,9 @@ void osm2rdf::config::Config::fromArgs(int argc, char** argv) {
// Output
output = outputOp->value();
outputFormat = outputFormatOp->value();
outputCompress = outputCompressOp->value() == "none" ? NONE : (outputCompressOp->value() == "gz" ? GZ : BZ2);
outputCompress = outputCompressOp->value() == "none"
? NONE
: (outputCompressOp->value() == "gz" ? GZ : BZ2);
outputKeepFiles = outputKeepFilesOp->is_set();
if (output.empty()) {
outputCompress = NONE;
Expand Down
28 changes: 11 additions & 17 deletions src/osm/CountHandler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,52 +16,46 @@
// You should have received a copy of the GNU General Public License
// along with osm2rdf. If not, see <https://www.gnu.org/licenses/>.

#include "osm2rdf/osm/CountHandler.h"

#include <iostream>

#include "osm2rdf/osm/CountHandler.h"

// ____________________________________________________________________________
void osm2rdf::osm::CountHandler::prepare_for_lookup() {
_firstPassDone = true;
}
void osm2rdf::osm::CountHandler::prepare_for_lookup() { _firstPassDone = true; }

// ____________________________________________________________________________
void osm2rdf::osm::CountHandler::node(const osmium::Node& node){
void osm2rdf::osm::CountHandler::node(const osmium::Node& node) {
if (node.positive_id() < _minId) _minId = node.positive_id();
if (node.positive_id() > _maxId) _maxId = node.positive_id();
if (_firstPassDone) {
if (_firstPassDone || (!_config.addUntaggedNodes && node.tags().empty())) {
return;
}
_numNodes++;
}

// ____________________________________________________________________________
void osm2rdf::osm::CountHandler::relation(const osmium::Relation&) {
if (_firstPassDone) {
void osm2rdf::osm::CountHandler::relation(const osmium::Relation& rel) {
if (_firstPassDone || (!_config.addUntaggedRelations && rel.tags().empty())) {
return;
}
_numRelations++;
}

// ____________________________________________________________________________
void osm2rdf::osm::CountHandler::way(const osmium::Way&) {
if (_firstPassDone) {
void osm2rdf::osm::CountHandler::way(const osmium::Way& way) {
if (_firstPassDone || (!_config.addUntaggedWays && way.tags().empty())) {
return;
}
_numWays++;
}

// ____________________________________________________________________________
size_t osm2rdf::osm::CountHandler::numNodes() const {
return _numNodes;
}
size_t osm2rdf::osm::CountHandler::numNodes() const { return _numNodes; }

// ____________________________________________________________________________
size_t osm2rdf::osm::CountHandler::numRelations() const {
return _numRelations;
}

// ____________________________________________________________________________
size_t osm2rdf::osm::CountHandler::numWays() const {
return _numWays;
}
size_t osm2rdf::osm::CountHandler::numWays() const { return _numWays; }
21 changes: 20 additions & 1 deletion src/osm/OsmiumHandler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ void osm2rdf::osm::OsmiumHandler<W>::handle() {
assembler_config.create_empty_areas = false;
osmium::area::MultipolygonManager<osmium::area::Assembler> mp_manager{
assembler_config};
osm2rdf::osm::CountHandler countHandler;
osm2rdf::osm::CountHandler countHandler(_config);

// read relations for areas
{
Expand Down Expand Up @@ -163,6 +163,11 @@ void osm2rdf::osm::OsmiumHandler<W>::handle() {
template <typename W>
void osm2rdf::osm::OsmiumHandler<W>::area(const osmium::Area& area) {
_areasSeen++;

if (!_config.addUntaggedAreas && area.tags().empty()) {
return;
}

try {
auto osmArea = osm2rdf::osm::Area(area);
#pragma omp task
Expand All @@ -187,6 +192,10 @@ template <typename W>
void osm2rdf::osm::OsmiumHandler<W>::node(const osmium::Node& node) {
_nodesSeen++;

if (!_config.addUntaggedNodes && node.tags().empty()) {
return;
}

try {
const auto& osmNode = osm2rdf::osm::Node(node);
#pragma omp task
Expand Down Expand Up @@ -224,6 +233,11 @@ template <typename W>
void osm2rdf::osm::OsmiumHandler<W>::relation(
const osmium::Relation& relation) {
_relationsSeen++;

if (!_config.addUntaggedRelations && relation.tags().empty()) {
return;
}

try {
// only task this away if we actually build the relation geometries,
// otherwise this just adds multithreading overhead for nothing
Expand Down Expand Up @@ -267,6 +281,11 @@ void osm2rdf::osm::OsmiumHandler<W>::relation(
template <typename W>
void osm2rdf::osm::OsmiumHandler<W>::way(const osmium::Way& way) {
_waysSeen++;

if (!_config.addUntaggedWays && way.tags().empty()) {
return;
}

try {
auto osmWay = osm2rdf::osm::Way(way);

Expand Down