Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix remaining memory issues during the last 0.5% of processing planet.osm with the dense memory cache #108

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions include/osm2rdf/config/Config.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,8 @@ struct Config {
bool addUntaggedRelations = true;
bool addUntaggedAreas = true;

bool addSpatialRelsForUntaggedNodes = true;

int numThreads = std::thread::hardware_concurrency();

// Default settings for data
Expand Down
8 changes: 8 additions & 0 deletions include/osm2rdf/config/Constants.h
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,14 @@ const static inline std::string NO_WAY_GEOM_RELATIONS_OPTION_LONG =
const static inline std::string NO_WAY_GEOM_RELATIONS_OPTION_HELP =
"Do not dump way geometric relations";

const static inline std::string NO_UNTAGGED_NODES_SPATIAL_RELS_INFO =
"Do not compute spatial relations involving untagged nodes";
const static inline std::string NO_UNTAGGED_NODES_SPATIAL_RELS_OPTION_SHORT = "";
const static inline std::string NO_UNTAGGED_NODES_SPATIAL_RELS_OPTION_LONG =
"no-untagged-nodes-geometric-relations";
const static inline std::string NO_UNTAGGED_NODES_SPATIAL_RELS_OPTION_HELP =
"Do not compute spatial relations involving untagged nodes";

const static inline std::string NO_UNTAGGED_NODES_INFO =
"Do not output untagged nodes";
const static inline std::string NO_UNTAGGED_NODES_OPTION_SHORT = "";
Expand Down
3 changes: 3 additions & 0 deletions include/osm2rdf/osm/GeometryHandler.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,9 @@ class GeometryHandler {
const std::string& pred);
void progressCb(size_t progr);

std::string getSweeperId(uint64_t oid, char type);
std::string getFullID(const std::string& id);

osm2rdf::util::ProgressBar _progressBar;
};

Expand Down
47 changes: 28 additions & 19 deletions src/config/Config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,11 @@ std::string osm2rdf::config::Config::getInfo(std::string_view prefix) const {
oss << "\n"
<< prefix << osm2rdf::config::constants::NO_UNTAGGED_AREAS_INFO;
}
if (!addSpatialRelsForUntaggedNodes) {
oss << "\n"
<< prefix
<< osm2rdf::config::constants::NO_UNTAGGED_NODES_SPATIAL_RELS_INFO;
}
if (simplifyWKT > 0) {
oss << "\n" << prefix << osm2rdf::config::constants::SIMPLIFY_WKT_INFO;
oss << "\n"
Expand Down Expand Up @@ -280,29 +285,32 @@ void osm2rdf::config::Config::fromArgs(int argc, char** argv) {
osm2rdf::config::constants::ADD_AREA_WAY_LINESTRINGS_OPTION_LONG,
osm2rdf::config::constants::ADD_AREA_WAY_LINESTRINGS_OPTION_HELP);

auto noUntaggedNodesOp =
parser.add<popl::Switch, popl::Attribute::expert>(
osm2rdf::config::constants::NO_UNTAGGED_NODES_OPTION_SHORT,
osm2rdf::config::constants::NO_UNTAGGED_NODES_OPTION_LONG,
osm2rdf::config::constants::NO_UNTAGGED_NODES_OPTION_HELP);
auto noUntaggedNodesSpatialRelsOp = parser.add<popl::Switch,
popl::Attribute::expert>(
osm2rdf::config::constants::NO_UNTAGGED_NODES_SPATIAL_RELS_OPTION_SHORT,
osm2rdf::config::constants::NO_UNTAGGED_NODES_SPATIAL_RELS_OPTION_LONG,
osm2rdf::config::constants::NO_UNTAGGED_NODES_SPATIAL_RELS_OPTION_HELP);

auto noUntaggedWaysOp =
parser.add<popl::Switch, popl::Attribute::expert>(
osm2rdf::config::constants::NO_UNTAGGED_WAYS_OPTION_SHORT,
osm2rdf::config::constants::NO_UNTAGGED_WAYS_OPTION_LONG,
osm2rdf::config::constants::NO_UNTAGGED_WAYS_OPTION_HELP);
auto noUntaggedNodesOp = parser.add<popl::Switch, popl::Attribute::expert>(
osm2rdf::config::constants::NO_UNTAGGED_NODES_OPTION_SHORT,
osm2rdf::config::constants::NO_UNTAGGED_NODES_OPTION_LONG,
osm2rdf::config::constants::NO_UNTAGGED_NODES_OPTION_HELP);

auto noUntaggedWaysOp = parser.add<popl::Switch, popl::Attribute::expert>(
osm2rdf::config::constants::NO_UNTAGGED_WAYS_OPTION_SHORT,
osm2rdf::config::constants::NO_UNTAGGED_WAYS_OPTION_LONG,
osm2rdf::config::constants::NO_UNTAGGED_WAYS_OPTION_HELP);

auto noUntaggedRelationsOp =
parser.add<popl::Switch, popl::Attribute::expert>(
osm2rdf::config::constants::NO_UNTAGGED_RELATIONS_OPTION_SHORT,
osm2rdf::config::constants::NO_UNTAGGED_RELATIONS_OPTION_LONG,
osm2rdf::config::constants::NO_UNTAGGED_RELATIONS_OPTION_HELP);

auto noUntaggedAreasOp =
parser.add<popl::Switch, popl::Attribute::expert>(
osm2rdf::config::constants::NO_UNTAGGED_AREAS_OPTION_SHORT,
osm2rdf::config::constants::NO_UNTAGGED_AREAS_OPTION_LONG,
osm2rdf::config::constants::NO_UNTAGGED_AREAS_OPTION_HELP);
auto noUntaggedAreasOp = parser.add<popl::Switch, popl::Attribute::expert>(
osm2rdf::config::constants::NO_UNTAGGED_AREAS_OPTION_SHORT,
osm2rdf::config::constants::NO_UNTAGGED_AREAS_OPTION_LONG,
osm2rdf::config::constants::NO_UNTAGGED_AREAS_OPTION_HELP);

auto addWayMetadataOp = parser.add<popl::Switch>(
osm2rdf::config::constants::ADD_WAY_METADATA_OPTION_SHORT,
Expand Down Expand Up @@ -482,6 +490,8 @@ void osm2rdf::config::Config::fromArgs(int argc, char** argv) {
wktDeviation = wktDeviationOp->value();
wktPrecision = wktPrecisionOp->value();

addSpatialRelsForUntaggedNodes = !noUntaggedNodesSpatialRelsOp->is_set();

addUntaggedNodes = !noUntaggedNodesOp->is_set();
addUntaggedWays = !noUntaggedWaysOp->is_set();
addUntaggedRelations = !noUntaggedRelationsOp->is_set();
Expand Down Expand Up @@ -514,10 +524,9 @@ void osm2rdf::config::Config::fromArgs(int argc, char** argv) {
} else if (outputCompressOp->value() == "bz2") {
outputCompress = BZ2;
} else {
throw popl::invalid_option(
outputCompressOp.get(),
popl::invalid_option::Error::invalid_argument,
popl::OptionName::long_name, outputCompressOp->value(), "");
throw popl::invalid_option(
outputCompressOp.get(), popl::invalid_option::Error::invalid_argument,
popl::OptionName::long_name, outputCompressOp->value(), "");
}

outputKeepFiles = outputKeepFilesOp->is_set();
Expand Down
73 changes: 61 additions & 12 deletions src/osm/GeometryHandler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,9 +98,7 @@ void GeometryHandler<W>::relation(const Relation& rel) {

if (!rel.hasGeometry()) return;

const std::string id = _writer->generateIRI(
osm2rdf::ttl::constants::RELATION_NAMESPACE[_config.sourceDataset],
rel.id());
std::string id = getSweeperId(rel.id(), 5);

size_t subId = 0;

Expand Down Expand Up @@ -137,7 +135,7 @@ template <typename W>
void GeometryHandler<W>::writeRelCb(size_t t, const std::string& a,
const std::string& b,
const std::string& pred) {
_writer->writeTriple(a, pred, b, t);
_writer->writeTriple(getFullID(a), pred, getFullID(b), t);
}

// ____________________________________________________________________________
Expand Down Expand Up @@ -197,9 +195,7 @@ ::util::geo::I32MultiPolygon GeometryHandler<W>::transform(
// ____________________________________________________________________________
template <typename W>
void GeometryHandler<W>::area(const Area& area) {
const std::string id = _writer->generateIRI(
areaNS(area.fromWay() ? AreaFromType::WAY : AreaFromType::RELATION),
area.objId());
std::string id = getSweeperId(area.objId(), area.fromWay() ? 3 : 4);

_sweeper.add(transform(area.geom()), id, false,
_parseBatches[omp_get_thread_num()]);
Expand All @@ -220,12 +216,66 @@ ::util::geo::I32Point GeometryHandler<W>::transform(
static_cast<int>(point.getY() * PREC)};
}

// ____________________________________________________________________________
template <typename W>
std::string GeometryHandler<W>::getFullID(const std::string& strid) {
uint64_t id = 0;

for (size_t i = strid.size() - 1; i > 0; i--) {
id |=
static_cast<uint64_t>(reinterpret_cast<const unsigned char&>(strid[i]))
<< (8 * (strid.size() - 1 - i));
}

if (strid[0] == 1) {
return _writer->generateIRI(
osm2rdf::ttl::constants::NODE_NAMESPACE[_config.sourceDataset], id);
}

if (strid[0] == 2) {
return _writer->generateIRI(
osm2rdf::ttl::constants::WAY_NAMESPACE[_config.sourceDataset], id);
}

if (strid[0] == 3) {
return _writer->generateIRI(areaNS(AreaFromType::WAY), id);
}

if (strid[0] == 4) {
return _writer->generateIRI(areaNS(AreaFromType::RELATION), id);
}

if (strid[0] == 5) {
return _writer->generateIRI(
osm2rdf::ttl::constants::RELATION_NAMESPACE[_config.sourceDataset], id);
}

return strid;
}

// ____________________________________________________________________________
template <typename W>
std::string GeometryHandler<W>::getSweeperId(uint64_t oid, char type) {
unsigned char id[10] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
int a = 0;
uint64_t tmp;

while ((oid >> (a * 8))) {
tmp = (oid & (0xFFLL << (a * 8)));
id[8 - a] = tmp >> (a * 8);
a++;
}

id[8 - a] = type;

return std::string{reinterpret_cast<char*>(id + (8 - a)),
static_cast<size_t>(a + 1)};
}

// ____________________________________________________________________________
template <typename W>
void GeometryHandler<W>::node(const Node& node) {
std::string id = _writer->generateIRI(
osm2rdf::ttl::constants::NODE_NAMESPACE[_config.sourceDataset],
node.id());
std::string id = getSweeperId(node.id(), 1);

_sweeper.add(transform(node.geom()), id, false,
_parseBatches[omp_get_thread_num()]);
Expand All @@ -241,8 +291,7 @@ template <typename W>
void GeometryHandler<W>::way(const Way& way) {
if (way.isArea()) return; // skip way relations, will be handled by area()

std::string id = _writer->generateIRI(
osm2rdf::ttl::constants::WAY_NAMESPACE[_config.sourceDataset], way.id());
std::string id = getSweeperId(way.id(), 2);

_sweeper.add(transform(way.geom()), id, false,
_parseBatches[omp_get_thread_num()]);
Expand Down
3 changes: 2 additions & 1 deletion src/osm/OsmiumHandler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,8 @@ void osm2rdf::osm::OsmiumHandler<W>::node(const osmium::Node& node) {
_progressBar.update(_numTasksDone++);
}
}
if (!_config.noGeometricRelations && !_config.noNodeGeometricRelations) {
if (!_config.noGeometricRelations && !_config.noNodeGeometricRelations &&
(!osmNode.tags().empty() || _config.addSpatialRelsForUntaggedNodes)) {
_geometryHandler->node(osmNode);
#pragma omp critical(progress)
{
Expand Down
2 changes: 1 addition & 1 deletion vendor/spatialjoin
Loading