Skip to content

Commit

Permalink
update util, add .gz and plain file output, add e2e tests
Browse files Browse the repository at this point in the history
  • Loading branch information
patrickbr committed Apr 19, 2024
1 parent c9433dd commit 45c8b1e
Show file tree
Hide file tree
Showing 11 changed files with 635 additions and 265 deletions.
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS} -DLOGLEVEL=2")
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS} -g -DLOGLEVEL=3")

find_package(BZip2 REQUIRED)
find_package(ZLIB REQUIRED)

# export compile commands to tools like clang
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
Expand Down Expand Up @@ -64,6 +65,7 @@ add_subdirectory(src)

# tests
add_test("utilTest" utilTest)
add_test("spatialjoinTest" spatialjoinTest)

# install target
install(
Expand Down
6 changes: 5 additions & 1 deletion src/spatialjoin/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,14 +1,18 @@
file(GLOB_RECURSE SPATIALJOIN_SRC *.cpp)
list(REMOVE_ITEM SPATIALJOIN_SRC TestMain.cpp)

set(spatialjoin_main SpatialJoinMain.cpp)

list(REMOVE_ITEM SPATIALJOIN_SRC ${spatialjoin_main})

include_directories(
${SPATIALJOIN_INCLUDE_DIR}
${ZLIB_INCLUDE_DIRS}
)

add_executable(spatialjoin ${spatialjoin_main})
add_library(spatialjoin_dep ${SPATIALJOIN_SRC})

target_link_libraries(spatialjoin spatialjoin_dep util ${BZIP2_LIBRARIES} -lpthread)
target_link_libraries(spatialjoin spatialjoin_dep util ${BZIP2_LIBRARIES} ${ZLIB_LIBRARIES} -lpthread)

add_subdirectory(tests)
151 changes: 3 additions & 148 deletions src/spatialjoin/SpatialJoinMain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@

#include "BoxIds.h"
#include "Sweeper.h"
#include "WKTParse.h"
#include "util/Misc.h"
#include "util/geo/Geo.h"
#include "util/http/Server.h"
#include "util/log/Log.h"

using sj::Sweeper;
Expand Down Expand Up @@ -38,7 +38,7 @@ void printHelp(int argc, char** argv) {
<< std::setw(41) << " -h [ --help ]"
<< "show this help message\n"
<< std::setw(41) << " -o [ --output ] (default: '')"
<< "output file, empty (default) prints to stdout\n"
<< "output file (.bz2 or .gz supported), empty prints to stdout\n"
<< std::setw(41) << " -c [ --cache ] (default: '.')"
<< "cache directory for intermediate files\n"
<< std::setw(41) << " -C"
Expand All @@ -61,148 +61,6 @@ void printHelp(int argc, char** argv) {
<< std::endl;
}

// _____________________________________________________________________________
util::geo::I32Line parseLineString(const std::string& a, size_t p) {
util::geo::I32Line line;
line.reserve(2);
auto end = memchr(a.c_str() + p, ')', a.size() - p);
assert(end);

while (true) {
while (*(a.c_str() + p) && isspace(*(a.c_str() + p))) p++;
double x = util::atof(a.c_str() + p, 10);
double y = util::atof(
static_cast<const char*>(memchr(a.c_str() + p, ' ', a.size() - p)) + 1,
10);
auto projPoint = latLngToWebMerc(DPoint(x, y));

line.push_back(I32Point{projPoint.getX() * PREC, projPoint.getY() * PREC});

auto n = memchr(a.c_str() + p, ',', a.size() - p);
if (!n || n > end) break;
p = static_cast<const char*>(n) - a.c_str() + 1;
}

return util::geo::simplify(line, 0);
}

// _____________________________________________________________________________
util::geo::I32Point parsePoint(const std::string& a, size_t p) {
auto point = latLngToWebMerc(DPoint(
util::atof(a.c_str() + p, 10),
util::atof(
static_cast<const char*>(memchr(a.c_str() + p, ' ', a.size() - p)) +
1,
10)));

return {point.getX() * PREC, point.getY() * PREC};
}

// _____________________________________________________________________________
void parse(const char* c, size_t size, std::string& dangling, size_t* gid,
Sweeper& idx) {
const char* start = c;
while (c < start + size) {
if (*c == '\n') {
(*gid)++;

auto idp = dangling.find("\t");

std::string id = std::to_string(*gid);

size_t start = 2;

if (idp != std::string::npos) {
id = dangling.substr(0, idp);
start = idp + 2;
}

auto p = dangling.rfind("POINT(", start);

if (p != std::string::npos) {
p += 6;
auto point = parsePoint(dangling, p);
idx.add(point, id);
} else if ((p = dangling.rfind("LINESTRING(", start)) !=

std::string::npos) {
p += 11;
const auto& line = parseLineString(dangling, p);
if (line.size() != 0) {
idx.add(line, id);
}
} else if ((p = dangling.rfind("MULTILINESTRING(", start)) !=
std::string::npos) {
I32MultiLine ml;
p += 16;
while ((p = dangling.find("(", p)) != std::string::npos) {
const auto& line = parseLineString(dangling, p + 1);
if (line.size() != 0) {
ml.push_back(line);
}
p += 1;
}
idx.add(ml, id);
} else if ((p = dangling.rfind("POLYGON(", start)) != std::string::npos) {
p += 7;
size_t i = 0;
I32Polygon poly;
while ((p = dangling.find("(", p + 1)) != std::string::npos) {
const auto& line = parseLineString(dangling, p + 1);
if (i == 0) {
// outer
poly.getOuter() = line;
} else {
poly.getInners().push_back(line);
}
i++;
}
idx.add(poly, id);
} else if ((p = dangling.rfind("MULTIPOLYGON(", start)) !=
std::string::npos) {
p += 12;
I32MultiPolygon mp;
while (p != std::string::npos &&
(p = dangling.find("(", p + 1)) != std::string::npos) {
I32Polygon poly;
size_t i = 0;
while ((p = dangling.find("(", p + 1)) != std::string::npos) {
const auto& line = parseLineString(dangling, p + 1);
if (i == 0) {
// outer
poly.getOuter() = line;
} else {
poly.getInners().push_back(line);
}

// check if multipolygon is closed
auto q = dangling.find(
")", p + 1); // this is the closing of the linestring
auto q2 = dangling.find(")", q + 1);
auto q3 = dangling.find(",", q + 1);
if (q2 != std::string::npos && q3 != std::string::npos && q2 < q3) {
p = q3;
break;
}

i++;
}
mp.push_back(poly);
}
idx.add(mp, id);
}

dangling.clear();
c++;
continue;
}

dangling += *c;

c++;
}
}

// _____________________________________________________________________________
int main(int argc, char** argv) {
// disable output buffering for standard output
Expand Down Expand Up @@ -297,19 +155,16 @@ int main(int argc, char** argv) {
}

char* buf = new char[1024 * 1024 * 100];

size_t len;

std::string dangling;
size_t gid = 0;

size_t NUM_THREADS = std::thread::hardware_concurrency();

Sweeper sweeper({NUM_THREADS, prefix, intersects, contains, covers, suffix,
useBoxIds, useArea},
useCache, cache, output);

size_t gid = 0;

if (!useCache) {
LOGTO(INFO, std::cerr) << "Parsing input geometries...";

Expand Down
Loading

0 comments on commit 45c8b1e

Please sign in to comment.