Skip to content

Commit

Permalink
fix concurrency issue on output to stdout, add e2e tests for uncompre…
Browse files Browse the repository at this point in the history
…ssed file out, and output to stdout (which is always uncompressed)
  • Loading branch information
patrickbr committed Oct 16, 2024
1 parent 1271fb1 commit 7e3e294
Show file tree
Hide file tree
Showing 2 changed files with 104 additions and 21 deletions.
94 changes: 91 additions & 3 deletions .github/workflows/docker-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,16 @@ concurrency:
cancel-in-progress: true

jobs:
build:
name: End-to-end test (docker build)
build-bz2:
name: End-to-end test, BZ2 output (docker build)
runs-on: ubuntu-22.04
strategy:
fail-fast: false
matrix:
dockerfile: [ Dockerfile]

steps:
- name: Checkout the repository
- name: Checkout the repository
uses: actions/checkout@v3
with:
submodules: "recursive"
Expand Down Expand Up @@ -55,3 +55,91 @@ jobs:
NUM_GEOMS=$(echo "${RESULT_JSON}" | jq --exit-status --raw-output .results.bindings[0].count.value)
echo ${NUM_GEOMS} | numfmt --grouping
test ${NUM_GEOMS} -gt 100000
build-uncompressed:
name: End-to-end test, uncompressed output (docker build)
runs-on: ubuntu-22.04
strategy:
fail-fast: false
matrix:
dockerfile: [ Dockerfile]

steps:
- name: Checkout the repository
uses: actions/checkout@v3
with:
submodules: "recursive"

- name: Build the docker image
run: |
set -v
docker build -f ${{matrix.dockerfile}} -t osm2rdf .
docker run --rm osm2rdf --help
- name: Build TTL for Malta and check its validity
run: |
set -v
mkdir osm-malta && cd $_
curl -L -o osm-malta.pbf https://download.geofabrik.de/europe/malta-latest.osm.pbf
ls -l osm-malta.pbf
docker run --rm -v $(pwd):/data osm2rdf /data/osm-malta.pbf --output-no-compress -o /data/osm-malta.ttl
ls -l osm-malta.pbf osm-malta.ttl
docker run --rm -v $(pwd):/data stain/jena riot --validate /data/osm-malta.ttl
- name: Build QLever index and count the number of geometries
run: |
set -v
cd osm-malta
docker run -u $(id -u):$(id -g) -v $(pwd):/data -w /data --entrypoint bash adfreiburg/qlever -c "cat osm-malta.ttl | IndexBuilderMain -F ttl -f - -i osm-malta"
docker run -d -p 7000:7000 -v $(pwd):/data -w /data --entrypoint bash --name qlever adfreiburg/qlever -c "ServerMain -i /data/osm-malta -p 7000"
sleep 5
docker logs qlever
RESULT_JSON=$(curl http://localhost:7000 --data-urlencode "query=PREFIX geo: <http://www.opengis.net/ont/geosparql#> SELECT (COUNT(?geometry) AS ?count) WHERE { ?osm_id geo:hasGeometry ?geometry }")
echo "${RESULT_JSON}"
NUM_GEOMS=$(echo "${RESULT_JSON}" | jq --exit-status --raw-output .results.bindings[0].count.value)
echo ${NUM_GEOMS} | numfmt --grouping
test ${NUM_GEOMS} -gt 100000
build-stdout:
name: End-to-end test, output to stdout (docker build)
runs-on: ubuntu-22.04
strategy:
fail-fast: false
matrix:
dockerfile: [ Dockerfile]

steps:
- name: Checkout the repository
uses: actions/checkout@v3
with:
submodules: "recursive"

- name: Build the docker image
run: |
set -v
docker build -f ${{matrix.dockerfile}} -t osm2rdf .
docker run --rm osm2rdf --help
- name: Build TTL for Malta and check its validity
run: |
set -v
mkdir osm-malta && cd $_
curl -L -o osm-malta.pbf https://download.geofabrik.de/europe/malta-latest.osm.pbf
ls -l osm-malta.pbf
docker run --rm -v $(pwd):/data osm2rdf /data/osm-malta.pbf > /data/osm-malta.ttl
ls -l osm-malta.pbf osm-malta.ttl
docker run --rm -v $(pwd):/data stain/jena riot --validate /data/osm-malta.ttl
- name: Build QLever index and count the number of geometries
run: |
set -v
cd osm-malta
docker run -u $(id -u):$(id -g) -v $(pwd):/data -w /data --entrypoint bash adfreiburg/qlever -c "cat osm-malta.ttl | IndexBuilderMain -F ttl -f - -i osm-malta"
docker run -d -p 7000:7000 -v $(pwd):/data -w /data --entrypoint bash --name qlever adfreiburg/qlever -c "ServerMain -i /data/osm-malta -p 7000"
sleep 5
docker logs qlever
RESULT_JSON=$(curl http://localhost:7000 --data-urlencode "query=PREFIX geo: <http://www.opengis.net/ont/geosparql#> SELECT (COUNT(?geometry) AS ?count) WHERE { ?osm_id geo:hasGeometry ?geometry }")
echo "${RESULT_JSON}"
NUM_GEOMS=$(echo "${RESULT_JSON}" | jq --exit-status --raw-output .results.bindings[0].count.value)
echo ${NUM_GEOMS} | numfmt --grouping
test ${NUM_GEOMS} -gt 100000
31 changes: 13 additions & 18 deletions src/util/Output.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -113,11 +113,7 @@ void osm2rdf::util::Output::close() {
}

if (_toStdOut) {
#pragma omp parallel for
for (size_t i = 0; i < _partCount; ++i) {
_outBuffers[i][_outBufPos[i]] = 0;
std::cout << _outBuffers[i];
}
return;
} else if (_config.outputCompress) {
#pragma omp parallel for
for (size_t i = 0; i < _partCount; ++i) {
Expand Down Expand Up @@ -211,17 +207,21 @@ void osm2rdf::util::Output::concatenate() {
}

// ____________________________________________________________________________
void osm2rdf::util::Output::writeNewLine(size_t part) { write('\n', part); }
void osm2rdf::util::Output::writeNewLine(size_t part) {
write('\n', part);

if (_toStdOut) {
_outBuffers[part][_outBufPos[part]] = 0;
std::cout << _outBuffers[part];
_outBufPos[part] = 0;
}
}

// ____________________________________________________________________________
void osm2rdf::util::Output::write(std::string_view strv, size_t t) {
assert(t < _partCount);
if (_toStdOut) {
if (_outBufPos[t] + strv.size() + 1 >= BUFFER_S) {
_outBuffers[t][_outBufPos[t]] = 0;
std::cout << _outBuffers[t];
_outBufPos[t] = 0;
}
// on output to stdout, we only flush on newlines
} else if (_config.outputCompress) {
if (_outBufPos[t] + strv.size() + 1 >= BUFFER_S) {
int err = 0;
Expand Down Expand Up @@ -272,11 +272,7 @@ void osm2rdf::util::Output::write(std::string_view strv, size_t t) {
void osm2rdf::util::Output::write(const char c, size_t t) {
assert(t < _partCount);
if (_toStdOut) {
if (_outBufPos[t] + 2 >= BUFFER_S) {
_outBuffers[t][_outBufPos[t]] = 0;
std::cout << _outBuffers[t];
_outBufPos[t] = 0;
}
// on output to stdout, we only flush on newlines
} else if (_config.outputCompress) {
if (_outBufPos[t] + 2 >= BUFFER_S) {
int err = 0;
Expand Down Expand Up @@ -320,8 +316,7 @@ void osm2rdf::util::Output::flush() {
// ____________________________________________________________________________
void osm2rdf::util::Output::flush(size_t i) {
if (_toStdOut) {
_outBuffers[i][_outBufPos[i]] = 0;
std::cout << _outBuffers[i];
// on output to stdout, we only flush on newlines
} else if (_config.outputCompress) {
int err = 0;
BZ2_bzWrite(&err, _files[i], _outBuffers[i], _outBufPos[i]);
Expand Down

0 comments on commit 7e3e294

Please sign in to comment.