Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add ST_Dump, bump duckdb, wrap yyjson in namespace #157

Merged
merged 3 commits into from
Oct 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@

project(${TARGET_NAME})

add_definitions(-DDUCKDB_MAJOR_VERSION=${DUCKDB_MAJOR_VERSION})
add_definitions(-DDUCKDB_MINOR_VERSION=${DUCKDB_MINOR_VERSION})
add_definitions(-DDUCKDB_PATCH_VERSION=${DUCKDB_PATCH_VERSION})

# Options

# Enable network functionality (OpenSSL and GDAL's CURL based fs/drivers)
Expand Down
2 changes: 1 addition & 1 deletion duckdb
Submodule duckdb updated 122 files
4 changes: 4 additions & 0 deletions spatial/include/spatial/core/functions/scalar.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ struct CoreScalarFunctions {
RegisterStContains(db);
RegisterStDimension(db);
RegisterStDistance(db);
RegisterStDump(db);
RegisterStEndPoint(db);
RegisterStExtent(db);
RegisterStExteriorRing(db);
Expand Down Expand Up @@ -83,6 +84,9 @@ struct CoreScalarFunctions {
// ST_Distance
static void RegisterStDistance(DatabaseInstance &db);

// ST_Dump
static void RegisterStDump(DatabaseInstance &db);

// ST_EndPoint
static void RegisterStEndPoint(DatabaseInstance &db);

Expand Down
1 change: 1 addition & 0 deletions spatial/src/spatial/core/functions/scalar/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ set(EXTENSION_SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/st_contains.cpp
${CMAKE_CURRENT_SOURCE_DIR}/st_dimension.cpp
${CMAKE_CURRENT_SOURCE_DIR}/st_distance.cpp
${CMAKE_CURRENT_SOURCE_DIR}/st_dump.cpp
${CMAKE_CURRENT_SOURCE_DIR}/st_endpoint.cpp
${CMAKE_CURRENT_SOURCE_DIR}/st_extent.cpp
${CMAKE_CURRENT_SOURCE_DIR}/st_exteriorring.cpp
Expand Down
2 changes: 2 additions & 0 deletions spatial/src/spatial/core/functions/scalar/st_asgeojson.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ namespace spatial {

namespace core {

using namespace duckdb_yyjson_spatial;

class JSONAllocator {
// Stolen from the JSON extension :)
public:
Expand Down
155 changes: 155 additions & 0 deletions spatial/src/spatial/core/functions/scalar/st_dump.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
#include "spatial/common.hpp"
#include "spatial/core/types.hpp"
#include "spatial/core/functions/scalar.hpp"
#include "spatial/core/functions/common.hpp"
#include "spatial/core/geometry/geometry.hpp"

#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp"
#include "duckdb/common/vector_operations/unary_executor.hpp"
#include "duckdb/common/vector_operations/binary_executor.hpp"

namespace spatial {

namespace core {

static void DumpFunction(DataChunk &args, ExpressionState &state, Vector &result) {
auto &lstate = GeometryFunctionLocalState::ResetAndGet(state);
auto count = args.size();

auto &geom_vec = args.data[0];
UnifiedVectorFormat geom_format;
geom_vec.ToUnifiedFormat(count, geom_format);

idx_t total_geom_count = 0;
idx_t total_path_count = 0;

for(idx_t out_row_idx = 0; out_row_idx < count; out_row_idx++) {
auto in_row_idx = geom_format.sel->get_index(out_row_idx);

if (!geom_format.validity.RowIsValid(in_row_idx)) {
FlatVector::SetNull(result, out_row_idx, true);
continue;
}

auto geometry_blob = UnifiedVectorFormat::GetData<string_t>(geom_format)[in_row_idx];
auto geometry = lstate.factory.Deserialize(geometry_blob);

vector<std::tuple<Geometry, vector<int32_t>>> stack;
vector<std::tuple<Geometry, vector<int32_t>>> items;

stack.emplace_back(geometry, vector<int32_t>());

while(!stack.empty()) {
auto current = stack.back();
auto current_geom = std::get<0>(current);
auto current_path = std::get<1>(current);

stack.pop_back();

if(current_geom.Type() == GeometryType::MULTIPOINT) {
auto mpoint = current_geom.GetMultiPoint();
for (int32_t i = 0; i < mpoint.Count(); i++) {
auto path = current_path;
path.push_back(i + 1); // path is 1-indexed
stack.emplace_back(mpoint[i], path);
}
}
else if(current_geom.Type() == GeometryType::MULTILINESTRING) {
auto mline = current_geom.GetMultiLineString();
for (int32_t i = 0; i < mline.Count(); i++) {
auto path = current_path;
path.push_back(i + 1);
stack.emplace_back(mline[i], path);
}
}
else if(current_geom.Type() == GeometryType::MULTIPOLYGON) {
auto mpoly = current_geom.GetMultiPolygon();
for (int32_t i = 0; i < mpoly.Count(); i++) {
auto path = current_path;
path.push_back(i + 1);
stack.emplace_back(mpoly[i], path);
}
}
else if (current_geom.Type() == GeometryType::GEOMETRYCOLLECTION) {
auto collection = current_geom.GetGeometryCollection();
for (int32_t i = 0; i < collection.Count(); i++) {
auto path = current_path;
path.push_back(i + 1);
stack.emplace_back(collection[i], path);
}
} else {
items.push_back(current);
}
}

// Finally reverse the results
std::reverse(items.begin(), items.end());

// Push to the result vector
auto result_entries = ListVector::GetData(result);

auto geom_offset = total_geom_count;
auto geom_length = items.size();

result_entries[out_row_idx].length = geom_length;
result_entries[out_row_idx].offset = geom_offset;

total_geom_count += geom_length;

ListVector::Reserve(result, total_geom_count);
ListVector::SetListSize(result, total_geom_count);

auto &result_list = ListVector::GetEntry(result);
auto &result_list_children = StructVector::GetEntries(result_list);
auto &result_geom_vec = result_list_children[0];
auto &result_path_vec = result_list_children[1];

auto geom_data = FlatVector::GetData<string_t>(*result_geom_vec);
for(idx_t i = 0; i < geom_length; i++) {
// Write the geometry
auto &item_blob = std::get<0>(items[i]);
geom_data[geom_offset + i] = lstate.factory.Serialize(*result_geom_vec, item_blob);

// Now write the paths
auto &path = std::get<1>(items[i]);
auto path_offset = total_path_count;
auto path_length = path.size();

total_path_count += path_length;

ListVector::Reserve(*result_path_vec, total_path_count);
ListVector::SetListSize(*result_path_vec, total_path_count);

auto path_entries = ListVector::GetData(*result_path_vec);

path_entries[geom_offset + i].offset = path_offset;
path_entries[geom_offset + i].length = path_length;

auto &path_data_vec = ListVector::GetEntry(*result_path_vec);
auto path_data = FlatVector::GetData<int32_t>(path_data_vec);

for(idx_t j = 0; j < path_length; j++) {
path_data[path_offset + j] = path[j];
}
}
}

if(count == 1) {
result.SetVectorType(VectorType::CONSTANT_VECTOR);
}
}

void CoreScalarFunctions::RegisterStDump(DatabaseInstance &db) {
ScalarFunctionSet set("ST_Dump");

set.AddFunction(ScalarFunction({GeoTypes::GEOMETRY()},
LogicalType::LIST(LogicalType::STRUCT({{"geom", GeoTypes::GEOMETRY()}, {"path", LogicalType::LIST(LogicalType::INTEGER)}})),
DumpFunction,
nullptr, nullptr, nullptr, GeometryFunctionLocalState::Init));

ExtensionUtil::RegisterFunction(db, set);
}

} // namespace core

} // namespace spatial
9 changes: 7 additions & 2 deletions spatial/src/spatial/gdal/functions/st_write.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,14 @@ struct GlobalState : public GlobalFunctionData {
//===--------------------------------------------------------------------===//
// Bind
//===--------------------------------------------------------------------===//
static unique_ptr<FunctionData> Bind(ClientContext &context, CopyInfo &info, vector<string> &names,
// The parameters are const in duckdb > 0.9.1, ifdef so we can build for both versions for now.
#if DUCKDB_PATCH_VERSION == 1
static unique_ptr<FunctionData> Bind(ClientContext &context, CopyInfo &info, vector<string> &names,
vector<LogicalType> &sql_types) {

#else
static unique_ptr<FunctionData> Bind(ClientContext &context, const CopyInfo &info, const vector<string> &names,
const vector<LogicalType> &sql_types) {
#endif
GdalFileHandler::SetLocalClientContext(context);

auto bind_data = make_uniq<BindData>(info.file_path, sql_types, names);
Expand Down
2 changes: 1 addition & 1 deletion spatial/third_party/yyjson/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@

set(EXTENSION_SOURCES
${EXTENSION_SOURCES}
${CMAKE_CURRENT_SOURCE_DIR}/yyjson.c
${CMAKE_CURRENT_SOURCE_DIR}/yyjson.cpp
PARENT_SCOPE
)
4 changes: 4 additions & 0 deletions spatial/third_party/yyjson/include/yyjson.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

/** @file yyjson.h */


#ifndef YYJSON_H
#define YYJSON_H

Expand All @@ -24,6 +25,7 @@
#include <float.h>


namespace duckdb_yyjson_spatial {

/*==============================================================================
* Compile-time Options
Expand Down Expand Up @@ -6243,3 +6245,5 @@ yyjson_api_inline bool yyjson_get_str_pointer(
#endif /* extern "C" end */

#endif /* YYJSON_H */

} // namespace duckdb_yyjson_spatial
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <stdio.h>
#include <math.h>

namespace duckdb_yyjson_spatial {


/*==============================================================================
Expand Down Expand Up @@ -8410,3 +8411,5 @@ bool yyjson_mut_write_file(const char *path,
#elif defined(_MSC_VER)
# pragma warning(pop)
#endif /* warning suppress end */

} // namespace yyjson
74 changes: 74 additions & 0 deletions test/sql/geometry/st_dump.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
require spatial

# Basic test
query II
SELECT UNNEST(st_dump(ST_GeomFromText('GEOMETRYCOLLECTION (POINT (1 1), POINT (2 2), GEOMETRYCOLLECTION(POINT (3 3)))')), recursive := true);
----
POINT (1 1) [1]
POINT (2 2) [2]
POINT (3 3) [3, 1]

query II
SELECT UNNEST(st_dump(ST_GeomFromText('GEOMETRYCOLLECTION (POINT (1 1), GEOMETRYCOLLECTION(POINT (3 3)), POINT (2 2))')), recursive := true);
----
POINT (1 1) [1]
POINT (3 3) [2, 1]
POINT (2 2) [3]

# Test empty collection
query I
SElECT ST_Dump(ST_GeomFromText('GEOMETRYCOLLECTION EMPTY'));
----
[]

# Test collection with one point
query I
SElECT ST_Dump(ST_GeomFromText('GEOMETRYCOLLECTION (POINT (0 0))'));
----
[{'geom': POINT (0 0), 'path': [1]}]

# Test with multipoint
query II
SELECT UNNEST(ST_Dump(ST_GeomFromText('MULTIPOINT ((0 0), (1 1)))')), recursive := true);
----
POINT (0 0) [1]
POINT (1 1) [2]

# Test with multilinestring
query II
SELECT UNNEST (ST_Dump(ST_GeomFromText('MULTILINESTRING ((0 0, 1 1), (2 2, 3 3))')), recursive := true);
----
LINESTRING (0 0, 1 1) [1]
LINESTRING (2 2, 3 3) [2]

# Test with multipolygon
query II
SELECT UNNEST(ST_Dump(ST_GeomFromText('MULTIPOLYGON (((0 0, 1 1, 1 0, 0 0)), ((2 2, 3 3, 3 2, 2 2))))')), recursive := true);
----
POLYGON ((0 0, 1 1, 1 0, 0 0)) [1]
POLYGON ((2 2, 3 3, 3 2, 2 2)) [2]

# Test complex
query II rowsort
SELECT UNNEST(ST_Dump(ST_GeomFromText('GEOMETRYCOLLECTION (POINT (1 1), LINESTRING (0 0, 1 1), POLYGON ((0 0, 1 1, 1 0, 0 0)), MULTIPOLYGON (((0 0, 1 1, 1 0, 0 0)), ((2 2, 3 3, 3 2, 2 2))), GEOMETRYCOLLECTION (POINT (3 3)))')), recursive := true);
----
LINESTRING (0 0, 1 1) [2]
POINT (1 1) [1]
POINT (3 3) [5, 1]
POLYGON ((0 0, 1 1, 1 0, 0 0)) [3]
POLYGON ((0 0, 1 1, 1 0, 0 0)) [4, 1]
POLYGON ((2 2, 3 3, 3 2, 2 2)) [4, 2]


# Test with intermittent nulls
query I
SELECT ST_Dump(geom)
FROM (VALUES
(ST_GeomFromText('GEOMETRYCOLLECTION (POINT (1 1), POINT (2 2), GEOMETRYCOLLECTION(POINT (3 3)))')),
(NULL),
(ST_GeomFromText('GEOMETRYCOLLECTION (POINT (1 1), GEOMETRYCOLLECTION(POINT (3 3)), POINT (2 2))'))
) as t(geom)
----
[{'geom': POINT (1 1), 'path': [1]}, {'geom': POINT (2 2), 'path': [2]}, {'geom': POINT (3 3), 'path': [3, 1]}]
NULL
[{'geom': POINT (1 1), 'path': [1]}, {'geom': POINT (3 3), 'path': [2, 1]}, {'geom': POINT (2 2), 'path': [3]}]
Loading