From 52898807a0d8eb641fdd3ed5fbb7ba50e5fb4d0d Mon Sep 17 00:00:00 2001 From: Dirk Eddelbuettel Date: Wed, 3 Apr 2024 05:52:52 -0500 Subject: [PATCH 1/9] [c++,r] Move `nanoarrow` to `src/external` --- apis/r/inst/include/tiledbsoma_types.h | 2 +- apis/r/src/rinterface.cpp | 8 ++++---- apis/r/src/riterator.cpp | 13 ++++--------- apis/r/src/rutilities.cpp | 9 +++------ libtiledbsoma/CMakeLists.txt | 2 +- libtiledbsoma/src/CMakeLists.txt | 15 ++++++++------- .../include/nanoarrow}/nanoarrow.h | 0 .../include/nanoarrow}/nanoarrow.hpp | 0 .../{utils => external/src/nanoarrow}/nanoarrow.c | 0 libtiledbsoma/src/utils/arrow_adapter.h | 2 +- 10 files changed, 22 insertions(+), 29 deletions(-) rename libtiledbsoma/src/{utils => external/include/nanoarrow}/nanoarrow.h (100%) rename libtiledbsoma/src/{utils => external/include/nanoarrow}/nanoarrow.hpp (100%) rename libtiledbsoma/src/{utils => external/src/nanoarrow}/nanoarrow.c (100%) diff --git a/apis/r/inst/include/tiledbsoma_types.h b/apis/r/inst/include/tiledbsoma_types.h index 7bf0c44412..27f327c085 100644 --- a/apis/r/inst/include/tiledbsoma_types.h +++ b/apis/r/inst/include/tiledbsoma_types.h @@ -15,7 +15,7 @@ #define TILEDB_NO_API_DEPRECATION_WARNINGS #endif -#include // for C interface to Arrow +#include // for C interface to Arrow #include // for QueryCondition etc #define ARROW_SCHEMA_AND_ARRAY_DEFINED 1 #include diff --git a/apis/r/src/rinterface.cpp b/apis/r/src/rinterface.cpp index 316267d167..dba41a3566 100644 --- a/apis/r/src/rinterface.cpp +++ b/apis/r/src/rinterface.cpp @@ -1,7 +1,7 @@ -#include // for R interface to C++ -#include // for C interface to Arrow (via R package) -#include // for C/C++ interface to Arrow -#include // for fromInteger64 +#include // for R interface to C++ +#include // for C interface to Arrow (via R package) +#include // for C/C++ interface to Arrow +#include // for fromInteger64 // we currently get deprecation warnings by default which are noisy #ifndef TILEDB_NO_API_DEPRECATION_WARNINGS diff --git a/apis/r/src/riterator.cpp b/apis/r/src/riterator.cpp index 8f94d4af07..f0d9aa4692 100644 --- a/apis/r/src/riterator.cpp +++ b/apis/r/src/riterator.cpp @@ -3,20 +3,15 @@ #define TILEDB_NO_API_DEPRECATION_WARNINGS #endif -//#define RCPP_DEBUG_LEVEL 5 - -#include // for R interface to C++ -#include // for C interface to Arrow (via R package nanoarrow) -#include -#include // for fromInteger64 +#include // for R interface to C++ +#include // for C interface to Arrow (via R package nanoarrow) +#include +#include // for fromInteger64 #include #if TILEDB_VERSION_MAJOR == 2 && TILEDB_VERSION_MINOR >= 4 #include #endif - -// We get these via nanoarrow and must not include carrow.h again -#define ARROW_SCHEMA_AND_ARRAY_DEFINED 1 #include #include "rutilities.h" // local declarations diff --git a/apis/r/src/rutilities.cpp b/apis/r/src/rutilities.cpp index 4db83da0b6..7f58300d00 100644 --- a/apis/r/src/rutilities.cpp +++ b/apis/r/src/rutilities.cpp @@ -4,12 +4,9 @@ #define TILEDB_NO_API_DEPRECATION_WARNINGS #endif -#include // for R interface to C++ -#include // for C interface to Arrow -#include // for fromInteger64 - -// We get these via nanoarrow and must cannot include carrow.h again -#define ARROW_SCHEMA_AND_ARRAY_DEFINED 1 +#include // for R interface to C++ +#include // for C interface to Arrow +#include // for fromInteger64 #include #include "rutilities.h" // local declarations diff --git a/libtiledbsoma/CMakeLists.txt b/libtiledbsoma/CMakeLists.txt index a055d71cd2..8ecb95c376 100644 --- a/libtiledbsoma/CMakeLists.txt +++ b/libtiledbsoma/CMakeLists.txt @@ -4,7 +4,7 @@ # # The MIT License # -# Copyright (c) 2022-2023 TileDB, Inc. +# Copyright (c) 2022-2024 TileDB, Inc. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/libtiledbsoma/src/CMakeLists.txt b/libtiledbsoma/src/CMakeLists.txt index beece2fedc..65f6de7a9a 100644 --- a/libtiledbsoma/src/CMakeLists.txt +++ b/libtiledbsoma/src/CMakeLists.txt @@ -72,10 +72,10 @@ add_library(TILEDB_SOMA_OBJECTS OBJECT ${CMAKE_CURRENT_SOURCE_DIR}/utils/stats.cc ${CMAKE_CURRENT_SOURCE_DIR}/utils/util.cc ${CMAKE_CURRENT_SOURCE_DIR}/utils/version.cc - ${CMAKE_CURRENT_SOURCE_DIR}/utils/nanoarrow.c ${CMAKE_CURRENT_SOURCE_DIR}/external/src/thread_pool/thread_pool.cc ${CMAKE_CURRENT_SOURCE_DIR}/external/src/thread_pool/status.cc + ${CMAKE_CURRENT_SOURCE_DIR}/external/src/nanoarrow/nanoarrow.c ) message(STATUS "Building TileDB without deprecation warnings") @@ -97,6 +97,7 @@ target_include_directories(TILEDB_SOMA_OBJECTS ${CMAKE_CURRENT_SOURCE_DIR}/soma ${CMAKE_CURRENT_SOURCE_DIR}/external/khash ${CMAKE_CURRENT_SOURCE_DIR}/external/include + ${CMAKE_CURRENT_SOURCE_DIR}/external/include/nanoarrow $ $ ${pybind11_INCLUDE_DIRS} @@ -216,12 +217,6 @@ install(FILES DESTINATION "include/tiledbsoma/reindexer/" ) -install(FILES - ${CMAKE_CURRENT_SOURCE_DIR}/utils/nanoarrow.h - ${CMAKE_CURRENT_SOURCE_DIR}/utils/nanoarrow.hpp - DESTINATION "include/tiledbsoma/utils/" -) - install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/tiledbsoma/tiledbsoma DESTINATION "include/tiledbsoma" @@ -242,6 +237,12 @@ install(FILES DESTINATION "include/tiledbsoma/soma/span" ) +install(FILES + ${CMAKE_CURRENT_SOURCE_DIR}/external/include/nanoarrow/nanoarrow.h + ${CMAKE_CURRENT_SOURCE_DIR}/external/include/nanoarrow/nanoarrow.hpp + DESTINATION "include/tiledbsoma/nanoarrow" +) + # ########################################################### # API symbol exports diff --git a/libtiledbsoma/src/utils/nanoarrow.h b/libtiledbsoma/src/external/include/nanoarrow/nanoarrow.h similarity index 100% rename from libtiledbsoma/src/utils/nanoarrow.h rename to libtiledbsoma/src/external/include/nanoarrow/nanoarrow.h diff --git a/libtiledbsoma/src/utils/nanoarrow.hpp b/libtiledbsoma/src/external/include/nanoarrow/nanoarrow.hpp similarity index 100% rename from libtiledbsoma/src/utils/nanoarrow.hpp rename to libtiledbsoma/src/external/include/nanoarrow/nanoarrow.hpp diff --git a/libtiledbsoma/src/utils/nanoarrow.c b/libtiledbsoma/src/external/src/nanoarrow/nanoarrow.c similarity index 100% rename from libtiledbsoma/src/utils/nanoarrow.c rename to libtiledbsoma/src/external/src/nanoarrow/nanoarrow.c diff --git a/libtiledbsoma/src/utils/arrow_adapter.h b/libtiledbsoma/src/utils/arrow_adapter.h index 6417be94c8..d0821dfc51 100644 --- a/libtiledbsoma/src/utils/arrow_adapter.h +++ b/libtiledbsoma/src/utils/arrow_adapter.h @@ -8,7 +8,7 @@ // https://arrow.apache.org/docs/format/Columnar.html#buffer-listing-for-each-layout // https://arrow.apache.org/docs/format/CDataInterface.html#exporting-a-simple-int32-array -#include "nanoarrow.hpp" +#include "tiledbsoma/nanoarrow/nanoarrow.hpp" namespace tiledbsoma { From c9659ea07e7bd8588ee182fbcfc728ed16d9af6a Mon Sep 17 00:00:00 2001 From: Dirk Eddelbuettel Date: Wed, 3 Apr 2024 07:19:32 -0500 Subject: [PATCH 2/9] Slight tweak to CMakeLists, adjustment for vendoring --- apis/r/src/borrowed.c | 146 +++++++++++++++++++++++++++++++ libtiledbsoma/src/CMakeLists.txt | 6 +- 2 files changed, 151 insertions(+), 1 deletion(-) create mode 100644 apis/r/src/borrowed.c diff --git a/apis/r/src/borrowed.c b/apis/r/src/borrowed.c new file mode 100644 index 0000000000..d1293dfaa0 --- /dev/null +++ b/apis/r/src/borrowed.c @@ -0,0 +1,146 @@ + +#include "tiledbsoma/nanoarrow/nanoarrow.h" + +// We need three entry points from nanoarrow that are declared 'static' in the official +// (now vendored) C library so we bring them here +void ArrowSchemaReleaseInternal(struct ArrowSchema* schema) { + if (schema->format != NULL) ArrowFree((void*)schema->format); + if (schema->name != NULL) ArrowFree((void*)schema->name); + if (schema->metadata != NULL) ArrowFree((void*)schema->metadata); + + // This object owns the memory for all the children, but those + // children may have been generated elsewhere and might have + // their own release() callback. + if (schema->children != NULL) { + for (int64_t i = 0; i < schema->n_children; i++) { + if (schema->children[i] != NULL) { + if (schema->children[i]->release != NULL) { + ArrowSchemaRelease(schema->children[i]); + } + + ArrowFree(schema->children[i]); + } + } + + ArrowFree(schema->children); + } + + // This object owns the memory for the dictionary but it + // may have been generated somewhere else and have its own + // release() callback. + if (schema->dictionary != NULL) { + if (schema->dictionary->release != NULL) { + ArrowSchemaRelease(schema->dictionary); + } + + ArrowFree(schema->dictionary); + } + + // private data not currently used + if (schema->private_data != NULL) { + ArrowFree(schema->private_data); + } + + schema->release = NULL; +} + +void ArrowArrayReleaseInternal(struct ArrowArray* array) { + // Release buffers held by this array + struct ArrowArrayPrivateData* private_data = + (struct ArrowArrayPrivateData*)array->private_data; + if (private_data != NULL) { + ArrowBitmapReset(&private_data->bitmap); + ArrowBufferReset(&private_data->buffers[0]); + ArrowBufferReset(&private_data->buffers[1]); + ArrowFree(private_data); + } + + // This object owns the memory for all the children, but those + // children may have been generated elsewhere and might have + // their own release() callback. + if (array->children != NULL) { + for (int64_t i = 0; i < array->n_children; i++) { + if (array->children[i] != NULL) { + if (array->children[i]->release != NULL) { + ArrowArrayRelease(array->children[i]); + } + + ArrowFree(array->children[i]); + } + } + + ArrowFree(array->children); + } + + // This object owns the memory for the dictionary but it + // may have been generated somewhere else and have its own + // release() callback. + if (array->dictionary != NULL) { + if (array->dictionary->release != NULL) { + ArrowArrayRelease(array->dictionary); + } + + ArrowFree(array->dictionary); + } + + // Mark released + array->release = NULL; +} + +ArrowErrorCode ArrowArraySetStorageType(struct ArrowArray* array, + enum ArrowType storage_type) { + switch (storage_type) { + case NANOARROW_TYPE_UNINITIALIZED: + case NANOARROW_TYPE_NA: + array->n_buffers = 0; + break; + + case NANOARROW_TYPE_FIXED_SIZE_LIST: + case NANOARROW_TYPE_STRUCT: + case NANOARROW_TYPE_SPARSE_UNION: + array->n_buffers = 1; + break; + + case NANOARROW_TYPE_LIST: + case NANOARROW_TYPE_LARGE_LIST: + case NANOARROW_TYPE_MAP: + case NANOARROW_TYPE_BOOL: + case NANOARROW_TYPE_UINT8: + case NANOARROW_TYPE_INT8: + case NANOARROW_TYPE_UINT16: + case NANOARROW_TYPE_INT16: + case NANOARROW_TYPE_UINT32: + case NANOARROW_TYPE_INT32: + case NANOARROW_TYPE_UINT64: + case NANOARROW_TYPE_INT64: + case NANOARROW_TYPE_HALF_FLOAT: + case NANOARROW_TYPE_FLOAT: + case NANOARROW_TYPE_DOUBLE: + case NANOARROW_TYPE_DECIMAL128: + case NANOARROW_TYPE_DECIMAL256: + case NANOARROW_TYPE_INTERVAL_MONTHS: + case NANOARROW_TYPE_INTERVAL_DAY_TIME: + case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO: + case NANOARROW_TYPE_FIXED_SIZE_BINARY: + case NANOARROW_TYPE_DENSE_UNION: + array->n_buffers = 2; + break; + + case NANOARROW_TYPE_STRING: + case NANOARROW_TYPE_LARGE_STRING: + case NANOARROW_TYPE_BINARY: + case NANOARROW_TYPE_LARGE_BINARY: + array->n_buffers = 3; + break; + + default: + return EINVAL; + + return NANOARROW_OK; + } + + struct ArrowArrayPrivateData* private_data = + (struct ArrowArrayPrivateData*)array->private_data; + private_data->storage_type = storage_type; + return NANOARROW_OK; +} diff --git a/libtiledbsoma/src/CMakeLists.txt b/libtiledbsoma/src/CMakeLists.txt index 65f6de7a9a..c406a7233f 100644 --- a/libtiledbsoma/src/CMakeLists.txt +++ b/libtiledbsoma/src/CMakeLists.txt @@ -90,6 +90,11 @@ target_compile_options( set_property(TARGET TILEDB_SOMA_OBJECTS PROPERTY POSITION_INDEPENDENT_CODE ON) +target_include_directories(TILEDB_SOMA_OBJECTS + PUBLIC + ${CMAKE_CURRENT_SOURCE_DIR}/external/include/nanoarrow +) + target_include_directories(TILEDB_SOMA_OBJECTS PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} @@ -97,7 +102,6 @@ target_include_directories(TILEDB_SOMA_OBJECTS ${CMAKE_CURRENT_SOURCE_DIR}/soma ${CMAKE_CURRENT_SOURCE_DIR}/external/khash ${CMAKE_CURRENT_SOURCE_DIR}/external/include - ${CMAKE_CURRENT_SOURCE_DIR}/external/include/nanoarrow $ $ ${pybind11_INCLUDE_DIRS} From e57367f9056ff2d1e1bf5c9f0eff04f424b22c5d Mon Sep 17 00:00:00 2001 From: Dirk Eddelbuettel Date: Wed, 3 Apr 2024 07:57:02 -0500 Subject: [PATCH 3/9] Un-clang-format files restoring release 0.4.0 copies --- .../external/include/nanoarrow/nanoarrow.h | 4586 ++++++++--------- .../src/external/src/nanoarrow/nanoarrow.c | 9 +- 2 files changed, 2199 insertions(+), 2396 deletions(-) diff --git a/libtiledbsoma/src/external/include/nanoarrow/nanoarrow.h b/libtiledbsoma/src/external/include/nanoarrow/nanoarrow.h index db53e4bc94..e338560f1a 100644 --- a/libtiledbsoma/src/external/include/nanoarrow/nanoarrow.h +++ b/libtiledbsoma/src/external/include/nanoarrow/nanoarrow.h @@ -23,9 +23,9 @@ #define NANOARROW_VERSION_PATCH 0 #define NANOARROW_VERSION "0.4.0-SNAPSHOT" -#define NANOARROW_VERSION_INT \ - (NANOARROW_VERSION_MAJOR * 10000 + NANOARROW_VERSION_MINOR * 100 + \ - NANOARROW_VERSION_PATCH) +#define NANOARROW_VERSION_INT \ + (NANOARROW_VERSION_MAJOR * 10000 + NANOARROW_VERSION_MINOR * 100 + \ + NANOARROW_VERSION_PATCH) // #define NANOARROW_NAMESPACE YourNamespaceHere @@ -53,6 +53,8 @@ #include #include + + #if defined(NANOARROW_DEBUG) && !defined(NANOARROW_PRINT_AND_DIE) #include #include @@ -68,11 +70,11 @@ extern "C" { /// \defgroup nanoarrow-arrow-cdata Arrow C Data interface /// /// The Arrow C Data (https://arrow.apache.org/docs/format/CDataInterface.html) -/// and Arrow C Stream -/// (https://arrow.apache.org/docs/format/CStreamInterface.html) interfaces are -/// part of the Arrow Columnar Format specification -/// (https://arrow.apache.org/docs/format/Columnar.html). See the Arrow -/// documentation for documentation of these structures. +/// and Arrow C Stream (https://arrow.apache.org/docs/format/CStreamInterface.html) +/// interfaces are part of the +/// Arrow Columnar Format specification +/// (https://arrow.apache.org/docs/format/Columnar.html). See the Arrow documentation for +/// documentation of these structures. /// /// @{ @@ -84,36 +86,36 @@ extern "C" { #define ARROW_FLAG_MAP_KEYS_SORTED 4 struct ArrowSchema { - // Array type description - const char* format; - const char* name; - const char* metadata; - int64_t flags; - int64_t n_children; - struct ArrowSchema** children; - struct ArrowSchema* dictionary; - - // Release callback - void (*release)(struct ArrowSchema*); - // Opaque producer-specific data - void* private_data; + // Array type description + const char* format; + const char* name; + const char* metadata; + int64_t flags; + int64_t n_children; + struct ArrowSchema** children; + struct ArrowSchema* dictionary; + + // Release callback + void (*release)(struct ArrowSchema*); + // Opaque producer-specific data + void* private_data; }; struct ArrowArray { - // Array data description - int64_t length; - int64_t null_count; - int64_t offset; - int64_t n_buffers; - int64_t n_children; - const void** buffers; - struct ArrowArray** children; - struct ArrowArray* dictionary; - - // Release callback - void (*release)(struct ArrowArray*); - // Opaque producer-specific data - void* private_data; + // Array data description + int64_t length; + int64_t null_count; + int64_t offset; + int64_t n_buffers; + int64_t n_children; + const void** buffers; + struct ArrowArray** children; + struct ArrowArray* dictionary; + + // Release callback + void (*release)(struct ArrowArray*); + // Opaque producer-specific data + void* private_data; }; #endif // ARROW_C_DATA_INTERFACE @@ -122,43 +124,39 @@ struct ArrowArray { #define ARROW_C_STREAM_INTERFACE struct ArrowArrayStream { - // Callback to get the stream type - // (will be the same for all arrays in the stream). - // - // Return value: 0 if successful, an `errno`-compatible error code - // otherwise. - // - // If successful, the ArrowSchema must be released independently from the - // stream. - int (*get_schema)(struct ArrowArrayStream*, struct ArrowSchema* out); - - // Callback to get the next array - // (if no error and the array is released, the stream has ended) - // - // Return value: 0 if successful, an `errno`-compatible error code - // otherwise. - // - // If successful, the ArrowArray must be released independently from the - // stream. - int (*get_next)(struct ArrowArrayStream*, struct ArrowArray* out); - - // Callback to get optional detailed error information. - // This must only be called if the last stream operation failed - // with a non-0 return code. - // - // Return value: pointer to a null-terminated character array describing - // the last error, or NULL if no description is available. - // - // The returned pointer is only valid until the next operation on this - // stream (including release). - const char* (*get_last_error)(struct ArrowArrayStream*); - - // Release callback: release the stream's own resources. - // Note that arrays returned by `get_next` must be individually released. - void (*release)(struct ArrowArrayStream*); - - // Opaque producer-specific data - void* private_data; + // Callback to get the stream type + // (will be the same for all arrays in the stream). + // + // Return value: 0 if successful, an `errno`-compatible error code otherwise. + // + // If successful, the ArrowSchema must be released independently from the stream. + int (*get_schema)(struct ArrowArrayStream*, struct ArrowSchema* out); + + // Callback to get the next array + // (if no error and the array is released, the stream has ended) + // + // Return value: 0 if successful, an `errno`-compatible error code otherwise. + // + // If successful, the ArrowArray must be released independently from the stream. + int (*get_next)(struct ArrowArrayStream*, struct ArrowArray* out); + + // Callback to get optional detailed error information. + // This must only be called if the last stream operation failed + // with a non-0 return code. + // + // Return value: pointer to a null-terminated character array describing + // the last error, or NULL if no description is available. + // + // The returned pointer is only valid until the next operation on this stream + // (including release). + const char* (*get_last_error)(struct ArrowArrayStream*); + + // Release callback: release the stream's own resources. + // Note that arrays returned by `get_next` must be individually released. + void (*release)(struct ArrowArrayStream*); + + // Opaque producer-specific data + void* private_data; }; #endif // ARROW_C_STREAM_INTERFACE @@ -171,54 +169,44 @@ struct ArrowArrayStream { #define _NANOARROW_MAKE_NAME(x, y) _NANOARROW_CONCAT(x, y) #define _NANOARROW_RETURN_NOT_OK_IMPL(NAME, EXPR) \ - do { \ - const int NAME = (EXPR); \ - if (NAME) \ - return NAME; \ - } while (0) + do { \ + const int NAME = (EXPR); \ + if (NAME) return NAME; \ + } while (0) #define _NANOARROW_CHECK_RANGE(x_, min_, max_) \ - NANOARROW_RETURN_NOT_OK((x_ >= min_ && x_ <= max_) ? NANOARROW_OK : EINVAL) + NANOARROW_RETURN_NOT_OK((x_ >= min_ && x_ <= max_) ? NANOARROW_OK : EINVAL) #define _NANOARROW_CHECK_UPPER_LIMIT(x_, max_) \ - NANOARROW_RETURN_NOT_OK((x_ <= max_) ? NANOARROW_OK : EINVAL) + NANOARROW_RETURN_NOT_OK((x_ <= max_) ? NANOARROW_OK : EINVAL) #if defined(NANOARROW_DEBUG) -#define _NANOARROW_RETURN_NOT_OK_WITH_ERROR_IMPL( \ - NAME, EXPR, ERROR_PTR_EXPR, EXPR_STR) \ - do { \ - const int NAME = (EXPR); \ - if (NAME) { \ - ArrowErrorSet( \ - (ERROR_PTR_EXPR), \ - "%s failed with errno %d\n* %s:%d", \ - EXPR_STR, \ - NAME, \ - __FILE__, \ - __LINE__); \ - return NAME; \ - } \ - } while (0) +#define _NANOARROW_RETURN_NOT_OK_WITH_ERROR_IMPL(NAME, EXPR, ERROR_PTR_EXPR, EXPR_STR) \ + do { \ + const int NAME = (EXPR); \ + if (NAME) { \ + ArrowErrorSet((ERROR_PTR_EXPR), "%s failed with errno %d\n* %s:%d", EXPR_STR, \ + NAME, __FILE__, __LINE__); \ + return NAME; \ + } \ + } while (0) #else -#define _NANOARROW_RETURN_NOT_OK_WITH_ERROR_IMPL( \ - NAME, EXPR, ERROR_PTR_EXPR, EXPR_STR) \ - do { \ - const int NAME = (EXPR); \ - if (NAME) { \ - ArrowErrorSet( \ - (ERROR_PTR_EXPR), "%s failed with errno %d", EXPR_STR, NAME); \ - return NAME; \ - } \ - } while (0) +#define _NANOARROW_RETURN_NOT_OK_WITH_ERROR_IMPL(NAME, EXPR, ERROR_PTR_EXPR, EXPR_STR) \ + do { \ + const int NAME = (EXPR); \ + if (NAME) { \ + ArrowErrorSet((ERROR_PTR_EXPR), "%s failed with errno %d", EXPR_STR, NAME); \ + return NAME; \ + } \ + } while (0) #endif #if defined(NANOARROW_DEBUG) // For checking ArrowErrorSet() calls for valid printf format strings/arguments -// If using mingw's c99-compliant printf, we need a different format-checking -// attribute +// If using mingw's c99-compliant printf, we need a different format-checking attribute #if defined(__USE_MINGW_ANSI_STDIO) && defined(__MINGW_PRINTF_FORMAT) #define NANOARROW_CHECK_PRINTF_ATTRIBUTE \ - __attribute__((format(__MINGW_PRINTF_FORMAT, 2, 3))) + __attribute__((format(__MINGW_PRINTF_FORMAT, 2, 3))) #elif defined(__GNUC__) #define NANOARROW_CHECK_PRINTF_ATTRIBUTE __attribute__((format(printf, 2, 3))) #else @@ -255,25 +243,24 @@ typedef int ArrowErrorCode; /// \brief Flags supported by ArrowSchemaViewInit() /// \ingroup nanoarrow-schema-view -#define NANOARROW_FLAG_ALL_SUPPORTED \ - (ARROW_FLAG_DICTIONARY_ORDERED | ARROW_FLAG_NULLABLE | \ - ARROW_FLAG_MAP_KEYS_SORTED) +#define NANOARROW_FLAG_ALL_SUPPORTED \ + (ARROW_FLAG_DICTIONARY_ORDERED | ARROW_FLAG_NULLABLE | ARROW_FLAG_MAP_KEYS_SORTED) /// \brief Error type containing a UTF-8 encoded message. /// \ingroup nanoarrow-errors struct ArrowError { - /// \brief A character buffer with space for an error message. - char message[1024]; + /// \brief A character buffer with space for an error message. + char message[1024]; }; -/// \brief Ensure an ArrowError is null-terminated by zeroing the first -/// character. \ingroup nanoarrow-errors +/// \brief Ensure an ArrowError is null-terminated by zeroing the first character. +/// \ingroup nanoarrow-errors /// /// If error is NULL, this function does nothing. static inline void ArrowErrorInit(struct ArrowError* error) { - if (error != NULL) { - error->message[0] = '\0'; - } + if (error != NULL) { + error->message[0] = '\0'; + } } /// \brief Get the contents of an error @@ -282,93 +269,79 @@ static inline void ArrowErrorInit(struct ArrowError* error) { /// If error is NULL, returns "", or returns the contents of the error message /// otherwise. static inline const char* ArrowErrorMessage(struct ArrowError* error) { - if (error == NULL) { - return ""; - } else { - return error->message; - } + if (error == NULL) { + return ""; + } else { + return error->message; + } } /// \brief Set the contents of an error from an existing null-terminated string /// \ingroup nanoarrow-errors /// /// If error is NULL, this function does nothing. -static inline void ArrowErrorSetString( - struct ArrowError* error, const char* src) { - if (error == NULL) { - return; - } +static inline void ArrowErrorSetString(struct ArrowError* error, const char* src) { + if (error == NULL) { + return; + } - int64_t src_len = strlen(src); - if (src_len >= ((int64_t)sizeof(error->message))) { - memcpy(error->message, src, sizeof(error->message) - 1); - error->message[sizeof(error->message) - 1] = '\0'; - } else { - memcpy(error->message, src, src_len); - error->message[src_len] = '\0'; - } + int64_t src_len = strlen(src); + if (src_len >= ((int64_t)sizeof(error->message))) { + memcpy(error->message, src, sizeof(error->message) - 1); + error->message[sizeof(error->message) - 1] = '\0'; + } else { + memcpy(error->message, src, src_len); + error->message[src_len] = '\0'; + } } /// \brief Check the result of an expression and return it if not NANOARROW_OK /// \ingroup nanoarrow-errors #define NANOARROW_RETURN_NOT_OK(EXPR) \ - _NANOARROW_RETURN_NOT_OK_IMPL( \ - _NANOARROW_MAKE_NAME(errno_status_, __COUNTER__), EXPR) + _NANOARROW_RETURN_NOT_OK_IMPL(_NANOARROW_MAKE_NAME(errno_status_, __COUNTER__), EXPR) /// \brief Check the result of an expression and return it if not NANOARROW_OK, /// adding an auto-generated message to an ArrowError. /// \ingroup nanoarrow-errors /// /// This macro is used to ensure that functions that accept an ArrowError -/// as input always set its message when returning an error code (e.g., when -/// calling a nanoarrow function that does *not* accept ArrowError). +/// as input always set its message when returning an error code (e.g., when calling +/// a nanoarrow function that does *not* accept ArrowError). #define NANOARROW_RETURN_NOT_OK_WITH_ERROR(EXPR, ERROR_EXPR) \ - _NANOARROW_RETURN_NOT_OK_WITH_ERROR_IMPL( \ - _NANOARROW_MAKE_NAME(errno_status_, __COUNTER__), \ - EXPR, \ - ERROR_EXPR, \ - #EXPR) + _NANOARROW_RETURN_NOT_OK_WITH_ERROR_IMPL( \ + _NANOARROW_MAKE_NAME(errno_status_, __COUNTER__), EXPR, ERROR_EXPR, #EXPR) #if defined(NANOARROW_DEBUG) && !defined(NANOARROW_PRINT_AND_DIE) -#define NANOARROW_PRINT_AND_DIE(VALUE, EXPR_STR) \ - do { \ - fprintf( \ - stderr, \ - "%s failed with code %d\n* %s:%d\n", \ - EXPR_STR, \ - (int)(VALUE), \ - __FILE__, \ - (int)__LINE__); \ - abort(); \ - } while (0) +#define NANOARROW_PRINT_AND_DIE(VALUE, EXPR_STR) \ + do { \ + fprintf(stderr, "%s failed with code %d\n* %s:%d\n", EXPR_STR, (int)(VALUE), \ + __FILE__, (int)__LINE__); \ + abort(); \ + } while (0) #endif #if defined(NANOARROW_DEBUG) #define _NANOARROW_ASSERT_OK_IMPL(NAME, EXPR, EXPR_STR) \ - do { \ - const int NAME = (EXPR); \ - if (NAME) \ - NANOARROW_PRINT_AND_DIE(NAME, EXPR_STR); \ - } while (0) + do { \ + const int NAME = (EXPR); \ + if (NAME) NANOARROW_PRINT_AND_DIE(NAME, EXPR_STR); \ + } while (0) /// \brief Assert that an expression's value is NANOARROW_OK /// \ingroup nanoarrow-errors /// -/// If nanoarrow was built in debug mode (i.e., defined(NANOARROW_DEBUG) is -/// true), print a message to stderr and abort. If nanoarrow was built in -/// release mode, this statement has no effect. You can customize fatal error -/// behaviour be defining the NANOARROW_PRINT_AND_DIE macro before including -/// nanoarrow.h This macro is provided as a convenience for users and is not -/// used internally. +/// If nanoarrow was built in debug mode (i.e., defined(NANOARROW_DEBUG) is true), +/// print a message to stderr and abort. If nanoarrow was built in release mode, +/// this statement has no effect. You can customize fatal error behaviour +/// be defining the NANOARROW_PRINT_AND_DIE macro before including nanoarrow.h +/// This macro is provided as a convenience for users and is not used internally. #define NANOARROW_ASSERT_OK(EXPR) \ - _NANOARROW_ASSERT_OK_IMPL( \ - _NANOARROW_MAKE_NAME(errno_status_, __COUNTER__), EXPR, #EXPR) + _NANOARROW_ASSERT_OK_IMPL(_NANOARROW_MAKE_NAME(errno_status_, __COUNTER__), EXPR, #EXPR) -#define _NANOARROW_DCHECK_IMPL(EXPR, EXPR_STR) \ - do { \ - if (!(EXPR)) \ - NANOARROW_PRINT_AND_DIE(-1, EXPR_STR); \ - } while (0) +#define _NANOARROW_DCHECK_IMPL(EXPR, EXPR_STR) \ + do { \ + if (!(EXPR)) NANOARROW_PRINT_AND_DIE(-1, EXPR_STR); \ + } while (0) #define NANOARROW_DCHECK(EXPR) _NANOARROW_DCHECK_IMPL(EXPR, #EXPR) #else @@ -376,97 +349,92 @@ static inline void ArrowErrorSetString( #define NANOARROW_DCHECK(EXPR) #endif -static inline void ArrowSchemaMove( - struct ArrowSchema* src, struct ArrowSchema* dst) { - NANOARROW_DCHECK(src != NULL); - NANOARROW_DCHECK(dst != NULL); +static inline void ArrowSchemaMove(struct ArrowSchema* src, struct ArrowSchema* dst) { + NANOARROW_DCHECK(src != NULL); + NANOARROW_DCHECK(dst != NULL); - memcpy(dst, src, sizeof(struct ArrowSchema)); - src->release = NULL; + memcpy(dst, src, sizeof(struct ArrowSchema)); + src->release = NULL; } static inline void ArrowSchemaRelease(struct ArrowSchema* schema) { - NANOARROW_DCHECK(schema != NULL); - schema->release(schema); - NANOARROW_DCHECK(schema->release == NULL); + NANOARROW_DCHECK(schema != NULL); + schema->release(schema); + NANOARROW_DCHECK(schema->release == NULL); } -static inline void ArrowArrayMove( - struct ArrowArray* src, struct ArrowArray* dst) { - NANOARROW_DCHECK(src != NULL); - NANOARROW_DCHECK(dst != NULL); +static inline void ArrowArrayMove(struct ArrowArray* src, struct ArrowArray* dst) { + NANOARROW_DCHECK(src != NULL); + NANOARROW_DCHECK(dst != NULL); - memcpy(dst, src, sizeof(struct ArrowArray)); - src->release = NULL; + memcpy(dst, src, sizeof(struct ArrowArray)); + src->release = NULL; } static inline void ArrowArrayRelease(struct ArrowArray* array) { - NANOARROW_DCHECK(array != NULL); - array->release(array); - NANOARROW_DCHECK(array->release == NULL); + NANOARROW_DCHECK(array != NULL); + array->release(array); + NANOARROW_DCHECK(array->release == NULL); } -static inline void ArrowArrayStreamMove( - struct ArrowArrayStream* src, struct ArrowArrayStream* dst) { - NANOARROW_DCHECK(src != NULL); - NANOARROW_DCHECK(dst != NULL); +static inline void ArrowArrayStreamMove(struct ArrowArrayStream* src, + struct ArrowArrayStream* dst) { + NANOARROW_DCHECK(src != NULL); + NANOARROW_DCHECK(dst != NULL); - memcpy(dst, src, sizeof(struct ArrowArrayStream)); - src->release = NULL; + memcpy(dst, src, sizeof(struct ArrowArrayStream)); + src->release = NULL; } static inline const char* ArrowArrayStreamGetLastError( struct ArrowArrayStream* array_stream) { - NANOARROW_DCHECK(array_stream != NULL); + NANOARROW_DCHECK(array_stream != NULL); - const char* value = array_stream->get_last_error(array_stream); - if (value == NULL) { - return ""; - } else { - return value; - } + const char* value = array_stream->get_last_error(array_stream); + if (value == NULL) { + return ""; + } else { + return value; + } } static inline ArrowErrorCode ArrowArrayStreamGetSchema( - struct ArrowArrayStream* array_stream, - struct ArrowSchema* out, + struct ArrowArrayStream* array_stream, struct ArrowSchema* out, struct ArrowError* error) { - NANOARROW_DCHECK(array_stream != NULL); + NANOARROW_DCHECK(array_stream != NULL); - int result = array_stream->get_schema(array_stream, out); - if (result != NANOARROW_OK && error != NULL) { - ArrowErrorSetString(error, ArrowArrayStreamGetLastError(array_stream)); - } + int result = array_stream->get_schema(array_stream, out); + if (result != NANOARROW_OK && error != NULL) { + ArrowErrorSetString(error, ArrowArrayStreamGetLastError(array_stream)); + } - return result; + return result; } static inline ArrowErrorCode ArrowArrayStreamGetNext( - struct ArrowArrayStream* array_stream, - struct ArrowArray* out, + struct ArrowArrayStream* array_stream, struct ArrowArray* out, struct ArrowError* error) { - NANOARROW_DCHECK(array_stream != NULL); + NANOARROW_DCHECK(array_stream != NULL); - int result = array_stream->get_next(array_stream, out); - if (result != NANOARROW_OK && error != NULL) { - ArrowErrorSetString(error, ArrowArrayStreamGetLastError(array_stream)); - } + int result = array_stream->get_next(array_stream, out); + if (result != NANOARROW_OK && error != NULL) { + ArrowErrorSetString(error, ArrowArrayStreamGetLastError(array_stream)); + } - return result; + return result; } -static inline void ArrowArrayStreamRelease( - struct ArrowArrayStream* array_stream) { - NANOARROW_DCHECK(array_stream != NULL); - array_stream->release(array_stream); - NANOARROW_DCHECK(array_stream->release == NULL); +static inline void ArrowArrayStreamRelease(struct ArrowArrayStream* array_stream) { + NANOARROW_DCHECK(array_stream != NULL); + array_stream->release(array_stream); + NANOARROW_DCHECK(array_stream->release == NULL); } static char _ArrowIsLittleEndian(void) { - uint32_t check = 1; - char first_byte; - memcpy(&first_byte, &check, sizeof(char)); - return first_byte; + uint32_t check = 1; + char first_byte; + memcpy(&first_byte, &check, sizeof(char)); + return first_byte; } /// \brief Arrow type enumerator @@ -476,45 +444,45 @@ static char _ArrowIsLittleEndian(void) { /// enumerator; however, the numeric values are specifically not equal /// (i.e., do not rely on numeric comparison). enum ArrowType { - NANOARROW_TYPE_UNINITIALIZED = 0, - NANOARROW_TYPE_NA = 1, - NANOARROW_TYPE_BOOL, - NANOARROW_TYPE_UINT8, - NANOARROW_TYPE_INT8, - NANOARROW_TYPE_UINT16, - NANOARROW_TYPE_INT16, - NANOARROW_TYPE_UINT32, - NANOARROW_TYPE_INT32, - NANOARROW_TYPE_UINT64, - NANOARROW_TYPE_INT64, - NANOARROW_TYPE_HALF_FLOAT, - NANOARROW_TYPE_FLOAT, - NANOARROW_TYPE_DOUBLE, - NANOARROW_TYPE_STRING, - NANOARROW_TYPE_BINARY, - NANOARROW_TYPE_FIXED_SIZE_BINARY, - NANOARROW_TYPE_DATE32, - NANOARROW_TYPE_DATE64, - NANOARROW_TYPE_TIMESTAMP, - NANOARROW_TYPE_TIME32, - NANOARROW_TYPE_TIME64, - NANOARROW_TYPE_INTERVAL_MONTHS, - NANOARROW_TYPE_INTERVAL_DAY_TIME, - NANOARROW_TYPE_DECIMAL128, - NANOARROW_TYPE_DECIMAL256, - NANOARROW_TYPE_LIST, - NANOARROW_TYPE_STRUCT, - NANOARROW_TYPE_SPARSE_UNION, - NANOARROW_TYPE_DENSE_UNION, - NANOARROW_TYPE_DICTIONARY, - NANOARROW_TYPE_MAP, - NANOARROW_TYPE_EXTENSION, - NANOARROW_TYPE_FIXED_SIZE_LIST, - NANOARROW_TYPE_DURATION, - NANOARROW_TYPE_LARGE_STRING, - NANOARROW_TYPE_LARGE_BINARY, - NANOARROW_TYPE_LARGE_LIST, - NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO + NANOARROW_TYPE_UNINITIALIZED = 0, + NANOARROW_TYPE_NA = 1, + NANOARROW_TYPE_BOOL, + NANOARROW_TYPE_UINT8, + NANOARROW_TYPE_INT8, + NANOARROW_TYPE_UINT16, + NANOARROW_TYPE_INT16, + NANOARROW_TYPE_UINT32, + NANOARROW_TYPE_INT32, + NANOARROW_TYPE_UINT64, + NANOARROW_TYPE_INT64, + NANOARROW_TYPE_HALF_FLOAT, + NANOARROW_TYPE_FLOAT, + NANOARROW_TYPE_DOUBLE, + NANOARROW_TYPE_STRING, + NANOARROW_TYPE_BINARY, + NANOARROW_TYPE_FIXED_SIZE_BINARY, + NANOARROW_TYPE_DATE32, + NANOARROW_TYPE_DATE64, + NANOARROW_TYPE_TIMESTAMP, + NANOARROW_TYPE_TIME32, + NANOARROW_TYPE_TIME64, + NANOARROW_TYPE_INTERVAL_MONTHS, + NANOARROW_TYPE_INTERVAL_DAY_TIME, + NANOARROW_TYPE_DECIMAL128, + NANOARROW_TYPE_DECIMAL256, + NANOARROW_TYPE_LIST, + NANOARROW_TYPE_STRUCT, + NANOARROW_TYPE_SPARSE_UNION, + NANOARROW_TYPE_DENSE_UNION, + NANOARROW_TYPE_DICTIONARY, + NANOARROW_TYPE_MAP, + NANOARROW_TYPE_EXTENSION, + NANOARROW_TYPE_FIXED_SIZE_LIST, + NANOARROW_TYPE_DURATION, + NANOARROW_TYPE_LARGE_STRING, + NANOARROW_TYPE_LARGE_BINARY, + NANOARROW_TYPE_LARGE_LIST, + NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO }; /// \brief Get a string value of an enum ArrowType value @@ -524,86 +492,86 @@ enum ArrowType { static inline const char* ArrowTypeString(enum ArrowType type); static inline const char* ArrowTypeString(enum ArrowType type) { - switch (type) { - case NANOARROW_TYPE_NA: - return "na"; - case NANOARROW_TYPE_BOOL: - return "bool"; - case NANOARROW_TYPE_UINT8: - return "uint8"; - case NANOARROW_TYPE_INT8: - return "int8"; - case NANOARROW_TYPE_UINT16: - return "uint16"; - case NANOARROW_TYPE_INT16: - return "int16"; - case NANOARROW_TYPE_UINT32: - return "uint32"; - case NANOARROW_TYPE_INT32: - return "int32"; - case NANOARROW_TYPE_UINT64: - return "uint64"; - case NANOARROW_TYPE_INT64: - return "int64"; - case NANOARROW_TYPE_HALF_FLOAT: - return "half_float"; - case NANOARROW_TYPE_FLOAT: - return "float"; - case NANOARROW_TYPE_DOUBLE: - return "double"; - case NANOARROW_TYPE_STRING: - return "string"; - case NANOARROW_TYPE_BINARY: - return "binary"; - case NANOARROW_TYPE_FIXED_SIZE_BINARY: - return "fixed_size_binary"; - case NANOARROW_TYPE_DATE32: - return "date32"; - case NANOARROW_TYPE_DATE64: - return "date64"; - case NANOARROW_TYPE_TIMESTAMP: - return "timestamp"; - case NANOARROW_TYPE_TIME32: - return "time32"; - case NANOARROW_TYPE_TIME64: - return "time64"; - case NANOARROW_TYPE_INTERVAL_MONTHS: - return "interval_months"; - case NANOARROW_TYPE_INTERVAL_DAY_TIME: - return "interval_day_time"; - case NANOARROW_TYPE_DECIMAL128: - return "decimal128"; - case NANOARROW_TYPE_DECIMAL256: - return "decimal256"; - case NANOARROW_TYPE_LIST: - return "list"; - case NANOARROW_TYPE_STRUCT: - return "struct"; - case NANOARROW_TYPE_SPARSE_UNION: - return "sparse_union"; - case NANOARROW_TYPE_DENSE_UNION: - return "dense_union"; - case NANOARROW_TYPE_DICTIONARY: - return "dictionary"; - case NANOARROW_TYPE_MAP: - return "map"; - case NANOARROW_TYPE_EXTENSION: - return "extension"; - case NANOARROW_TYPE_FIXED_SIZE_LIST: - return "fixed_size_list"; - case NANOARROW_TYPE_DURATION: - return "duration"; - case NANOARROW_TYPE_LARGE_STRING: - return "large_string"; - case NANOARROW_TYPE_LARGE_BINARY: - return "large_binary"; - case NANOARROW_TYPE_LARGE_LIST: - return "large_list"; - case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO: - return "interval_month_day_nano"; - default: - return NULL; - } + switch (type) { + case NANOARROW_TYPE_NA: + return "na"; + case NANOARROW_TYPE_BOOL: + return "bool"; + case NANOARROW_TYPE_UINT8: + return "uint8"; + case NANOARROW_TYPE_INT8: + return "int8"; + case NANOARROW_TYPE_UINT16: + return "uint16"; + case NANOARROW_TYPE_INT16: + return "int16"; + case NANOARROW_TYPE_UINT32: + return "uint32"; + case NANOARROW_TYPE_INT32: + return "int32"; + case NANOARROW_TYPE_UINT64: + return "uint64"; + case NANOARROW_TYPE_INT64: + return "int64"; + case NANOARROW_TYPE_HALF_FLOAT: + return "half_float"; + case NANOARROW_TYPE_FLOAT: + return "float"; + case NANOARROW_TYPE_DOUBLE: + return "double"; + case NANOARROW_TYPE_STRING: + return "string"; + case NANOARROW_TYPE_BINARY: + return "binary"; + case NANOARROW_TYPE_FIXED_SIZE_BINARY: + return "fixed_size_binary"; + case NANOARROW_TYPE_DATE32: + return "date32"; + case NANOARROW_TYPE_DATE64: + return "date64"; + case NANOARROW_TYPE_TIMESTAMP: + return "timestamp"; + case NANOARROW_TYPE_TIME32: + return "time32"; + case NANOARROW_TYPE_TIME64: + return "time64"; + case NANOARROW_TYPE_INTERVAL_MONTHS: + return "interval_months"; + case NANOARROW_TYPE_INTERVAL_DAY_TIME: + return "interval_day_time"; + case NANOARROW_TYPE_DECIMAL128: + return "decimal128"; + case NANOARROW_TYPE_DECIMAL256: + return "decimal256"; + case NANOARROW_TYPE_LIST: + return "list"; + case NANOARROW_TYPE_STRUCT: + return "struct"; + case NANOARROW_TYPE_SPARSE_UNION: + return "sparse_union"; + case NANOARROW_TYPE_DENSE_UNION: + return "dense_union"; + case NANOARROW_TYPE_DICTIONARY: + return "dictionary"; + case NANOARROW_TYPE_MAP: + return "map"; + case NANOARROW_TYPE_EXTENSION: + return "extension"; + case NANOARROW_TYPE_FIXED_SIZE_LIST: + return "fixed_size_list"; + case NANOARROW_TYPE_DURATION: + return "duration"; + case NANOARROW_TYPE_LARGE_STRING: + return "large_string"; + case NANOARROW_TYPE_LARGE_BINARY: + return "large_binary"; + case NANOARROW_TYPE_LARGE_LIST: + return "large_list"; + case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO: + return "interval_month_day_nano"; + default: + return NULL; + } } /// \brief Arrow time unit enumerator @@ -612,34 +580,29 @@ static inline const char* ArrowTypeString(enum ArrowType type) { /// These names and values map to the corresponding arrow::TimeUnit::type /// enumerator. enum ArrowTimeUnit { - NANOARROW_TIME_UNIT_SECOND = 0, - NANOARROW_TIME_UNIT_MILLI = 1, - NANOARROW_TIME_UNIT_MICRO = 2, - NANOARROW_TIME_UNIT_NANO = 3 + NANOARROW_TIME_UNIT_SECOND = 0, + NANOARROW_TIME_UNIT_MILLI = 1, + NANOARROW_TIME_UNIT_MICRO = 2, + NANOARROW_TIME_UNIT_NANO = 3 }; /// \brief Validation level enumerator /// \ingroup nanoarrow-array enum ArrowValidationLevel { - /// \brief Do not validate buffer sizes or content. - NANOARROW_VALIDATION_LEVEL_NONE = 0, - - /// \brief Validate buffer sizes that depend on array length but do not - /// validate buffer - /// sizes that depend on buffer data access. - NANOARROW_VALIDATION_LEVEL_MINIMAL = 1, - - /// \brief Validate all buffer sizes, including those that require buffer - /// data access, - /// but do not perform any checks that are O(1) along the length of the - /// buffers. - NANOARROW_VALIDATION_LEVEL_DEFAULT = 2, - - /// \brief Validate all buffer sizes and all buffer content. This is useful - /// in the - /// context of untrusted input or input that may have been corrupted in - /// transit. - NANOARROW_VALIDATION_LEVEL_FULL = 3 + /// \brief Do not validate buffer sizes or content. + NANOARROW_VALIDATION_LEVEL_NONE = 0, + + /// \brief Validate buffer sizes that depend on array length but do not validate buffer + /// sizes that depend on buffer data access. + NANOARROW_VALIDATION_LEVEL_MINIMAL = 1, + + /// \brief Validate all buffer sizes, including those that require buffer data access, + /// but do not perform any checks that are O(1) along the length of the buffers. + NANOARROW_VALIDATION_LEVEL_DEFAULT = 2, + + /// \brief Validate all buffer sizes and all buffer content. This is useful in the + /// context of untrusted input or input that may have been corrupted in transit. + NANOARROW_VALIDATION_LEVEL_FULL = 3 }; /// \brief Get a string value of an enum ArrowTimeUnit value @@ -649,29 +612,29 @@ enum ArrowValidationLevel { static inline const char* ArrowTimeUnitString(enum ArrowTimeUnit time_unit); static inline const char* ArrowTimeUnitString(enum ArrowTimeUnit time_unit) { - switch (time_unit) { - case NANOARROW_TIME_UNIT_SECOND: - return "s"; - case NANOARROW_TIME_UNIT_MILLI: - return "ms"; - case NANOARROW_TIME_UNIT_MICRO: - return "us"; - case NANOARROW_TIME_UNIT_NANO: - return "ns"; - default: - return NULL; - } -} - -/// \brief Functional types of buffers as described in the Arrow Columnar -/// Specification \ingroup nanoarrow-array-view + switch (time_unit) { + case NANOARROW_TIME_UNIT_SECOND: + return "s"; + case NANOARROW_TIME_UNIT_MILLI: + return "ms"; + case NANOARROW_TIME_UNIT_MICRO: + return "us"; + case NANOARROW_TIME_UNIT_NANO: + return "ns"; + default: + return NULL; + } +} + +/// \brief Functional types of buffers as described in the Arrow Columnar Specification +/// \ingroup nanoarrow-array-view enum ArrowBufferType { - NANOARROW_BUFFER_TYPE_NONE, - NANOARROW_BUFFER_TYPE_VALIDITY, - NANOARROW_BUFFER_TYPE_TYPE_ID, - NANOARROW_BUFFER_TYPE_UNION_OFFSET, - NANOARROW_BUFFER_TYPE_DATA_OFFSET, - NANOARROW_BUFFER_TYPE_DATA + NANOARROW_BUFFER_TYPE_NONE, + NANOARROW_BUFFER_TYPE_VALIDITY, + NANOARROW_BUFFER_TYPE_TYPE_ID, + NANOARROW_BUFFER_TYPE_UNION_OFFSET, + NANOARROW_BUFFER_TYPE_DATA_OFFSET, + NANOARROW_BUFFER_TYPE_DATA }; /// \brief The maximum number of buffers in an ArrowArrayView or ArrowLayout @@ -679,22 +642,21 @@ enum ArrowBufferType { /// /// All currently supported types have 3 buffers or fewer; however, future types /// may involve a variable number of buffers (e.g., string view). These buffers -/// will be represented by separate members of the ArrowArrayView or -/// ArrowLayout. +/// will be represented by separate members of the ArrowArrayView or ArrowLayout. #define NANOARROW_MAX_FIXED_BUFFERS 3 /// \brief An non-owning view of a string /// \ingroup nanoarrow-utils struct ArrowStringView { - /// \brief A pointer to the start of the string - /// - /// If size_bytes is 0, this value may be NULL. - const char* data; - - /// \brief The size of the string in bytes, - /// - /// (Not including the null terminator.) - int64_t size_bytes; + /// \brief A pointer to the start of the string + /// + /// If size_bytes is 0, this value may be NULL. + const char* data; + + /// \brief The size of the string in bytes, + /// + /// (Not including the null terminator.) + int64_t size_bytes; }; /// \brief Return a view of a const C string @@ -702,43 +664,43 @@ struct ArrowStringView { static inline struct ArrowStringView ArrowCharView(const char* value); static inline struct ArrowStringView ArrowCharView(const char* value) { - struct ArrowStringView out; + struct ArrowStringView out; - out.data = value; - if (value) { - out.size_bytes = (int64_t)strlen(value); - } else { - out.size_bytes = 0; - } + out.data = value; + if (value) { + out.size_bytes = (int64_t)strlen(value); + } else { + out.size_bytes = 0; + } - return out; + return out; } union ArrowBufferViewData { - const void* data; - const int8_t* as_int8; - const uint8_t* as_uint8; - const int16_t* as_int16; - const uint16_t* as_uint16; - const int32_t* as_int32; - const uint32_t* as_uint32; - const int64_t* as_int64; - const uint64_t* as_uint64; - const double* as_double; - const float* as_float; - const char* as_char; + const void* data; + const int8_t* as_int8; + const uint8_t* as_uint8; + const int16_t* as_int16; + const uint16_t* as_uint16; + const int32_t* as_int32; + const uint32_t* as_uint32; + const int64_t* as_int64; + const uint64_t* as_uint64; + const double* as_double; + const float* as_float; + const char* as_char; }; /// \brief An non-owning view of a buffer /// \ingroup nanoarrow-utils struct ArrowBufferView { - /// \brief A pointer to the start of the buffer - /// - /// If size_bytes is 0, this value may be NULL. - union ArrowBufferViewData data; + /// \brief A pointer to the start of the buffer + /// + /// If size_bytes is 0, this value may be NULL. + union ArrowBufferViewData data; - /// \brief The size of the buffer in bytes - int64_t size_bytes; + /// \brief The size of the buffer in bytes + int64_t size_bytes; }; /// \brief Array buffer allocation and deallocation @@ -748,48 +710,43 @@ struct ArrowBufferView { /// to customize allocation and deallocation of buffers when constructing /// an ArrowArray. struct ArrowBufferAllocator { - /// \brief Reallocate a buffer or return NULL if it cannot be reallocated - uint8_t* (*reallocate)( - struct ArrowBufferAllocator* allocator, - uint8_t* ptr, - int64_t old_size, - int64_t new_size); - - /// \brief Deallocate a buffer allocated by this allocator - void (*free)( - struct ArrowBufferAllocator* allocator, uint8_t* ptr, int64_t size); - - /// \brief Opaque data specific to the allocator - void* private_data; + /// \brief Reallocate a buffer or return NULL if it cannot be reallocated + uint8_t* (*reallocate)(struct ArrowBufferAllocator* allocator, uint8_t* ptr, + int64_t old_size, int64_t new_size); + + /// \brief Deallocate a buffer allocated by this allocator + void (*free)(struct ArrowBufferAllocator* allocator, uint8_t* ptr, int64_t size); + + /// \brief Opaque data specific to the allocator + void* private_data; }; /// \brief An owning mutable view of a buffer /// \ingroup nanoarrow-buffer struct ArrowBuffer { - /// \brief A pointer to the start of the buffer - /// - /// If capacity_bytes is 0, this value may be NULL. - uint8_t* data; + /// \brief A pointer to the start of the buffer + /// + /// If capacity_bytes is 0, this value may be NULL. + uint8_t* data; - /// \brief The size of the buffer in bytes - int64_t size_bytes; + /// \brief The size of the buffer in bytes + int64_t size_bytes; - /// \brief The capacity of the buffer in bytes - int64_t capacity_bytes; + /// \brief The capacity of the buffer in bytes + int64_t capacity_bytes; - /// \brief The allocator that will be used to reallocate and/or free the - /// buffer - struct ArrowBufferAllocator allocator; + /// \brief The allocator that will be used to reallocate and/or free the buffer + struct ArrowBufferAllocator allocator; }; /// \brief An owning mutable view of a bitmap /// \ingroup nanoarrow-bitmap struct ArrowBitmap { - /// \brief An ArrowBuffer to hold the allocated memory - struct ArrowBuffer buffer; + /// \brief An ArrowBuffer to hold the allocated memory + struct ArrowBuffer buffer; - /// \brief The number of bits that have been appended to the bitmap - int64_t size_bits; + /// \brief The number of bits that have been appended to the bitmap + int64_t size_bits; }; /// \brief A description of an arrangement of buffers @@ -799,19 +756,18 @@ struct ArrowBitmap { /// calculate the size of each buffer in an ArrowArray knowing only /// the length and offset of the array. struct ArrowLayout { - /// \brief The function of each buffer - enum ArrowBufferType buffer_type[NANOARROW_MAX_FIXED_BUFFERS]; + /// \brief The function of each buffer + enum ArrowBufferType buffer_type[NANOARROW_MAX_FIXED_BUFFERS]; - /// \brief The data type of each buffer - enum ArrowType buffer_data_type[NANOARROW_MAX_FIXED_BUFFERS]; + /// \brief The data type of each buffer + enum ArrowType buffer_data_type[NANOARROW_MAX_FIXED_BUFFERS]; - /// \brief The size of an element each buffer or 0 if this size is variable - /// or unknown - int64_t element_size_bits[NANOARROW_MAX_FIXED_BUFFERS]; + /// \brief The size of an element each buffer or 0 if this size is variable or unknown + int64_t element_size_bits[NANOARROW_MAX_FIXED_BUFFERS]; - /// \brief The number of elements in the child array per element in this - /// array for a fixed-size list - int64_t child_size_elements; + /// \brief The number of elements in the child array per element in this array for a + /// fixed-size list + int64_t child_size_elements; }; /// \brief A non-owning view of an ArrowArray @@ -824,100 +780,99 @@ struct ArrowLayout { /// ArrowArray that does not exist yet, or use it to validate the buffers /// of a future ArrowArray. struct ArrowArrayView { - /// \brief The underlying ArrowArray or NULL if it has not been set or - /// if the buffers in this ArrowArrayView are not backed by an ArrowArray. - const struct ArrowArray* array; + /// \brief The underlying ArrowArray or NULL if it has not been set or + /// if the buffers in this ArrowArrayView are not backed by an ArrowArray. + const struct ArrowArray* array; - /// \brief The number of elements from the physical start of the buffers. - int64_t offset; + /// \brief The number of elements from the physical start of the buffers. + int64_t offset; - /// \brief The number of elements in this view. - int64_t length; + /// \brief The number of elements in this view. + int64_t length; - /// \brief A cached null count or -1 to indicate that this value is unknown. - int64_t null_count; + /// \brief A cached null count or -1 to indicate that this value is unknown. + int64_t null_count; - /// \brief The type used to store values in this array - /// - /// This type represents only the minimum required information to - /// extract values from the array buffers (e.g., for a Date32 array, - /// this value will be NANOARROW_TYPE_INT32). For dictionary-encoded - /// arrays, this will be the index type. - enum ArrowType storage_type; + /// \brief The type used to store values in this array + /// + /// This type represents only the minimum required information to + /// extract values from the array buffers (e.g., for a Date32 array, + /// this value will be NANOARROW_TYPE_INT32). For dictionary-encoded + /// arrays, this will be the index type. + enum ArrowType storage_type; - /// \brief The buffer types, strides, and sizes of this Array's buffers - struct ArrowLayout layout; + /// \brief The buffer types, strides, and sizes of this Array's buffers + struct ArrowLayout layout; - /// \brief This Array's buffers as ArrowBufferView objects - struct ArrowBufferView buffer_views[NANOARROW_MAX_FIXED_BUFFERS]; + /// \brief This Array's buffers as ArrowBufferView objects + struct ArrowBufferView buffer_views[NANOARROW_MAX_FIXED_BUFFERS]; - /// \brief The number of children of this view - int64_t n_children; + /// \brief The number of children of this view + int64_t n_children; - /// \brief Pointers to views of this array's children - struct ArrowArrayView** children; + /// \brief Pointers to views of this array's children + struct ArrowArrayView** children; - /// \brief Pointer to a view of this array's dictionary - struct ArrowArrayView* dictionary; + /// \brief Pointer to a view of this array's dictionary + struct ArrowArrayView* dictionary; - /// \brief Union type id to child index mapping - /// - /// If storage_type is a union type, a 256-byte ArrowMalloc()ed buffer - /// such that child_index == union_type_id_map[type_id] and - /// type_id == union_type_id_map[128 + child_index]. This value may be - /// NULL in the case where child_id == type_id. - int8_t* union_type_id_map; + /// \brief Union type id to child index mapping + /// + /// If storage_type is a union type, a 256-byte ArrowMalloc()ed buffer + /// such that child_index == union_type_id_map[type_id] and + /// type_id == union_type_id_map[128 + child_index]. This value may be + /// NULL in the case where child_id == type_id. + int8_t* union_type_id_map; }; // Used as the private data member for ArrowArrays allocated here and accessed // internally within inline ArrowArray* helpers. struct ArrowArrayPrivateData { - // Holder for the validity buffer (or first buffer for union types, which - // are the only type whose first buffer is not a valdiity buffer) - struct ArrowBitmap bitmap; + // Holder for the validity buffer (or first buffer for union types, which are + // the only type whose first buffer is not a valdiity buffer) + struct ArrowBitmap bitmap; - // Holder for additional buffers as required - struct ArrowBuffer buffers[NANOARROW_MAX_FIXED_BUFFERS - 1]; + // Holder for additional buffers as required + struct ArrowBuffer buffers[NANOARROW_MAX_FIXED_BUFFERS - 1]; - // The array of pointers to buffers. This must be updated after a sequence - // of appends to synchronize its values with the actual buffer addresses - // (which may have ben reallocated uring that time) - const void* buffer_data[NANOARROW_MAX_FIXED_BUFFERS]; + // The array of pointers to buffers. This must be updated after a sequence + // of appends to synchronize its values with the actual buffer addresses + // (which may have ben reallocated uring that time) + const void* buffer_data[NANOARROW_MAX_FIXED_BUFFERS]; - // The storage data type, or NANOARROW_TYPE_UNINITIALIZED if unknown - enum ArrowType storage_type; + // The storage data type, or NANOARROW_TYPE_UNINITIALIZED if unknown + enum ArrowType storage_type; - // The buffer arrangement for the storage type - struct ArrowLayout layout; + // The buffer arrangement for the storage type + struct ArrowLayout layout; - // Flag to indicate if there are non-sequence union type ids. - // In the future this could be replaced with a type id<->child mapping - // to support constructing unions in append mode where type_id != - // child_index - int8_t union_type_id_is_child_index; + // Flag to indicate if there are non-sequence union type ids. + // In the future this could be replaced with a type id<->child mapping + // to support constructing unions in append mode where type_id != child_index + int8_t union_type_id_is_child_index; }; /// \brief A representation of an interval. /// \ingroup nanoarrow-utils struct ArrowInterval { - /// \brief The type of interval being used - enum ArrowType type; - /// \brief The number of months represented by the interval - int32_t months; - /// \brief The number of days represented by the interval - int32_t days; - /// \brief The number of ms represented by the interval - int32_t ms; - /// \brief The number of ns represented by the interval - int64_t ns; + /// \brief The type of interval being used + enum ArrowType type; + /// \brief The number of months represented by the interval + int32_t months; + /// \brief The number of days represented by the interval + int32_t days; + /// \brief The number of ms represented by the interval + int32_t ms; + /// \brief The number of ns represented by the interval + int64_t ns; }; /// \brief Zero initialize an Interval with a given unit /// \ingroup nanoarrow-utils -static inline void ArrowIntervalInit( - struct ArrowInterval* interval, enum ArrowType type) { - memset(interval, 0, sizeof(struct ArrowInterval)); - interval->type = type; +static inline void ArrowIntervalInit(struct ArrowInterval* interval, + enum ArrowType type) { + memset(interval, 0, sizeof(struct ArrowInterval)); + interval->type = type; } /// \brief A representation of a fixed-precision decimal number @@ -927,47 +882,41 @@ static inline void ArrowIntervalInit( /// values set using ArrowDecimalSetInt(), ArrowDecimalSetBytes128(), /// or ArrowDecimalSetBytes256(). struct ArrowDecimal { - /// \brief An array of 64-bit integers of n_words length defined in - /// native-endian order - uint64_t words[4]; + /// \brief An array of 64-bit integers of n_words length defined in native-endian order + uint64_t words[4]; - /// \brief The number of significant digits this decimal number can - /// represent - int32_t precision; + /// \brief The number of significant digits this decimal number can represent + int32_t precision; - /// \brief The number of digits after the decimal point. This can be - /// negative. - int32_t scale; + /// \brief The number of digits after the decimal point. This can be negative. + int32_t scale; - /// \brief The number of words in the words array - int n_words; + /// \brief The number of words in the words array + int n_words; - /// \brief Cached value used by the implementation - int high_word_index; + /// \brief Cached value used by the implementation + int high_word_index; - /// \brief Cached value used by the implementation - int low_word_index; + /// \brief Cached value used by the implementation + int low_word_index; }; /// \brief Initialize a decimal with a given set of type parameters /// \ingroup nanoarrow-utils -static inline void ArrowDecimalInit( - struct ArrowDecimal* decimal, - int32_t bitwidth, - int32_t precision, - int32_t scale) { - memset(decimal->words, 0, sizeof(decimal->words)); - decimal->precision = precision; - decimal->scale = scale; - decimal->n_words = bitwidth / 8 / sizeof(uint64_t); - - if (_ArrowIsLittleEndian()) { - decimal->low_word_index = 0; - decimal->high_word_index = decimal->n_words - 1; - } else { - decimal->low_word_index = decimal->n_words - 1; - decimal->high_word_index = 0; - } +static inline void ArrowDecimalInit(struct ArrowDecimal* decimal, int32_t bitwidth, + int32_t precision, int32_t scale) { + memset(decimal->words, 0, sizeof(decimal->words)); + decimal->precision = precision; + decimal->scale = scale; + decimal->n_words = bitwidth / 8 / sizeof(uint64_t); + + if (_ArrowIsLittleEndian()) { + decimal->low_word_index = 0; + decimal->high_word_index = decimal->n_words - 1; + } else { + decimal->low_word_index = decimal->n_words - 1; + decimal->high_word_index = 0; + } } /// \brief Get a signed integer value of a sufficiently small ArrowDecimal @@ -975,64 +924,62 @@ static inline void ArrowDecimalInit( /// This does not check if the decimal's precision sufficiently small to fit /// within the signed 64-bit integer range (A precision less than or equal /// to 18 is sufficiently small). -static inline int64_t ArrowDecimalGetIntUnsafe( - const struct ArrowDecimal* decimal) { - return (int64_t)decimal->words[decimal->low_word_index]; +static inline int64_t ArrowDecimalGetIntUnsafe(const struct ArrowDecimal* decimal) { + return (int64_t)decimal->words[decimal->low_word_index]; } /// \brief Copy the bytes of this decimal into a sufficiently large buffer /// \ingroup nanoarrow-utils -static inline void ArrowDecimalGetBytes( - const struct ArrowDecimal* decimal, uint8_t* out) { - memcpy(out, decimal->words, decimal->n_words * sizeof(uint64_t)); +static inline void ArrowDecimalGetBytes(const struct ArrowDecimal* decimal, + uint8_t* out) { + memcpy(out, decimal->words, decimal->n_words * sizeof(uint64_t)); } /// \brief Returns 1 if the value represented by decimal is >= 0 or -1 otherwise /// \ingroup nanoarrow-utils static inline int64_t ArrowDecimalSign(const struct ArrowDecimal* decimal) { - return 1 | ((int64_t)(decimal->words[decimal->high_word_index]) >> 63); + return 1 | ((int64_t)(decimal->words[decimal->high_word_index]) >> 63); } /// \brief Sets the integer value of this decimal /// \ingroup nanoarrow-utils -static inline void ArrowDecimalSetInt( - struct ArrowDecimal* decimal, int64_t value) { - if (value < 0) { - memset(decimal->words, 0xff, decimal->n_words * sizeof(uint64_t)); - } else { - memset(decimal->words, 0, decimal->n_words * sizeof(uint64_t)); - } +static inline void ArrowDecimalSetInt(struct ArrowDecimal* decimal, int64_t value) { + if (value < 0) { + memset(decimal->words, 0xff, decimal->n_words * sizeof(uint64_t)); + } else { + memset(decimal->words, 0, decimal->n_words * sizeof(uint64_t)); + } - decimal->words[decimal->low_word_index] = value; + decimal->words[decimal->low_word_index] = value; } /// \brief Negate the value of this decimal in place /// \ingroup nanoarrow-utils static inline void ArrowDecimalNegate(struct ArrowDecimal* decimal) { - uint64_t carry = 1; - - if (decimal->low_word_index == 0) { - for (int i = 0; i < decimal->n_words; i++) { - uint64_t elem = decimal->words[i]; - elem = ~elem + carry; - carry &= (elem == 0); - decimal->words[i] = elem; - } - } else { - for (int i = decimal->low_word_index; i >= 0; i--) { - uint64_t elem = decimal->words[i]; - elem = ~elem + carry; - carry &= (elem == 0); - decimal->words[i] = elem; - } + uint64_t carry = 1; + + if (decimal->low_word_index == 0) { + for (int i = 0; i < decimal->n_words; i++) { + uint64_t elem = decimal->words[i]; + elem = ~elem + carry; + carry &= (elem == 0); + decimal->words[i] = elem; + } + } else { + for (int i = decimal->low_word_index; i >= 0; i--) { + uint64_t elem = decimal->words[i]; + elem = ~elem + carry; + carry &= (elem == 0); + decimal->words[i] = elem; } + } } /// \brief Copy bytes from a buffer into this decimal /// \ingroup nanoarrow-utils -static inline void ArrowDecimalSetBytes( - struct ArrowDecimal* decimal, const uint8_t* value) { - memcpy(decimal->words, value, decimal->n_words * sizeof(uint64_t)); +static inline void ArrowDecimalSetBytes(struct ArrowDecimal* decimal, + const uint8_t* value) { + memcpy(decimal->words, value, decimal->n_words * sizeof(uint64_t)); } #ifdef __cplusplus @@ -1064,9 +1011,11 @@ static inline void ArrowDecimalSetBytes( #include #include -// If using CMake, optionally pass -DNANOARROW_NAMESPACE=MyNamespace which will -// set this define in nanoarrow_config.h. If not, you can optionally #define -// NANOARROW_NAMESPACE MyNamespace here. + + +// If using CMake, optionally pass -DNANOARROW_NAMESPACE=MyNamespace which will set this +// define in nanoarrow_config.h. If not, you can optionally #define NANOARROW_NAMESPACE +// MyNamespace here. // This section remaps the non-prefixed symbols to the prefixed symbols so that // code written against this build can be used independent of the value of @@ -1075,118 +1024,104 @@ static inline void ArrowDecimalSetBytes( #define NANOARROW_CAT(A, B) A##B #define NANOARROW_SYMBOL(A, B) NANOARROW_CAT(A, B) -#define ArrowNanoarrowVersion \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowNanoarrowVersion) +#define ArrowNanoarrowVersion NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowNanoarrowVersion) #define ArrowNanoarrowVersionInt \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowNanoarrowVersionInt) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowNanoarrowVersionInt) #define ArrowMalloc NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMalloc) #define ArrowRealloc NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowRealloc) #define ArrowFree NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowFree) #define ArrowBufferAllocatorDefault \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowBufferAllocatorDefault) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowBufferAllocatorDefault) #define ArrowBufferDeallocator \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowBufferDeallocator) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowBufferDeallocator) #define ArrowErrorSet NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowErrorSet) #define ArrowLayoutInit NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowLayoutInit) -#define ArrowDecimalSetDigits \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowDecimalSetDigits) +#define ArrowDecimalSetDigits NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowDecimalSetDigits) #define ArrowDecimalAppendDigitsToBuffer \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowDecimalAppendDigitsToBuffer) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowDecimalAppendDigitsToBuffer) #define ArrowSchemaInit NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaInit) #define ArrowSchemaInitFromType \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaInitFromType) -#define ArrowSchemaSetType \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetType) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaInitFromType) +#define ArrowSchemaSetType NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetType) #define ArrowSchemaSetTypeStruct \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetTypeStruct) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetTypeStruct) #define ArrowSchemaSetTypeFixedSize \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetTypeFixedSize) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetTypeFixedSize) #define ArrowSchemaSetTypeDecimal \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetTypeDecimal) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetTypeDecimal) #define ArrowSchemaSetTypeDateTime \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetTypeDateTime) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetTypeDateTime) #define ArrowSchemaSetTypeUnion \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetTypeUnion) -#define ArrowSchemaDeepCopy \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaDeepCopy) -#define ArrowSchemaSetFormat \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetFormat) -#define ArrowSchemaSetName \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetName) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetTypeUnion) +#define ArrowSchemaDeepCopy NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaDeepCopy) +#define ArrowSchemaSetFormat NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetFormat) +#define ArrowSchemaSetName NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetName) #define ArrowSchemaSetMetadata \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetMetadata) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetMetadata) #define ArrowSchemaAllocateChildren \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaAllocateChildren) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaAllocateChildren) #define ArrowSchemaAllocateDictionary \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaAllocateDictionary) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaAllocateDictionary) #define ArrowMetadataReaderInit \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataReaderInit) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataReaderInit) #define ArrowMetadataReaderRead \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataReaderRead) -#define ArrowMetadataSizeOf \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataSizeOf) -#define ArrowMetadataHasKey \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataHasKey) -#define ArrowMetadataGetValue \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataGetValue) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataReaderRead) +#define ArrowMetadataSizeOf NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataSizeOf) +#define ArrowMetadataHasKey NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataHasKey) +#define ArrowMetadataGetValue NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataGetValue) #define ArrowMetadataBuilderInit \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataBuilderInit) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataBuilderInit) #define ArrowMetadataBuilderAppend \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataBuilderAppend) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataBuilderAppend) #define ArrowMetadataBuilderSet \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataBuilderSet) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataBuilderSet) #define ArrowMetadataBuilderRemove \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataBuilderRemove) -#define ArrowSchemaViewInit \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaViewInit) -#define ArrowSchemaToString \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaToString) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataBuilderRemove) +#define ArrowSchemaViewInit NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaViewInit) +#define ArrowSchemaToString NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaToString) #define ArrowArrayInitFromType \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayInitFromType) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayInitFromType) #define ArrowArrayInitFromSchema \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayInitFromSchema) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayInitFromSchema) #define ArrowArrayInitFromArrayView \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayInitFromArrayView) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayInitFromArrayView) #define ArrowArrayInitFromArrayView \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayInitFromArrayView) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayInitFromArrayView) #define ArrowArrayAllocateChildren \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayAllocateChildren) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayAllocateChildren) #define ArrowArrayAllocateDictionary \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayAllocateDictionary) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayAllocateDictionary) #define ArrowArraySetValidityBitmap \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArraySetValidityBitmap) -#define ArrowArraySetBuffer \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArraySetBuffer) -#define ArrowArrayReserve \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayReserve) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArraySetValidityBitmap) +#define ArrowArraySetBuffer NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArraySetBuffer) +#define ArrowArrayReserve NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayReserve) #define ArrowArrayFinishBuilding \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayFinishBuilding) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayFinishBuilding) #define ArrowArrayFinishBuildingDefault \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayFinishBuildingDefault) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayFinishBuildingDefault) #define ArrowArrayViewInitFromType \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewInitFromType) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewInitFromType) #define ArrowArrayViewInitFromSchema \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewInitFromSchema) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewInitFromSchema) #define ArrowArrayViewAllocateChildren \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewAllocateChildren) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewAllocateChildren) #define ArrowArrayViewAllocateDictionary \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewAllocateDictionary) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewAllocateDictionary) #define ArrowArrayViewSetLength \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewSetLength) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewSetLength) #define ArrowArrayViewSetArray \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewSetArray) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewSetArray) #define ArrowArrayViewSetArrayMinimal \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewSetArrayMinimal) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewSetArrayMinimal) #define ArrowArrayViewValidate \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewValidate) -#define ArrowArrayViewReset \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewReset) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewValidate) +#define ArrowArrayViewReset NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewReset) #define ArrowBasicArrayStreamInit \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowBasicArrayStreamInit) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowBasicArrayStreamInit) #define ArrowBasicArrayStreamSetArray \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowBasicArrayStreamSetArray) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowBasicArrayStreamSetArray) #define ArrowBasicArrayStreamValidate \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowBasicArrayStreamValidate) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowBasicArrayStreamValidate) #endif @@ -1234,33 +1169,31 @@ struct ArrowBufferAllocator ArrowBufferAllocatorDefault(void); /// avoid copying an existing buffer that was not allocated using the /// infrastructure provided here (e.g., by an R or Python object). struct ArrowBufferAllocator ArrowBufferDeallocator( - void (*custom_free)( - struct ArrowBufferAllocator* allocator, uint8_t* ptr, int64_t size), + void (*custom_free)(struct ArrowBufferAllocator* allocator, uint8_t* ptr, + int64_t size), void* private_data); /// @} -/// \brief Move the contents of an src ArrowSchema into dst and set src->release -/// to NULL \ingroup nanoarrow-arrow-cdata -static inline void ArrowSchemaMove( - struct ArrowSchema* src, struct ArrowSchema* dst); +/// \brief Move the contents of an src ArrowSchema into dst and set src->release to NULL +/// \ingroup nanoarrow-arrow-cdata +static inline void ArrowSchemaMove(struct ArrowSchema* src, struct ArrowSchema* dst); /// \brief Call the release callback of an ArrowSchema /// \ingroup nanoarrow-arrow-cdata static inline void ArrowSchemaRelease(struct ArrowSchema* schema); -/// \brief Move the contents of an src ArrowArray into dst and set src->release -/// to NULL \ingroup nanoarrow-arrow-cdata -static inline void ArrowArrayMove( - struct ArrowArray* src, struct ArrowArray* dst); +/// \brief Move the contents of an src ArrowArray into dst and set src->release to NULL +/// \ingroup nanoarrow-arrow-cdata +static inline void ArrowArrayMove(struct ArrowArray* src, struct ArrowArray* dst); /// \brief Call the release callback of an ArrowArray static inline void ArrowArrayRelease(struct ArrowArray* array); -/// \brief Move the contents of an src ArrowArrayStream into dst and set -/// src->release to NULL \ingroup nanoarrow-arrow-cdata -static inline void ArrowArrayStreamMove( - struct ArrowArrayStream* src, struct ArrowArrayStream* dst); +/// \brief Move the contents of an src ArrowArrayStream into dst and set src->release to +/// NULL \ingroup nanoarrow-arrow-cdata +static inline void ArrowArrayStreamMove(struct ArrowArrayStream* src, + struct ArrowArrayStream* dst); /// \brief Call the get_schema callback of an ArrowArrayStream /// \ingroup nanoarrow-arrow-cdata @@ -1270,8 +1203,7 @@ static inline void ArrowArrayStreamMove( /// makes it significantly less verbose to iterate over array streams /// using NANOARROW_RETURN_NOT_OK()-style error handling. static inline ArrowErrorCode ArrowArrayStreamGetSchema( - struct ArrowArrayStream* array_stream, - struct ArrowSchema* out, + struct ArrowArrayStream* array_stream, struct ArrowSchema* out, struct ArrowError* error); /// \brief Call the get_schema callback of an ArrowArrayStream @@ -1282,22 +1214,20 @@ static inline ArrowErrorCode ArrowArrayStreamGetSchema( /// makes it significantly less verbose to iterate over array streams /// using NANOARROW_RETURN_NOT_OK()-style error handling. static inline ArrowErrorCode ArrowArrayStreamGetNext( - struct ArrowArrayStream* array_stream, - struct ArrowArray* out, + struct ArrowArrayStream* array_stream, struct ArrowArray* out, struct ArrowError* error); /// \brief Call the get_next callback of an ArrowArrayStream /// \ingroup nanoarrow-arrow-cdata /// /// Unlike the get_next callback, this function never returns NULL (i.e., its -/// result is safe to use in printf-style error formatters). Null values from -/// the original callback are reported as "". +/// result is safe to use in printf-style error formatters). Null values from the +/// original callback are reported as "". static inline const char* ArrowArrayStreamGetLastError( struct ArrowArrayStream* array_stream); /// \brief Call the release callback of an ArrowArrayStream -static inline void ArrowArrayStreamRelease( - struct ArrowArrayStream* array_stream); +static inline void ArrowArrayStreamRelease(struct ArrowArrayStream* array_stream); /// \defgroup nanoarrow-errors Error handling /// @@ -1305,24 +1235,24 @@ static inline void ArrowArrayStreamRelease( /// need to communicate more verbose error information accept a pointer /// to an ArrowError. This can be stack or statically allocated. The /// content of the message is undefined unless an error code has been -/// returned. If a nanoarrow function is passed a non-null ArrowError pointer, -/// the ArrowError pointed to by the argument will be propagated with a +/// returned. If a nanoarrow function is passed a non-null ArrowError pointer, the +/// ArrowError pointed to by the argument will be propagated with a /// null-terminated error message. It is safe to pass a NULL ArrowError anywhere /// in the nanoarrow API. /// /// Except where documented, it is generally not safe to continue after a -/// function has returned a non-zero ArrowErrorCode. The NANOARROW_RETURN_NOT_OK -/// and NANOARROW_ASSERT_OK macros are provided to help propagate errors. C++ -/// clients can use the helpers provided in the nanoarrow.hpp header to -/// facilitate using C++ idioms for memory management and error propgagtion. +/// function has returned a non-zero ArrowErrorCode. The NANOARROW_RETURN_NOT_OK and +/// NANOARROW_ASSERT_OK macros are provided to help propagate errors. C++ clients can use +/// the helpers provided in the nanoarrow.hpp header to facilitate using C++ idioms +/// for memory management and error propgagtion. /// /// @{ /// \brief Set the contents of an error using printf syntax. /// /// If error is NULL, this function does nothing and returns NANOARROW_OK. -NANOARROW_CHECK_PRINTF_ATTRIBUTE int ArrowErrorSet( - struct ArrowError* error, const char* fmt, ...); +NANOARROW_CHECK_PRINTF_ATTRIBUTE int ArrowErrorSet(struct ArrowError* error, + const char* fmt, ...); /// @} @@ -1343,12 +1273,12 @@ void ArrowLayoutInit(struct ArrowLayout* layout, enum ArrowType storage_type); static inline struct ArrowStringView ArrowCharView(const char* value); /// \brief Sets the integer value of an ArrowDecimal from a string -ArrowErrorCode ArrowDecimalSetDigits( - struct ArrowDecimal* decimal, struct ArrowStringView value); +ArrowErrorCode ArrowDecimalSetDigits(struct ArrowDecimal* decimal, + struct ArrowStringView value); /// \brief Get the integer value of an ArrowDecimal as string -ArrowErrorCode ArrowDecimalAppendDigitsToBuffer( - const struct ArrowDecimal* decimal, struct ArrowBuffer* buffer); +ArrowErrorCode ArrowDecimalAppendDigitsToBuffer(const struct ArrowDecimal* decimal, + struct ArrowBuffer* buffer); /// @} @@ -1369,10 +1299,9 @@ void ArrowSchemaInit(struct ArrowSchema* schema); /// /// A convenience constructor for that calls ArrowSchemaInit() and /// ArrowSchemaSetType() for the common case of constructing an -/// unparameterized type. The caller is responsible for calling the -/// schema->release callback if NANOARROW_OK is returned. -ArrowErrorCode ArrowSchemaInitFromType( - struct ArrowSchema* schema, enum ArrowType type); +/// unparameterized type. The caller is responsible for calling the schema->release +/// callback if NANOARROW_OK is returned. +ArrowErrorCode ArrowSchemaInitFromType(struct ArrowSchema* schema, enum ArrowType type); /// \brief Get a human-readable summary of a Schema /// @@ -1380,8 +1309,8 @@ ArrowErrorCode ArrowSchemaInitFromType( /// and returns the number of characters required for the output if /// n were sufficiently large. If recursive is non-zero, the result will /// also include children. -int64_t ArrowSchemaToString( - const struct ArrowSchema* schema, char* out, int64_t n, char recursive); +int64_t ArrowSchemaToString(const struct ArrowSchema* schema, char* out, int64_t n, + char recursive); /// \brief Set the format field of a schema from an ArrowType /// @@ -1389,19 +1318,16 @@ int64_t ArrowSchemaToString( /// NANOARROW_TYPE_LIST, NANOARROW_TYPE_LARGE_LIST, and /// NANOARROW_TYPE_MAP, the appropriate number of children are /// allocated, initialized, and named; however, the caller must -/// ArrowSchemaSetType() on the preinitialized children. Schema must have been -/// initialized using ArrowSchemaInit() or ArrowSchemaDeepCopy(). -ArrowErrorCode ArrowSchemaSetType( - struct ArrowSchema* schema, enum ArrowType type); +/// ArrowSchemaSetType() on the preinitialized children. Schema must have been initialized +/// using ArrowSchemaInit() or ArrowSchemaDeepCopy(). +ArrowErrorCode ArrowSchemaSetType(struct ArrowSchema* schema, enum ArrowType type); /// \brief Set the format field and initialize children of a struct schema /// -/// The specified number of children are initialized; however, the caller is -/// responsible for calling ArrowSchemaSetType() and ArrowSchemaSetName() on -/// each child. Schema must have been initialized using ArrowSchemaInit() or -/// ArrowSchemaDeepCopy(). -ArrowErrorCode ArrowSchemaSetTypeStruct( - struct ArrowSchema* schema, int64_t n_children); +/// The specified number of children are initialized; however, the caller is responsible +/// for calling ArrowSchemaSetType() and ArrowSchemaSetName() on each child. +/// Schema must have been initialized using ArrowSchemaInit() or ArrowSchemaDeepCopy(). +ArrowErrorCode ArrowSchemaSetTypeStruct(struct ArrowSchema* schema, int64_t n_children); /// \brief Set the format field of a fixed-size schema /// @@ -1409,55 +1335,50 @@ ArrowErrorCode ArrowSchemaSetTypeStruct( /// NANOARROW_TYPE_FIXED_SIZE_BINARY or NANOARROW_TYPE_FIXED_SIZE_LIST. /// For NANOARROW_TYPE_FIXED_SIZE_LIST, the appropriate number of children are /// allocated, initialized, and named; however, the caller must -/// ArrowSchemaSetType() the first child. Schema must have been initialized -/// using ArrowSchemaInit() or ArrowSchemaDeepCopy(). -ArrowErrorCode ArrowSchemaSetTypeFixedSize( - struct ArrowSchema* schema, enum ArrowType type, int32_t fixed_size); +/// ArrowSchemaSetType() the first child. Schema must have been initialized using +/// ArrowSchemaInit() or ArrowSchemaDeepCopy(). +ArrowErrorCode ArrowSchemaSetTypeFixedSize(struct ArrowSchema* schema, + enum ArrowType type, int32_t fixed_size); /// \brief Set the format field of a decimal schema /// /// Returns EINVAL for scale <= 0 or for type that is not -/// NANOARROW_TYPE_DECIMAL128 or NANOARROW_TYPE_DECIMAL256. Schema must have -/// been initialized using ArrowSchemaInit() or ArrowSchemaDeepCopy(). -ArrowErrorCode ArrowSchemaSetTypeDecimal( - struct ArrowSchema* schema, - enum ArrowType type, - int32_t decimal_precision, - int32_t decimal_scale); +/// NANOARROW_TYPE_DECIMAL128 or NANOARROW_TYPE_DECIMAL256. Schema must have been +/// initialized using ArrowSchemaInit() or ArrowSchemaDeepCopy(). +ArrowErrorCode ArrowSchemaSetTypeDecimal(struct ArrowSchema* schema, enum ArrowType type, + int32_t decimal_precision, + int32_t decimal_scale); /// \brief Set the format field of a time, timestamp, or duration schema /// /// Returns EINVAL for type that is not /// NANOARROW_TYPE_TIME32, NANOARROW_TYPE_TIME64, /// NANOARROW_TYPE_TIMESTAMP, or NANOARROW_TYPE_DURATION. The -/// timezone parameter must be NULL for a non-timestamp type. Schema must have -/// been initialized using ArrowSchemaInit() or ArrowSchemaDeepCopy(). -ArrowErrorCode ArrowSchemaSetTypeDateTime( - struct ArrowSchema* schema, - enum ArrowType type, - enum ArrowTimeUnit time_unit, - const char* timezone); +/// timezone parameter must be NULL for a non-timestamp type. Schema must have been +/// initialized using ArrowSchemaInit() or ArrowSchemaDeepCopy(). +ArrowErrorCode ArrowSchemaSetTypeDateTime(struct ArrowSchema* schema, enum ArrowType type, + enum ArrowTimeUnit time_unit, + const char* timezone); /// \brief Seet the format field of a union schema /// /// Returns EINVAL for a type that is not NANOARROW_TYPE_DENSE_UNION /// or NANOARROW_TYPE_SPARSE_UNION. The specified number of children are /// allocated, and initialized. -ArrowErrorCode ArrowSchemaSetTypeUnion( - struct ArrowSchema* schema, enum ArrowType type, int64_t n_children); +ArrowErrorCode ArrowSchemaSetTypeUnion(struct ArrowSchema* schema, enum ArrowType type, + int64_t n_children); /// \brief Make a (recursive) copy of a schema /// /// Allocates and copies fields of schema into schema_out. -ArrowErrorCode ArrowSchemaDeepCopy( - const struct ArrowSchema* schema, struct ArrowSchema* schema_out); +ArrowErrorCode ArrowSchemaDeepCopy(const struct ArrowSchema* schema, + struct ArrowSchema* schema_out); /// \brief Copy format into schema->format /// /// schema must have been allocated using ArrowSchemaInitFromType() or /// ArrowSchemaDeepCopy(). -ArrowErrorCode ArrowSchemaSetFormat( - struct ArrowSchema* schema, const char* format); +ArrowErrorCode ArrowSchemaSetFormat(struct ArrowSchema* schema, const char* format); /// \brief Copy name into schema->name /// @@ -1469,16 +1390,15 @@ ArrowErrorCode ArrowSchemaSetName(struct ArrowSchema* schema, const char* name); /// /// schema must have been allocated using ArrowSchemaInitFromType() or /// ArrowSchemaDeepCopy. -ArrowErrorCode ArrowSchemaSetMetadata( - struct ArrowSchema* schema, const char* metadata); +ArrowErrorCode ArrowSchemaSetMetadata(struct ArrowSchema* schema, const char* metadata); /// \brief Allocate the schema->children array /// /// Includes the memory for each child struct ArrowSchema. /// schema must have been allocated using ArrowSchemaInitFromType() or /// ArrowSchemaDeepCopy(). -ArrowErrorCode ArrowSchemaAllocateChildren( - struct ArrowSchema* schema, int64_t n_children); +ArrowErrorCode ArrowSchemaAllocateChildren(struct ArrowSchema* schema, + int64_t n_children); /// \brief Allocate the schema->dictionary member /// @@ -1497,25 +1417,24 @@ ArrowErrorCode ArrowSchemaAllocateDictionary(struct ArrowSchema* schema); /// The ArrowMetadataReader does not own any data and is only valid /// for the lifetime of the underlying metadata pointer. struct ArrowMetadataReader { - /// \brief A metadata string from a schema->metadata field. - const char* metadata; + /// \brief A metadata string from a schema->metadata field. + const char* metadata; - /// \brief The current offset into the metadata string - int64_t offset; + /// \brief The current offset into the metadata string + int64_t offset; - /// \brief The number of remaining keys - int32_t remaining_keys; + /// \brief The number of remaining keys + int32_t remaining_keys; }; /// \brief Initialize an ArrowMetadataReader -ArrowErrorCode ArrowMetadataReaderInit( - struct ArrowMetadataReader* reader, const char* metadata); +ArrowErrorCode ArrowMetadataReaderInit(struct ArrowMetadataReader* reader, + const char* metadata); /// \brief Read the next key/value pair from an ArrowMetadataReader -ArrowErrorCode ArrowMetadataReaderRead( - struct ArrowMetadataReader* reader, - struct ArrowStringView* key_out, - struct ArrowStringView* value_out); +ArrowErrorCode ArrowMetadataReaderRead(struct ArrowMetadataReader* reader, + struct ArrowStringView* key_out, + struct ArrowStringView* value_out); /// \brief The number of bytes in in a key/value metadata string int64_t ArrowMetadataSizeOf(const char* metadata); @@ -1526,37 +1445,32 @@ char ArrowMetadataHasKey(const char* metadata, struct ArrowStringView key); /// \brief Extract a value from schema metadata /// /// If key does not exist in metadata, value_out is unmodified -ArrowErrorCode ArrowMetadataGetValue( - const char* metadata, - struct ArrowStringView key, - struct ArrowStringView* value_out); +ArrowErrorCode ArrowMetadataGetValue(const char* metadata, struct ArrowStringView key, + struct ArrowStringView* value_out); /// \brief Initialize a builder for schema metadata from key/value pairs /// /// metadata can be an existing metadata string or NULL to initialize /// an empty metadata string. -ArrowErrorCode ArrowMetadataBuilderInit( - struct ArrowBuffer* buffer, const char* metadata); +ArrowErrorCode ArrowMetadataBuilderInit(struct ArrowBuffer* buffer, const char* metadata); /// \brief Append a key/value pair to a buffer containing serialized metadata -ArrowErrorCode ArrowMetadataBuilderAppend( - struct ArrowBuffer* buffer, - struct ArrowStringView key, - struct ArrowStringView value); +ArrowErrorCode ArrowMetadataBuilderAppend(struct ArrowBuffer* buffer, + struct ArrowStringView key, + struct ArrowStringView value); /// \brief Set a key/value pair to a buffer containing serialized metadata /// /// Ensures that the only entry for key in the metadata is set to value. /// This function maintains the existing position of (the first instance of) /// key if present in the data. -ArrowErrorCode ArrowMetadataBuilderSet( - struct ArrowBuffer* buffer, - struct ArrowStringView key, - struct ArrowStringView value); +ArrowErrorCode ArrowMetadataBuilderSet(struct ArrowBuffer* buffer, + struct ArrowStringView key, + struct ArrowStringView value); /// \brief Remove a key from a buffer containing serialized metadata -ArrowErrorCode ArrowMetadataBuilderRemove( - struct ArrowBuffer* buffer, struct ArrowStringView key); +ArrowErrorCode ArrowMetadataBuilderRemove(struct ArrowBuffer* buffer, + struct ArrowStringView key); /// @} @@ -1571,93 +1485,92 @@ ArrowErrorCode ArrowMetadataBuilderRemove( /// encouraged to use the provided getters to ensure forward /// compatibility. struct ArrowSchemaView { - /// \brief A pointer to the schema represented by this view - const struct ArrowSchema* schema; - - /// \brief The data type represented by the schema - /// - /// This value may be NANOARROW_TYPE_DICTIONARY if the schema has a - /// non-null dictionary member; datetime types are valid values. - /// This value will never be NANOARROW_TYPE_EXTENSION (see - /// extension_name and/or extension_metadata to check for - /// an extension type). - enum ArrowType type; - - /// \brief The storage data type represented by the schema - /// - /// This value will never be NANOARROW_TYPE_DICTIONARY, - /// NANOARROW_TYPE_EXTENSION or any datetime type. This value represents - /// only the type required to interpret the buffers in the array. - enum ArrowType storage_type; - - /// \brief The storage layout represented by the schema - struct ArrowLayout layout; - - /// \brief The extension type name if it exists - /// - /// If the ARROW:extension:name key is present in schema.metadata, - /// extension_name.data will be non-NULL. - struct ArrowStringView extension_name; - - /// \brief The extension type metadata if it exists - /// - /// If the ARROW:extension:metadata key is present in schema.metadata, - /// extension_metadata.data will be non-NULL. - struct ArrowStringView extension_metadata; - - /// \brief Format fixed size parameter - /// - /// This value is set when parsing a fixed-size binary or fixed-size - /// list schema; this value is undefined for other types. For a - /// fixed-size binary schema this value is in bytes; for a fixed-size - /// list schema this value refers to the number of child elements for - /// each element of the parent. - int32_t fixed_size; - - /// \brief Decimal bitwidth - /// - /// This value is set when parsing a decimal type schema; - /// this value is undefined for other types. - int32_t decimal_bitwidth; - - /// \brief Decimal precision - /// - /// This value is set when parsing a decimal type schema; - /// this value is undefined for other types. - int32_t decimal_precision; - - /// \brief Decimal scale - /// - /// This value is set when parsing a decimal type schema; - /// this value is undefined for other types. - int32_t decimal_scale; - - /// \brief Format time unit parameter - /// - /// This value is set when parsing a date/time type. The value is - /// undefined for other types. - enum ArrowTimeUnit time_unit; - - /// \brief Format timezone parameter - /// - /// This value is set when parsing a timestamp type and represents - /// the timezone format parameter. This value points to - /// data within the schema and is undefined for other types. - const char* timezone; - - /// \brief Union type ids parameter - /// - /// This value is set when parsing a union type and represents - /// type ids parameter. This value points to - /// data within the schema and is undefined for other types. - const char* union_type_ids; + /// \brief A pointer to the schema represented by this view + const struct ArrowSchema* schema; + + /// \brief The data type represented by the schema + /// + /// This value may be NANOARROW_TYPE_DICTIONARY if the schema has a + /// non-null dictionary member; datetime types are valid values. + /// This value will never be NANOARROW_TYPE_EXTENSION (see + /// extension_name and/or extension_metadata to check for + /// an extension type). + enum ArrowType type; + + /// \brief The storage data type represented by the schema + /// + /// This value will never be NANOARROW_TYPE_DICTIONARY, NANOARROW_TYPE_EXTENSION + /// or any datetime type. This value represents only the type required to + /// interpret the buffers in the array. + enum ArrowType storage_type; + + /// \brief The storage layout represented by the schema + struct ArrowLayout layout; + + /// \brief The extension type name if it exists + /// + /// If the ARROW:extension:name key is present in schema.metadata, + /// extension_name.data will be non-NULL. + struct ArrowStringView extension_name; + + /// \brief The extension type metadata if it exists + /// + /// If the ARROW:extension:metadata key is present in schema.metadata, + /// extension_metadata.data will be non-NULL. + struct ArrowStringView extension_metadata; + + /// \brief Format fixed size parameter + /// + /// This value is set when parsing a fixed-size binary or fixed-size + /// list schema; this value is undefined for other types. For a + /// fixed-size binary schema this value is in bytes; for a fixed-size + /// list schema this value refers to the number of child elements for + /// each element of the parent. + int32_t fixed_size; + + /// \brief Decimal bitwidth + /// + /// This value is set when parsing a decimal type schema; + /// this value is undefined for other types. + int32_t decimal_bitwidth; + + /// \brief Decimal precision + /// + /// This value is set when parsing a decimal type schema; + /// this value is undefined for other types. + int32_t decimal_precision; + + /// \brief Decimal scale + /// + /// This value is set when parsing a decimal type schema; + /// this value is undefined for other types. + int32_t decimal_scale; + + /// \brief Format time unit parameter + /// + /// This value is set when parsing a date/time type. The value is + /// undefined for other types. + enum ArrowTimeUnit time_unit; + + /// \brief Format timezone parameter + /// + /// This value is set when parsing a timestamp type and represents + /// the timezone format parameter. This value points to + /// data within the schema and is undefined for other types. + const char* timezone; + + /// \brief Union type ids parameter + /// + /// This value is set when parsing a union type and represents + /// type ids parameter. This value points to + /// data within the schema and is undefined for other types. + const char* union_type_ids; }; /// \brief Initialize an ArrowSchemaView -ArrowErrorCode ArrowSchemaViewInit( - struct ArrowSchemaView* schema_view, - const struct ArrowSchema* schema, - struct ArrowError* error); +ArrowErrorCode ArrowSchemaViewInit(struct ArrowSchemaView* schema_view, + const struct ArrowSchema* schema, + struct ArrowError* error); /// @} @@ -1688,8 +1601,7 @@ static inline void ArrowBufferReset(struct ArrowBuffer* buffer); /// /// Transfers the buffer data and lifecycle management to another /// address and resets buffer. -static inline void ArrowBufferMove( - struct ArrowBuffer* src, struct ArrowBuffer* dst); +static inline void ArrowBufferMove(struct ArrowBuffer* src, struct ArrowBuffer* dst); /// \brief Grow or shrink a buffer to a given capacity /// @@ -1697,84 +1609,85 @@ static inline void ArrowBufferMove( /// if shrink_to_fit is non-zero. Calling ArrowBufferResize() does not /// adjust the buffer's size member except to ensure that the invariant /// capacity >= size remains true. -static inline ArrowErrorCode ArrowBufferResize( - struct ArrowBuffer* buffer, int64_t new_capacity_bytes, char shrink_to_fit); +static inline ArrowErrorCode ArrowBufferResize(struct ArrowBuffer* buffer, + int64_t new_capacity_bytes, + char shrink_to_fit); /// \brief Ensure a buffer has at least a given additional capacity /// /// Ensures that the buffer has space to append at least /// additional_size_bytes, overallocating when required. -static inline ArrowErrorCode ArrowBufferReserve( - struct ArrowBuffer* buffer, int64_t additional_size_bytes); +static inline ArrowErrorCode ArrowBufferReserve(struct ArrowBuffer* buffer, + int64_t additional_size_bytes); /// \brief Write data to buffer and increment the buffer size /// /// This function does not check that buffer has the required capacity -static inline void ArrowBufferAppendUnsafe( - struct ArrowBuffer* buffer, const void* data, int64_t size_bytes); +static inline void ArrowBufferAppendUnsafe(struct ArrowBuffer* buffer, const void* data, + int64_t size_bytes); /// \brief Write data to buffer and increment the buffer size /// /// This function writes and ensures that the buffer has the required capacity, /// possibly by reallocating the buffer. Like ArrowBufferReserve, this will /// overallocate when reallocation is required. -static inline ArrowErrorCode ArrowBufferAppend( - struct ArrowBuffer* buffer, const void* data, int64_t size_bytes); +static inline ArrowErrorCode ArrowBufferAppend(struct ArrowBuffer* buffer, + const void* data, int64_t size_bytes); /// \brief Write fill to buffer and increment the buffer size /// /// This function writes the specified number of fill bytes and /// ensures that the buffer has the required capacity, -static inline ArrowErrorCode ArrowBufferAppendFill( - struct ArrowBuffer* buffer, uint8_t value, int64_t size_bytes); +static inline ArrowErrorCode ArrowBufferAppendFill(struct ArrowBuffer* buffer, + uint8_t value, int64_t size_bytes); /// \brief Write an 8-bit integer to a buffer -static inline ArrowErrorCode ArrowBufferAppendInt8( - struct ArrowBuffer* buffer, int8_t value); +static inline ArrowErrorCode ArrowBufferAppendInt8(struct ArrowBuffer* buffer, + int8_t value); /// \brief Write an unsigned 8-bit integer to a buffer -static inline ArrowErrorCode ArrowBufferAppendUInt8( - struct ArrowBuffer* buffer, uint8_t value); +static inline ArrowErrorCode ArrowBufferAppendUInt8(struct ArrowBuffer* buffer, + uint8_t value); /// \brief Write a 16-bit integer to a buffer -static inline ArrowErrorCode ArrowBufferAppendInt16( - struct ArrowBuffer* buffer, int16_t value); +static inline ArrowErrorCode ArrowBufferAppendInt16(struct ArrowBuffer* buffer, + int16_t value); /// \brief Write an unsigned 16-bit integer to a buffer -static inline ArrowErrorCode ArrowBufferAppendUInt16( - struct ArrowBuffer* buffer, uint16_t value); +static inline ArrowErrorCode ArrowBufferAppendUInt16(struct ArrowBuffer* buffer, + uint16_t value); /// \brief Write a 32-bit integer to a buffer -static inline ArrowErrorCode ArrowBufferAppendInt32( - struct ArrowBuffer* buffer, int32_t value); +static inline ArrowErrorCode ArrowBufferAppendInt32(struct ArrowBuffer* buffer, + int32_t value); /// \brief Write an unsigned 32-bit integer to a buffer -static inline ArrowErrorCode ArrowBufferAppendUInt32( - struct ArrowBuffer* buffer, uint32_t value); +static inline ArrowErrorCode ArrowBufferAppendUInt32(struct ArrowBuffer* buffer, + uint32_t value); /// \brief Write a 64-bit integer to a buffer -static inline ArrowErrorCode ArrowBufferAppendInt64( - struct ArrowBuffer* buffer, int64_t value); +static inline ArrowErrorCode ArrowBufferAppendInt64(struct ArrowBuffer* buffer, + int64_t value); /// \brief Write an unsigned 64-bit integer to a buffer -static inline ArrowErrorCode ArrowBufferAppendUInt64( - struct ArrowBuffer* buffer, uint64_t value); +static inline ArrowErrorCode ArrowBufferAppendUInt64(struct ArrowBuffer* buffer, + uint64_t value); /// \brief Write a double to a buffer -static inline ArrowErrorCode ArrowBufferAppendDouble( - struct ArrowBuffer* buffer, double value); +static inline ArrowErrorCode ArrowBufferAppendDouble(struct ArrowBuffer* buffer, + double value); /// \brief Write a float to a buffer -static inline ArrowErrorCode ArrowBufferAppendFloat( - struct ArrowBuffer* buffer, float value); +static inline ArrowErrorCode ArrowBufferAppendFloat(struct ArrowBuffer* buffer, + float value); /// \brief Write an ArrowStringView to a buffer -static inline ArrowErrorCode ArrowBufferAppendStringView( - struct ArrowBuffer* buffer, struct ArrowStringView value); +static inline ArrowErrorCode ArrowBufferAppendStringView(struct ArrowBuffer* buffer, + struct ArrowStringView value); /// \brief Write an ArrowBufferView to a buffer -static inline ArrowErrorCode ArrowBufferAppendBufferView( - struct ArrowBuffer* buffer, struct ArrowBufferView value); +static inline ArrowErrorCode ArrowBufferAppendBufferView(struct ArrowBuffer* buffer, + struct ArrowBufferView value); /// @} @@ -1795,20 +1708,19 @@ static inline void ArrowBitClear(uint8_t* bits, int64_t i); static inline void ArrowBitSetTo(uint8_t* bits, int64_t i, uint8_t value); /// \brief Set a boolean value to a range in a bitmap -static inline void ArrowBitsSetTo( - uint8_t* bits, int64_t start_offset, int64_t length, uint8_t bits_are_set); +static inline void ArrowBitsSetTo(uint8_t* bits, int64_t start_offset, int64_t length, + uint8_t bits_are_set); /// \brief Count true values in a bitmap -static inline int64_t ArrowBitCountSet( - const uint8_t* bits, int64_t i_from, int64_t i_to); +static inline int64_t ArrowBitCountSet(const uint8_t* bits, int64_t i_from, int64_t i_to); /// \brief Extract int8 boolean values from a range in a bitmap -static inline void ArrowBitsUnpackInt8( - const uint8_t* bits, int64_t start_offset, int64_t length, int8_t* out); +static inline void ArrowBitsUnpackInt8(const uint8_t* bits, int64_t start_offset, + int64_t length, int8_t* out); /// \brief Extract int32 boolean values from a range in a bitmap -static inline void ArrowBitsUnpackInt32( - const uint8_t* bits, int64_t start_offset, int64_t length, int32_t* out); +static inline void ArrowBitsUnpackInt32(const uint8_t* bits, int64_t start_offset, + int64_t length, int32_t* out); /// \brief Initialize an ArrowBitmap /// @@ -1819,15 +1731,14 @@ static inline void ArrowBitmapInit(struct ArrowBitmap* bitmap); /// /// Transfers the underlying buffer data and lifecycle management to another /// address and resets the bitmap. -static inline void ArrowBitmapMove( - struct ArrowBitmap* src, struct ArrowBitmap* dst); +static inline void ArrowBitmapMove(struct ArrowBitmap* src, struct ArrowBitmap* dst); /// \brief Ensure a bitmap builder has at least a given additional capacity /// /// Ensures that the buffer has space to append at least /// additional_size_bits, overallocating when required. -static inline ArrowErrorCode ArrowBitmapReserve( - struct ArrowBitmap* bitmap, int64_t additional_size_bits); +static inline ArrowErrorCode ArrowBitmapReserve(struct ArrowBitmap* bitmap, + int64_t additional_size_bits); /// \brief Grow or shrink a bitmap to a given capacity /// @@ -1835,34 +1746,33 @@ static inline ArrowErrorCode ArrowBitmapReserve( /// if shrink_to_fit is non-zero. Calling ArrowBitmapResize() does not /// adjust the buffer's size member except when shrinking new_capacity_bits /// to a value less than the current number of bits in the bitmap. -static inline ArrowErrorCode ArrowBitmapResize( - struct ArrowBitmap* bitmap, int64_t new_capacity_bits, char shrink_to_fit); +static inline ArrowErrorCode ArrowBitmapResize(struct ArrowBitmap* bitmap, + int64_t new_capacity_bits, + char shrink_to_fit); -/// \brief Reserve space for and append zero or more of the same boolean value -/// to a bitmap -static inline ArrowErrorCode ArrowBitmapAppend( - struct ArrowBitmap* bitmap, uint8_t bits_are_set, int64_t length); +/// \brief Reserve space for and append zero or more of the same boolean value to a bitmap +static inline ArrowErrorCode ArrowBitmapAppend(struct ArrowBitmap* bitmap, + uint8_t bits_are_set, int64_t length); /// \brief Append zero or more of the same boolean value to a bitmap -static inline void ArrowBitmapAppendUnsafe( - struct ArrowBitmap* bitmap, uint8_t bits_are_set, int64_t length); +static inline void ArrowBitmapAppendUnsafe(struct ArrowBitmap* bitmap, + uint8_t bits_are_set, int64_t length); /// \brief Append boolean values encoded as int8_t to a bitmap /// /// The values must all be 0 or 1. -static inline void ArrowBitmapAppendInt8Unsafe( - struct ArrowBitmap* bitmap, const int8_t* values, int64_t n_values); +static inline void ArrowBitmapAppendInt8Unsafe(struct ArrowBitmap* bitmap, + const int8_t* values, int64_t n_values); /// \brief Append boolean values encoded as int32_t to a bitmap /// /// The values must all be 0 or 1. -static inline void ArrowBitmapAppendInt32Unsafe( - struct ArrowBitmap* bitmap, const int32_t* values, int64_t n_values); +static inline void ArrowBitmapAppendInt32Unsafe(struct ArrowBitmap* bitmap, + const int32_t* values, int64_t n_values); /// \brief Reset a bitmap builder /// -/// Releases any memory held by buffer, empties the cache, and resets the size -/// to zero +/// Releases any memory held by buffer, empties the cache, and resets the size to zero static inline void ArrowBitmapReset(struct ArrowBitmap* bitmap); /// @} @@ -1881,26 +1791,24 @@ static inline void ArrowBitmapReset(struct ArrowBitmap* bitmap); /// Initializes the fields and release callback of array. Caller /// is responsible for calling the array->release callback if /// NANOARROW_OK is returned. -ArrowErrorCode ArrowArrayInitFromType( - struct ArrowArray* array, enum ArrowType storage_type); +ArrowErrorCode ArrowArrayInitFromType(struct ArrowArray* array, + enum ArrowType storage_type); /// \brief Initialize the contents of an ArrowArray from an ArrowSchema /// /// Caller is responsible for calling the array->release callback if /// NANOARROW_OK is returned. -ArrowErrorCode ArrowArrayInitFromSchema( - struct ArrowArray* array, - const struct ArrowSchema* schema, - struct ArrowError* error); +ArrowErrorCode ArrowArrayInitFromSchema(struct ArrowArray* array, + const struct ArrowSchema* schema, + struct ArrowError* error); /// \brief Initialize the contents of an ArrowArray from an ArrowArrayView /// /// Caller is responsible for calling the array->release callback if /// NANOARROW_OK is returned. -ArrowErrorCode ArrowArrayInitFromArrayView( - struct ArrowArray* array, - const struct ArrowArrayView* array_view, - struct ArrowError* error); +ArrowErrorCode ArrowArrayInitFromArrayView(struct ArrowArray* array, + const struct ArrowArrayView* array_view, + struct ArrowError* error); /// \brief Allocate the array->children array /// @@ -1908,8 +1816,7 @@ ArrowErrorCode ArrowArrayInitFromArrayView( /// whose members are marked as released and may be subsequently initialized /// with ArrowArrayInitFromType() or moved from an existing ArrowArray. /// schema must have been allocated using ArrowArrayInitFromType(). -ArrowErrorCode ArrowArrayAllocateChildren( - struct ArrowArray* array, int64_t n_children); +ArrowErrorCode ArrowArrayAllocateChildren(struct ArrowArray* array, int64_t n_children); /// \brief Allocate the array->dictionary member /// @@ -1922,33 +1829,30 @@ ArrowErrorCode ArrowArrayAllocateDictionary(struct ArrowArray* array); /// \brief Set the validity bitmap of an ArrowArray /// /// array must have been allocated using ArrowArrayInitFromType() -void ArrowArraySetValidityBitmap( - struct ArrowArray* array, struct ArrowBitmap* bitmap); +void ArrowArraySetValidityBitmap(struct ArrowArray* array, struct ArrowBitmap* bitmap); /// \brief Set a buffer of an ArrowArray /// /// array must have been allocated using ArrowArrayInitFromType() -ArrowErrorCode ArrowArraySetBuffer( - struct ArrowArray* array, int64_t i, struct ArrowBuffer* buffer); +ArrowErrorCode ArrowArraySetBuffer(struct ArrowArray* array, int64_t i, + struct ArrowBuffer* buffer); /// \brief Get the validity bitmap of an ArrowArray /// /// array must have been allocated using ArrowArrayInitFromType() -static inline struct ArrowBitmap* ArrowArrayValidityBitmap( - struct ArrowArray* array); +static inline struct ArrowBitmap* ArrowArrayValidityBitmap(struct ArrowArray* array); /// \brief Get a buffer of an ArrowArray /// /// array must have been allocated using ArrowArrayInitFromType() -static inline struct ArrowBuffer* ArrowArrayBuffer( - struct ArrowArray* array, int64_t i); +static inline struct ArrowBuffer* ArrowArrayBuffer(struct ArrowArray* array, int64_t i); /// \brief Start element-wise appending to an ArrowArray /// /// Initializes any values needed to use ArrowArrayAppend*() functions. -/// All element-wise appenders append by value and return EINVAL if the exact -/// value cannot be represented by the underlying storage type. array must have -/// been allocated using ArrowArrayInitFromType() +/// All element-wise appenders append by value and return EINVAL if the exact value +/// cannot be represented by the underlying storage type. +/// array must have been allocated using ArrowArrayInitFromType() static inline ArrowErrorCode ArrowArrayStartAppending(struct ArrowArray* array); /// \brief Reserve space for future appends @@ -1957,32 +1861,29 @@ static inline ArrowErrorCode ArrowArrayStartAppending(struct ArrowArray* array); /// child array sizes for non-fixed-size arrays), recursively reserve space for /// additional elements. This is useful for reducing the number of reallocations /// that occur using the item-wise appenders. -ArrowErrorCode ArrowArrayReserve( - struct ArrowArray* array, int64_t additional_size_elements); +ArrowErrorCode ArrowArrayReserve(struct ArrowArray* array, + int64_t additional_size_elements); /// \brief Append a null value to an array -static inline ArrowErrorCode ArrowArrayAppendNull( - struct ArrowArray* array, int64_t n); +static inline ArrowErrorCode ArrowArrayAppendNull(struct ArrowArray* array, int64_t n); /// \brief Append an empty, non-null value to an array -static inline ArrowErrorCode ArrowArrayAppendEmpty( - struct ArrowArray* array, int64_t n); +static inline ArrowErrorCode ArrowArrayAppendEmpty(struct ArrowArray* array, int64_t n); /// \brief Append a signed integer value to an array /// /// Returns NANOARROW_OK if value can be exactly represented by /// the underlying storage type or EINVAL otherwise (e.g., value /// is outside the valid array range). -static inline ArrowErrorCode ArrowArrayAppendInt( - struct ArrowArray* array, int64_t value); +static inline ArrowErrorCode ArrowArrayAppendInt(struct ArrowArray* array, int64_t value); /// \brief Append an unsigned integer value to an array /// /// Returns NANOARROW_OK if value can be exactly represented by /// the underlying storage type or EINVAL otherwise (e.g., value /// is outside the valid array range). -static inline ArrowErrorCode ArrowArrayAppendUInt( - struct ArrowArray* array, uint64_t value); +static inline ArrowErrorCode ArrowArrayAppendUInt(struct ArrowArray* array, + uint64_t value); /// \brief Append a double value to an array /// @@ -1990,68 +1891,67 @@ static inline ArrowErrorCode ArrowArrayAppendUInt( /// the underlying storage type or EINVAL otherwise (e.g., value /// is outside the valid array range or there is an attempt to append /// a non-integer to an array with an integer storage type). -static inline ArrowErrorCode ArrowArrayAppendDouble( - struct ArrowArray* array, double value); +static inline ArrowErrorCode ArrowArrayAppendDouble(struct ArrowArray* array, + double value); /// \brief Append a string of bytes to an array /// /// Returns NANOARROW_OK if value can be exactly represented by /// the underlying storage type, EOVERFLOW if appending value would overflow /// the offset type (e.g., if the data buffer would be larger than 2 GB for a -/// non-large string type), or EINVAL otherwise (e.g., the underlying array is -/// not a binary, string, large binary, large string, or fixed-size binary -/// array, or value is the wrong size for a fixed-size binary array). -static inline ArrowErrorCode ArrowArrayAppendBytes( - struct ArrowArray* array, struct ArrowBufferView value); +/// non-large string type), or EINVAL otherwise (e.g., the underlying array is not a +/// binary, string, large binary, large string, or fixed-size binary array, or value is +/// the wrong size for a fixed-size binary array). +static inline ArrowErrorCode ArrowArrayAppendBytes(struct ArrowArray* array, + struct ArrowBufferView value); /// \brief Append a string value to an array /// /// Returns NANOARROW_OK if value can be exactly represented by /// the underlying storage type, EOVERFLOW if appending value would overflow /// the offset type (e.g., if the data buffer would be larger than 2 GB for a -/// non-large string type), or EINVAL otherwise (e.g., the underlying array is -/// not a string or large string array). -static inline ArrowErrorCode ArrowArrayAppendString( - struct ArrowArray* array, struct ArrowStringView value); +/// non-large string type), or EINVAL otherwise (e.g., the underlying array is not a +/// string or large string array). +static inline ArrowErrorCode ArrowArrayAppendString(struct ArrowArray* array, + struct ArrowStringView value); /// \brief Append a Interval to an array /// /// Returns NANOARROW_OK if value can be exactly represented by /// the underlying storage type or EINVAL otherwise. -static inline ArrowErrorCode ArrowArrayAppendInterval( - struct ArrowArray* array, const struct ArrowInterval* value); +static inline ArrowErrorCode ArrowArrayAppendInterval(struct ArrowArray* array, + const struct ArrowInterval* value); /// \brief Append a decimal value to an array /// /// Returns NANOARROW_OK if array is a decimal array with the appropriate /// bitwidth or EINVAL otherwise. -static inline ArrowErrorCode ArrowArrayAppendDecimal( - struct ArrowArray* array, const struct ArrowDecimal* value); +static inline ArrowErrorCode ArrowArrayAppendDecimal(struct ArrowArray* array, + const struct ArrowDecimal* value); /// \brief Finish a nested array element /// /// Appends a non-null element to the array based on the first child's current /// length. Returns NANOARROW_OK if the item was successfully added, EOVERFLOW /// if the child of a list or map array would exceed INT_MAX elements, or EINVAL -/// if the underlying storage type is not a struct, list, large list, or -/// fixed-size list, or if there was an attempt to add a struct or fixed-size -/// list element where the length of the child array(s) did not match the -/// expected length. +/// if the underlying storage type is not a struct, list, large list, or fixed-size +/// list, or if there was an attempt to add a struct or fixed-size list element where the +/// length of the child array(s) did not match the expected length. static inline ArrowErrorCode ArrowArrayFinishElement(struct ArrowArray* array); /// \brief Finish a union array element /// -/// Appends an element to the union type ids buffer and increments -/// array->length. For sparse unions, up to one element is added to non type-id -/// children. Returns EINVAL if the underlying storage type is not a union, if -/// type_id is not valid, or if child sizes after appending are inconsistent. -static inline ArrowErrorCode ArrowArrayFinishUnionElement( - struct ArrowArray* array, int8_t type_id); +/// Appends an element to the union type ids buffer and increments array->length. +/// For sparse unions, up to one element is added to non type-id children. Returns +/// EINVAL if the underlying storage type is not a union, if type_id is not valid, +/// or if child sizes after appending are inconsistent. +static inline ArrowErrorCode ArrowArrayFinishUnionElement(struct ArrowArray* array, + int8_t type_id); /// \brief Shrink buffer capacity to the size required /// -/// Also applies shrinking to any child arrays. array must have been allocated -/// using ArrowArrayInitFromType +/// Also applies shrinking to any child arrays. array must have been allocated using +/// ArrowArrayInitFromType static inline ArrowErrorCode ArrowArrayShrinkToFit(struct ArrowArray* array); /// \brief Finish building an ArrowArray @@ -2060,20 +1960,19 @@ static inline ArrowErrorCode ArrowArrayShrinkToFit(struct ArrowArray* array); /// into array->buffers and checks the actual size of the buffers /// against the expected size based on the final length. /// array must have been allocated using ArrowArrayInitFromType() -ArrowErrorCode ArrowArrayFinishBuildingDefault( - struct ArrowArray* array, struct ArrowError* error); +ArrowErrorCode ArrowArrayFinishBuildingDefault(struct ArrowArray* array, + struct ArrowError* error); /// \brief Finish building an ArrowArray with explicit validation /// -/// Finish building with an explicit validation level. This could perform less -/// validation (i.e. NANOARROW_VALIDATION_LEVEL_NONE or -/// NANOARROW_VALIDATION_LEVEL_MINIMAL) if CPU buffer data access is not -/// possible or more validation (i.e., NANOARROW_VALIDATION_LEVEL_FULL) if -/// buffer content was obtained from an untrusted or corruptible source. -ArrowErrorCode ArrowArrayFinishBuilding( - struct ArrowArray* array, - enum ArrowValidationLevel validation_level, - struct ArrowError* error); +/// Finish building with an explicit validation level. This could perform less validation +/// (i.e. NANOARROW_VALIDATION_LEVEL_NONE or NANOARROW_VALIDATION_LEVEL_MINIMAL) if CPU +/// buffer data access is not possible or more validation (i.e., +/// NANOARROW_VALIDATION_LEVEL_FULL) if buffer content was obtained from an untrusted or +/// corruptible source. +ArrowErrorCode ArrowArrayFinishBuilding(struct ArrowArray* array, + enum ArrowValidationLevel validation_level, + struct ArrowError* error); /// @} @@ -2084,71 +1983,66 @@ ArrowErrorCode ArrowArrayFinishBuilding( /// @{ /// \brief Initialize the contents of an ArrowArrayView -void ArrowArrayViewInitFromType( - struct ArrowArrayView* array_view, enum ArrowType storage_type); +void ArrowArrayViewInitFromType(struct ArrowArrayView* array_view, + enum ArrowType storage_type); /// \brief Move an ArrowArrayView /// /// Transfers the ArrowArrayView data and lifecycle management to another /// address and resets the contents of src. -static inline void ArrowArrayViewMove( - struct ArrowArrayView* src, struct ArrowArrayView* dst); +static inline void ArrowArrayViewMove(struct ArrowArrayView* src, + struct ArrowArrayView* dst); /// \brief Initialize the contents of an ArrowArrayView from an ArrowSchema -ArrowErrorCode ArrowArrayViewInitFromSchema( - struct ArrowArrayView* array_view, - const struct ArrowSchema* schema, - struct ArrowError* error); +ArrowErrorCode ArrowArrayViewInitFromSchema(struct ArrowArrayView* array_view, + const struct ArrowSchema* schema, + struct ArrowError* error); /// \brief Allocate the array_view->children array /// /// Includes the memory for each child struct ArrowArrayView -ArrowErrorCode ArrowArrayViewAllocateChildren( - struct ArrowArrayView* array_view, int64_t n_children); +ArrowErrorCode ArrowArrayViewAllocateChildren(struct ArrowArrayView* array_view, + int64_t n_children); /// \brief Allocate array_view->dictionary -ArrowErrorCode ArrowArrayViewAllocateDictionary( - struct ArrowArrayView* array_view); +ArrowErrorCode ArrowArrayViewAllocateDictionary(struct ArrowArrayView* array_view); /// \brief Set data-independent buffer sizes from length void ArrowArrayViewSetLength(struct ArrowArrayView* array_view, int64_t length); /// \brief Set buffer sizes and data pointers from an ArrowArray -ArrowErrorCode ArrowArrayViewSetArray( - struct ArrowArrayView* array_view, - const struct ArrowArray* array, - struct ArrowError* error); +ArrowErrorCode ArrowArrayViewSetArray(struct ArrowArrayView* array_view, + const struct ArrowArray* array, + struct ArrowError* error); -/// \brief Set buffer sizes and data pointers from an ArrowArray except for -/// those that require dereferencing buffer content. -ArrowErrorCode ArrowArrayViewSetArrayMinimal( - struct ArrowArrayView* array_view, - const struct ArrowArray* array, - struct ArrowError* error); +/// \brief Set buffer sizes and data pointers from an ArrowArray except for those +/// that require dereferencing buffer content. +ArrowErrorCode ArrowArrayViewSetArrayMinimal(struct ArrowArrayView* array_view, + const struct ArrowArray* array, + struct ArrowError* error); /// \brief Performs checks on the content of an ArrowArrayView /// /// If using ArrowArrayViewSetArray() to back array_view with an ArrowArray, /// the buffer sizes and some content (fist and last offset) have already /// been validated at the "default" level. If setting the buffer pointers -/// and sizes otherwise, you may wish to perform checks at a different level. -/// See documentation for ArrowValidationLevel for the details of checks -/// performed at each level. -ArrowErrorCode ArrowArrayViewValidate( - struct ArrowArrayView* array_view, - enum ArrowValidationLevel validation_level, - struct ArrowError* error); +/// and sizes otherwise, you may wish to perform checks at a different level. See +/// documentation for ArrowValidationLevel for the details of checks performed +/// at each level. +ArrowErrorCode ArrowArrayViewValidate(struct ArrowArrayView* array_view, + enum ArrowValidationLevel validation_level, + struct ArrowError* error); /// \brief Reset the contents of an ArrowArrayView and frees resources void ArrowArrayViewReset(struct ArrowArrayView* array_view); /// \brief Check for a null element in an ArrowArrayView -static inline int8_t ArrowArrayViewIsNull( - const struct ArrowArrayView* array_view, int64_t i); +static inline int8_t ArrowArrayViewIsNull(const struct ArrowArrayView* array_view, + int64_t i); /// \brief Get the type id of a union array element -static inline int8_t ArrowArrayViewUnionTypeId( - const struct ArrowArrayView* array_view, int64_t i); +static inline int8_t ArrowArrayViewUnionTypeId(const struct ArrowArrayView* array_view, + int64_t i); /// \brief Get the child index of a union array element static inline int8_t ArrowArrayViewUnionChildIndex( @@ -2160,15 +2054,15 @@ static inline int64_t ArrowArrayViewUnionChildOffset( /// \brief Get an element in an ArrowArrayView as an integer /// -/// This function does not check for null values, that values are actually -/// integers, or that values are within a valid range for an int64. -static inline int64_t ArrowArrayViewGetIntUnsafe( - const struct ArrowArrayView* array_view, int64_t i); +/// This function does not check for null values, that values are actually integers, or +/// that values are within a valid range for an int64. +static inline int64_t ArrowArrayViewGetIntUnsafe(const struct ArrowArrayView* array_view, + int64_t i); /// \brief Get an element in an ArrowArrayView as an unsigned integer /// -/// This function does not check for null values, that values are actually -/// integers, or that values are within a valid range for a uint64. +/// This function does not check for null values, that values are actually integers, or +/// that values are within a valid range for a uint64. static inline uint64_t ArrowArrayViewGetUIntUnsafe( const struct ArrowArrayView* array_view, int64_t i); @@ -2196,10 +2090,8 @@ static inline struct ArrowBufferView ArrowArrayViewGetBytesUnsafe( /// This function does not check for null values. The out parameter must /// be initialized with ArrowDecimalInit() with the proper parameters for this /// type before calling this for the first time. -static inline void ArrowArrayViewGetDecimalUnsafe( - const struct ArrowArrayView* array_view, - int64_t i, - struct ArrowDecimal* out); +static inline void ArrowArrayViewGetDecimalUnsafe(const struct ArrowArrayView* array_view, + int64_t i, struct ArrowDecimal* out); /// @} @@ -2217,10 +2109,8 @@ static inline void ArrowArrayViewGetDecimalUnsafe( /// This function moves the ownership of schema to the array_stream. If /// this function returns NANOARROW_OK, the caller is responsible for /// releasing the ArrowArrayStream. -ArrowErrorCode ArrowBasicArrayStreamInit( - struct ArrowArrayStream* array_stream, - struct ArrowSchema* schema, - int64_t n_arrays); +ArrowErrorCode ArrowBasicArrayStreamInit(struct ArrowArrayStream* array_stream, + struct ArrowSchema* schema, int64_t n_arrays); /// \brief Set the ith ArrowArray in this ArrowArrayStream. /// @@ -2229,27 +2119,29 @@ ArrowErrorCode ArrowBasicArrayStreamInit( /// be greater than zero and less than the value of n_arrays passed in /// ArrowBasicArrayStreamInit(). Callers are not required to fill all /// n_arrays members (i.e., n_arrays is a maximum bound). -void ArrowBasicArrayStreamSetArray( - struct ArrowArrayStream* array_stream, int64_t i, struct ArrowArray* array); +void ArrowBasicArrayStreamSetArray(struct ArrowArrayStream* array_stream, int64_t i, + struct ArrowArray* array); /// \brief Validate the contents of this ArrowArrayStream /// /// array_stream must have been initialized with ArrowBasicArrayStreamInit(). -/// This function uses ArrowArrayStreamInitFromSchema() and -/// ArrowArrayStreamSetArray() to validate the contents of the arrays. -ArrowErrorCode ArrowBasicArrayStreamValidate( - const struct ArrowArrayStream* array_stream, struct ArrowError* error); +/// This function uses ArrowArrayStreamInitFromSchema() and ArrowArrayStreamSetArray() +/// to validate the contents of the arrays. +ArrowErrorCode ArrowBasicArrayStreamValidate(const struct ArrowArrayStream* array_stream, + struct ArrowError* error); /// @} -// Undefine ArrowErrorCode, which may have been defined to annotate functions -// that return it to warn for an unused result. +// Undefine ArrowErrorCode, which may have been defined to annotate functions that return +// it to warn for an unused result. #if defined(ArrowErrorCode) #undef ArrowErrorCode #endif // Inline function definitions + + #ifdef __cplusplus } #endif @@ -2279,595 +2171,574 @@ ArrowErrorCode ArrowBasicArrayStreamValidate( #include #include + + #ifdef __cplusplus extern "C" { #endif -static inline int64_t _ArrowGrowByFactor( - int64_t current_capacity, int64_t new_capacity) { - int64_t doubled_capacity = current_capacity * 2; - if (doubled_capacity > new_capacity) { - return doubled_capacity; - } else { - return new_capacity; - } +static inline int64_t _ArrowGrowByFactor(int64_t current_capacity, int64_t new_capacity) { + int64_t doubled_capacity = current_capacity * 2; + if (doubled_capacity > new_capacity) { + return doubled_capacity; + } else { + return new_capacity; + } } static inline void ArrowBufferInit(struct ArrowBuffer* buffer) { - buffer->data = NULL; - buffer->size_bytes = 0; - buffer->capacity_bytes = 0; - buffer->allocator = ArrowBufferAllocatorDefault(); + buffer->data = NULL; + buffer->size_bytes = 0; + buffer->capacity_bytes = 0; + buffer->allocator = ArrowBufferAllocatorDefault(); } static inline ArrowErrorCode ArrowBufferSetAllocator( struct ArrowBuffer* buffer, struct ArrowBufferAllocator allocator) { - if (buffer->data == NULL) { - buffer->allocator = allocator; - return NANOARROW_OK; - } else { - return EINVAL; - } + if (buffer->data == NULL) { + buffer->allocator = allocator; + return NANOARROW_OK; + } else { + return EINVAL; + } } static inline void ArrowBufferReset(struct ArrowBuffer* buffer) { - if (buffer->data != NULL) { - buffer->allocator.free( - &buffer->allocator, (uint8_t*)buffer->data, buffer->capacity_bytes); - buffer->data = NULL; - } + if (buffer->data != NULL) { + buffer->allocator.free(&buffer->allocator, (uint8_t*)buffer->data, + buffer->capacity_bytes); + buffer->data = NULL; + } - buffer->capacity_bytes = 0; - buffer->size_bytes = 0; + buffer->capacity_bytes = 0; + buffer->size_bytes = 0; } -static inline void ArrowBufferMove( - struct ArrowBuffer* src, struct ArrowBuffer* dst) { - memcpy(dst, src, sizeof(struct ArrowBuffer)); - src->data = NULL; - ArrowBufferReset(src); +static inline void ArrowBufferMove(struct ArrowBuffer* src, struct ArrowBuffer* dst) { + memcpy(dst, src, sizeof(struct ArrowBuffer)); + src->data = NULL; + ArrowBufferReset(src); } -static inline ArrowErrorCode ArrowBufferResize( - struct ArrowBuffer* buffer, - int64_t new_capacity_bytes, - char shrink_to_fit) { - if (new_capacity_bytes < 0) { - return EINVAL; - } - - if (new_capacity_bytes > buffer->capacity_bytes || shrink_to_fit) { - buffer->data = buffer->allocator.reallocate( - &buffer->allocator, - buffer->data, - buffer->capacity_bytes, - new_capacity_bytes); - if (buffer->data == NULL && new_capacity_bytes > 0) { - buffer->capacity_bytes = 0; - buffer->size_bytes = 0; - return ENOMEM; - } +static inline ArrowErrorCode ArrowBufferResize(struct ArrowBuffer* buffer, + int64_t new_capacity_bytes, + char shrink_to_fit) { + if (new_capacity_bytes < 0) { + return EINVAL; + } - buffer->capacity_bytes = new_capacity_bytes; + if (new_capacity_bytes > buffer->capacity_bytes || shrink_to_fit) { + buffer->data = buffer->allocator.reallocate( + &buffer->allocator, buffer->data, buffer->capacity_bytes, new_capacity_bytes); + if (buffer->data == NULL && new_capacity_bytes > 0) { + buffer->capacity_bytes = 0; + buffer->size_bytes = 0; + return ENOMEM; } - // Ensures that when shrinking that size <= capacity - if (new_capacity_bytes < buffer->size_bytes) { - buffer->size_bytes = new_capacity_bytes; - } + buffer->capacity_bytes = new_capacity_bytes; + } - return NANOARROW_OK; + // Ensures that when shrinking that size <= capacity + if (new_capacity_bytes < buffer->size_bytes) { + buffer->size_bytes = new_capacity_bytes; + } + + return NANOARROW_OK; } -static inline ArrowErrorCode ArrowBufferReserve( - struct ArrowBuffer* buffer, int64_t additional_size_bytes) { - int64_t min_capacity_bytes = buffer->size_bytes + additional_size_bytes; - if (min_capacity_bytes <= buffer->capacity_bytes) { - return NANOARROW_OK; - } +static inline ArrowErrorCode ArrowBufferReserve(struct ArrowBuffer* buffer, + int64_t additional_size_bytes) { + int64_t min_capacity_bytes = buffer->size_bytes + additional_size_bytes; + if (min_capacity_bytes <= buffer->capacity_bytes) { + return NANOARROW_OK; + } - return ArrowBufferResize( - buffer, - _ArrowGrowByFactor(buffer->capacity_bytes, min_capacity_bytes), - 0); + return ArrowBufferResize( + buffer, _ArrowGrowByFactor(buffer->capacity_bytes, min_capacity_bytes), 0); } -static inline void ArrowBufferAppendUnsafe( - struct ArrowBuffer* buffer, const void* data, int64_t size_bytes) { - if (size_bytes > 0) { - memcpy(buffer->data + buffer->size_bytes, data, size_bytes); - buffer->size_bytes += size_bytes; - } +static inline void ArrowBufferAppendUnsafe(struct ArrowBuffer* buffer, const void* data, + int64_t size_bytes) { + if (size_bytes > 0) { + memcpy(buffer->data + buffer->size_bytes, data, size_bytes); + buffer->size_bytes += size_bytes; + } } -static inline ArrowErrorCode ArrowBufferAppend( - struct ArrowBuffer* buffer, const void* data, int64_t size_bytes) { - NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(buffer, size_bytes)); +static inline ArrowErrorCode ArrowBufferAppend(struct ArrowBuffer* buffer, + const void* data, int64_t size_bytes) { + NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(buffer, size_bytes)); - ArrowBufferAppendUnsafe(buffer, data, size_bytes); - return NANOARROW_OK; + ArrowBufferAppendUnsafe(buffer, data, size_bytes); + return NANOARROW_OK; } -static inline ArrowErrorCode ArrowBufferAppendInt8( - struct ArrowBuffer* buffer, int8_t value) { - return ArrowBufferAppend(buffer, &value, sizeof(int8_t)); +static inline ArrowErrorCode ArrowBufferAppendInt8(struct ArrowBuffer* buffer, + int8_t value) { + return ArrowBufferAppend(buffer, &value, sizeof(int8_t)); } -static inline ArrowErrorCode ArrowBufferAppendUInt8( - struct ArrowBuffer* buffer, uint8_t value) { - return ArrowBufferAppend(buffer, &value, sizeof(uint8_t)); +static inline ArrowErrorCode ArrowBufferAppendUInt8(struct ArrowBuffer* buffer, + uint8_t value) { + return ArrowBufferAppend(buffer, &value, sizeof(uint8_t)); } -static inline ArrowErrorCode ArrowBufferAppendInt16( - struct ArrowBuffer* buffer, int16_t value) { - return ArrowBufferAppend(buffer, &value, sizeof(int16_t)); +static inline ArrowErrorCode ArrowBufferAppendInt16(struct ArrowBuffer* buffer, + int16_t value) { + return ArrowBufferAppend(buffer, &value, sizeof(int16_t)); } -static inline ArrowErrorCode ArrowBufferAppendUInt16( - struct ArrowBuffer* buffer, uint16_t value) { - return ArrowBufferAppend(buffer, &value, sizeof(uint16_t)); +static inline ArrowErrorCode ArrowBufferAppendUInt16(struct ArrowBuffer* buffer, + uint16_t value) { + return ArrowBufferAppend(buffer, &value, sizeof(uint16_t)); } -static inline ArrowErrorCode ArrowBufferAppendInt32( - struct ArrowBuffer* buffer, int32_t value) { - return ArrowBufferAppend(buffer, &value, sizeof(int32_t)); +static inline ArrowErrorCode ArrowBufferAppendInt32(struct ArrowBuffer* buffer, + int32_t value) { + return ArrowBufferAppend(buffer, &value, sizeof(int32_t)); } -static inline ArrowErrorCode ArrowBufferAppendUInt32( - struct ArrowBuffer* buffer, uint32_t value) { - return ArrowBufferAppend(buffer, &value, sizeof(uint32_t)); +static inline ArrowErrorCode ArrowBufferAppendUInt32(struct ArrowBuffer* buffer, + uint32_t value) { + return ArrowBufferAppend(buffer, &value, sizeof(uint32_t)); } -static inline ArrowErrorCode ArrowBufferAppendInt64( - struct ArrowBuffer* buffer, int64_t value) { - return ArrowBufferAppend(buffer, &value, sizeof(int64_t)); +static inline ArrowErrorCode ArrowBufferAppendInt64(struct ArrowBuffer* buffer, + int64_t value) { + return ArrowBufferAppend(buffer, &value, sizeof(int64_t)); } -static inline ArrowErrorCode ArrowBufferAppendUInt64( - struct ArrowBuffer* buffer, uint64_t value) { - return ArrowBufferAppend(buffer, &value, sizeof(uint64_t)); +static inline ArrowErrorCode ArrowBufferAppendUInt64(struct ArrowBuffer* buffer, + uint64_t value) { + return ArrowBufferAppend(buffer, &value, sizeof(uint64_t)); } -static inline ArrowErrorCode ArrowBufferAppendDouble( - struct ArrowBuffer* buffer, double value) { - return ArrowBufferAppend(buffer, &value, sizeof(double)); +static inline ArrowErrorCode ArrowBufferAppendDouble(struct ArrowBuffer* buffer, + double value) { + return ArrowBufferAppend(buffer, &value, sizeof(double)); } -static inline ArrowErrorCode ArrowBufferAppendFloat( - struct ArrowBuffer* buffer, float value) { - return ArrowBufferAppend(buffer, &value, sizeof(float)); +static inline ArrowErrorCode ArrowBufferAppendFloat(struct ArrowBuffer* buffer, + float value) { + return ArrowBufferAppend(buffer, &value, sizeof(float)); } -static inline ArrowErrorCode ArrowBufferAppendStringView( - struct ArrowBuffer* buffer, struct ArrowStringView value) { - return ArrowBufferAppend(buffer, value.data, value.size_bytes); +static inline ArrowErrorCode ArrowBufferAppendStringView(struct ArrowBuffer* buffer, + struct ArrowStringView value) { + return ArrowBufferAppend(buffer, value.data, value.size_bytes); } -static inline ArrowErrorCode ArrowBufferAppendBufferView( - struct ArrowBuffer* buffer, struct ArrowBufferView value) { - return ArrowBufferAppend(buffer, value.data.data, value.size_bytes); +static inline ArrowErrorCode ArrowBufferAppendBufferView(struct ArrowBuffer* buffer, + struct ArrowBufferView value) { + return ArrowBufferAppend(buffer, value.data.data, value.size_bytes); } -static inline ArrowErrorCode ArrowBufferAppendFill( - struct ArrowBuffer* buffer, uint8_t value, int64_t size_bytes) { - NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(buffer, size_bytes)); +static inline ArrowErrorCode ArrowBufferAppendFill(struct ArrowBuffer* buffer, + uint8_t value, int64_t size_bytes) { + NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(buffer, size_bytes)); - memset(buffer->data + buffer->size_bytes, value, size_bytes); - buffer->size_bytes += size_bytes; - return NANOARROW_OK; + memset(buffer->data + buffer->size_bytes, value, size_bytes); + buffer->size_bytes += size_bytes; + return NANOARROW_OK; } static const uint8_t _ArrowkBitmask[] = {1, 2, 4, 8, 16, 32, 64, 128}; -static const uint8_t _ArrowkFlippedBitmask[] = { - 254, 253, 251, 247, 239, 223, 191, 127}; +static const uint8_t _ArrowkFlippedBitmask[] = {254, 253, 251, 247, 239, 223, 191, 127}; static const uint8_t _ArrowkPrecedingBitmask[] = {0, 1, 3, 7, 15, 31, 63, 127}; -static const uint8_t _ArrowkTrailingBitmask[] = { - 255, 254, 252, 248, 240, 224, 192, 128}; +static const uint8_t _ArrowkTrailingBitmask[] = {255, 254, 252, 248, 240, 224, 192, 128}; static const uint8_t _ArrowkBytePopcount[] = { - 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, - 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, - 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, - 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, - 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, - 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8}; + 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, + 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, + 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, + 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, + 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, + 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, + 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, + 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, + 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8}; static inline int64_t _ArrowRoundUpToMultipleOf8(int64_t value) { - return (value + 7) & ~((int64_t)7); + return (value + 7) & ~((int64_t)7); } static inline int64_t _ArrowRoundDownToMultipleOf8(int64_t value) { - return (value / 8) * 8; + return (value / 8) * 8; } static inline int64_t _ArrowBytesForBits(int64_t bits) { - return (bits >> 3) + ((bits & 7) != 0); + return (bits >> 3) + ((bits & 7) != 0); } static inline void _ArrowBitsUnpackInt8(const uint8_t word, int8_t* out) { - out[0] = (word & 0x1) != 0; - out[1] = (word & 0x2) != 0; - out[2] = (word & 0x4) != 0; - out[3] = (word & 0x8) != 0; - out[4] = (word & 0x10) != 0; - out[5] = (word & 0x20) != 0; - out[6] = (word & 0x40) != 0; - out[7] = (word & 0x80) != 0; + out[0] = (word & 0x1) != 0; + out[1] = (word & 0x2) != 0; + out[2] = (word & 0x4) != 0; + out[3] = (word & 0x8) != 0; + out[4] = (word & 0x10) != 0; + out[5] = (word & 0x20) != 0; + out[6] = (word & 0x40) != 0; + out[7] = (word & 0x80) != 0; } static inline void _ArrowBitsUnpackInt32(const uint8_t word, int32_t* out) { - out[0] = (word & 0x1) != 0; - out[1] = (word & 0x2) != 0; - out[2] = (word & 0x4) != 0; - out[3] = (word & 0x8) != 0; - out[4] = (word & 0x10) != 0; - out[5] = (word & 0x20) != 0; - out[6] = (word & 0x40) != 0; - out[7] = (word & 0x80) != 0; + out[0] = (word & 0x1) != 0; + out[1] = (word & 0x2) != 0; + out[2] = (word & 0x4) != 0; + out[3] = (word & 0x8) != 0; + out[4] = (word & 0x10) != 0; + out[5] = (word & 0x20) != 0; + out[6] = (word & 0x40) != 0; + out[7] = (word & 0x80) != 0; } static inline void _ArrowBitmapPackInt8(const int8_t* values, uint8_t* out) { - *out = - (uint8_t)(values[0] | ((values[1] + 0x1) & 0x2) | ((values[2] + 0x3) & 0x4) | ((values[3] + 0x7) & 0x8) | ((values[4] + 0xf) & 0x10) | ((values[5] + 0x1f) & 0x20) | ((values[6] + 0x3f) & 0x40) | ((values[7] + 0x7f) & 0x80)); + *out = (uint8_t)(values[0] | ((values[1] + 0x1) & 0x2) | ((values[2] + 0x3) & 0x4) | + ((values[3] + 0x7) & 0x8) | ((values[4] + 0xf) & 0x10) | + ((values[5] + 0x1f) & 0x20) | ((values[6] + 0x3f) & 0x40) | + ((values[7] + 0x7f) & 0x80)); } static inline void _ArrowBitmapPackInt32(const int32_t* values, uint8_t* out) { - *out = - (uint8_t)(values[0] | ((values[1] + 0x1) & 0x2) | ((values[2] + 0x3) & 0x4) | ((values[3] + 0x7) & 0x8) | ((values[4] + 0xf) & 0x10) | ((values[5] + 0x1f) & 0x20) | ((values[6] + 0x3f) & 0x40) | ((values[7] + 0x7f) & 0x80)); + *out = (uint8_t)(values[0] | ((values[1] + 0x1) & 0x2) | ((values[2] + 0x3) & 0x4) | + ((values[3] + 0x7) & 0x8) | ((values[4] + 0xf) & 0x10) | + ((values[5] + 0x1f) & 0x20) | ((values[6] + 0x3f) & 0x40) | + ((values[7] + 0x7f) & 0x80)); } static inline int8_t ArrowBitGet(const uint8_t* bits, int64_t i) { - return (bits[i >> 3] >> (i & 0x07)) & 1; + return (bits[i >> 3] >> (i & 0x07)) & 1; } -static inline void ArrowBitsUnpackInt8( - const uint8_t* bits, int64_t start_offset, int64_t length, int8_t* out) { - if (length == 0) { - return; - } - - const int64_t i_begin = start_offset; - const int64_t i_end = start_offset + length; - const int64_t i_last_valid = i_end - 1; +static inline void ArrowBitsUnpackInt8(const uint8_t* bits, int64_t start_offset, + int64_t length, int8_t* out) { + if (length == 0) { + return; + } - const int64_t bytes_begin = i_begin / 8; - const int64_t bytes_last_valid = i_last_valid / 8; + const int64_t i_begin = start_offset; + const int64_t i_end = start_offset + length; + const int64_t i_last_valid = i_end - 1; - if (bytes_begin == bytes_last_valid) { - for (int i = 0; i < length; i++) { - out[i] = ArrowBitGet(&bits[bytes_begin], i + i_begin % 8); - } + const int64_t bytes_begin = i_begin / 8; + const int64_t bytes_last_valid = i_last_valid / 8; - return; + if (bytes_begin == bytes_last_valid) { + for (int i = 0; i < length; i++) { + out[i] = ArrowBitGet(&bits[bytes_begin], i + i_begin % 8); } - // first byte - for (int i = 0; i < 8 - (i_begin % 8); i++) { - *out++ = ArrowBitGet(&bits[bytes_begin], i + i_begin % 8); - } + return; + } - // middle bytes - for (int64_t i = bytes_begin + 1; i < bytes_last_valid; i++) { - _ArrowBitsUnpackInt8(bits[i], out); - out += 8; - } + // first byte + for (int i = 0; i < 8 - (i_begin % 8); i++) { + *out++ = ArrowBitGet(&bits[bytes_begin], i + i_begin % 8); + } - // last byte - const int bits_remaining = (int)(i_end % 8 == 0 ? 8 : i_end % 8); - for (int i = 0; i < bits_remaining; i++) { - *out++ = ArrowBitGet(&bits[bytes_last_valid], i); - } -} + // middle bytes + for (int64_t i = bytes_begin + 1; i < bytes_last_valid; i++) { + _ArrowBitsUnpackInt8(bits[i], out); + out += 8; + } -static inline void ArrowBitsUnpackInt32( - const uint8_t* bits, int64_t start_offset, int64_t length, int32_t* out) { - if (length == 0) { - return; - } + // last byte + const int bits_remaining = (int)(i_end % 8 == 0 ? 8 : i_end % 8); + for (int i = 0; i < bits_remaining; i++) { + *out++ = ArrowBitGet(&bits[bytes_last_valid], i); + } +} - const int64_t i_begin = start_offset; - const int64_t i_end = start_offset + length; - const int64_t i_last_valid = i_end - 1; +static inline void ArrowBitsUnpackInt32(const uint8_t* bits, int64_t start_offset, + int64_t length, int32_t* out) { + if (length == 0) { + return; + } - const int64_t bytes_begin = i_begin / 8; - const int64_t bytes_last_valid = i_last_valid / 8; + const int64_t i_begin = start_offset; + const int64_t i_end = start_offset + length; + const int64_t i_last_valid = i_end - 1; - if (bytes_begin == bytes_last_valid) { - for (int i = 0; i < length; i++) { - out[i] = ArrowBitGet(&bits[bytes_begin], i + i_begin % 8); - } + const int64_t bytes_begin = i_begin / 8; + const int64_t bytes_last_valid = i_last_valid / 8; - return; + if (bytes_begin == bytes_last_valid) { + for (int i = 0; i < length; i++) { + out[i] = ArrowBitGet(&bits[bytes_begin], i + i_begin % 8); } - // first byte - for (int i = 0; i < 8 - (i_begin % 8); i++) { - *out++ = ArrowBitGet(&bits[bytes_begin], i + i_begin % 8); - } + return; + } - // middle bytes - for (int64_t i = bytes_begin + 1; i < bytes_last_valid; i++) { - _ArrowBitsUnpackInt32(bits[i], out); - out += 8; - } + // first byte + for (int i = 0; i < 8 - (i_begin % 8); i++) { + *out++ = ArrowBitGet(&bits[bytes_begin], i + i_begin % 8); + } - // last byte - const int bits_remaining = (int)(i_end % 8 == 0 ? 8 : i_end % 8); - for (int i = 0; i < bits_remaining; i++) { - *out++ = ArrowBitGet(&bits[bytes_last_valid], i); - } + // middle bytes + for (int64_t i = bytes_begin + 1; i < bytes_last_valid; i++) { + _ArrowBitsUnpackInt32(bits[i], out); + out += 8; + } + + // last byte + const int bits_remaining = (int)(i_end % 8 == 0 ? 8 : i_end % 8); + for (int i = 0; i < bits_remaining; i++) { + *out++ = ArrowBitGet(&bits[bytes_last_valid], i); + } } static inline void ArrowBitSet(uint8_t* bits, int64_t i) { - bits[i / 8] |= _ArrowkBitmask[i % 8]; + bits[i / 8] |= _ArrowkBitmask[i % 8]; } static inline void ArrowBitClear(uint8_t* bits, int64_t i) { - bits[i / 8] &= _ArrowkFlippedBitmask[i % 8]; + bits[i / 8] &= _ArrowkFlippedBitmask[i % 8]; } static inline void ArrowBitSetTo(uint8_t* bits, int64_t i, uint8_t bit_is_set) { - bits[i / 8] ^= ((uint8_t)(-((uint8_t)(bit_is_set != 0)) ^ bits[i / 8])) & - _ArrowkBitmask[i % 8]; -} - -static inline void ArrowBitsSetTo( - uint8_t* bits, int64_t start_offset, int64_t length, uint8_t bits_are_set) { - const int64_t i_begin = start_offset; - const int64_t i_end = start_offset + length; - const uint8_t fill_byte = (uint8_t)(-bits_are_set); - - const int64_t bytes_begin = i_begin / 8; - const int64_t bytes_end = i_end / 8 + 1; - - const uint8_t first_byte_mask = _ArrowkPrecedingBitmask[i_begin % 8]; - const uint8_t last_byte_mask = _ArrowkTrailingBitmask[i_end % 8]; - - if (bytes_end == bytes_begin + 1) { - // set bits within a single byte - const uint8_t - only_byte_mask = i_end % 8 == 0 ? - first_byte_mask : - (uint8_t)(first_byte_mask | last_byte_mask); - bits[bytes_begin] &= only_byte_mask; - bits[bytes_begin] |= (uint8_t)(fill_byte & ~only_byte_mask); - return; - } + bits[i / 8] ^= + ((uint8_t)(-((uint8_t)(bit_is_set != 0)) ^ bits[i / 8])) & _ArrowkBitmask[i % 8]; +} - // set/clear trailing bits of first byte - bits[bytes_begin] &= first_byte_mask; - bits[bytes_begin] |= (uint8_t)(fill_byte & ~first_byte_mask); +static inline void ArrowBitsSetTo(uint8_t* bits, int64_t start_offset, int64_t length, + uint8_t bits_are_set) { + const int64_t i_begin = start_offset; + const int64_t i_end = start_offset + length; + const uint8_t fill_byte = (uint8_t)(-bits_are_set); - if (bytes_end - bytes_begin > 2) { - // set/clear whole bytes - memset( - bits + bytes_begin + 1, - fill_byte, - (size_t)(bytes_end - bytes_begin - 2)); - } + const int64_t bytes_begin = i_begin / 8; + const int64_t bytes_end = i_end / 8 + 1; - if (i_end % 8 == 0) { - return; - } + const uint8_t first_byte_mask = _ArrowkPrecedingBitmask[i_begin % 8]; + const uint8_t last_byte_mask = _ArrowkTrailingBitmask[i_end % 8]; + + if (bytes_end == bytes_begin + 1) { + // set bits within a single byte + const uint8_t only_byte_mask = + i_end % 8 == 0 ? first_byte_mask : (uint8_t)(first_byte_mask | last_byte_mask); + bits[bytes_begin] &= only_byte_mask; + bits[bytes_begin] |= (uint8_t)(fill_byte & ~only_byte_mask); + return; + } + + // set/clear trailing bits of first byte + bits[bytes_begin] &= first_byte_mask; + bits[bytes_begin] |= (uint8_t)(fill_byte & ~first_byte_mask); - // set/clear leading bits of last byte - bits[bytes_end - 1] &= last_byte_mask; - bits[bytes_end - 1] |= (uint8_t)(fill_byte & ~last_byte_mask); + if (bytes_end - bytes_begin > 2) { + // set/clear whole bytes + memset(bits + bytes_begin + 1, fill_byte, (size_t)(bytes_end - bytes_begin - 2)); + } + + if (i_end % 8 == 0) { + return; + } + + // set/clear leading bits of last byte + bits[bytes_end - 1] &= last_byte_mask; + bits[bytes_end - 1] |= (uint8_t)(fill_byte & ~last_byte_mask); } -static inline int64_t ArrowBitCountSet( - const uint8_t* bits, int64_t start_offset, int64_t length) { - if (length == 0) { - return 0; - } +static inline int64_t ArrowBitCountSet(const uint8_t* bits, int64_t start_offset, + int64_t length) { + if (length == 0) { + return 0; + } - const int64_t i_begin = start_offset; - const int64_t i_end = start_offset + length; - const int64_t i_last_valid = i_end - 1; + const int64_t i_begin = start_offset; + const int64_t i_end = start_offset + length; + const int64_t i_last_valid = i_end - 1; - const int64_t bytes_begin = i_begin / 8; - const int64_t bytes_last_valid = i_last_valid / 8; + const int64_t bytes_begin = i_begin / 8; + const int64_t bytes_last_valid = i_last_valid / 8; - if (bytes_begin == bytes_last_valid) { - // count bits within a single byte - const uint8_t first_byte_mask = _ArrowkPrecedingBitmask[i_end % 8]; - const uint8_t last_byte_mask = _ArrowkTrailingBitmask[i_begin % 8]; + if (bytes_begin == bytes_last_valid) { + // count bits within a single byte + const uint8_t first_byte_mask = _ArrowkPrecedingBitmask[i_end % 8]; + const uint8_t last_byte_mask = _ArrowkTrailingBitmask[i_begin % 8]; - const uint8_t - only_byte_mask = i_end % 8 == 0 ? - last_byte_mask : - (uint8_t)(first_byte_mask & last_byte_mask); + const uint8_t only_byte_mask = + i_end % 8 == 0 ? last_byte_mask : (uint8_t)(first_byte_mask & last_byte_mask); - const uint8_t byte_masked = bits[bytes_begin] & only_byte_mask; - return _ArrowkBytePopcount[byte_masked]; - } + const uint8_t byte_masked = bits[bytes_begin] & only_byte_mask; + return _ArrowkBytePopcount[byte_masked]; + } - const uint8_t first_byte_mask = _ArrowkPrecedingBitmask[i_begin % 8]; - const uint8_t last_byte_mask = i_end % 8 == 0 ? - 0 : - _ArrowkTrailingBitmask[i_end % 8]; - int64_t count = 0; + const uint8_t first_byte_mask = _ArrowkPrecedingBitmask[i_begin % 8]; + const uint8_t last_byte_mask = i_end % 8 == 0 ? 0 : _ArrowkTrailingBitmask[i_end % 8]; + int64_t count = 0; - // first byte - count += _ArrowkBytePopcount[bits[bytes_begin] & ~first_byte_mask]; + // first byte + count += _ArrowkBytePopcount[bits[bytes_begin] & ~first_byte_mask]; - // middle bytes - for (int64_t i = bytes_begin + 1; i < bytes_last_valid; i++) { - count += _ArrowkBytePopcount[bits[i]]; - } + // middle bytes + for (int64_t i = bytes_begin + 1; i < bytes_last_valid; i++) { + count += _ArrowkBytePopcount[bits[i]]; + } - // last byte - count += _ArrowkBytePopcount[bits[bytes_last_valid] & ~last_byte_mask]; + // last byte + count += _ArrowkBytePopcount[bits[bytes_last_valid] & ~last_byte_mask]; - return count; + return count; } static inline void ArrowBitmapInit(struct ArrowBitmap* bitmap) { - ArrowBufferInit(&bitmap->buffer); - bitmap->size_bits = 0; + ArrowBufferInit(&bitmap->buffer); + bitmap->size_bits = 0; } -static inline void ArrowBitmapMove( - struct ArrowBitmap* src, struct ArrowBitmap* dst) { - ArrowBufferMove(&src->buffer, &dst->buffer); - dst->size_bits = src->size_bits; - src->size_bits = 0; +static inline void ArrowBitmapMove(struct ArrowBitmap* src, struct ArrowBitmap* dst) { + ArrowBufferMove(&src->buffer, &dst->buffer); + dst->size_bits = src->size_bits; + src->size_bits = 0; } -static inline ArrowErrorCode ArrowBitmapReserve( - struct ArrowBitmap* bitmap, int64_t additional_size_bits) { - int64_t min_capacity_bits = bitmap->size_bits + additional_size_bits; - if (min_capacity_bits <= (bitmap->buffer.capacity_bytes * 8)) { - return NANOARROW_OK; - } +static inline ArrowErrorCode ArrowBitmapReserve(struct ArrowBitmap* bitmap, + int64_t additional_size_bits) { + int64_t min_capacity_bits = bitmap->size_bits + additional_size_bits; + if (min_capacity_bits <= (bitmap->buffer.capacity_bytes * 8)) { + return NANOARROW_OK; + } - NANOARROW_RETURN_NOT_OK(ArrowBufferReserve( - &bitmap->buffer, _ArrowBytesForBits(additional_size_bits))); + NANOARROW_RETURN_NOT_OK( + ArrowBufferReserve(&bitmap->buffer, _ArrowBytesForBits(additional_size_bits))); - bitmap->buffer.data[bitmap->buffer.capacity_bytes - 1] = 0; - return NANOARROW_OK; + bitmap->buffer.data[bitmap->buffer.capacity_bytes - 1] = 0; + return NANOARROW_OK; } -static inline ArrowErrorCode ArrowBitmapResize( - struct ArrowBitmap* bitmap, int64_t new_capacity_bits, char shrink_to_fit) { - if (new_capacity_bits < 0) { - return EINVAL; - } +static inline ArrowErrorCode ArrowBitmapResize(struct ArrowBitmap* bitmap, + int64_t new_capacity_bits, + char shrink_to_fit) { + if (new_capacity_bits < 0) { + return EINVAL; + } - int64_t new_capacity_bytes = _ArrowBytesForBits(new_capacity_bits); - NANOARROW_RETURN_NOT_OK( - ArrowBufferResize(&bitmap->buffer, new_capacity_bytes, shrink_to_fit)); + int64_t new_capacity_bytes = _ArrowBytesForBits(new_capacity_bits); + NANOARROW_RETURN_NOT_OK( + ArrowBufferResize(&bitmap->buffer, new_capacity_bytes, shrink_to_fit)); - if (new_capacity_bits < bitmap->size_bits) { - bitmap->size_bits = new_capacity_bits; - } + if (new_capacity_bits < bitmap->size_bits) { + bitmap->size_bits = new_capacity_bits; + } + + return NANOARROW_OK; +} + +static inline ArrowErrorCode ArrowBitmapAppend(struct ArrowBitmap* bitmap, + uint8_t bits_are_set, int64_t length) { + NANOARROW_RETURN_NOT_OK(ArrowBitmapReserve(bitmap, length)); + + ArrowBitmapAppendUnsafe(bitmap, bits_are_set, length); + return NANOARROW_OK; +} - return NANOARROW_OK; +static inline void ArrowBitmapAppendUnsafe(struct ArrowBitmap* bitmap, + uint8_t bits_are_set, int64_t length) { + ArrowBitsSetTo(bitmap->buffer.data, bitmap->size_bits, length, bits_are_set); + bitmap->size_bits += length; + bitmap->buffer.size_bytes = _ArrowBytesForBits(bitmap->size_bits); } -static inline ArrowErrorCode ArrowBitmapAppend( - struct ArrowBitmap* bitmap, uint8_t bits_are_set, int64_t length) { - NANOARROW_RETURN_NOT_OK(ArrowBitmapReserve(bitmap, length)); +static inline void ArrowBitmapAppendInt8Unsafe(struct ArrowBitmap* bitmap, + const int8_t* values, int64_t n_values) { + if (n_values == 0) { + return; + } - ArrowBitmapAppendUnsafe(bitmap, bits_are_set, length); - return NANOARROW_OK; -} - -static inline void ArrowBitmapAppendUnsafe( - struct ArrowBitmap* bitmap, uint8_t bits_are_set, int64_t length) { - ArrowBitsSetTo( - bitmap->buffer.data, bitmap->size_bits, length, bits_are_set); - bitmap->size_bits += length; - bitmap->buffer.size_bytes = _ArrowBytesForBits(bitmap->size_bits); -} + const int8_t* values_cursor = values; + int64_t n_remaining = n_values; + int64_t out_i_cursor = bitmap->size_bits; + uint8_t* out_cursor = bitmap->buffer.data + bitmap->size_bits / 8; -static inline void ArrowBitmapAppendInt8Unsafe( - struct ArrowBitmap* bitmap, const int8_t* values, int64_t n_values) { - if (n_values == 0) { - return; + // First byte + if ((out_i_cursor % 8) != 0) { + int64_t n_partial_bits = _ArrowRoundUpToMultipleOf8(out_i_cursor) - out_i_cursor; + for (int i = 0; i < n_partial_bits; i++) { + ArrowBitSetTo(bitmap->buffer.data, out_i_cursor++, values[i]); } - const int8_t* values_cursor = values; - int64_t n_remaining = n_values; - int64_t out_i_cursor = bitmap->size_bits; - uint8_t* out_cursor = bitmap->buffer.data + bitmap->size_bits / 8; - - // First byte - if ((out_i_cursor % 8) != 0) { - int64_t n_partial_bits = _ArrowRoundUpToMultipleOf8(out_i_cursor) - - out_i_cursor; - for (int i = 0; i < n_partial_bits; i++) { - ArrowBitSetTo(bitmap->buffer.data, out_i_cursor++, values[i]); - } + out_cursor++; + values_cursor += n_partial_bits; + n_remaining -= n_partial_bits; + } - out_cursor++; - values_cursor += n_partial_bits; - n_remaining -= n_partial_bits; - } + // Middle bytes + int64_t n_full_bytes = n_remaining / 8; + for (int64_t i = 0; i < n_full_bytes; i++) { + _ArrowBitmapPackInt8(values_cursor, out_cursor); + values_cursor += 8; + out_cursor++; + } - // Middle bytes - int64_t n_full_bytes = n_remaining / 8; - for (int64_t i = 0; i < n_full_bytes; i++) { - _ArrowBitmapPackInt8(values_cursor, out_cursor); - values_cursor += 8; - out_cursor++; + // Last byte + out_i_cursor += n_full_bytes * 8; + n_remaining -= n_full_bytes * 8; + if (n_remaining > 0) { + // Zero out the last byte + *out_cursor = 0x00; + for (int i = 0; i < n_remaining; i++) { + ArrowBitSetTo(bitmap->buffer.data, out_i_cursor++, values_cursor[i]); } + out_cursor++; + } - // Last byte - out_i_cursor += n_full_bytes * 8; - n_remaining -= n_full_bytes * 8; - if (n_remaining > 0) { - // Zero out the last byte - *out_cursor = 0x00; - for (int i = 0; i < n_remaining; i++) { - ArrowBitSetTo( - bitmap->buffer.data, out_i_cursor++, values_cursor[i]); - } - out_cursor++; - } - - bitmap->size_bits += n_values; - bitmap->buffer.size_bytes = out_cursor - bitmap->buffer.data; + bitmap->size_bits += n_values; + bitmap->buffer.size_bytes = out_cursor - bitmap->buffer.data; } -static inline void ArrowBitmapAppendInt32Unsafe( - struct ArrowBitmap* bitmap, const int32_t* values, int64_t n_values) { - if (n_values == 0) { - return; - } +static inline void ArrowBitmapAppendInt32Unsafe(struct ArrowBitmap* bitmap, + const int32_t* values, int64_t n_values) { + if (n_values == 0) { + return; + } - const int32_t* values_cursor = values; - int64_t n_remaining = n_values; - int64_t out_i_cursor = bitmap->size_bits; - uint8_t* out_cursor = bitmap->buffer.data + bitmap->size_bits / 8; - - // First byte - if ((out_i_cursor % 8) != 0) { - int64_t n_partial_bits = _ArrowRoundUpToMultipleOf8(out_i_cursor) - - out_i_cursor; - for (int i = 0; i < n_partial_bits; i++) { - ArrowBitSetTo( - bitmap->buffer.data, out_i_cursor++, (uint8_t)values[i]); - } + const int32_t* values_cursor = values; + int64_t n_remaining = n_values; + int64_t out_i_cursor = bitmap->size_bits; + uint8_t* out_cursor = bitmap->buffer.data + bitmap->size_bits / 8; - out_cursor++; - values_cursor += n_partial_bits; - n_remaining -= n_partial_bits; + // First byte + if ((out_i_cursor % 8) != 0) { + int64_t n_partial_bits = _ArrowRoundUpToMultipleOf8(out_i_cursor) - out_i_cursor; + for (int i = 0; i < n_partial_bits; i++) { + ArrowBitSetTo(bitmap->buffer.data, out_i_cursor++, (uint8_t)values[i]); } - // Middle bytes - int64_t n_full_bytes = n_remaining / 8; - for (int64_t i = 0; i < n_full_bytes; i++) { - _ArrowBitmapPackInt32(values_cursor, out_cursor); - values_cursor += 8; - out_cursor++; - } + out_cursor++; + values_cursor += n_partial_bits; + n_remaining -= n_partial_bits; + } + + // Middle bytes + int64_t n_full_bytes = n_remaining / 8; + for (int64_t i = 0; i < n_full_bytes; i++) { + _ArrowBitmapPackInt32(values_cursor, out_cursor); + values_cursor += 8; + out_cursor++; + } - // Last byte - out_i_cursor += n_full_bytes * 8; - n_remaining -= n_full_bytes * 8; - if (n_remaining > 0) { - // Zero out the last byte - *out_cursor = 0x00; - for (int i = 0; i < n_remaining; i++) { - ArrowBitSetTo( - bitmap->buffer.data, out_i_cursor++, (uint8_t)values_cursor[i]); - } - out_cursor++; - } - - bitmap->size_bits += n_values; - bitmap->buffer.size_bytes = out_cursor - bitmap->buffer.data; + // Last byte + out_i_cursor += n_full_bytes * 8; + n_remaining -= n_full_bytes * 8; + if (n_remaining > 0) { + // Zero out the last byte + *out_cursor = 0x00; + for (int i = 0; i < n_remaining; i++) { + ArrowBitSetTo(bitmap->buffer.data, out_i_cursor++, (uint8_t)values_cursor[i]); + } + out_cursor++; + } + + bitmap->size_bits += n_values; + bitmap->buffer.size_bytes = out_cursor - bitmap->buffer.data; } static inline void ArrowBitmapReset(struct ArrowBitmap* bitmap) { - ArrowBufferReset(&bitmap->buffer); - bitmap->size_bits = 0; + ArrowBufferReset(&bitmap->buffer); + bitmap->size_bits = 0; } #ifdef __cplusplus @@ -2901,1026 +2772,961 @@ static inline void ArrowBitmapReset(struct ArrowBitmap* bitmap) { #include #include + + + #ifdef __cplusplus extern "C" { #endif -static inline struct ArrowBitmap* ArrowArrayValidityBitmap( - struct ArrowArray* array) { - struct ArrowArrayPrivateData* private_data = (struct ArrowArrayPrivateData*) - array->private_data; - return &private_data->bitmap; -} - -static inline struct ArrowBuffer* ArrowArrayBuffer( - struct ArrowArray* array, int64_t i) { - struct ArrowArrayPrivateData* private_data = (struct ArrowArrayPrivateData*) - array->private_data; - switch (i) { - case 0: - return &private_data->bitmap.buffer; - default: - return private_data->buffers + i - 1; - } +static inline struct ArrowBitmap* ArrowArrayValidityBitmap(struct ArrowArray* array) { + struct ArrowArrayPrivateData* private_data = + (struct ArrowArrayPrivateData*)array->private_data; + return &private_data->bitmap; +} + +static inline struct ArrowBuffer* ArrowArrayBuffer(struct ArrowArray* array, int64_t i) { + struct ArrowArrayPrivateData* private_data = + (struct ArrowArrayPrivateData*)array->private_data; + switch (i) { + case 0: + return &private_data->bitmap.buffer; + default: + return private_data->buffers + i - 1; + } } // We don't currently support the case of unions where type_id != child_index; // however, these functions are used to keep track of where that assumption // is made. -static inline int8_t _ArrowArrayUnionChildIndex( - struct ArrowArray* array, int8_t type_id) { - NANOARROW_UNUSED(array); - return type_id; +static inline int8_t _ArrowArrayUnionChildIndex(struct ArrowArray* array, + int8_t type_id) { + NANOARROW_UNUSED(array); + return type_id; } -static inline int8_t _ArrowArrayUnionTypeId( - struct ArrowArray* array, int8_t child_index) { - NANOARROW_UNUSED(array); - return child_index; +static inline int8_t _ArrowArrayUnionTypeId(struct ArrowArray* array, + int8_t child_index) { + NANOARROW_UNUSED(array); + return child_index; } -static inline int32_t _ArrowParseUnionTypeIds( - const char* type_ids, int8_t* out) { - if (*type_ids == '\0') { - return 0; - } - - int32_t i = 0; - long type_id; - char* end_ptr; - do { - type_id = strtol(type_ids, &end_ptr, 10); - if (end_ptr == type_ids || type_id < 0 || type_id > 127) { - return -1; - } - - if (out != NULL) { - out[i] = (int8_t)type_id; - } - - i++; - - type_ids = end_ptr; - if (*type_ids == '\0') { - return i; - } else if (*type_ids != ',') { - return -1; - } else { - type_ids++; - } - } while (1); +static inline int32_t _ArrowParseUnionTypeIds(const char* type_ids, int8_t* out) { + if (*type_ids == '\0') { + return 0; + } - return -1; -} - -static inline int8_t _ArrowParsedUnionTypeIdsWillEqualChildIndices( - const int8_t* type_ids, int64_t n_type_ids, int64_t n_children) { - if (n_type_ids != n_children) { - return 0; + int32_t i = 0; + long type_id; + char* end_ptr; + do { + type_id = strtol(type_ids, &end_ptr, 10); + if (end_ptr == type_ids || type_id < 0 || type_id > 127) { + return -1; } - for (int8_t i = 0; i < n_type_ids; i++) { - if (type_ids[i] != i) { - return 0; - } + if (out != NULL) { + out[i] = (int8_t)type_id; } - return 1; -} - -static inline int8_t _ArrowUnionTypeIdsWillEqualChildIndices( - const char* type_id_str, int64_t n_children) { - int8_t type_ids[128]; - int32_t n_type_ids = _ArrowParseUnionTypeIds(type_id_str, type_ids); - return _ArrowParsedUnionTypeIdsWillEqualChildIndices( - type_ids, n_type_ids, n_children); -} - -static inline ArrowErrorCode ArrowArrayStartAppending( - struct ArrowArray* array) { - struct ArrowArrayPrivateData* private_data = (struct ArrowArrayPrivateData*) - array->private_data; - - switch (private_data->storage_type) { - case NANOARROW_TYPE_UNINITIALIZED: - return EINVAL; - case NANOARROW_TYPE_SPARSE_UNION: - case NANOARROW_TYPE_DENSE_UNION: - // Note that this value could be -1 if the type_ids string was - // invalid - if (private_data->union_type_id_is_child_index != 1) { - return EINVAL; - } else { - break; - } - default: - break; - } - if (private_data->storage_type == NANOARROW_TYPE_UNINITIALIZED) { - return EINVAL; - } + i++; - // Initialize any data offset buffer with a single zero - for (int i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) { - if (private_data->layout.buffer_type[i] == - NANOARROW_BUFFER_TYPE_DATA_OFFSET && - private_data->layout.element_size_bits[i] == 64) { - NANOARROW_RETURN_NOT_OK( - ArrowBufferAppendInt64(ArrowArrayBuffer(array, i), 0)); - } else if ( - private_data->layout.buffer_type[i] == - NANOARROW_BUFFER_TYPE_DATA_OFFSET && - private_data->layout.element_size_bits[i] == 32) { - NANOARROW_RETURN_NOT_OK( - ArrowBufferAppendInt32(ArrowArrayBuffer(array, i), 0)); - } - } - - // Start building any child arrays or dictionaries - for (int64_t i = 0; i < array->n_children; i++) { - NANOARROW_RETURN_NOT_OK(ArrowArrayStartAppending(array->children[i])); - } - - if (array->dictionary != NULL) { - NANOARROW_RETURN_NOT_OK(ArrowArrayStartAppending(array->dictionary)); + type_ids = end_ptr; + if (*type_ids == '\0') { + return i; + } else if (*type_ids != ',') { + return -1; + } else { + type_ids++; } + } while (1); - return NANOARROW_OK; + return -1; } -static inline ArrowErrorCode ArrowArrayShrinkToFit(struct ArrowArray* array) { - for (int64_t i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) { - struct ArrowBuffer* buffer = ArrowArrayBuffer(array, i); - NANOARROW_RETURN_NOT_OK( - ArrowBufferResize(buffer, buffer->size_bytes, 1)); - } - - for (int64_t i = 0; i < array->n_children; i++) { - NANOARROW_RETURN_NOT_OK(ArrowArrayShrinkToFit(array->children[i])); - } +static inline int8_t _ArrowParsedUnionTypeIdsWillEqualChildIndices(const int8_t* type_ids, + int64_t n_type_ids, + int64_t n_children) { + if (n_type_ids != n_children) { + return 0; + } - if (array->dictionary != NULL) { - NANOARROW_RETURN_NOT_OK(ArrowArrayShrinkToFit(array->dictionary)); + for (int8_t i = 0; i < n_type_ids; i++) { + if (type_ids[i] != i) { + return 0; } + } - return NANOARROW_OK; + return 1; } -static inline ArrowErrorCode _ArrowArrayAppendBits( - struct ArrowArray* array, int64_t buffer_i, uint8_t value, int64_t n) { - struct ArrowArrayPrivateData* private_data = (struct ArrowArrayPrivateData*) - array->private_data; - struct ArrowBuffer* buffer = ArrowArrayBuffer(array, buffer_i); - int64_t bytes_required = _ArrowRoundUpToMultipleOf8( - private_data->layout - .element_size_bits[buffer_i] * - (array->length + 1)) / - 8; - if (bytes_required > buffer->size_bytes) { - NANOARROW_RETURN_NOT_OK(ArrowBufferAppendFill( - buffer, 0, bytes_required - buffer->size_bytes)); - } - - ArrowBitsSetTo(buffer->data, array->length, n, value); - return NANOARROW_OK; +static inline int8_t _ArrowUnionTypeIdsWillEqualChildIndices(const char* type_id_str, + int64_t n_children) { + int8_t type_ids[128]; + int32_t n_type_ids = _ArrowParseUnionTypeIds(type_id_str, type_ids); + return _ArrowParsedUnionTypeIdsWillEqualChildIndices(type_ids, n_type_ids, n_children); } -static inline ArrowErrorCode _ArrowArrayAppendEmptyInternal( - struct ArrowArray* array, int64_t n, uint8_t is_valid) { - struct ArrowArrayPrivateData* private_data = (struct ArrowArrayPrivateData*) - array->private_data; +static inline ArrowErrorCode ArrowArrayStartAppending(struct ArrowArray* array) { + struct ArrowArrayPrivateData* private_data = + (struct ArrowArrayPrivateData*)array->private_data; - if (n == 0) { - return NANOARROW_OK; - } - - // Some type-specific handling - switch (private_data->storage_type) { - case NANOARROW_TYPE_NA: - // (An empty value for a null array *is* a null) - array->null_count += n; - array->length += n; - return NANOARROW_OK; - - case NANOARROW_TYPE_DENSE_UNION: { - // Add one null to the first child and append n references to that - // child - int8_t type_id = _ArrowArrayUnionTypeId(array, 0); - NANOARROW_RETURN_NOT_OK(_ArrowArrayAppendEmptyInternal( - array->children[0], 1, is_valid)); - NANOARROW_RETURN_NOT_OK( - ArrowBufferAppendFill(ArrowArrayBuffer(array, 0), type_id, n)); - for (int64_t i = 0; i < n; i++) { - NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32( - ArrowArrayBuffer(array, 1), - (int32_t)array->children[0]->length - 1)); - } - // For the purposes of array->null_count, union elements are never - // considered "null" even if some children contain nulls. - array->length += n; - return NANOARROW_OK; - } - - case NANOARROW_TYPE_SPARSE_UNION: { - // Add n nulls to the first child and append n references to that - // child - int8_t type_id = _ArrowArrayUnionTypeId(array, 0); - NANOARROW_RETURN_NOT_OK(_ArrowArrayAppendEmptyInternal( - array->children[0], n, is_valid)); - for (int64_t i = 1; i < array->n_children; i++) { - NANOARROW_RETURN_NOT_OK( - ArrowArrayAppendEmpty(array->children[i], n)); - } - - NANOARROW_RETURN_NOT_OK( - ArrowBufferAppendFill(ArrowArrayBuffer(array, 0), type_id, n)); - // For the purposes of array->null_count, union elements are never - // considered "null" even if some children contain nulls. - array->length += n; - return NANOARROW_OK; - } - - case NANOARROW_TYPE_FIXED_SIZE_LIST: - NANOARROW_RETURN_NOT_OK(ArrowArrayAppendEmpty( - array->children[0], - n * private_data->layout.child_size_elements)); - break; - case NANOARROW_TYPE_STRUCT: - for (int64_t i = 0; i < array->n_children; i++) { - NANOARROW_RETURN_NOT_OK( - ArrowArrayAppendEmpty(array->children[i], n)); - } - break; - - default: - break; - } - - // Append n is_valid bits to the validity bitmap. If we haven't allocated a - // bitmap yet and we need to append nulls, do it now. - if (!is_valid && private_data->bitmap.buffer.data == NULL) { - NANOARROW_RETURN_NOT_OK( - ArrowBitmapReserve(&private_data->bitmap, array->length + n)); - ArrowBitmapAppendUnsafe(&private_data->bitmap, 1, array->length); - ArrowBitmapAppendUnsafe(&private_data->bitmap, is_valid, n); - } else if (private_data->bitmap.buffer.data != NULL) { - NANOARROW_RETURN_NOT_OK(ArrowBitmapReserve(&private_data->bitmap, n)); - ArrowBitmapAppendUnsafe(&private_data->bitmap, is_valid, n); - } - - // Add appropriate buffer fill - struct ArrowBuffer* buffer; - int64_t size_bytes; - - for (int i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) { - buffer = ArrowArrayBuffer(array, i); - size_bytes = private_data->layout.element_size_bits[i] / 8; - - switch (private_data->layout.buffer_type[i]) { - case NANOARROW_BUFFER_TYPE_NONE: - case NANOARROW_BUFFER_TYPE_VALIDITY: - continue; - case NANOARROW_BUFFER_TYPE_DATA_OFFSET: - // Append the current value at the end of the offset buffer for - // each element - NANOARROW_RETURN_NOT_OK( - ArrowBufferReserve(buffer, size_bytes * n)); - - for (int64_t j = 0; j < n; j++) { - ArrowBufferAppendUnsafe( - buffer, - buffer->data + size_bytes * (array->length + j), - size_bytes); - } - - // Skip the data buffer - i++; - continue; - case NANOARROW_BUFFER_TYPE_DATA: - // Zero out the next bit of memory - if (private_data->layout.element_size_bits[i] % 8 == 0) { - NANOARROW_RETURN_NOT_OK( - ArrowBufferAppendFill(buffer, 0, size_bytes * n)); - } else { - NANOARROW_RETURN_NOT_OK( - _ArrowArrayAppendBits(array, i, 0, n)); - } - continue; - - case NANOARROW_BUFFER_TYPE_TYPE_ID: - case NANOARROW_BUFFER_TYPE_UNION_OFFSET: - // These cases return above - return EINVAL; - } - } - - array->length += n; - array->null_count += n * !is_valid; - return NANOARROW_OK; + switch (private_data->storage_type) { + case NANOARROW_TYPE_UNINITIALIZED: + return EINVAL; + case NANOARROW_TYPE_SPARSE_UNION: + case NANOARROW_TYPE_DENSE_UNION: + // Note that this value could be -1 if the type_ids string was invalid + if (private_data->union_type_id_is_child_index != 1) { + return EINVAL; + } else { + break; + } + default: + break; + } + if (private_data->storage_type == NANOARROW_TYPE_UNINITIALIZED) { + return EINVAL; + } + + // Initialize any data offset buffer with a single zero + for (int i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) { + if (private_data->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_DATA_OFFSET && + private_data->layout.element_size_bits[i] == 64) { + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt64(ArrowArrayBuffer(array, i), 0)); + } else if (private_data->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_DATA_OFFSET && + private_data->layout.element_size_bits[i] == 32) { + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(ArrowArrayBuffer(array, i), 0)); + } + } + + // Start building any child arrays or dictionaries + for (int64_t i = 0; i < array->n_children; i++) { + NANOARROW_RETURN_NOT_OK(ArrowArrayStartAppending(array->children[i])); + } + + if (array->dictionary != NULL) { + NANOARROW_RETURN_NOT_OK(ArrowArrayStartAppending(array->dictionary)); + } + + return NANOARROW_OK; } -static inline ArrowErrorCode ArrowArrayAppendNull( - struct ArrowArray* array, int64_t n) { - return _ArrowArrayAppendEmptyInternal(array, n, 0); -} - -static inline ArrowErrorCode ArrowArrayAppendEmpty( - struct ArrowArray* array, int64_t n) { - return _ArrowArrayAppendEmptyInternal(array, n, 1); -} - -static inline ArrowErrorCode ArrowArrayAppendInt( - struct ArrowArray* array, int64_t value) { - struct ArrowArrayPrivateData* private_data = (struct ArrowArrayPrivateData*) - array->private_data; - - struct ArrowBuffer* data_buffer = ArrowArrayBuffer(array, 1); - - switch (private_data->storage_type) { - case NANOARROW_TYPE_INT64: - NANOARROW_RETURN_NOT_OK( - ArrowBufferAppend(data_buffer, &value, sizeof(int64_t))); - break; - case NANOARROW_TYPE_INT32: - _NANOARROW_CHECK_RANGE(value, INT32_MIN, INT32_MAX); - NANOARROW_RETURN_NOT_OK( - ArrowBufferAppendInt32(data_buffer, (int32_t)value)); - break; - case NANOARROW_TYPE_INT16: - _NANOARROW_CHECK_RANGE(value, INT16_MIN, INT16_MAX); - NANOARROW_RETURN_NOT_OK( - ArrowBufferAppendInt16(data_buffer, (int16_t)value)); - break; - case NANOARROW_TYPE_INT8: - _NANOARROW_CHECK_RANGE(value, INT8_MIN, INT8_MAX); - NANOARROW_RETURN_NOT_OK( - ArrowBufferAppendInt8(data_buffer, (int8_t)value)); - break; - case NANOARROW_TYPE_UINT64: - case NANOARROW_TYPE_UINT32: - case NANOARROW_TYPE_UINT16: - case NANOARROW_TYPE_UINT8: - _NANOARROW_CHECK_RANGE(value, 0, INT64_MAX); - return ArrowArrayAppendUInt(array, value); - case NANOARROW_TYPE_DOUBLE: - NANOARROW_RETURN_NOT_OK( - ArrowBufferAppendDouble(data_buffer, (double)value)); - break; - case NANOARROW_TYPE_FLOAT: - NANOARROW_RETURN_NOT_OK( - ArrowBufferAppendFloat(data_buffer, (float)value)); - break; - case NANOARROW_TYPE_BOOL: - NANOARROW_RETURN_NOT_OK( - _ArrowArrayAppendBits(array, 1, value != 0, 1)); - break; - default: - return EINVAL; - } - - if (private_data->bitmap.buffer.data != NULL) { - NANOARROW_RETURN_NOT_OK( - ArrowBitmapAppend(ArrowArrayValidityBitmap(array), 1, 1)); - } +static inline ArrowErrorCode ArrowArrayShrinkToFit(struct ArrowArray* array) { + for (int64_t i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) { + struct ArrowBuffer* buffer = ArrowArrayBuffer(array, i); + NANOARROW_RETURN_NOT_OK(ArrowBufferResize(buffer, buffer->size_bytes, 1)); + } + + for (int64_t i = 0; i < array->n_children; i++) { + NANOARROW_RETURN_NOT_OK(ArrowArrayShrinkToFit(array->children[i])); + } + + if (array->dictionary != NULL) { + NANOARROW_RETURN_NOT_OK(ArrowArrayShrinkToFit(array->dictionary)); + } + + return NANOARROW_OK; +} + +static inline ArrowErrorCode _ArrowArrayAppendBits(struct ArrowArray* array, + int64_t buffer_i, uint8_t value, + int64_t n) { + struct ArrowArrayPrivateData* private_data = + (struct ArrowArrayPrivateData*)array->private_data; + struct ArrowBuffer* buffer = ArrowArrayBuffer(array, buffer_i); + int64_t bytes_required = + _ArrowRoundUpToMultipleOf8(private_data->layout.element_size_bits[buffer_i] * + (array->length + 1)) / + 8; + if (bytes_required > buffer->size_bytes) { + NANOARROW_RETURN_NOT_OK( + ArrowBufferAppendFill(buffer, 0, bytes_required - buffer->size_bytes)); + } - array->length++; - return NANOARROW_OK; + ArrowBitsSetTo(buffer->data, array->length, n, value); + return NANOARROW_OK; } -static inline ArrowErrorCode ArrowArrayAppendUInt( - struct ArrowArray* array, uint64_t value) { - struct ArrowArrayPrivateData* private_data = (struct ArrowArrayPrivateData*) - array->private_data; - - struct ArrowBuffer* data_buffer = ArrowArrayBuffer(array, 1); - - switch (private_data->storage_type) { - case NANOARROW_TYPE_UINT64: - NANOARROW_RETURN_NOT_OK( - ArrowBufferAppend(data_buffer, &value, sizeof(uint64_t))); - break; - case NANOARROW_TYPE_UINT32: - _NANOARROW_CHECK_UPPER_LIMIT(value, UINT32_MAX); - NANOARROW_RETURN_NOT_OK( - ArrowBufferAppendUInt32(data_buffer, (uint32_t)value)); - break; - case NANOARROW_TYPE_UINT16: - _NANOARROW_CHECK_UPPER_LIMIT(value, UINT16_MAX); - NANOARROW_RETURN_NOT_OK( - ArrowBufferAppendUInt16(data_buffer, (uint16_t)value)); - break; - case NANOARROW_TYPE_UINT8: - _NANOARROW_CHECK_UPPER_LIMIT(value, UINT8_MAX); - NANOARROW_RETURN_NOT_OK( - ArrowBufferAppendUInt8(data_buffer, (uint8_t)value)); - break; - case NANOARROW_TYPE_INT64: - case NANOARROW_TYPE_INT32: - case NANOARROW_TYPE_INT16: - case NANOARROW_TYPE_INT8: - _NANOARROW_CHECK_UPPER_LIMIT(value, INT64_MAX); - return ArrowArrayAppendInt(array, value); - case NANOARROW_TYPE_DOUBLE: - NANOARROW_RETURN_NOT_OK( - ArrowBufferAppendDouble(data_buffer, (double)value)); - break; - case NANOARROW_TYPE_FLOAT: - NANOARROW_RETURN_NOT_OK( - ArrowBufferAppendFloat(data_buffer, (float)value)); - break; - case NANOARROW_TYPE_BOOL: - NANOARROW_RETURN_NOT_OK( - _ArrowArrayAppendBits(array, 1, value != 0, 1)); - break; - default: - return EINVAL; - } +static inline ArrowErrorCode _ArrowArrayAppendEmptyInternal(struct ArrowArray* array, + int64_t n, uint8_t is_valid) { + struct ArrowArrayPrivateData* private_data = + (struct ArrowArrayPrivateData*)array->private_data; - if (private_data->bitmap.buffer.data != NULL) { - NANOARROW_RETURN_NOT_OK( - ArrowBitmapAppend(ArrowArrayValidityBitmap(array), 1, 1)); - } - - array->length++; + if (n == 0) { return NANOARROW_OK; -} + } + + // Some type-specific handling + switch (private_data->storage_type) { + case NANOARROW_TYPE_NA: + // (An empty value for a null array *is* a null) + array->null_count += n; + array->length += n; + return NANOARROW_OK; + + case NANOARROW_TYPE_DENSE_UNION: { + // Add one null to the first child and append n references to that child + int8_t type_id = _ArrowArrayUnionTypeId(array, 0); + NANOARROW_RETURN_NOT_OK( + _ArrowArrayAppendEmptyInternal(array->children[0], 1, is_valid)); + NANOARROW_RETURN_NOT_OK( + ArrowBufferAppendFill(ArrowArrayBuffer(array, 0), type_id, n)); + for (int64_t i = 0; i < n; i++) { + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32( + ArrowArrayBuffer(array, 1), (int32_t)array->children[0]->length - 1)); + } + // For the purposes of array->null_count, union elements are never considered "null" + // even if some children contain nulls. + array->length += n; + return NANOARROW_OK; + } + + case NANOARROW_TYPE_SPARSE_UNION: { + // Add n nulls to the first child and append n references to that child + int8_t type_id = _ArrowArrayUnionTypeId(array, 0); + NANOARROW_RETURN_NOT_OK( + _ArrowArrayAppendEmptyInternal(array->children[0], n, is_valid)); + for (int64_t i = 1; i < array->n_children; i++) { + NANOARROW_RETURN_NOT_OK(ArrowArrayAppendEmpty(array->children[i], n)); + } + + NANOARROW_RETURN_NOT_OK( + ArrowBufferAppendFill(ArrowArrayBuffer(array, 0), type_id, n)); + // For the purposes of array->null_count, union elements are never considered "null" + // even if some children contain nulls. + array->length += n; + return NANOARROW_OK; + } + + case NANOARROW_TYPE_FIXED_SIZE_LIST: + NANOARROW_RETURN_NOT_OK(ArrowArrayAppendEmpty( + array->children[0], n * private_data->layout.child_size_elements)); + break; + case NANOARROW_TYPE_STRUCT: + for (int64_t i = 0; i < array->n_children; i++) { + NANOARROW_RETURN_NOT_OK(ArrowArrayAppendEmpty(array->children[i], n)); + } + break; + + default: + break; + } + + // Append n is_valid bits to the validity bitmap. If we haven't allocated a bitmap yet + // and we need to append nulls, do it now. + if (!is_valid && private_data->bitmap.buffer.data == NULL) { + NANOARROW_RETURN_NOT_OK(ArrowBitmapReserve(&private_data->bitmap, array->length + n)); + ArrowBitmapAppendUnsafe(&private_data->bitmap, 1, array->length); + ArrowBitmapAppendUnsafe(&private_data->bitmap, is_valid, n); + } else if (private_data->bitmap.buffer.data != NULL) { + NANOARROW_RETURN_NOT_OK(ArrowBitmapReserve(&private_data->bitmap, n)); + ArrowBitmapAppendUnsafe(&private_data->bitmap, is_valid, n); + } + + // Add appropriate buffer fill + struct ArrowBuffer* buffer; + int64_t size_bytes; + + for (int i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) { + buffer = ArrowArrayBuffer(array, i); + size_bytes = private_data->layout.element_size_bits[i] / 8; + + switch (private_data->layout.buffer_type[i]) { + case NANOARROW_BUFFER_TYPE_NONE: + case NANOARROW_BUFFER_TYPE_VALIDITY: + continue; + case NANOARROW_BUFFER_TYPE_DATA_OFFSET: + // Append the current value at the end of the offset buffer for each element + NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(buffer, size_bytes * n)); + + for (int64_t j = 0; j < n; j++) { + ArrowBufferAppendUnsafe(buffer, buffer->data + size_bytes * (array->length + j), + size_bytes); + } -static inline ArrowErrorCode ArrowArrayAppendDouble( - struct ArrowArray* array, double value) { - struct ArrowArrayPrivateData* private_data = (struct ArrowArrayPrivateData*) - array->private_data; - - struct ArrowBuffer* data_buffer = ArrowArrayBuffer(array, 1); - - switch (private_data->storage_type) { - case NANOARROW_TYPE_DOUBLE: - NANOARROW_RETURN_NOT_OK( - ArrowBufferAppend(data_buffer, &value, sizeof(double))); - break; - case NANOARROW_TYPE_FLOAT: - NANOARROW_RETURN_NOT_OK( - ArrowBufferAppendFloat(data_buffer, (float)value)); - break; - default: - return EINVAL; - } + // Skip the data buffer + i++; + continue; + case NANOARROW_BUFFER_TYPE_DATA: + // Zero out the next bit of memory + if (private_data->layout.element_size_bits[i] % 8 == 0) { + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendFill(buffer, 0, size_bytes * n)); + } else { + NANOARROW_RETURN_NOT_OK(_ArrowArrayAppendBits(array, i, 0, n)); + } + continue; - if (private_data->bitmap.buffer.data != NULL) { - NANOARROW_RETURN_NOT_OK( - ArrowBitmapAppend(ArrowArrayValidityBitmap(array), 1, 1)); + case NANOARROW_BUFFER_TYPE_TYPE_ID: + case NANOARROW_BUFFER_TYPE_UNION_OFFSET: + // These cases return above + return EINVAL; } + } + + array->length += n; + array->null_count += n * !is_valid; + return NANOARROW_OK; +} + +static inline ArrowErrorCode ArrowArrayAppendNull(struct ArrowArray* array, int64_t n) { + return _ArrowArrayAppendEmptyInternal(array, n, 0); +} + +static inline ArrowErrorCode ArrowArrayAppendEmpty(struct ArrowArray* array, int64_t n) { + return _ArrowArrayAppendEmptyInternal(array, n, 1); +} + +static inline ArrowErrorCode ArrowArrayAppendInt(struct ArrowArray* array, + int64_t value) { + struct ArrowArrayPrivateData* private_data = + (struct ArrowArrayPrivateData*)array->private_data; + + struct ArrowBuffer* data_buffer = ArrowArrayBuffer(array, 1); + + switch (private_data->storage_type) { + case NANOARROW_TYPE_INT64: + NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(data_buffer, &value, sizeof(int64_t))); + break; + case NANOARROW_TYPE_INT32: + _NANOARROW_CHECK_RANGE(value, INT32_MIN, INT32_MAX); + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(data_buffer, (int32_t)value)); + break; + case NANOARROW_TYPE_INT16: + _NANOARROW_CHECK_RANGE(value, INT16_MIN, INT16_MAX); + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt16(data_buffer, (int16_t)value)); + break; + case NANOARROW_TYPE_INT8: + _NANOARROW_CHECK_RANGE(value, INT8_MIN, INT8_MAX); + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt8(data_buffer, (int8_t)value)); + break; + case NANOARROW_TYPE_UINT64: + case NANOARROW_TYPE_UINT32: + case NANOARROW_TYPE_UINT16: + case NANOARROW_TYPE_UINT8: + _NANOARROW_CHECK_RANGE(value, 0, INT64_MAX); + return ArrowArrayAppendUInt(array, value); + case NANOARROW_TYPE_DOUBLE: + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendDouble(data_buffer, (double)value)); + break; + case NANOARROW_TYPE_FLOAT: + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendFloat(data_buffer, (float)value)); + break; + case NANOARROW_TYPE_BOOL: + NANOARROW_RETURN_NOT_OK(_ArrowArrayAppendBits(array, 1, value != 0, 1)); + break; + default: + return EINVAL; + } + + if (private_data->bitmap.buffer.data != NULL) { + NANOARROW_RETURN_NOT_OK(ArrowBitmapAppend(ArrowArrayValidityBitmap(array), 1, 1)); + } + + array->length++; + return NANOARROW_OK; +} + +static inline ArrowErrorCode ArrowArrayAppendUInt(struct ArrowArray* array, + uint64_t value) { + struct ArrowArrayPrivateData* private_data = + (struct ArrowArrayPrivateData*)array->private_data; + + struct ArrowBuffer* data_buffer = ArrowArrayBuffer(array, 1); + + switch (private_data->storage_type) { + case NANOARROW_TYPE_UINT64: + NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(data_buffer, &value, sizeof(uint64_t))); + break; + case NANOARROW_TYPE_UINT32: + _NANOARROW_CHECK_UPPER_LIMIT(value, UINT32_MAX); + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendUInt32(data_buffer, (uint32_t)value)); + break; + case NANOARROW_TYPE_UINT16: + _NANOARROW_CHECK_UPPER_LIMIT(value, UINT16_MAX); + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendUInt16(data_buffer, (uint16_t)value)); + break; + case NANOARROW_TYPE_UINT8: + _NANOARROW_CHECK_UPPER_LIMIT(value, UINT8_MAX); + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendUInt8(data_buffer, (uint8_t)value)); + break; + case NANOARROW_TYPE_INT64: + case NANOARROW_TYPE_INT32: + case NANOARROW_TYPE_INT16: + case NANOARROW_TYPE_INT8: + _NANOARROW_CHECK_UPPER_LIMIT(value, INT64_MAX); + return ArrowArrayAppendInt(array, value); + case NANOARROW_TYPE_DOUBLE: + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendDouble(data_buffer, (double)value)); + break; + case NANOARROW_TYPE_FLOAT: + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendFloat(data_buffer, (float)value)); + break; + case NANOARROW_TYPE_BOOL: + NANOARROW_RETURN_NOT_OK(_ArrowArrayAppendBits(array, 1, value != 0, 1)); + break; + default: + return EINVAL; + } + + if (private_data->bitmap.buffer.data != NULL) { + NANOARROW_RETURN_NOT_OK(ArrowBitmapAppend(ArrowArrayValidityBitmap(array), 1, 1)); + } + + array->length++; + return NANOARROW_OK; +} + +static inline ArrowErrorCode ArrowArrayAppendDouble(struct ArrowArray* array, + double value) { + struct ArrowArrayPrivateData* private_data = + (struct ArrowArrayPrivateData*)array->private_data; + + struct ArrowBuffer* data_buffer = ArrowArrayBuffer(array, 1); + + switch (private_data->storage_type) { + case NANOARROW_TYPE_DOUBLE: + NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(data_buffer, &value, sizeof(double))); + break; + case NANOARROW_TYPE_FLOAT: + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendFloat(data_buffer, (float)value)); + break; + default: + return EINVAL; + } + + if (private_data->bitmap.buffer.data != NULL) { + NANOARROW_RETURN_NOT_OK(ArrowBitmapAppend(ArrowArrayValidityBitmap(array), 1, 1)); + } + + array->length++; + return NANOARROW_OK; +} + +static inline ArrowErrorCode ArrowArrayAppendBytes(struct ArrowArray* array, + struct ArrowBufferView value) { + struct ArrowArrayPrivateData* private_data = + (struct ArrowArrayPrivateData*)array->private_data; + + struct ArrowBuffer* offset_buffer = ArrowArrayBuffer(array, 1); + struct ArrowBuffer* data_buffer = ArrowArrayBuffer( + array, 1 + (private_data->storage_type != NANOARROW_TYPE_FIXED_SIZE_BINARY)); + int32_t offset; + int64_t large_offset; + int64_t fixed_size_bytes = private_data->layout.element_size_bits[1] / 8; + + switch (private_data->storage_type) { + case NANOARROW_TYPE_STRING: + case NANOARROW_TYPE_BINARY: + offset = ((int32_t*)offset_buffer->data)[array->length]; + if ((((int64_t)offset) + value.size_bytes) > INT32_MAX) { + return EOVERFLOW; + } + + offset += (int32_t)value.size_bytes; + NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(offset_buffer, &offset, sizeof(int32_t))); + NANOARROW_RETURN_NOT_OK( + ArrowBufferAppend(data_buffer, value.data.data, value.size_bytes)); + break; + + case NANOARROW_TYPE_LARGE_STRING: + case NANOARROW_TYPE_LARGE_BINARY: + large_offset = ((int64_t*)offset_buffer->data)[array->length]; + large_offset += value.size_bytes; + NANOARROW_RETURN_NOT_OK( + ArrowBufferAppend(offset_buffer, &large_offset, sizeof(int64_t))); + NANOARROW_RETURN_NOT_OK( + ArrowBufferAppend(data_buffer, value.data.data, value.size_bytes)); + break; + + case NANOARROW_TYPE_FIXED_SIZE_BINARY: + if (value.size_bytes != fixed_size_bytes) { + return EINVAL; + } - array->length++; - return NANOARROW_OK; -} + NANOARROW_RETURN_NOT_OK( + ArrowBufferAppend(data_buffer, value.data.data, value.size_bytes)); + break; + default: + return EINVAL; + } -static inline ArrowErrorCode ArrowArrayAppendBytes( - struct ArrowArray* array, struct ArrowBufferView value) { - struct ArrowArrayPrivateData* private_data = (struct ArrowArrayPrivateData*) - array->private_data; - - struct ArrowBuffer* offset_buffer = ArrowArrayBuffer(array, 1); - struct ArrowBuffer* data_buffer = ArrowArrayBuffer( - array, - 1 + (private_data->storage_type != NANOARROW_TYPE_FIXED_SIZE_BINARY)); - int32_t offset; - int64_t large_offset; - int64_t fixed_size_bytes = private_data->layout.element_size_bits[1] / 8; - - switch (private_data->storage_type) { - case NANOARROW_TYPE_STRING: - case NANOARROW_TYPE_BINARY: - offset = ((int32_t*)offset_buffer->data)[array->length]; - if ((((int64_t)offset) + value.size_bytes) > INT32_MAX) { - return EOVERFLOW; - } - - offset += (int32_t)value.size_bytes; - NANOARROW_RETURN_NOT_OK( - ArrowBufferAppend(offset_buffer, &offset, sizeof(int32_t))); - NANOARROW_RETURN_NOT_OK(ArrowBufferAppend( - data_buffer, value.data.data, value.size_bytes)); - break; - - case NANOARROW_TYPE_LARGE_STRING: - case NANOARROW_TYPE_LARGE_BINARY: - large_offset = ((int64_t*)offset_buffer->data)[array->length]; - large_offset += value.size_bytes; - NANOARROW_RETURN_NOT_OK(ArrowBufferAppend( - offset_buffer, &large_offset, sizeof(int64_t))); - NANOARROW_RETURN_NOT_OK(ArrowBufferAppend( - data_buffer, value.data.data, value.size_bytes)); - break; - - case NANOARROW_TYPE_FIXED_SIZE_BINARY: - if (value.size_bytes != fixed_size_bytes) { - return EINVAL; - } - - NANOARROW_RETURN_NOT_OK(ArrowBufferAppend( - data_buffer, value.data.data, value.size_bytes)); - break; - default: - return EINVAL; - } + if (private_data->bitmap.buffer.data != NULL) { + NANOARROW_RETURN_NOT_OK(ArrowBitmapAppend(ArrowArrayValidityBitmap(array), 1, 1)); + } - if (private_data->bitmap.buffer.data != NULL) { - NANOARROW_RETURN_NOT_OK( - ArrowBitmapAppend(ArrowArrayValidityBitmap(array), 1, 1)); - } - - array->length++; - return NANOARROW_OK; + array->length++; + return NANOARROW_OK; } -static inline ArrowErrorCode ArrowArrayAppendString( - struct ArrowArray* array, struct ArrowStringView value) { - struct ArrowArrayPrivateData* private_data = (struct ArrowArrayPrivateData*) - array->private_data; +static inline ArrowErrorCode ArrowArrayAppendString(struct ArrowArray* array, + struct ArrowStringView value) { + struct ArrowArrayPrivateData* private_data = + (struct ArrowArrayPrivateData*)array->private_data; - struct ArrowBufferView buffer_view; - buffer_view.data.data = value.data; - buffer_view.size_bytes = value.size_bytes; + struct ArrowBufferView buffer_view; + buffer_view.data.data = value.data; + buffer_view.size_bytes = value.size_bytes; - switch (private_data->storage_type) { - case NANOARROW_TYPE_STRING: - case NANOARROW_TYPE_LARGE_STRING: - case NANOARROW_TYPE_BINARY: - case NANOARROW_TYPE_LARGE_BINARY: - return ArrowArrayAppendBytes(array, buffer_view); - default: - return EINVAL; - } + switch (private_data->storage_type) { + case NANOARROW_TYPE_STRING: + case NANOARROW_TYPE_LARGE_STRING: + case NANOARROW_TYPE_BINARY: + case NANOARROW_TYPE_LARGE_BINARY: + return ArrowArrayAppendBytes(array, buffer_view); + default: + return EINVAL; + } } -static inline ArrowErrorCode ArrowArrayAppendInterval( - struct ArrowArray* array, const struct ArrowInterval* value) { - struct ArrowArrayPrivateData* private_data = (struct ArrowArrayPrivateData*) - array->private_data; +static inline ArrowErrorCode ArrowArrayAppendInterval(struct ArrowArray* array, + const struct ArrowInterval* value) { + struct ArrowArrayPrivateData* private_data = + (struct ArrowArrayPrivateData*)array->private_data; - struct ArrowBuffer* data_buffer = ArrowArrayBuffer(array, 1); + struct ArrowBuffer* data_buffer = ArrowArrayBuffer(array, 1); - switch (private_data->storage_type) { - case NANOARROW_TYPE_INTERVAL_MONTHS: { - if (value->type != NANOARROW_TYPE_INTERVAL_MONTHS) { - return EINVAL; - } + switch (private_data->storage_type) { + case NANOARROW_TYPE_INTERVAL_MONTHS: { + if (value->type != NANOARROW_TYPE_INTERVAL_MONTHS) { + return EINVAL; + } - NANOARROW_RETURN_NOT_OK( - ArrowBufferAppendInt32(data_buffer, value->months)); - break; - } - case NANOARROW_TYPE_INTERVAL_DAY_TIME: { - if (value->type != NANOARROW_TYPE_INTERVAL_DAY_TIME) { - return EINVAL; - } - - NANOARROW_RETURN_NOT_OK( - ArrowBufferAppendInt32(data_buffer, value->days)); - NANOARROW_RETURN_NOT_OK( - ArrowBufferAppendInt32(data_buffer, value->ms)); - break; - } - case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO: { - if (value->type != NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO) { - return EINVAL; - } - - NANOARROW_RETURN_NOT_OK( - ArrowBufferAppendInt32(data_buffer, value->months)); - NANOARROW_RETURN_NOT_OK( - ArrowBufferAppendInt32(data_buffer, value->days)); - NANOARROW_RETURN_NOT_OK( - ArrowBufferAppendInt64(data_buffer, value->ns)); - break; - } - default: - return EINVAL; + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(data_buffer, value->months)); + break; } + case NANOARROW_TYPE_INTERVAL_DAY_TIME: { + if (value->type != NANOARROW_TYPE_INTERVAL_DAY_TIME) { + return EINVAL; + } - if (private_data->bitmap.buffer.data != NULL) { - NANOARROW_RETURN_NOT_OK( - ArrowBitmapAppend(ArrowArrayValidityBitmap(array), 1, 1)); + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(data_buffer, value->days)); + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(data_buffer, value->ms)); + break; } + case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO: { + if (value->type != NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO) { + return EINVAL; + } - array->length++; - return NANOARROW_OK; -} - -static inline ArrowErrorCode ArrowArrayAppendDecimal( - struct ArrowArray* array, const struct ArrowDecimal* value) { - struct ArrowArrayPrivateData* private_data = (struct ArrowArrayPrivateData*) - array->private_data; - struct ArrowBuffer* data_buffer = ArrowArrayBuffer(array, 1); - - switch (private_data->storage_type) { - case NANOARROW_TYPE_DECIMAL128: - if (value->n_words != 2) { - return EINVAL; - } else { - NANOARROW_RETURN_NOT_OK(ArrowBufferAppend( - data_buffer, value->words, 2 * sizeof(uint64_t))); - break; - } - case NANOARROW_TYPE_DECIMAL256: - if (value->n_words != 4) { - return EINVAL; - } else { - NANOARROW_RETURN_NOT_OK(ArrowBufferAppend( - data_buffer, value->words, 4 * sizeof(uint64_t))); - break; - } - default: - return EINVAL; + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(data_buffer, value->months)); + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(data_buffer, value->days)); + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt64(data_buffer, value->ns)); + break; } + default: + return EINVAL; + } - if (private_data->bitmap.buffer.data != NULL) { - NANOARROW_RETURN_NOT_OK( - ArrowBitmapAppend(ArrowArrayValidityBitmap(array), 1, 1)); - } + if (private_data->bitmap.buffer.data != NULL) { + NANOARROW_RETURN_NOT_OK(ArrowBitmapAppend(ArrowArrayValidityBitmap(array), 1, 1)); + } - array->length++; - return NANOARROW_OK; + array->length++; + return NANOARROW_OK; } -static inline ArrowErrorCode ArrowArrayFinishElement(struct ArrowArray* array) { - struct ArrowArrayPrivateData* private_data = (struct ArrowArrayPrivateData*) - array->private_data; - - int64_t child_length; - - switch (private_data->storage_type) { - case NANOARROW_TYPE_LIST: - case NANOARROW_TYPE_MAP: - child_length = array->children[0]->length; - if (child_length > INT32_MAX) { - return EOVERFLOW; - } - NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32( - ArrowArrayBuffer(array, 1), (int32_t)child_length)); - break; - case NANOARROW_TYPE_LARGE_LIST: - child_length = array->children[0]->length; - NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt64( - ArrowArrayBuffer(array, 1), child_length)); - break; - case NANOARROW_TYPE_FIXED_SIZE_LIST: - child_length = array->children[0]->length; - if (child_length != ((array->length + 1) * - private_data->layout.child_size_elements)) { - return EINVAL; - } - break; - case NANOARROW_TYPE_STRUCT: - for (int64_t i = 0; i < array->n_children; i++) { - child_length = array->children[i]->length; - if (child_length != (array->length + 1)) { - return EINVAL; - } - } - break; - default: - return EINVAL; - } +static inline ArrowErrorCode ArrowArrayAppendDecimal(struct ArrowArray* array, + const struct ArrowDecimal* value) { + struct ArrowArrayPrivateData* private_data = + (struct ArrowArrayPrivateData*)array->private_data; + struct ArrowBuffer* data_buffer = ArrowArrayBuffer(array, 1); - if (private_data->bitmap.buffer.data != NULL) { + switch (private_data->storage_type) { + case NANOARROW_TYPE_DECIMAL128: + if (value->n_words != 2) { + return EINVAL; + } else { NANOARROW_RETURN_NOT_OK( - ArrowBitmapAppend(ArrowArrayValidityBitmap(array), 1, 1)); - } - - array->length++; - return NANOARROW_OK; -} - -static inline ArrowErrorCode ArrowArrayFinishUnionElement( - struct ArrowArray* array, int8_t type_id) { - struct ArrowArrayPrivateData* private_data = (struct ArrowArrayPrivateData*) - array->private_data; - - int64_t child_index = _ArrowArrayUnionChildIndex(array, type_id); - if (child_index < 0 || child_index >= array->n_children) { + ArrowBufferAppend(data_buffer, value->words, 2 * sizeof(uint64_t))); + break; + } + case NANOARROW_TYPE_DECIMAL256: + if (value->n_words != 4) { return EINVAL; - } + } else { + NANOARROW_RETURN_NOT_OK( + ArrowBufferAppend(data_buffer, value->words, 4 * sizeof(uint64_t))); + break; + } + default: + return EINVAL; + } - switch (private_data->storage_type) { - case NANOARROW_TYPE_DENSE_UNION: - // Append the target child length to the union offsets buffer - _NANOARROW_CHECK_RANGE( - array->children[child_index]->length, 0, INT32_MAX); - NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32( - ArrowArrayBuffer(array, 1), - (int32_t)array->children[child_index]->length - 1)); - break; - case NANOARROW_TYPE_SPARSE_UNION: - // Append one empty to any non-target column that isn't already the - // right length or abort if appending a null will result in a column - // with invalid length - for (int64_t i = 0; i < array->n_children; i++) { - if (i == child_index || - array->children[i]->length == (array->length + 1)) { - continue; - } - - if (array->children[i]->length != array->length) { - return EINVAL; - } - - NANOARROW_RETURN_NOT_OK( - ArrowArrayAppendEmpty(array->children[i], 1)); - } - - break; - default: - return EINVAL; - } + if (private_data->bitmap.buffer.data != NULL) { + NANOARROW_RETURN_NOT_OK(ArrowBitmapAppend(ArrowArrayValidityBitmap(array), 1, 1)); + } - // Write to the type_ids buffer - NANOARROW_RETURN_NOT_OK( - ArrowBufferAppendInt8(ArrowArrayBuffer(array, 0), (int8_t)type_id)); - array->length++; - return NANOARROW_OK; + array->length++; + return NANOARROW_OK; } -static inline void ArrowArrayViewMove( - struct ArrowArrayView* src, struct ArrowArrayView* dst) { - memcpy(dst, src, sizeof(struct ArrowArrayView)); - ArrowArrayViewInitFromType(src, NANOARROW_TYPE_UNINITIALIZED); -} +static inline ArrowErrorCode ArrowArrayFinishElement(struct ArrowArray* array) { + struct ArrowArrayPrivateData* private_data = + (struct ArrowArrayPrivateData*)array->private_data; + + int64_t child_length; + + switch (private_data->storage_type) { + case NANOARROW_TYPE_LIST: + case NANOARROW_TYPE_MAP: + child_length = array->children[0]->length; + if (child_length > INT32_MAX) { + return EOVERFLOW; + } + NANOARROW_RETURN_NOT_OK( + ArrowBufferAppendInt32(ArrowArrayBuffer(array, 1), (int32_t)child_length)); + break; + case NANOARROW_TYPE_LARGE_LIST: + child_length = array->children[0]->length; + NANOARROW_RETURN_NOT_OK( + ArrowBufferAppendInt64(ArrowArrayBuffer(array, 1), child_length)); + break; + case NANOARROW_TYPE_FIXED_SIZE_LIST: + child_length = array->children[0]->length; + if (child_length != + ((array->length + 1) * private_data->layout.child_size_elements)) { + return EINVAL; + } + break; + case NANOARROW_TYPE_STRUCT: + for (int64_t i = 0; i < array->n_children; i++) { + child_length = array->children[i]->length; + if (child_length != (array->length + 1)) { + return EINVAL; + } + } + break; + default: + return EINVAL; + } + + if (private_data->bitmap.buffer.data != NULL) { + NANOARROW_RETURN_NOT_OK(ArrowBitmapAppend(ArrowArrayValidityBitmap(array), 1, 1)); + } + + array->length++; + return NANOARROW_OK; +} + +static inline ArrowErrorCode ArrowArrayFinishUnionElement(struct ArrowArray* array, + int8_t type_id) { + struct ArrowArrayPrivateData* private_data = + (struct ArrowArrayPrivateData*)array->private_data; + + int64_t child_index = _ArrowArrayUnionChildIndex(array, type_id); + if (child_index < 0 || child_index >= array->n_children) { + return EINVAL; + } + + switch (private_data->storage_type) { + case NANOARROW_TYPE_DENSE_UNION: + // Append the target child length to the union offsets buffer + _NANOARROW_CHECK_RANGE(array->children[child_index]->length, 0, INT32_MAX); + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32( + ArrowArrayBuffer(array, 1), (int32_t)array->children[child_index]->length - 1)); + break; + case NANOARROW_TYPE_SPARSE_UNION: + // Append one empty to any non-target column that isn't already the right length + // or abort if appending a null will result in a column with invalid length + for (int64_t i = 0; i < array->n_children; i++) { + if (i == child_index || array->children[i]->length == (array->length + 1)) { + continue; + } -static inline int8_t ArrowArrayViewIsNull( - const struct ArrowArrayView* array_view, int64_t i) { - const uint8_t* validity_buffer = array_view->buffer_views[0].data.as_uint8; - i += array_view->offset; - switch (array_view->storage_type) { - case NANOARROW_TYPE_NA: - return 0x01; - case NANOARROW_TYPE_DENSE_UNION: - case NANOARROW_TYPE_SPARSE_UNION: - // Unions are "never null" in Arrow land - return 0x00; - default: - return validity_buffer != NULL && !ArrowBitGet(validity_buffer, i); - } -} + if (array->children[i]->length != array->length) { + return EINVAL; + } -static inline int8_t ArrowArrayViewUnionTypeId( - const struct ArrowArrayView* array_view, int64_t i) { - switch (array_view->storage_type) { - case NANOARROW_TYPE_DENSE_UNION: - case NANOARROW_TYPE_SPARSE_UNION: - return array_view->buffer_views[0].data.as_int8[i]; - default: - return -1; - } + NANOARROW_RETURN_NOT_OK(ArrowArrayAppendEmpty(array->children[i], 1)); + } + + break; + default: + return EINVAL; + } + + // Write to the type_ids buffer + NANOARROW_RETURN_NOT_OK( + ArrowBufferAppendInt8(ArrowArrayBuffer(array, 0), (int8_t)type_id)); + array->length++; + return NANOARROW_OK; +} + +static inline void ArrowArrayViewMove(struct ArrowArrayView* src, + struct ArrowArrayView* dst) { + memcpy(dst, src, sizeof(struct ArrowArrayView)); + ArrowArrayViewInitFromType(src, NANOARROW_TYPE_UNINITIALIZED); +} + +static inline int8_t ArrowArrayViewIsNull(const struct ArrowArrayView* array_view, + int64_t i) { + const uint8_t* validity_buffer = array_view->buffer_views[0].data.as_uint8; + i += array_view->offset; + switch (array_view->storage_type) { + case NANOARROW_TYPE_NA: + return 0x01; + case NANOARROW_TYPE_DENSE_UNION: + case NANOARROW_TYPE_SPARSE_UNION: + // Unions are "never null" in Arrow land + return 0x00; + default: + return validity_buffer != NULL && !ArrowBitGet(validity_buffer, i); + } +} + +static inline int8_t ArrowArrayViewUnionTypeId(const struct ArrowArrayView* array_view, + int64_t i) { + switch (array_view->storage_type) { + case NANOARROW_TYPE_DENSE_UNION: + case NANOARROW_TYPE_SPARSE_UNION: + return array_view->buffer_views[0].data.as_int8[i]; + default: + return -1; + } } static inline int8_t ArrowArrayViewUnionChildIndex( const struct ArrowArrayView* array_view, int64_t i) { - int8_t type_id = ArrowArrayViewUnionTypeId(array_view, i); - if (array_view->union_type_id_map == NULL) { - return type_id; - } else { - return array_view->union_type_id_map[type_id]; - } + int8_t type_id = ArrowArrayViewUnionTypeId(array_view, i); + if (array_view->union_type_id_map == NULL) { + return type_id; + } else { + return array_view->union_type_id_map[type_id]; + } } static inline int64_t ArrowArrayViewUnionChildOffset( const struct ArrowArrayView* array_view, int64_t i) { - switch (array_view->storage_type) { - case NANOARROW_TYPE_DENSE_UNION: - return array_view->buffer_views[1].data.as_int32[i]; - case NANOARROW_TYPE_SPARSE_UNION: - return i; - default: - return -1; - } + switch (array_view->storage_type) { + case NANOARROW_TYPE_DENSE_UNION: + return array_view->buffer_views[1].data.as_int32[i]; + case NANOARROW_TYPE_SPARSE_UNION: + return i; + default: + return -1; + } } static inline int64_t ArrowArrayViewListChildOffset( const struct ArrowArrayView* array_view, int64_t i) { - switch (array_view->storage_type) { - case NANOARROW_TYPE_LIST: - return array_view->buffer_views[1].data.as_int32[i]; - case NANOARROW_TYPE_LARGE_LIST: - return array_view->buffer_views[1].data.as_int64[i]; - default: - return -1; - } -} - -static inline int64_t ArrowArrayViewGetIntUnsafe( - const struct ArrowArrayView* array_view, int64_t i) { - const struct ArrowBufferView* data_view = &array_view->buffer_views[1]; - i += array_view->offset; - switch (array_view->storage_type) { - case NANOARROW_TYPE_INT64: - return data_view->data.as_int64[i]; - case NANOARROW_TYPE_UINT64: - return data_view->data.as_uint64[i]; - case NANOARROW_TYPE_INTERVAL_MONTHS: - case NANOARROW_TYPE_INT32: - return data_view->data.as_int32[i]; - case NANOARROW_TYPE_UINT32: - return data_view->data.as_uint32[i]; - case NANOARROW_TYPE_INT16: - return data_view->data.as_int16[i]; - case NANOARROW_TYPE_UINT16: - return data_view->data.as_uint16[i]; - case NANOARROW_TYPE_INT8: - return data_view->data.as_int8[i]; - case NANOARROW_TYPE_UINT8: - return data_view->data.as_uint8[i]; - case NANOARROW_TYPE_DOUBLE: - return (int64_t)data_view->data.as_double[i]; - case NANOARROW_TYPE_FLOAT: - return (int64_t)data_view->data.as_float[i]; - case NANOARROW_TYPE_BOOL: - return ArrowBitGet(data_view->data.as_uint8, i); - default: - return INT64_MAX; - } + switch (array_view->storage_type) { + case NANOARROW_TYPE_LIST: + return array_view->buffer_views[1].data.as_int32[i]; + case NANOARROW_TYPE_LARGE_LIST: + return array_view->buffer_views[1].data.as_int64[i]; + default: + return -1; + } +} + +static inline int64_t ArrowArrayViewGetIntUnsafe(const struct ArrowArrayView* array_view, + int64_t i) { + const struct ArrowBufferView* data_view = &array_view->buffer_views[1]; + i += array_view->offset; + switch (array_view->storage_type) { + case NANOARROW_TYPE_INT64: + return data_view->data.as_int64[i]; + case NANOARROW_TYPE_UINT64: + return data_view->data.as_uint64[i]; + case NANOARROW_TYPE_INTERVAL_MONTHS: + case NANOARROW_TYPE_INT32: + return data_view->data.as_int32[i]; + case NANOARROW_TYPE_UINT32: + return data_view->data.as_uint32[i]; + case NANOARROW_TYPE_INT16: + return data_view->data.as_int16[i]; + case NANOARROW_TYPE_UINT16: + return data_view->data.as_uint16[i]; + case NANOARROW_TYPE_INT8: + return data_view->data.as_int8[i]; + case NANOARROW_TYPE_UINT8: + return data_view->data.as_uint8[i]; + case NANOARROW_TYPE_DOUBLE: + return (int64_t)data_view->data.as_double[i]; + case NANOARROW_TYPE_FLOAT: + return (int64_t)data_view->data.as_float[i]; + case NANOARROW_TYPE_BOOL: + return ArrowBitGet(data_view->data.as_uint8, i); + default: + return INT64_MAX; + } } static inline uint64_t ArrowArrayViewGetUIntUnsafe( const struct ArrowArrayView* array_view, int64_t i) { - i += array_view->offset; - const struct ArrowBufferView* data_view = &array_view->buffer_views[1]; - switch (array_view->storage_type) { - case NANOARROW_TYPE_INT64: - return data_view->data.as_int64[i]; - case NANOARROW_TYPE_UINT64: - return data_view->data.as_uint64[i]; - case NANOARROW_TYPE_INTERVAL_MONTHS: - case NANOARROW_TYPE_INT32: - return data_view->data.as_int32[i]; - case NANOARROW_TYPE_UINT32: - return data_view->data.as_uint32[i]; - case NANOARROW_TYPE_INT16: - return data_view->data.as_int16[i]; - case NANOARROW_TYPE_UINT16: - return data_view->data.as_uint16[i]; - case NANOARROW_TYPE_INT8: - return data_view->data.as_int8[i]; - case NANOARROW_TYPE_UINT8: - return data_view->data.as_uint8[i]; - case NANOARROW_TYPE_DOUBLE: - return (uint64_t)data_view->data.as_double[i]; - case NANOARROW_TYPE_FLOAT: - return (uint64_t)data_view->data.as_float[i]; - case NANOARROW_TYPE_BOOL: - return ArrowBitGet(data_view->data.as_uint8, i); - default: - return UINT64_MAX; - } + i += array_view->offset; + const struct ArrowBufferView* data_view = &array_view->buffer_views[1]; + switch (array_view->storage_type) { + case NANOARROW_TYPE_INT64: + return data_view->data.as_int64[i]; + case NANOARROW_TYPE_UINT64: + return data_view->data.as_uint64[i]; + case NANOARROW_TYPE_INTERVAL_MONTHS: + case NANOARROW_TYPE_INT32: + return data_view->data.as_int32[i]; + case NANOARROW_TYPE_UINT32: + return data_view->data.as_uint32[i]; + case NANOARROW_TYPE_INT16: + return data_view->data.as_int16[i]; + case NANOARROW_TYPE_UINT16: + return data_view->data.as_uint16[i]; + case NANOARROW_TYPE_INT8: + return data_view->data.as_int8[i]; + case NANOARROW_TYPE_UINT8: + return data_view->data.as_uint8[i]; + case NANOARROW_TYPE_DOUBLE: + return (uint64_t)data_view->data.as_double[i]; + case NANOARROW_TYPE_FLOAT: + return (uint64_t)data_view->data.as_float[i]; + case NANOARROW_TYPE_BOOL: + return ArrowBitGet(data_view->data.as_uint8, i); + default: + return UINT64_MAX; + } } static inline double ArrowArrayViewGetDoubleUnsafe( const struct ArrowArrayView* array_view, int64_t i) { - i += array_view->offset; - const struct ArrowBufferView* data_view = &array_view->buffer_views[1]; - switch (array_view->storage_type) { - case NANOARROW_TYPE_INT64: - return (double)data_view->data.as_int64[i]; - case NANOARROW_TYPE_UINT64: - return (double)data_view->data.as_uint64[i]; - case NANOARROW_TYPE_INT32: - return data_view->data.as_int32[i]; - case NANOARROW_TYPE_UINT32: - return data_view->data.as_uint32[i]; - case NANOARROW_TYPE_INT16: - return data_view->data.as_int16[i]; - case NANOARROW_TYPE_UINT16: - return data_view->data.as_uint16[i]; - case NANOARROW_TYPE_INT8: - return data_view->data.as_int8[i]; - case NANOARROW_TYPE_UINT8: - return data_view->data.as_uint8[i]; - case NANOARROW_TYPE_DOUBLE: - return data_view->data.as_double[i]; - case NANOARROW_TYPE_FLOAT: - return data_view->data.as_float[i]; - case NANOARROW_TYPE_BOOL: - return ArrowBitGet(data_view->data.as_uint8, i); - default: - return DBL_MAX; - } + i += array_view->offset; + const struct ArrowBufferView* data_view = &array_view->buffer_views[1]; + switch (array_view->storage_type) { + case NANOARROW_TYPE_INT64: + return (double)data_view->data.as_int64[i]; + case NANOARROW_TYPE_UINT64: + return (double)data_view->data.as_uint64[i]; + case NANOARROW_TYPE_INT32: + return data_view->data.as_int32[i]; + case NANOARROW_TYPE_UINT32: + return data_view->data.as_uint32[i]; + case NANOARROW_TYPE_INT16: + return data_view->data.as_int16[i]; + case NANOARROW_TYPE_UINT16: + return data_view->data.as_uint16[i]; + case NANOARROW_TYPE_INT8: + return data_view->data.as_int8[i]; + case NANOARROW_TYPE_UINT8: + return data_view->data.as_uint8[i]; + case NANOARROW_TYPE_DOUBLE: + return data_view->data.as_double[i]; + case NANOARROW_TYPE_FLOAT: + return data_view->data.as_float[i]; + case NANOARROW_TYPE_BOOL: + return ArrowBitGet(data_view->data.as_uint8, i); + default: + return DBL_MAX; + } } static inline struct ArrowStringView ArrowArrayViewGetStringUnsafe( const struct ArrowArrayView* array_view, int64_t i) { - i += array_view->offset; - const struct ArrowBufferView* offsets_view = &array_view->buffer_views[1]; - const char* data_view = array_view->buffer_views[2].data.as_char; - - struct ArrowStringView view; - switch (array_view->storage_type) { - case NANOARROW_TYPE_STRING: - case NANOARROW_TYPE_BINARY: - view.data = data_view + offsets_view->data.as_int32[i]; - view.size_bytes = offsets_view->data.as_int32[i + 1] - - offsets_view->data.as_int32[i]; - break; - case NANOARROW_TYPE_LARGE_STRING: - case NANOARROW_TYPE_LARGE_BINARY: - view.data = data_view + offsets_view->data.as_int64[i]; - view.size_bytes = offsets_view->data.as_int64[i + 1] - - offsets_view->data.as_int64[i]; - break; - case NANOARROW_TYPE_FIXED_SIZE_BINARY: - view.size_bytes = array_view->layout.element_size_bits[1] / 8; - view.data = array_view->buffer_views[1].data.as_char + - (i * view.size_bytes); - break; - default: - view.data = NULL; - view.size_bytes = 0; - break; - } - - return view; + i += array_view->offset; + const struct ArrowBufferView* offsets_view = &array_view->buffer_views[1]; + const char* data_view = array_view->buffer_views[2].data.as_char; + + struct ArrowStringView view; + switch (array_view->storage_type) { + case NANOARROW_TYPE_STRING: + case NANOARROW_TYPE_BINARY: + view.data = data_view + offsets_view->data.as_int32[i]; + view.size_bytes = + offsets_view->data.as_int32[i + 1] - offsets_view->data.as_int32[i]; + break; + case NANOARROW_TYPE_LARGE_STRING: + case NANOARROW_TYPE_LARGE_BINARY: + view.data = data_view + offsets_view->data.as_int64[i]; + view.size_bytes = + offsets_view->data.as_int64[i + 1] - offsets_view->data.as_int64[i]; + break; + case NANOARROW_TYPE_FIXED_SIZE_BINARY: + view.size_bytes = array_view->layout.element_size_bits[1] / 8; + view.data = array_view->buffer_views[1].data.as_char + (i * view.size_bytes); + break; + default: + view.data = NULL; + view.size_bytes = 0; + break; + } + + return view; } static inline struct ArrowBufferView ArrowArrayViewGetBytesUnsafe( const struct ArrowArrayView* array_view, int64_t i) { - i += array_view->offset; - const struct ArrowBufferView* offsets_view = &array_view->buffer_views[1]; - const uint8_t* data_view = array_view->buffer_views[2].data.as_uint8; - - struct ArrowBufferView view; - switch (array_view->storage_type) { - case NANOARROW_TYPE_STRING: - case NANOARROW_TYPE_BINARY: - view.size_bytes = offsets_view->data.as_int32[i + 1] - - offsets_view->data.as_int32[i]; - view.data.as_uint8 = data_view + offsets_view->data.as_int32[i]; - break; - case NANOARROW_TYPE_LARGE_STRING: - case NANOARROW_TYPE_LARGE_BINARY: - view.size_bytes = offsets_view->data.as_int64[i + 1] - - offsets_view->data.as_int64[i]; - view.data.as_uint8 = data_view + offsets_view->data.as_int64[i]; - break; - case NANOARROW_TYPE_FIXED_SIZE_BINARY: - view.size_bytes = array_view->layout.element_size_bits[1] / 8; - view.data.as_uint8 = array_view->buffer_views[1].data.as_uint8 + - (i * view.size_bytes); - break; - default: - view.data.data = NULL; - view.size_bytes = 0; - break; - } - - return view; + i += array_view->offset; + const struct ArrowBufferView* offsets_view = &array_view->buffer_views[1]; + const uint8_t* data_view = array_view->buffer_views[2].data.as_uint8; + + struct ArrowBufferView view; + switch (array_view->storage_type) { + case NANOARROW_TYPE_STRING: + case NANOARROW_TYPE_BINARY: + view.size_bytes = + offsets_view->data.as_int32[i + 1] - offsets_view->data.as_int32[i]; + view.data.as_uint8 = data_view + offsets_view->data.as_int32[i]; + break; + case NANOARROW_TYPE_LARGE_STRING: + case NANOARROW_TYPE_LARGE_BINARY: + view.size_bytes = + offsets_view->data.as_int64[i + 1] - offsets_view->data.as_int64[i]; + view.data.as_uint8 = data_view + offsets_view->data.as_int64[i]; + break; + case NANOARROW_TYPE_FIXED_SIZE_BINARY: + view.size_bytes = array_view->layout.element_size_bits[1] / 8; + view.data.as_uint8 = + array_view->buffer_views[1].data.as_uint8 + (i * view.size_bytes); + break; + default: + view.data.data = NULL; + view.size_bytes = 0; + break; + } + + return view; } static inline void ArrowArrayViewGetIntervalUnsafe( - const struct ArrowArrayView* array_view, - int64_t i, - struct ArrowInterval* out) { - const uint8_t* data_view = array_view->buffer_views[1].data.as_uint8; - switch (array_view->storage_type) { - case NANOARROW_TYPE_INTERVAL_MONTHS: { - const size_t size = sizeof(int32_t); - memcpy(&out->months, data_view + i * size, sizeof(int32_t)); - break; - } - case NANOARROW_TYPE_INTERVAL_DAY_TIME: { - const size_t size = sizeof(int32_t) + sizeof(int32_t); - memcpy(&out->days, data_view + i * size, sizeof(int32_t)); - memcpy(&out->ms, data_view + i * size + 4, sizeof(int32_t)); - break; - } - case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO: { - const size_t size = sizeof(int32_t) + sizeof(int32_t) + - sizeof(int64_t); - memcpy(&out->months, data_view + i * size, sizeof(int32_t)); - memcpy(&out->days, data_view + i * size + 4, sizeof(int32_t)); - memcpy(&out->ns, data_view + i * size + 8, sizeof(int64_t)); - break; - } - default: - break; - } -} - -static inline void ArrowArrayViewGetDecimalUnsafe( - const struct ArrowArrayView* array_view, - int64_t i, - struct ArrowDecimal* out) { - i += array_view->offset; - const uint8_t* data_view = array_view->buffer_views[1].data.as_uint8; - switch (array_view->storage_type) { - case NANOARROW_TYPE_DECIMAL128: - ArrowDecimalSetBytes(out, data_view + (i * 16)); - break; - case NANOARROW_TYPE_DECIMAL256: - ArrowDecimalSetBytes(out, data_view + (i * 32)); - break; - default: - memset(out->words, 0, sizeof(out->words)); - break; - } + const struct ArrowArrayView* array_view, int64_t i, struct ArrowInterval* out) { + const uint8_t* data_view = array_view->buffer_views[1].data.as_uint8; + switch (array_view->storage_type) { + case NANOARROW_TYPE_INTERVAL_MONTHS: { + const size_t size = sizeof(int32_t); + memcpy(&out->months, data_view + i * size, sizeof(int32_t)); + break; + } + case NANOARROW_TYPE_INTERVAL_DAY_TIME: { + const size_t size = sizeof(int32_t) + sizeof(int32_t); + memcpy(&out->days, data_view + i * size, sizeof(int32_t)); + memcpy(&out->ms, data_view + i * size + 4, sizeof(int32_t)); + break; + } + case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO: { + const size_t size = sizeof(int32_t) + sizeof(int32_t) + sizeof(int64_t); + memcpy(&out->months, data_view + i * size, sizeof(int32_t)); + memcpy(&out->days, data_view + i * size + 4, sizeof(int32_t)); + memcpy(&out->ns, data_view + i * size + 8, sizeof(int64_t)); + break; + } + default: + break; + } +} + +static inline void ArrowArrayViewGetDecimalUnsafe(const struct ArrowArrayView* array_view, + int64_t i, struct ArrowDecimal* out) { + i += array_view->offset; + const uint8_t* data_view = array_view->buffer_views[1].data.as_uint8; + switch (array_view->storage_type) { + case NANOARROW_TYPE_DECIMAL128: + ArrowDecimalSetBytes(out, data_view + (i * 16)); + break; + case NANOARROW_TYPE_DECIMAL256: + ArrowDecimalSetBytes(out, data_view + (i * 32)); + break; + default: + memset(out->words, 0, sizeof(out->words)); + break; + } } #ifdef __cplusplus diff --git a/libtiledbsoma/src/external/src/nanoarrow/nanoarrow.c b/libtiledbsoma/src/external/src/nanoarrow/nanoarrow.c index c946c01362..0af57027a5 100644 --- a/libtiledbsoma/src/external/src/nanoarrow/nanoarrow.c +++ b/libtiledbsoma/src/external/src/nanoarrow/nanoarrow.c @@ -454,8 +454,7 @@ ArrowErrorCode ArrowDecimalAppendDigitsToBuffer(const struct ArrowDecimal* decim #include "nanoarrow.h" -// -- changed for tiledb-r static -void ArrowSchemaReleaseInternal(struct ArrowSchema* schema) { +static void ArrowSchemaReleaseInternal(struct ArrowSchema* schema) { if (schema->format != NULL) ArrowFree((void*)schema->format); if (schema->name != NULL) ArrowFree((void*)schema->name); if (schema->metadata != NULL) ArrowFree((void*)schema->metadata); @@ -2025,8 +2024,7 @@ ArrowErrorCode ArrowMetadataBuilderRemove(struct ArrowBuffer* buffer, #include "nanoarrow.h" -// -- changed for tiledb-r static -void ArrowArrayReleaseInternal(struct ArrowArray* array) { +static void ArrowArrayReleaseInternal(struct ArrowArray* array) { // Release buffers held by this array struct ArrowArrayPrivateData* private_data = (struct ArrowArrayPrivateData*)array->private_data; @@ -2069,8 +2067,7 @@ void ArrowArrayReleaseInternal(struct ArrowArray* array) { array->release = NULL; } -// -- changed for tiledb-r static -ArrowErrorCode ArrowArraySetStorageType(struct ArrowArray* array, +static ArrowErrorCode ArrowArraySetStorageType(struct ArrowArray* array, enum ArrowType storage_type) { switch (storage_type) { case NANOARROW_TYPE_UNINITIALIZED: From 46aebebecdc756f81d3c3d868bc054640baf557b Mon Sep 17 00:00:00 2001 From: Dirk Eddelbuettel Date: Thu, 4 Apr 2024 12:52:24 -0500 Subject: [PATCH 4/9] Re-adjust after rebase --- apis/r/src/borrowed.c | 146 ------------------------------- libtiledbsoma/src/CMakeLists.txt | 2 +- 2 files changed, 1 insertion(+), 147 deletions(-) delete mode 100644 apis/r/src/borrowed.c diff --git a/apis/r/src/borrowed.c b/apis/r/src/borrowed.c deleted file mode 100644 index d1293dfaa0..0000000000 --- a/apis/r/src/borrowed.c +++ /dev/null @@ -1,146 +0,0 @@ - -#include "tiledbsoma/nanoarrow/nanoarrow.h" - -// We need three entry points from nanoarrow that are declared 'static' in the official -// (now vendored) C library so we bring them here -void ArrowSchemaReleaseInternal(struct ArrowSchema* schema) { - if (schema->format != NULL) ArrowFree((void*)schema->format); - if (schema->name != NULL) ArrowFree((void*)schema->name); - if (schema->metadata != NULL) ArrowFree((void*)schema->metadata); - - // This object owns the memory for all the children, but those - // children may have been generated elsewhere and might have - // their own release() callback. - if (schema->children != NULL) { - for (int64_t i = 0; i < schema->n_children; i++) { - if (schema->children[i] != NULL) { - if (schema->children[i]->release != NULL) { - ArrowSchemaRelease(schema->children[i]); - } - - ArrowFree(schema->children[i]); - } - } - - ArrowFree(schema->children); - } - - // This object owns the memory for the dictionary but it - // may have been generated somewhere else and have its own - // release() callback. - if (schema->dictionary != NULL) { - if (schema->dictionary->release != NULL) { - ArrowSchemaRelease(schema->dictionary); - } - - ArrowFree(schema->dictionary); - } - - // private data not currently used - if (schema->private_data != NULL) { - ArrowFree(schema->private_data); - } - - schema->release = NULL; -} - -void ArrowArrayReleaseInternal(struct ArrowArray* array) { - // Release buffers held by this array - struct ArrowArrayPrivateData* private_data = - (struct ArrowArrayPrivateData*)array->private_data; - if (private_data != NULL) { - ArrowBitmapReset(&private_data->bitmap); - ArrowBufferReset(&private_data->buffers[0]); - ArrowBufferReset(&private_data->buffers[1]); - ArrowFree(private_data); - } - - // This object owns the memory for all the children, but those - // children may have been generated elsewhere and might have - // their own release() callback. - if (array->children != NULL) { - for (int64_t i = 0; i < array->n_children; i++) { - if (array->children[i] != NULL) { - if (array->children[i]->release != NULL) { - ArrowArrayRelease(array->children[i]); - } - - ArrowFree(array->children[i]); - } - } - - ArrowFree(array->children); - } - - // This object owns the memory for the dictionary but it - // may have been generated somewhere else and have its own - // release() callback. - if (array->dictionary != NULL) { - if (array->dictionary->release != NULL) { - ArrowArrayRelease(array->dictionary); - } - - ArrowFree(array->dictionary); - } - - // Mark released - array->release = NULL; -} - -ArrowErrorCode ArrowArraySetStorageType(struct ArrowArray* array, - enum ArrowType storage_type) { - switch (storage_type) { - case NANOARROW_TYPE_UNINITIALIZED: - case NANOARROW_TYPE_NA: - array->n_buffers = 0; - break; - - case NANOARROW_TYPE_FIXED_SIZE_LIST: - case NANOARROW_TYPE_STRUCT: - case NANOARROW_TYPE_SPARSE_UNION: - array->n_buffers = 1; - break; - - case NANOARROW_TYPE_LIST: - case NANOARROW_TYPE_LARGE_LIST: - case NANOARROW_TYPE_MAP: - case NANOARROW_TYPE_BOOL: - case NANOARROW_TYPE_UINT8: - case NANOARROW_TYPE_INT8: - case NANOARROW_TYPE_UINT16: - case NANOARROW_TYPE_INT16: - case NANOARROW_TYPE_UINT32: - case NANOARROW_TYPE_INT32: - case NANOARROW_TYPE_UINT64: - case NANOARROW_TYPE_INT64: - case NANOARROW_TYPE_HALF_FLOAT: - case NANOARROW_TYPE_FLOAT: - case NANOARROW_TYPE_DOUBLE: - case NANOARROW_TYPE_DECIMAL128: - case NANOARROW_TYPE_DECIMAL256: - case NANOARROW_TYPE_INTERVAL_MONTHS: - case NANOARROW_TYPE_INTERVAL_DAY_TIME: - case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO: - case NANOARROW_TYPE_FIXED_SIZE_BINARY: - case NANOARROW_TYPE_DENSE_UNION: - array->n_buffers = 2; - break; - - case NANOARROW_TYPE_STRING: - case NANOARROW_TYPE_LARGE_STRING: - case NANOARROW_TYPE_BINARY: - case NANOARROW_TYPE_LARGE_BINARY: - array->n_buffers = 3; - break; - - default: - return EINVAL; - - return NANOARROW_OK; - } - - struct ArrowArrayPrivateData* private_data = - (struct ArrowArrayPrivateData*)array->private_data; - private_data->storage_type = storage_type; - return NANOARROW_OK; -} diff --git a/libtiledbsoma/src/CMakeLists.txt b/libtiledbsoma/src/CMakeLists.txt index c406a7233f..74008acca7 100644 --- a/libtiledbsoma/src/CMakeLists.txt +++ b/libtiledbsoma/src/CMakeLists.txt @@ -50,7 +50,7 @@ message(STATUS "Building with commit hash ${BUILD_COMMIT_HASH}") # Common object library # ########################################################### set_source_files_properties( - ${CMAKE_CURRENT_SOURCE_DIR}/utils/nanoarrow.c PROPERTIES LANGUAGE CXX) + ${CMAKE_CURRENT_SOURCE_DIR}/external/src/nanoarrow/nanoarrow.c PROPERTIES LANGUAGE CXX) add_library(TILEDB_SOMA_OBJECTS OBJECT ${CMAKE_CURRENT_SOURCE_DIR}/reindexer/reindexer.cc From f2ea6f54b5433bea9501dbbaa268d86ef38dd5f7 Mon Sep 17 00:00:00 2001 From: Dirk Eddelbuettel Date: Thu, 4 Apr 2024 13:51:16 -0500 Subject: [PATCH 5/9] Path adjustment for nanoarrow include, pkg-config update --- apis/r/inst/include/tiledbsoma_types.h | 2 +- apis/r/src/rinterface.cpp | 2 +- apis/r/src/riterator.cpp | 2 +- apis/r/src/rutilities.cpp | 2 +- apis/r/src/xptr-utils.h | 8 -------- libtiledbsoma/cmake/inputs/tiledbsoma.pc.in | 4 ++-- libtiledbsoma/src/utils/arrow_adapter.h | 2 +- 7 files changed, 7 insertions(+), 15 deletions(-) diff --git a/apis/r/inst/include/tiledbsoma_types.h b/apis/r/inst/include/tiledbsoma_types.h index 27f327c085..ecbfb375d7 100644 --- a/apis/r/inst/include/tiledbsoma_types.h +++ b/apis/r/inst/include/tiledbsoma_types.h @@ -15,7 +15,7 @@ #define TILEDB_NO_API_DEPRECATION_WARNINGS #endif -#include // for C interface to Arrow +#include // for C interface to Arrow #include // for QueryCondition etc #define ARROW_SCHEMA_AND_ARRAY_DEFINED 1 #include diff --git a/apis/r/src/rinterface.cpp b/apis/r/src/rinterface.cpp index dba41a3566..c895b2f45e 100644 --- a/apis/r/src/rinterface.cpp +++ b/apis/r/src/rinterface.cpp @@ -1,6 +1,6 @@ #include // for R interface to C++ #include // for C interface to Arrow (via R package) -#include // for C/C++ interface to Arrow +#include // for C/C++ interface to Arrow #include // for fromInteger64 // we currently get deprecation warnings by default which are noisy diff --git a/apis/r/src/riterator.cpp b/apis/r/src/riterator.cpp index f0d9aa4692..9571fa7141 100644 --- a/apis/r/src/riterator.cpp +++ b/apis/r/src/riterator.cpp @@ -5,7 +5,7 @@ #include // for R interface to C++ #include // for C interface to Arrow (via R package nanoarrow) -#include +#include #include // for fromInteger64 #include diff --git a/apis/r/src/rutilities.cpp b/apis/r/src/rutilities.cpp index 7f58300d00..d7a86354c2 100644 --- a/apis/r/src/rutilities.cpp +++ b/apis/r/src/rutilities.cpp @@ -5,7 +5,7 @@ #endif #include // for R interface to C++ -#include // for C interface to Arrow +#include // for C interface to Arrow #include // for fromInteger64 #include diff --git a/apis/r/src/xptr-utils.h b/apis/r/src/xptr-utils.h index d5610bc9ac..847bab51a0 100644 --- a/apis/r/src/xptr-utils.h +++ b/apis/r/src/xptr-utils.h @@ -24,9 +24,6 @@ const tiledb_xptr_object tiledb_xptr_vlv_buf_t { 180 }; const tiledb_xptr_object tiledb_xptr_query_buf_t { 190 }; // the definitions above are internal to tiledb-r but we need a new value here if we want tag the external pointer -const tiledb_xptr_object tiledb_arrow_array_t { 300 }; -const tiledb_xptr_object tiledb_arrow_schema_t { 310 }; - const tiledb_xptr_object tiledb_soma_reader_t { 500 }; // templated checkers for external pointer tags @@ -52,10 +49,6 @@ template <> inline const int32_t XPtrTagType = til // template <> inline const int32_t XPtrTagType = tiledb_xptr_vlv_buf_t; // template <> inline const int32_t XPtrTagType = tiledb_xptr_query_buf_t; - -template <> inline const int32_t XPtrTagType = tiledb_arrow_array_t; -template <> inline const int32_t XPtrTagType = tiledb_arrow_schema_t; - template <> inline const int32_t XPtrTagType = tiledb_soma_reader_t; template Rcpp::XPtr make_xptr(T* p, bool finalize=true) { @@ -81,4 +74,3 @@ template void check_xptr_tag(Rcpp::XPtr ptr) { // in rinterface.cpp Rcpp::XPtr schema_owning_xptr(void); Rcpp::XPtr array_owning_xptr(void); - diff --git a/libtiledbsoma/cmake/inputs/tiledbsoma.pc.in b/libtiledbsoma/cmake/inputs/tiledbsoma.pc.in index f88500f416..342e30212e 100644 --- a/libtiledbsoma/cmake/inputs/tiledbsoma.pc.in +++ b/libtiledbsoma/cmake/inputs/tiledbsoma.pc.in @@ -9,6 +9,6 @@ URL: https://github.com/single-cell-data/TileDB-SOMA Version: @VERSION@ Requires: @PKGCONF_REQ_PUB@ Requires.private: @PKGCONF_REQ_PRIV@ -Cflags: -I"${includedir}" +Cflags: -I"${includedir}" -I"${includedir}/tiledbsoma" Libs: -L"${libdir}" -ltiledbsoma -Libs.private: -L"${libdir}" -ltiledbsoma @PKGCONF_LIBS_PRIV@ \ No newline at end of file +Libs.private: -L"${libdir}" -ltiledbsoma @PKGCONF_LIBS_PRIV@ diff --git a/libtiledbsoma/src/utils/arrow_adapter.h b/libtiledbsoma/src/utils/arrow_adapter.h index d0821dfc51..818f5cc370 100644 --- a/libtiledbsoma/src/utils/arrow_adapter.h +++ b/libtiledbsoma/src/utils/arrow_adapter.h @@ -8,7 +8,7 @@ // https://arrow.apache.org/docs/format/Columnar.html#buffer-listing-for-each-layout // https://arrow.apache.org/docs/format/CDataInterface.html#exporting-a-simple-int32-array -#include "tiledbsoma/nanoarrow/nanoarrow.hpp" +#include "nanoarrow/nanoarrow.hpp" namespace tiledbsoma { From dfd48db0b7f4657bf7cf498ddf03e07ea3f00139 Mon Sep 17 00:00:00 2001 From: Dirk Eddelbuettel Date: Thu, 4 Apr 2024 13:59:06 -0500 Subject: [PATCH 6/9] Adjusting Python too --- apis/python/setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/apis/python/setup.py b/apis/python/setup.py index bc3cf4818e..086b5c1136 100644 --- a/apis/python/setup.py +++ b/apis/python/setup.py @@ -217,6 +217,7 @@ def run(self): INC_DIRS = [ "dist_links/libtiledbsoma/include", "dist_links/libtiledbsoma/external/include", + "dist_links/libtiledbsoma/external/include/tiledbsoma", "../../build/externals/install/include", str(tiledbsoma_dir / "include"), str(tiledbsoma_dir.parent / "build/externals/install/include"), From 6351e96c41196a43e51481d3a3fd374ac960bcb8 Mon Sep 17 00:00:00 2001 From: Dirk Eddelbuettel Date: Thu, 4 Apr 2024 14:48:54 -0500 Subject: [PATCH 7/9] Adjusting Python too, once more --- apis/python/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apis/python/setup.py b/apis/python/setup.py index 086b5c1136..12b1f896df 100644 --- a/apis/python/setup.py +++ b/apis/python/setup.py @@ -217,9 +217,9 @@ def run(self): INC_DIRS = [ "dist_links/libtiledbsoma/include", "dist_links/libtiledbsoma/external/include", - "dist_links/libtiledbsoma/external/include/tiledbsoma", "../../build/externals/install/include", str(tiledbsoma_dir / "include"), + str(tiledbsoma_dir / "include/tiledbsoma"), str(tiledbsoma_dir.parent / "build/externals/install/include"), str(tiledbsoma_dir / "include"), str(tiledb_dir / "include"), From 3a3564e82426c8000db83f39bb097c36170d9cbb Mon Sep 17 00:00:00 2001 From: Dirk Eddelbuettel Date: Thu, 4 Apr 2024 15:37:21 -0500 Subject: [PATCH 8/9] Adjust include path for R for the non pkg-config case too --- apis/r/configure | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apis/r/configure b/apis/r/configure index 1fa5d6e01d..8eb1f7ab6c 100755 --- a/apis/r/configure +++ b/apis/r/configure @@ -46,7 +46,7 @@ export CXX="`${R_HOME}/bin/R CMD config CXX`" export CMAKE_OSX_ARCHITECTURES="`uname -m`" tools/build_libtiledbsoma.sh -pkgincl="-I../inst/tiledb/include -I../inst/tiledbsoma/include" +pkgincl="-I../inst/tiledb/include -I../inst/tiledbsoma/include -I../inst/tiledbsoma/include/tiledbsoma" pkglibs="-ltiledb -L../inst/tiledb/lib -ltiledbsoma -L../inst/tiledbsoma/lib" rpath="-Wl,-rpath,'\$\$ORIGIN/../tiledb/lib' -Wl,-rpath,'\$\$ORIGIN/../tiledbsoma/lib'" macosver=`${R_HOME}/bin/Rscript -e 'if (Sys.info()["sysname"] == "Darwin") cat("-mmacosx-version-min=11.0") else cat("")'` From a700a7f69ecb4ddd79b3cba64ba1ba222a493a8c Mon Sep 17 00:00:00 2001 From: Dirk Eddelbuettel Date: Thu, 4 Apr 2024 20:39:31 -0500 Subject: [PATCH 9/9] The Python installation for 'interop' needs another includes case --- apis/python/setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/apis/python/setup.py b/apis/python/setup.py index 12b1f896df..5cec080012 100644 --- a/apis/python/setup.py +++ b/apis/python/setup.py @@ -217,6 +217,7 @@ def run(self): INC_DIRS = [ "dist_links/libtiledbsoma/include", "dist_links/libtiledbsoma/external/include", + "dist_links/libtiledbsoma/src/external/include", "../../build/externals/install/include", str(tiledbsoma_dir / "include"), str(tiledbsoma_dir / "include/tiledbsoma"),