From 056d4b98232ec3c40e76fb5fb9b4f90f91534aff Mon Sep 17 00:00:00 2001 From: Vivian Nguyen Date: Wed, 28 Feb 2024 19:53:52 -0600 Subject: [PATCH] [c++] Refactor metadata * Store read-mode `Array` or `Group` that holds metadata values valid as a class memeber * `create` methods take in timestamps which indicate when the metadata values for `soma_object_type` and `encoding_version` should be written and when the write-mode `SOMAObject` should be opened * Make `soma_object_type` and `encoding_version` consts * Use keystroke saver `TimestampRange` * Refactor unit tests to reflect these changes --- apis/python/src/tiledbsoma/soma_array.cc | 47 +++--- libtiledbsoma/src/soma/soma_array.cc | 148 +++++++++++------- libtiledbsoma/src/soma/soma_array.h | 48 ++++-- libtiledbsoma/src/soma/soma_collection.cc | 39 +++-- libtiledbsoma/src/soma/soma_collection.h | 8 +- libtiledbsoma/src/soma/soma_dataframe.cc | 10 +- libtiledbsoma/src/soma/soma_dataframe.h | 16 +- libtiledbsoma/src/soma/soma_dense_ndarray.cc | 11 +- libtiledbsoma/src/soma/soma_dense_ndarray.h | 16 +- libtiledbsoma/src/soma/soma_experiment.cc | 25 +-- libtiledbsoma/src/soma/soma_experiment.h | 23 ++- libtiledbsoma/src/soma/soma_group.cc | 130 +++++++++------ libtiledbsoma/src/soma/soma_group.h | 42 ++++- libtiledbsoma/src/soma/soma_measurement.cc | 41 +++-- libtiledbsoma/src/soma/soma_measurement.h | 23 ++- libtiledbsoma/src/soma/soma_object.cc | 7 +- libtiledbsoma/src/soma/soma_object.h | 3 +- libtiledbsoma/src/soma/soma_sparse_ndarray.cc | 10 +- libtiledbsoma/src/soma/soma_sparse_ndarray.h | 16 +- libtiledbsoma/src/utils/common.h | 6 + libtiledbsoma/test/unit_soma_array.cc | 48 ++++-- libtiledbsoma/test/unit_soma_collection.cc | 127 ++++++++++----- libtiledbsoma/test/unit_soma_dataframe.cc | 62 +++++--- libtiledbsoma/test/unit_soma_dense_ndarray.cc | 70 +++++---- libtiledbsoma/test/unit_soma_group.cc | 55 ++++--- .../test/unit_soma_sparse_ndarray.cc | 58 ++++--- 26 files changed, 684 insertions(+), 405 deletions(-) diff --git a/apis/python/src/tiledbsoma/soma_array.cc b/apis/python/src/tiledbsoma/soma_array.cc index adb57cdcb3..6b56b4b7ea 100644 --- a/apis/python/src/tiledbsoma/soma_array.cc +++ b/apis/python/src/tiledbsoma/soma_array.cc @@ -39,6 +39,25 @@ namespace py = pybind11; using namespace py::literals; using namespace tiledbsoma; +py::dict meta(SOMAArray& array) { + py::dict results; + + for (auto [key, val] : array.get_metadata()) { + auto [tdb_type, value_num, value] = val; + + if (tdb_type == TILEDB_STRING_UTF8 || tdb_type == TILEDB_STRING_ASCII) { + auto py_buf = py::array(py::dtype("|S1"), value_num, value); + auto res = py_buf.attr("tobytes")().attr("decode")("UTF-8"); + results[py::str(key)] = res; + } else { + py::dtype value_type = tdb_to_np_dtype(tdb_type, 1); + auto res = py::array(value_type, value_num, value).attr("item")(0); + results[py::str(key)] = res; + } + } + return results; +} + py::tuple get_enum(SOMAArray& sr, std::string attr_name) { auto attr_to_enmrs = sr.get_attr_to_enum_mapping(); if (attr_to_enmrs.count(attr_name) == 0) @@ -644,34 +663,10 @@ void load_soma_array(py::module& m) { "get_metadata", py::overload_cast(&SOMAArray::get_metadata)) - .def_property_readonly( - "meta", - [](SOMAArray& soma_dataframe) -> py::dict { - py::dict results; - - for (auto const& [key, val] : soma_dataframe.get_metadata()) { - tiledb_datatype_t tdb_type = std::get( - val); - uint32_t value_num = std::get(val); - const void* value = std::get(val); - - if (tdb_type == TILEDB_STRING_UTF8) { - results[py::str(key)] = py::str( - std::string((const char*)value, value_num)); - } else if (tdb_type == TILEDB_STRING_ASCII) { - results[py::str(key)] = py::bytes( - std::string((const char*)value, value_num)); - } else { - py::dtype value_type = tdb_to_np_dtype(tdb_type, 1); - results[py::str(key)] = py::array( - value_type, value_num, value); - } - } - return results; - }) + .def_property_readonly("meta", meta) .def("has_metadata", &SOMAArray::has_metadata) .def("metadata_num", &SOMAArray::metadata_num); } -} // namespace libtiledbsomacpp \ No newline at end of file +} // namespace libtiledbsomacpp diff --git a/libtiledbsoma/src/soma/soma_array.cc b/libtiledbsoma/src/soma/soma_array.cc index 081b8fd4ad..231c326547 100644 --- a/libtiledbsoma/src/soma/soma_array.cc +++ b/libtiledbsoma/src/soma/soma_array.cc @@ -41,19 +41,40 @@ using namespace tiledb; //= public static //=================================================================== -void SOMAArray::create( +std::unique_ptr SOMAArray::create( std::shared_ptr ctx, std::string_view uri, ArraySchema schema, - std::string soma_type) { + std::string soma_type, + std::optional timestamp) { Array::create(std::string(uri), schema); - auto array = Array(*ctx->tiledb_ctx(), std::string(uri), TILEDB_WRITE); - array.put_metadata( - "soma_object_type", + + std::shared_ptr array; + if (timestamp) { + array = std::make_shared( + *ctx->tiledb_ctx(), + std::string(uri), + TILEDB_WRITE, + TemporalPolicy( + TimestampStartEnd, timestamp->first, timestamp->second)); + } else { + array = std::make_shared( + *ctx->tiledb_ctx(), std::string(uri), TILEDB_WRITE); + } + + array->put_metadata( + SOMA_OBJECT_TYPE_KEY, TILEDB_STRING_UTF8, static_cast(soma_type.length()), soma_type.c_str()); - array.close(); + + array->put_metadata( + ENCODING_VERSION_KEY, + TILEDB_STRING_UTF8, + static_cast(ENCODING_VERSION_VAL.length()), + ENCODING_VERSION_VAL.c_str()); + + return std::make_unique(ctx, array, timestamp); } std::unique_ptr SOMAArray::open( @@ -64,7 +85,7 @@ std::unique_ptr SOMAArray::open( std::vector column_names, std::string_view batch_size, ResultOrder result_order, - std::optional> timestamp) { + std::optional timestamp) { LOG_DEBUG( fmt::format("[SOMAArray] static method 'cfg' opening array '{}'", uri)); return std::make_unique( @@ -86,7 +107,7 @@ std::unique_ptr SOMAArray::open( std::vector column_names, std::string_view batch_size, ResultOrder result_order, - std::optional> timestamp) { + std::optional timestamp) { LOG_DEBUG( fmt::format("[SOMAArray] static method 'ctx' opening array '{}'", uri)); return std::make_unique( @@ -112,7 +133,7 @@ SOMAArray::SOMAArray( std::vector column_names, std::string_view batch_size, ResultOrder result_order, - std::optional> timestamp) + std::optional timestamp) : uri_(util::rstrip_uri(uri)) , result_order_(result_order) , timestamp_(timestamp) { @@ -130,7 +151,7 @@ SOMAArray::SOMAArray( std::vector column_names, std::string_view batch_size, ResultOrder result_order, - std::optional> timestamp) + std::optional timestamp) : uri_(util::rstrip_uri(uri)) , ctx_(ctx) , result_order_(result_order) @@ -140,20 +161,40 @@ SOMAArray::SOMAArray( fill_metadata_cache(); } +SOMAArray::SOMAArray( + std::shared_ptr ctx, + std::shared_ptr arr, + std::optional timestamp) + : uri_(util::rstrip_uri(arr->uri())) + , name_(std::string(std::filesystem::path(arr->uri()).filename())) + , ctx_(ctx) + , batch_size_("auto") + , result_order_(ResultOrder::automatic) + , timestamp_(timestamp) + , mq_(std::make_unique(arr, ctx_->tiledb_ctx(), name_)) + , arr_(arr) { + reset({}, batch_size_, result_order_); + fill_metadata_cache(); +} + void SOMAArray::fill_metadata_cache() { - std::shared_ptr array; if (arr_->query_type() == TILEDB_WRITE) { - array = std::make_shared(*ctx_->tiledb_ctx(), uri_, TILEDB_READ); + meta_cache_arr_ = std::make_shared( + *ctx_->tiledb_ctx(), + uri_, + TILEDB_READ, + TemporalPolicy( + TimestampStartEnd, timestamp()->first, timestamp()->second)); } else { - array = arr_; + meta_cache_arr_ = arr_; } - for (uint64_t idx = 0; idx < array->metadata_num(); ++idx) { + for (uint64_t idx = 0; idx < meta_cache_arr_->metadata_num(); ++idx) { std::string key; tiledb_datatype_t value_type; uint32_t value_num; const void* value; - array->get_metadata_from_index( + meta_cache_arr_->get_metadata_from_index( idx, &key, &value_type, &value_num, &value); MetadataValue mdval(value_type, value_num, value); std::pair mdpair(key, mdval); @@ -169,26 +210,22 @@ std::shared_ptr SOMAArray::ctx() { return ctx_; }; -void SOMAArray::open( - OpenMode mode, std::optional> timestamp) { - auto tdb_mode = mode == OpenMode::read ? TILEDB_READ : TILEDB_WRITE; - arr_->open(tdb_mode); - if (timestamp) { - if (timestamp->first > timestamp->second) { - throw std::invalid_argument("timestamp start > end"); - } - arr_->set_open_timestamp_start(timestamp->first); - arr_->set_open_timestamp_end(timestamp->second); - arr_->close(); - arr_->open(tdb_mode); - } +void SOMAArray::open(OpenMode mode, std::optional timestamp) { + timestamp_ = timestamp; + + validate(mode, name_, timestamp); reset(column_names(), batch_size_, result_order_); + fill_metadata_cache(); } void SOMAArray::close() { + if (arr_->query_type() == TILEDB_WRITE) + meta_cache_arr_->close(); + // Close the array through the managed query to ensure any pending queries // are completed. mq_->close(); + metadata_.clear(); } void SOMAArray::reset( @@ -511,35 +548,41 @@ void SOMAArray::set_metadata( tiledb_datatype_t value_type, uint32_t value_num, const void* value) { - if (key.compare("soma_object_type") == 0) { - throw TileDBSOMAError("soma_object_type cannot be modified."); - } + if (key.compare(SOMA_OBJECT_TYPE_KEY) == 0) + throw TileDBSOMAError(SOMA_OBJECT_TYPE_KEY + " cannot be modified."); + + if (key.compare(ENCODING_VERSION_KEY) == 0) + throw TileDBSOMAError(ENCODING_VERSION_KEY + " cannot be modified."); arr_->put_metadata(key, value_type, value_num, value); + MetadataValue mdval(value_type, value_num, value); std::pair mdpair(key, mdval); metadata_.insert(mdpair); } void SOMAArray::delete_metadata(const std::string& key) { - if (key.compare("soma_object_type") == 0) { - throw TileDBSOMAError("soma_object_type cannot be deleted."); - } + if (key.compare(SOMA_OBJECT_TYPE_KEY) == 0) + throw TileDBSOMAError(SOMA_OBJECT_TYPE_KEY + " cannot be deleted."); + + if (key.compare(ENCODING_VERSION_KEY) == 0) + throw TileDBSOMAError(ENCODING_VERSION_KEY + " cannot be deleted."); + arr_->delete_metadata(key); metadata_.erase(key); } -std::map SOMAArray::get_metadata() { - return metadata_; -} - std::optional SOMAArray::get_metadata(const std::string& key) { - if (metadata_.count(key) == 0) { + if (metadata_.count(key) == 0) return std::nullopt; - } + return metadata_[key]; } +std::map SOMAArray::get_metadata() { + return metadata_; +} + bool SOMAArray::has_metadata(const std::string& key) { return metadata_.count(key) != 0; } @@ -551,26 +594,21 @@ uint64_t SOMAArray::metadata_num() const { void SOMAArray::validate( OpenMode mode, std::string_view name, - std::optional> timestamp) { + std::optional timestamp) { // Validate parameters auto tdb_mode = mode == OpenMode::read ? TILEDB_READ : TILEDB_WRITE; try { LOG_DEBUG(fmt::format("[SOMAArray] opening array '{}'", uri_)); - arr_ = std::make_shared(*ctx_->tiledb_ctx(), uri_, tdb_mode); if (timestamp) { - if (timestamp->first > timestamp->second) { - throw std::invalid_argument("timestamp start > end"); - } - arr_->set_open_timestamp_start(timestamp->first); - arr_->set_open_timestamp_end(timestamp->second); - arr_->close(); - arr_->open(tdb_mode); - LOG_DEBUG(fmt::format( - "[SOMAArray] timestamp_start = {}", - arr_->open_timestamp_start())); - LOG_DEBUG(fmt::format( - "[SOMAArray] timestamp_end = {}", arr_->open_timestamp_end())); + arr_ = std::make_shared( + *ctx_->tiledb_ctx(), + uri_, + tdb_mode, + TemporalPolicy( + TimestampStartEnd, timestamp->first, timestamp->second)); + } else { + arr_ = std::make_shared(*ctx_->tiledb_ctx(), uri_, tdb_mode); } LOG_TRACE(fmt::format("[SOMAArray] loading enumerations")); ArrayExperimental::load_all_enumerations( @@ -582,7 +620,7 @@ void SOMAArray::validate( } } -std::optional> SOMAArray::timestamp() { +std::optional SOMAArray::timestamp() { return timestamp_; } diff --git a/libtiledbsoma/src/soma/soma_array.h b/libtiledbsoma/src/soma/soma_array.h index b4c17ee6c4..7d0e379731 100644 --- a/libtiledbsoma/src/soma/soma_array.h +++ b/libtiledbsoma/src/soma/soma_array.h @@ -57,17 +57,19 @@ class SOMAArray : public SOMAObject { /** * @brief Create a SOMAArray object at the given URI. * - * @param ctx TileDB context + * @param ctx SOMAContext * @param uri URI to create the SOMAArray * @param schema TileDB ArraySchema - * @param soma_type SOMADataFrame, SOMADenseNDArray, or - * SOMASparseNDArray + * @param soma_type SOMADataFrame, SOMADenseNDArray, or SOMASparseNDArray + * @param timestamp Optional pair indicating timestamp start and end + * @return std::unique_ptr */ - static void create( + static std::unique_ptr create( std::shared_ptr ctx, std::string_view uri, ArraySchema schema, - std::string soma_type); + std::string soma_type, + std::optional timestamp = std::nullopt); /** * @brief Open an array at the specified URI and return SOMAArray @@ -82,7 +84,7 @@ class SOMAArray : public SOMAObject { * @param result_order Read result order: automatic (default), rowmajor, * or colmajor * @param timestamp Optional pair indicating timestamp start and end - * @return std::unique_ptr SOMAArray + * @return std::unique_ptr */ static std::unique_ptr open( OpenMode mode, @@ -92,7 +94,7 @@ class SOMAArray : public SOMAObject { std::vector column_names = {}, std::string_view batch_size = "auto", ResultOrder result_order = ResultOrder::automatic, - std::optional> timestamp = std::nullopt); + std::optional timestamp = std::nullopt); /** * @brief Open an array at the specified URI and return SOMAArray @@ -117,7 +119,7 @@ class SOMAArray : public SOMAObject { std::vector column_names = {}, std::string_view batch_size = "auto", ResultOrder result_order = ResultOrder::automatic, - std::optional> timestamp = std::nullopt); + std::optional timestamp = std::nullopt); //=================================================================== //= public non-static @@ -143,7 +145,7 @@ class SOMAArray : public SOMAObject { std::vector column_names, std::string_view batch_size, ResultOrder result_order, - std::optional> timestamp = std::nullopt); + std::optional timestamp = std::nullopt); /** * @brief Construct a new SOMAArray object @@ -165,7 +167,7 @@ class SOMAArray : public SOMAObject { std::vector column_names, std::string_view batch_size, ResultOrder result_order, - std::optional> timestamp = std::nullopt); + std::optional timestamp = std::nullopt); SOMAArray(const SOMAArray& other) : uri_(other.uri_) @@ -174,6 +176,7 @@ class SOMAArray : public SOMAObject { , batch_size_(other.batch_size_) , result_order_(other.result_order_) , metadata_(other.metadata_) + , meta_cache_arr_(other.meta_cache_arr_) , timestamp_(other.timestamp_) , mq_(std::make_unique( other.arr_, other.ctx_->tiledb_ctx(), other.name_)) @@ -182,6 +185,11 @@ class SOMAArray : public SOMAObject { , submitted_(other.submitted_) { } + SOMAArray( + std::shared_ptr ctx, + std::shared_ptr arr, + std::optional timestamp); + SOMAArray(SOMAArray&&) = default; SOMAArray(const SOMAObject& other) @@ -212,8 +220,7 @@ class SOMAArray : public SOMAObject { * @param timestamp Timestamp */ void open( - OpenMode mode, - std::optional> timestamp = std::nullopt); + OpenMode mode, std::optional timestamp = std::nullopt); /** * Close the SOMAArray object. @@ -683,12 +690,12 @@ class SOMAArray : public SOMAObject { void validate( OpenMode mode, std::string_view name, - std::optional> timestamp); + std::optional timestamp); /** * Return optional timestamp pair SOMAArray was opened with. */ - std::optional> timestamp(); + std::optional timestamp(); private: //=================================================================== @@ -715,11 +722,20 @@ class SOMAArray : public SOMAObject { // Result order ResultOrder result_order_; - // Metadata cache + // Metadata values need to be accessible in write mode as well. When adding + // or deleting values in the array, instead of closing to update to + // metadata; then reopening to read the array; and again reopening to + // restore the array back to write mode, we just store the modifications to + // this cache std::map metadata_; + // Array associated with metadata_. We need to keep this read-mode array + // alive in order for the metadata value pointers in the cache to be + // accessible + std::shared_ptr meta_cache_arr_; + // Read timestamp range (start, end) - std::optional> timestamp_; + std::optional timestamp_; // Managed query for the array std::unique_ptr mq_; diff --git a/libtiledbsoma/src/soma/soma_collection.cc b/libtiledbsoma/src/soma/soma_collection.cc index 9fa9c654fb..5bf74c62bb 100644 --- a/libtiledbsoma/src/soma/soma_collection.cc +++ b/libtiledbsoma/src/soma/soma_collection.cc @@ -42,16 +42,18 @@ using namespace tiledb; //=================================================================== std::unique_ptr SOMACollection::create( - std::string_view uri, std::shared_ptr ctx) { - SOMAGroup::create(ctx, uri, "SOMACollection"); - return SOMACollection::open(uri, OpenMode::read, ctx); + std::string_view uri, + std::shared_ptr ctx, + std::optional timestamp) { + auto soma_group = SOMAGroup::create(ctx, uri, "SOMACollection", timestamp); + return std::make_unique(*soma_group); } std::unique_ptr SOMACollection::open( std::string_view uri, OpenMode mode, std::shared_ptr ctx, - std::optional> timestamp) { + std::optional timestamp) { return std::make_unique(mode, uri, ctx, timestamp); } @@ -96,7 +98,9 @@ std::shared_ptr SOMACollection::add_new_collection( std::string_view uri, URIType uri_type, std::shared_ptr ctx) { - std::shared_ptr member = SOMACollection::create(uri, ctx); + SOMACollection::create(uri, ctx); + std::shared_ptr member = SOMAExperiment::open( + uri, OpenMode::read, ctx); this->set(std::string(uri), uri_type, std::string(key)); children_[std::string(key)] = member; return member; @@ -108,8 +112,9 @@ std::shared_ptr SOMACollection::add_new_experiment( URIType uri_type, std::shared_ptr ctx, ArraySchema schema) { - std::shared_ptr member = SOMAExperiment::create( - uri, schema, ctx); + SOMAExperiment::create(uri, schema, ctx); + std::shared_ptr member = SOMAExperiment::open( + uri, OpenMode::read, ctx); this->set(std::string(uri), uri_type, std::string(key)); children_[std::string(key)] = member; return member; @@ -121,8 +126,9 @@ std::shared_ptr SOMACollection::add_new_measurement( URIType uri_type, std::shared_ptr ctx, ArraySchema schema) { - std::shared_ptr member = SOMAMeasurement::create( - uri, schema, ctx); + SOMAMeasurement::create(uri, schema, ctx); + std::shared_ptr member = SOMAMeasurement::open( + uri, OpenMode::read, ctx); this->set(std::string(uri), uri_type, std::string(key)); children_[std::string(key)] = member; return member; @@ -134,8 +140,9 @@ std::shared_ptr SOMACollection::add_new_dataframe( URIType uri_type, std::shared_ptr ctx, ArraySchema schema) { - std::shared_ptr member = SOMADataFrame::create( - uri, schema, ctx); + SOMADataFrame::create(uri, schema, ctx); + std::shared_ptr member = SOMADataFrame::open( + uri, OpenMode::read, ctx); this->set(std::string(uri), uri_type, std::string(key)); children_[std::string(key)] = member; return member; @@ -147,8 +154,9 @@ std::shared_ptr SOMACollection::add_new_dense_ndarray( URIType uri_type, std::shared_ptr ctx, ArraySchema schema) { - std::shared_ptr member = SOMADenseNDArray::create( - uri, schema, ctx); + SOMADenseNDArray::create(uri, schema, ctx); + std::shared_ptr member = SOMADenseNDArray::open( + uri, OpenMode::read, ctx); this->set(std::string(uri), uri_type, std::string(key)); children_[std::string(key)] = member; return member; @@ -160,8 +168,9 @@ std::shared_ptr SOMACollection::add_new_sparse_ndarray( URIType uri_type, std::shared_ptr ctx, ArraySchema schema) { - std::shared_ptr member = SOMASparseNDArray::create( - uri, schema, ctx); + SOMASparseNDArray::create(uri, schema, ctx); + std::shared_ptr member = SOMASparseNDArray::open( + uri, OpenMode::read, ctx); this->set(std::string(uri), uri_type, std::string(key)); children_[std::string(key)] = member; return member; diff --git a/libtiledbsoma/src/soma/soma_collection.h b/libtiledbsoma/src/soma/soma_collection.h index e869a7d68a..58fbf418b7 100644 --- a/libtiledbsoma/src/soma/soma_collection.h +++ b/libtiledbsoma/src/soma/soma_collection.h @@ -62,7 +62,9 @@ class SOMACollection : public SOMAGroup { * @param uri URI to create the SOMACollection */ static std::unique_ptr create( - std::string_view uri, std::shared_ptr ctx); + std::string_view uri, + std::shared_ptr ctx, + std::optional timestamp = std::nullopt); /** * @brief Open a group at the specified URI and return SOMACollection @@ -78,7 +80,7 @@ class SOMACollection : public SOMAGroup { std::string_view uri, OpenMode mode, std::shared_ptr ctx, - std::optional> timestamp = std::nullopt); + std::optional timestamp = std::nullopt); //=================================================================== //= public non-static @@ -97,7 +99,7 @@ class SOMACollection : public SOMAGroup { OpenMode mode, std::string_view uri, std::shared_ptr ctx, - std::optional> timestamp) + std::optional timestamp) : SOMAGroup( mode, uri, diff --git a/libtiledbsoma/src/soma/soma_dataframe.cc b/libtiledbsoma/src/soma/soma_dataframe.cc index 3fdab76d96..441288f134 100644 --- a/libtiledbsoma/src/soma/soma_dataframe.cc +++ b/libtiledbsoma/src/soma/soma_dataframe.cc @@ -42,9 +42,11 @@ using namespace tiledb; std::unique_ptr SOMADataFrame::create( std::string_view uri, ArraySchema schema, - std::shared_ptr ctx) { - SOMAArray::create(ctx, uri, schema, "SOMADataFrame"); - return SOMADataFrame::open(uri, OpenMode::read, ctx); + std::shared_ptr ctx, + std::optional timestamp) { + auto soma_array = SOMAArray::create( + ctx, uri, schema, "SOMADataFrame", timestamp); + return std::make_unique(*soma_array); } std::unique_ptr SOMADataFrame::open( @@ -53,7 +55,7 @@ std::unique_ptr SOMADataFrame::open( std::shared_ptr ctx, std::vector column_names, ResultOrder result_order, - std::optional> timestamp) { + std::optional timestamp) { return std::make_unique( mode, uri, ctx, column_names, result_order, timestamp); } diff --git a/libtiledbsoma/src/soma/soma_dataframe.h b/libtiledbsoma/src/soma/soma_dataframe.h index 1ed21f0b02..8583f16fb2 100644 --- a/libtiledbsoma/src/soma/soma_dataframe.h +++ b/libtiledbsoma/src/soma/soma_dataframe.h @@ -52,15 +52,17 @@ class SOMADataFrame : public SOMAArray { /** * @brief Create a SOMADataFrame object at the given URI. * - * @param uri URI to create the SOMADataFrame + * @param uri URI to create the SOMAArray * @param schema TileDB ArraySchema - * @param platform_config Optional config parameter dictionary - * @return std::shared_ptr opened in read mode + * @param ctx SOMAContext + * @param timestamp Optional pair indicating timestamp start and end + * @return std::unique_ptr */ static std::unique_ptr create( std::string_view uri, ArraySchema schema, - std::shared_ptr ctx); + std::shared_ptr ctx, + std::optional timestamp = std::nullopt); /** * @brief Open and return a SOMADataFrame object at the given URI. @@ -76,7 +78,7 @@ class SOMADataFrame : public SOMAArray { * colmajor * @param timestamp If specified, overrides the default timestamp used to * open this object. If unset, uses the timestamp provided by the context. - * @return std::shared_ptr SOMADataFrame + * @return std::unique_ptr */ static std::unique_ptr open( std::string_view uri, @@ -84,7 +86,7 @@ class SOMADataFrame : public SOMAArray { std::shared_ptr ctx, std::vector column_names = {}, ResultOrder result_order = ResultOrder::automatic, - std::optional> timestamp = std::nullopt); + std::optional timestamp = std::nullopt); /** * @brief Check if the SOMADataFrame exists at the URI. @@ -114,7 +116,7 @@ class SOMADataFrame : public SOMAArray { std::shared_ptr ctx, std::vector column_names, ResultOrder result_order, - std::optional> timestamp = std::nullopt) + std::optional timestamp = std::nullopt) : SOMAArray( mode, uri, diff --git a/libtiledbsoma/src/soma/soma_dense_ndarray.cc b/libtiledbsoma/src/soma/soma_dense_ndarray.cc index 6df2e84a51..b82f8d3ace 100644 --- a/libtiledbsoma/src/soma/soma_dense_ndarray.cc +++ b/libtiledbsoma/src/soma/soma_dense_ndarray.cc @@ -29,6 +29,7 @@ * * This file defines the SOMADenseNDArray class. */ + #include "soma_dense_ndarray.h" namespace tiledbsoma { @@ -41,9 +42,11 @@ using namespace tiledb; std::unique_ptr SOMADenseNDArray::create( std::string_view uri, ArraySchema schema, - std::shared_ptr ctx) { - SOMAArray::create(ctx, uri, schema, "SOMADenseNDArray"); - return SOMADenseNDArray::open(uri, OpenMode::read, ctx); + std::shared_ptr ctx, + std::optional timestamp) { + auto soma_array = SOMAArray::create( + ctx, uri, schema, "SOMADenseNDArray", timestamp); + return std::make_unique(*soma_array); } std::unique_ptr SOMADenseNDArray::open( @@ -52,7 +55,7 @@ std::unique_ptr SOMADenseNDArray::open( std::shared_ptr ctx, std::vector column_names, ResultOrder result_order, - std::optional> timestamp) { + std::optional timestamp) { return std::make_unique( mode, uri, ctx, column_names, result_order, timestamp); } diff --git a/libtiledbsoma/src/soma/soma_dense_ndarray.h b/libtiledbsoma/src/soma/soma_dense_ndarray.h index 39f2b5d18b..47a13f7bdb 100644 --- a/libtiledbsoma/src/soma/soma_dense_ndarray.h +++ b/libtiledbsoma/src/soma/soma_dense_ndarray.h @@ -52,15 +52,17 @@ class SOMADenseNDArray : public SOMAArray { /** * @brief Create a SOMADenseNDArray object at the given URI. * - * @param uri URI to create the SOMADenseNDArray + * @param uri URI to create the SOMAArray * @param schema TileDB ArraySchema - * @param platform_config Optional config parameter dictionary - * @return std::shared_ptr opened in read mode + * @param ctx SOMAContext + * @param timestamp Optional pair indicating timestamp start and end + * @return std::unique_ptr */ static std::unique_ptr create( std::string_view uri, ArraySchema schema, - std::shared_ptr ctx); + std::shared_ptr ctx, + std::optional timestamp = std::nullopt); /** * @brief Open and return a SOMADenseNDArray object at the given URI. @@ -76,7 +78,7 @@ class SOMADenseNDArray : public SOMAArray { * open this object. If unset, uses the timestamp provided by the context. * @param result_order Read result order: automatic (default), rowmajor, or * colmajor - * @return std::shared_ptr SOMADenseNDArray + * @return std::shared_ptr */ static std::unique_ptr open( std::string_view uri, @@ -84,7 +86,7 @@ class SOMADenseNDArray : public SOMAArray { std::shared_ptr ctx, std::vector column_names = {}, ResultOrder result_order = ResultOrder::automatic, - std::optional> timestamp = std::nullopt); + std::optional timestamp = std::nullopt); /** * @brief Check if the SOMADenseNDArray exists at the URI. @@ -113,7 +115,7 @@ class SOMADenseNDArray : public SOMAArray { std::shared_ptr ctx, std::vector column_names, ResultOrder result_order, - std::optional> timestamp) + std::optional timestamp) : SOMAArray( mode, uri, diff --git a/libtiledbsoma/src/soma/soma_experiment.cc b/libtiledbsoma/src/soma/soma_experiment.cc index bfdfb417d2..b2bb3fa5ed 100644 --- a/libtiledbsoma/src/soma/soma_experiment.cc +++ b/libtiledbsoma/src/soma/soma_experiment.cc @@ -44,18 +44,23 @@ using namespace tiledb; std::unique_ptr SOMAExperiment::create( std::string_view uri, ArraySchema schema, - std::shared_ptr ctx) { + std::shared_ptr ctx, + std::optional timestamp) { std::string exp_uri(uri); - SOMAGroup::create(ctx, exp_uri, "SOMAExperiment"); - SOMADataFrame::create(exp_uri + "/obs", schema, ctx); - SOMACollection::create(exp_uri + "/ms", ctx); - - auto group = SOMAGroup::open(OpenMode::write, exp_uri, ctx); - group->set(exp_uri + "/obs", URIType::absolute, "obs"); - group->set(exp_uri + "/ms", URIType::absolute, "ms"); - group->close(); + auto soma_group = SOMAGroup::create(ctx, uri, "SOMAExperiment", timestamp); + SOMADataFrame::create(exp_uri + "/obs", schema, ctx, timestamp); + SOMACollection::create(exp_uri + "/ms", ctx, timestamp); + soma_group->set(exp_uri + "/obs", URIType::absolute, "obs"); + soma_group->set(exp_uri + "/ms", URIType::absolute, "ms"); + return std::make_unique(*soma_group); +} - return std::make_unique(OpenMode::read, exp_uri, ctx); +std::unique_ptr SOMAExperiment::open( + std::string_view uri, + OpenMode mode, + std::shared_ptr ctx, + std::optional timestamp) { + return std::make_unique(mode, uri, ctx, timestamp); } } // namespace tiledbsoma diff --git a/libtiledbsoma/src/soma/soma_experiment.h b/libtiledbsoma/src/soma/soma_experiment.h index ff991a6a07..9303d42b73 100644 --- a/libtiledbsoma/src/soma/soma_experiment.h +++ b/libtiledbsoma/src/soma/soma_experiment.h @@ -57,7 +57,24 @@ class SOMAExperiment : public SOMACollection { static std::unique_ptr create( std::string_view uri, ArraySchema schema, - std::shared_ptr ctx); + std::shared_ptr ctx, + std::optional timestamp = std::nullopt); + + /** + * @brief Open a group at the specified URI and return SOMAExperiment + * object. + * + * @param uri URI of the array + * @param mode read or write + * @param ctx TileDB context + * @param timestamp Optional pair indicating timestamp start and end + * @return std::shared_ptr SOMAExperiment + */ + static std::unique_ptr open( + std::string_view uri, + OpenMode mode, + std::shared_ptr ctx, + std::optional timestamp = std::nullopt); //=================================================================== //= public non-static @@ -67,7 +84,7 @@ class SOMAExperiment : public SOMACollection { OpenMode mode, std::string_view uri, std::shared_ptr ctx, - std::optional> timestamp = std::nullopt) + std::optional timestamp = std::nullopt) : SOMACollection(mode, uri, ctx, timestamp) { } @@ -80,6 +97,8 @@ class SOMAExperiment : public SOMACollection { SOMAExperiment(SOMAExperiment&&) = default; ~SOMAExperiment() = default; + using SOMACollection::open; + private: //=================================================================== //= private non-static diff --git a/libtiledbsoma/src/soma/soma_group.cc b/libtiledbsoma/src/soma/soma_group.cc index 4d354d6543..a3946f3f18 100644 --- a/libtiledbsoma/src/soma/soma_group.cc +++ b/libtiledbsoma/src/soma/soma_group.cc @@ -41,18 +41,32 @@ using namespace tiledb; //= public static //=================================================================== -void SOMAGroup::create( +std::unique_ptr SOMAGroup::create( std::shared_ptr ctx, std::string_view uri, - std::string soma_type) { + std::string soma_type, + std::optional timestamp) { Group::create(*ctx->tiledb_ctx(), std::string(uri)); - auto group = Group(*ctx->tiledb_ctx(), std::string(uri), TILEDB_WRITE); - group.put_metadata( - "soma_object_type", + + auto group = std::make_shared( + *ctx->tiledb_ctx(), + std::string(uri), + TILEDB_WRITE, + _set_timestamp(ctx, timestamp)); + + group->put_metadata( + SOMA_OBJECT_TYPE_KEY, TILEDB_STRING_UTF8, static_cast(soma_type.length()), soma_type.c_str()); - group.close(); + + group->put_metadata( + ENCODING_VERSION_KEY, + TILEDB_STRING_UTF8, + static_cast(ENCODING_VERSION_VAL.length()), + ENCODING_VERSION_VAL.c_str()); + + return std::make_unique(ctx, group, timestamp); } std::unique_ptr SOMAGroup::open( @@ -60,7 +74,7 @@ std::unique_ptr SOMAGroup::open( std::string_view uri, std::shared_ptr ctx, std::string_view name, - std::optional> timestamp) { + std::optional timestamp) { return std::make_unique(mode, uri, ctx, name, timestamp); } @@ -73,74 +87,69 @@ SOMAGroup::SOMAGroup( std::string_view uri, std::shared_ptr ctx, std::string_view name, - std::optional> timestamp) + std::optional timestamp) : ctx_(ctx) , uri_(util::rstrip_uri(uri)) , name_(name) { - auto cfg = ctx_->tiledb_ctx()->config(); - if (timestamp) { - if (timestamp->first > timestamp->second) { - throw std::invalid_argument("timestamp start > end"); - } - cfg["sm.group.timestamp_start"] = timestamp->first; - cfg["sm.group.timestamp_end"] = timestamp->second; - } - group_ = std::make_unique( + group_ = std::make_shared( *ctx_->tiledb_ctx(), std::string(uri), mode == OpenMode::read ? TILEDB_READ : TILEDB_WRITE, - cfg); + _set_timestamp(ctx, timestamp)); + fill_caches(); +} +SOMAGroup::SOMAGroup( + std::shared_ptr ctx, + std::shared_ptr group, + std::optional timestamp) + : ctx_(ctx) + , uri_(util::rstrip_uri(group->uri())) + , name_(std::string(std::filesystem::path(group->uri()).filename())) + , group_(group) + , timestamp_(timestamp) { fill_caches(); } void SOMAGroup::fill_caches() { - std::shared_ptr grp; if (group_->query_type() == TILEDB_WRITE) { - grp = std::make_shared(*ctx_->tiledb_ctx(), uri_, TILEDB_READ); + cache_group_ = std::make_shared( + *ctx_->tiledb_ctx(), uri_, TILEDB_READ); } else { - grp = group_; + cache_group_ = group_; } - for (uint64_t idx = 0; idx < grp->metadata_num(); ++idx) { + for (uint64_t idx = 0; idx < cache_group_->metadata_num(); ++idx) { std::string key; tiledb_datatype_t value_type; uint32_t value_num; const void* value; - grp->get_metadata_from_index( + cache_group_->get_metadata_from_index( idx, &key, &value_type, &value_num, &value); MetadataValue mdval(value_type, value_num, value); std::pair mdpair(key, mdval); metadata_.insert(mdpair); } - for (uint64_t i = 0; i < grp->member_count(); ++i) { - auto mem = grp->member(i); + for (uint64_t i = 0; i < cache_group_->member_count(); ++i) { + auto mem = cache_group_->member(i); member_to_uri_[mem.name().value()] = mem.uri(); } - - if (group_->query_type() == TILEDB_WRITE) { - grp->close(); - } } void SOMAGroup::open( - OpenMode query_type, - std::optional> timestamp) { - auto cfg = ctx_->tiledb_ctx()->config(); - if (timestamp) { - if (timestamp->first > timestamp->second) { - throw std::invalid_argument("timestamp start > end"); - } - cfg["sm.group.timestamp_start"] = timestamp->first; - cfg["sm.group.timestamp_end"] = timestamp->second; - } - group_->set_config(cfg); + OpenMode query_type, std::optional timestamp) { + timestamp_ = timestamp; + group_->set_config(_set_timestamp(ctx_, timestamp)); group_->open(query_type == OpenMode::read ? TILEDB_READ : TILEDB_WRITE); + fill_caches(); } void SOMAGroup::close() { + if (group_->query_type() == TILEDB_WRITE) + cache_group_->close(); group_->close(); + metadata_.clear(); } const std::string SOMAGroup::uri() const { @@ -195,9 +204,11 @@ void SOMAGroup::set_metadata( tiledb_datatype_t value_type, uint32_t value_num, const void* value) { - if (key.compare("soma_object_type") == 0) { - throw TileDBSOMAError("soma_object_type cannot be modified."); - } + if (key.compare(SOMA_OBJECT_TYPE_KEY) == 0) + throw TileDBSOMAError(SOMA_OBJECT_TYPE_KEY + " cannot be modified."); + + if (key.compare(ENCODING_VERSION_KEY) == 0) + throw TileDBSOMAError(ENCODING_VERSION_KEY + " cannot be modified."); group_->put_metadata(key, value_type, value_num, value); MetadataValue mdval(value_type, value_num, value); @@ -206,25 +217,27 @@ void SOMAGroup::set_metadata( } void SOMAGroup::delete_metadata(const std::string& key) { - if (key.compare("soma_object_type") == 0) { - throw TileDBSOMAError("soma_object_type cannot be deleted."); - } + if (key.compare(SOMA_OBJECT_TYPE_KEY) == 0) + throw TileDBSOMAError(SOMA_OBJECT_TYPE_KEY + " cannot be deleted."); + + if (key.compare(ENCODING_VERSION_KEY) == 0) + throw TileDBSOMAError(ENCODING_VERSION_KEY + " cannot be deleted."); group_->delete_metadata(key); metadata_.erase(key); } -std::map SOMAGroup::get_metadata() { - return metadata_; -} - std::optional SOMAGroup::get_metadata(const std::string& key) { - if (metadata_.count(key) == 0) { + if (metadata_.count(key) == 0) return std::nullopt; - } + return metadata_[key]; } +std::map SOMAGroup::get_metadata() { + return metadata_; +} + bool SOMAGroup::has_metadata(const std::string& key) { return metadata_.count(key) != 0; } @@ -233,4 +246,17 @@ uint64_t SOMAGroup::metadata_num() const { return metadata_.size(); } +Config SOMAGroup::_set_timestamp( + std::shared_ptr ctx, std::optional timestamp) { + auto cfg = ctx->tiledb_ctx()->config(); + if (timestamp) { + if (timestamp->first > timestamp->second) { + throw std::invalid_argument("timestamp start > end"); + } + cfg["sm.group.timestamp_start"] = timestamp->first; + cfg["sm.group.timestamp_end"] = timestamp->second; + } + return cfg; +} + } // namespace tiledbsoma \ No newline at end of file diff --git a/libtiledbsoma/src/soma/soma_group.h b/libtiledbsoma/src/soma/soma_group.h index 27c3c5010f..c9334159f5 100644 --- a/libtiledbsoma/src/soma/soma_group.h +++ b/libtiledbsoma/src/soma/soma_group.h @@ -57,11 +57,13 @@ class SOMAGroup : public SOMAObject { * @param ctx TileDB context * @param uri URI to create the SOMAGroup * @param soma_type SOMACollection, SOMAMeasurement, or SOMAExperiment + * @param timestamp Optional pair indicating timestamp start and end */ - static void create( + static std::unique_ptr create( std::shared_ptr ctx, std::string_view uri, - std::string soma_type); + std::string soma_type, + std::optional timestamp = std::nullopt); /** * @brief Open a group at the specified URI and return SOMAGroup @@ -79,7 +81,7 @@ class SOMAGroup : public SOMAObject { std::string_view uri, std::shared_ptr ctx, std::string_view name = "unnamed", - std::optional> timestamp = std::nullopt); + std::optional timestamp = std::nullopt); //=================================================================== //= public non-static @@ -99,7 +101,12 @@ class SOMAGroup : public SOMAObject { std::string_view uri, std::shared_ptr ctx, std::string_view name, - std::optional> timestamp = std::nullopt); + std::optional timestamp = std::nullopt); + + SOMAGroup( + std::shared_ptr ctx, + std::shared_ptr group, + std::optional timestamp); SOMAGroup() = delete; SOMAGroup(const SOMAGroup&) = default; @@ -113,8 +120,7 @@ class SOMAGroup : public SOMAObject { * @param timestamp Optional pair indicating timestamp start and end */ void open( - OpenMode mode, - std::optional> timestamp = std::nullopt); + OpenMode mode, std::optional timestamp = std::nullopt); /** * Close the SOMAGroup object. @@ -275,6 +281,14 @@ class SOMAGroup : public SOMAObject { //= private non-static //=================================================================== + /** + * Helper function to set the pass in timestamp in the config associated + * with the SOMAContext passed in + */ + static Config _set_timestamp( + std::shared_ptr ctx, + std::optional timestamp); + /** * Fills the metadata and member-to-uri caches upon opening the array. */ @@ -289,12 +303,24 @@ class SOMAGroup : public SOMAObject { // Name displayed in log messages std::string name_; - // TileDBGroup associated with the SOMAGroup + // TileDB Group associated with the SOMAGroup std::shared_ptr group_; - // Metadata cache + // Metadata values need to be accessible in write mode as well. When adding + // or deleting values in the group, instead of closing to update to + // metadata; then reopening to read the group; and again reopening to + // restore the group back to write mode, we just store the modifications to + // this cache std::map metadata_; + // Group associated with metadata_. We need to keep this read-mode group + // alive in order for the metadata value pointers in the cache to be + // accessible + std::shared_ptr cache_group_; + + // Read timestamp range (start, end) + std::optional timestamp_; + // Member-to-URI cache std::map member_to_uri_; }; diff --git a/libtiledbsoma/src/soma/soma_measurement.cc b/libtiledbsoma/src/soma/soma_measurement.cc index 80c44f11ae..38c892cc5a 100644 --- a/libtiledbsoma/src/soma/soma_measurement.cc +++ b/libtiledbsoma/src/soma/soma_measurement.cc @@ -44,26 +44,33 @@ using namespace tiledb; std::unique_ptr SOMAMeasurement::create( std::string_view uri, ArraySchema schema, - std::shared_ptr ctx) { + std::shared_ptr ctx, + std::optional timestamp) { std::string exp_uri(uri); - SOMAGroup::create(ctx, exp_uri, "SOMAMeasurement"); - SOMADataFrame::create(exp_uri + "/var", schema, ctx); - SOMACollection::create(exp_uri + "/X", ctx); - SOMACollection::create(exp_uri + "/obsm", ctx); - SOMACollection::create(exp_uri + "/obsp", ctx); - SOMACollection::create(exp_uri + "/varm", ctx); - SOMACollection::create(exp_uri + "/varp", ctx); + auto soma_group = SOMAGroup::create( + ctx, exp_uri, "SOMAMeasurement", timestamp); + SOMADataFrame::create(exp_uri + "/var", schema, ctx, timestamp); + SOMACollection::create(exp_uri + "/X", ctx, timestamp); + SOMACollection::create(exp_uri + "/obsm", ctx, timestamp); + SOMACollection::create(exp_uri + "/obsp", ctx, timestamp); + SOMACollection::create(exp_uri + "/varm", ctx, timestamp); + SOMACollection::create(exp_uri + "/varp", ctx, timestamp); - auto group = SOMAGroup::open(OpenMode::write, uri, ctx); - group->set(exp_uri + "/var", URIType::absolute, "var"); - group->set(exp_uri + "/X", URIType::absolute, "X"); - group->set(exp_uri + "/obsm", URIType::absolute, "obsm"); - group->set(exp_uri + "/obsp", URIType::absolute, "obsp"); - group->set(exp_uri + "/varm", URIType::absolute, "varm"); - group->set(exp_uri + "/varp", URIType::absolute, "varp"); - group->close(); + soma_group->set(exp_uri + "/var", URIType::absolute, "var"); + soma_group->set(exp_uri + "/X", URIType::absolute, "X"); + soma_group->set(exp_uri + "/obsm", URIType::absolute, "obsm"); + soma_group->set(exp_uri + "/obsp", URIType::absolute, "obsp"); + soma_group->set(exp_uri + "/varm", URIType::absolute, "varm"); + soma_group->set(exp_uri + "/varp", URIType::absolute, "varp"); + return std::make_unique(*soma_group); +} - return std::make_unique(OpenMode::read, uri, ctx); +std::unique_ptr SOMAMeasurement::open( + std::string_view uri, + OpenMode mode, + std::shared_ptr ctx, + std::optional timestamp) { + return std::make_unique(mode, uri, ctx, timestamp); } } // namespace tiledbsoma diff --git a/libtiledbsoma/src/soma/soma_measurement.h b/libtiledbsoma/src/soma/soma_measurement.h index cfaf950549..dbfe3b2505 100644 --- a/libtiledbsoma/src/soma/soma_measurement.h +++ b/libtiledbsoma/src/soma/soma_measurement.h @@ -58,7 +58,24 @@ class SOMAMeasurement : public SOMACollection { static std::unique_ptr create( std::string_view uri, ArraySchema schema, - std::shared_ptr ctx); + std::shared_ptr ctx, + std::optional timestamp = std::nullopt); + + /** + * @brief Open a group at the specified URI and return SOMAMeasurement + * object. + * + * @param uri URI of the array + * @param mode read or write + * @param ctx TileDB context + * @param timestamp Optional pair indicating timestamp start and end + * @return std::shared_ptr SOMAMeasurement + */ + static std::unique_ptr open( + std::string_view uri, + OpenMode mode, + std::shared_ptr ctx, + std::optional timestamp = std::nullopt); //=================================================================== //= public non-static @@ -67,7 +84,7 @@ class SOMAMeasurement : public SOMACollection { OpenMode mode, std::string_view uri, std::shared_ptr ctx, - std::optional> timestamp = std::nullopt) + std::optional timestamp = std::nullopt) : SOMACollection(mode, uri, ctx, timestamp) { } @@ -80,6 +97,8 @@ class SOMAMeasurement : public SOMACollection { SOMAMeasurement(SOMAMeasurement&&) = default; ~SOMAMeasurement() = default; + using SOMACollection::open; + private: //=================================================================== //= private non-static diff --git a/libtiledbsoma/src/soma/soma_object.cc b/libtiledbsoma/src/soma/soma_object.cc index c7fa2defa9..01080d42d2 100644 --- a/libtiledbsoma/src/soma/soma_object.cc +++ b/libtiledbsoma/src/soma/soma_object.cc @@ -18,12 +18,13 @@ std::unique_ptr SOMAObject::open( std::string_view uri, OpenMode mode, std::shared_ptr ctx, - std::optional> timestamp) { + std::optional timestamp) { auto obj = tiledb::Object::object(*ctx->tiledb_ctx(), std::string(uri)); + auto name = std::string(std::filesystem::path(uri).filename()); if (obj.type() == tiledb::Object::Type::Array) { auto array_ = SOMAArray::open( - mode, uri, ctx, "", {}, "auto", ResultOrder::automatic, timestamp); + mode, uri, ctx, "", {}, name, ResultOrder::automatic, timestamp); if (!array_->type().has_value()) throw TileDBSOMAError("SOMAArray has no type info"); @@ -38,7 +39,7 @@ std::unique_ptr SOMAObject::open( throw TileDBSOMAError("Saw invalid SOMAArray type"); } } else if (obj.type() == tiledb::Object::Type::Group) { - auto group_ = SOMAGroup::open(mode, uri, ctx, "", timestamp); + auto group_ = SOMAGroup::open(mode, uri, ctx, name, timestamp); if (!group_->type().has_value()) throw TileDBSOMAError("SOMAGroup has no type info"); diff --git a/libtiledbsoma/src/soma/soma_object.h b/libtiledbsoma/src/soma/soma_object.h index ee903b4625..c68bba0cc5 100644 --- a/libtiledbsoma/src/soma/soma_object.h +++ b/libtiledbsoma/src/soma/soma_object.h @@ -35,6 +35,7 @@ #ifndef SOMA_OBJECT #define SOMA_OBJECT +#include #include #include #include @@ -55,7 +56,7 @@ class SOMAObject { std::string_view uri, OpenMode mode, std::shared_ptr ctx, - std::optional> timestamp = std::nullopt); + std::optional timestamp = std::nullopt); /** * @brief Return a constant string describing the type of the object. diff --git a/libtiledbsoma/src/soma/soma_sparse_ndarray.cc b/libtiledbsoma/src/soma/soma_sparse_ndarray.cc index 1259c528e8..ca3d91fc61 100644 --- a/libtiledbsoma/src/soma/soma_sparse_ndarray.cc +++ b/libtiledbsoma/src/soma/soma_sparse_ndarray.cc @@ -42,9 +42,11 @@ using namespace tiledb; std::unique_ptr SOMASparseNDArray::create( std::string_view uri, ArraySchema schema, - std::shared_ptr ctx) { - SOMAArray::create(ctx, uri, schema, "SOMASparseNDArray"); - return SOMASparseNDArray::open(uri, OpenMode::read, ctx); + std::shared_ptr ctx, + std::optional timestamp) { + auto soma_array = SOMAArray::create( + ctx, uri, schema, "SOMASparseNDArray", timestamp); + return std::make_unique(*soma_array); } std::unique_ptr SOMASparseNDArray::open( @@ -53,7 +55,7 @@ std::unique_ptr SOMASparseNDArray::open( std::shared_ptr ctx, std::vector column_names, ResultOrder result_order, - std::optional> timestamp) { + std::optional timestamp) { return std::make_unique( mode, uri, ctx, column_names, result_order, timestamp); } diff --git a/libtiledbsoma/src/soma/soma_sparse_ndarray.h b/libtiledbsoma/src/soma/soma_sparse_ndarray.h index f201ff7614..4500b8870e 100644 --- a/libtiledbsoma/src/soma/soma_sparse_ndarray.h +++ b/libtiledbsoma/src/soma/soma_sparse_ndarray.h @@ -52,15 +52,17 @@ class SOMASparseNDArray : public SOMAArray { /** * @brief Create a SOMASparseNDArray object at the given URI. * - * @param uri URI to create the SOMASparseNDArray + * @param uri URI to create the SOMAArray * @param schema TileDB ArraySchema - * @param platform_config Optional config parameter dictionary - * @return std::shared_ptr opened in read mode + * @param ctx SOMAContext + * @param timestamp Optional pair indicating timestamp start and end + * @return std::unique_ptr */ static std::unique_ptr create( std::string_view uri, ArraySchema schema, - std::shared_ptr ctx); + std::shared_ptr ctx, + std::optional timestamp = std::nullopt); /** * @brief Open and return a SOMASparseNDArray object at the given URI. @@ -76,7 +78,7 @@ class SOMASparseNDArray : public SOMAArray { * colmajor * @param timestamp If specified, overrides the default timestamp used to * open this object. If unset, uses the timestamp provided by the context. - * @return std::shared_ptr SOMASparseNDArray + * @return std::unique_ptr */ static std::unique_ptr open( std::string_view uri, @@ -84,7 +86,7 @@ class SOMASparseNDArray : public SOMAArray { std::shared_ptr ctx, std::vector column_names = {}, ResultOrder result_order = ResultOrder::automatic, - std::optional> timestamp = std::nullopt); + std::optional timestamp = std::nullopt); /** * @brief Check if the SOMASparseNDArray exists at the URI. @@ -113,7 +115,7 @@ class SOMASparseNDArray : public SOMAArray { std::shared_ptr ctx, std::vector column_names, ResultOrder result_order, - std::optional> timestamp) + std::optional timestamp) : SOMAArray( mode, uri, diff --git a/libtiledbsoma/src/utils/common.h b/libtiledbsoma/src/utils/common.h index 3928b04675..87b41d5cd0 100644 --- a/libtiledbsoma/src/utils/common.h +++ b/libtiledbsoma/src/utils/common.h @@ -39,9 +39,15 @@ namespace tiledbsoma { +const std::string SOMA_OBJECT_TYPE_KEY = "soma_object_type"; +const std::string ENCODING_VERSION_KEY = "soma_encoding_version"; +const std::string ENCODING_VERSION_VAL = "1"; + using MetadataValue = std::tuple; enum MetadataInfo { dtype = 0, num, value }; +using TimestampRange = std::pair; + class TileDBSOMAError : public std::runtime_error { public: explicit TileDBSOMAError(const char* m) diff --git a/libtiledbsoma/test/unit_soma_array.cc b/libtiledbsoma/test/unit_soma_array.cc index aae178f0e5..610bc1f0bb 100644 --- a/libtiledbsoma/test/unit_soma_array.cc +++ b/libtiledbsoma/test/unit_soma_array.cc @@ -86,7 +86,7 @@ std::tuple create_array( schema.check(); // Create array - SOMAArray::create(ctx, uri, schema, "NONE"); + SOMAArray::create(ctx, uri, schema, "NONE", TimestampRange(0, 2)); uint64_t nnz = num_fragments * num_cells_per_fragment; @@ -125,7 +125,7 @@ std::tuple, std::vector> write_array( {}, "auto", ResultOrder::automatic, - std::pair(timestamp + i, timestamp + i)); + TimestampRange(timestamp + i, timestamp + i)); std::vector d0(num_cells_per_fragment); for (int j = 0; j < num_cells_per_fragment; j++) { @@ -218,8 +218,7 @@ TEST_CASE("SOMAArray: nnz") { {}, "auto", ResultOrder::automatic, - std::pair( - timestamp, timestamp + num_fragments - 1)); + TimestampRange(timestamp, timestamp + num_fragments - 1)); uint64_t nnz = soma_array->nnz(); REQUIRE(nnz == expected_nnz); @@ -283,7 +282,7 @@ TEST_CASE("SOMAArray: nnz with timestamp") { uri, ctx, num_cells_per_fragment, num_fragments, overlap, 40); // Get total cell num at timestamp (0, 20) - std::pair timestamp{0, 20}; + TimestampRange timestamp{0, 20}; auto soma_array = SOMAArray::open( OpenMode::read, uri, @@ -364,7 +363,6 @@ TEST_CASE("SOMAArray: nnz with consolidation") { TEST_CASE("SOMAArray: metadata") { auto ctx = std::make_shared(); - std::string base_uri = "mem://unit-test-array"; const auto& [uri, expected_nnz] = create_array(base_uri, ctx); @@ -376,35 +374,51 @@ TEST_CASE("SOMAArray: metadata") { {}, "auto", ResultOrder::automatic, - std::pair(1, 1)); + TimestampRange(1, 1)); + int32_t val = 100; soma_array->set_metadata("md", TILEDB_INT32, 1, &val); soma_array->close(); - soma_array->open(OpenMode::read, std::pair(1, 1)); - REQUIRE(soma_array->metadata_num() == 2); - REQUIRE(soma_array->has_metadata("soma_object_type") == true); - REQUIRE(soma_array->has_metadata("md") == true); - + // Read metadata + soma_array->open(OpenMode::read, TimestampRange(0, 2)); + REQUIRE(soma_array->metadata_num() == 3); + REQUIRE(soma_array->has_metadata("soma_object_type")); + REQUIRE(soma_array->has_metadata("soma_encoding_version")); + REQUIRE(soma_array->has_metadata("md")); auto mdval = soma_array->get_metadata("md"); REQUIRE(std::get(*mdval) == TILEDB_INT32); REQUIRE(std::get(*mdval) == 1); REQUIRE(*((const int32_t*)std::get(*mdval)) == 100); soma_array->close(); - soma_array->open(OpenMode::write, std::pair(2, 2)); + // md should not be available at (2, 2) + soma_array->open(OpenMode::read, TimestampRange(2, 2)); + REQUIRE(soma_array->metadata_num() == 2); + REQUIRE(soma_array->has_metadata("soma_object_type")); + REQUIRE(soma_array->has_metadata("soma_encoding_version")); + REQUIRE(!soma_array->has_metadata("md")); + soma_array->close(); + // Metadata should also be retrievable in write mode + soma_array->open(OpenMode::write, TimestampRange(0, 2)); + REQUIRE(soma_array->metadata_num() == 3); + REQUIRE(soma_array->has_metadata("soma_object_type")); + REQUIRE(soma_array->has_metadata("soma_encoding_version")); + REQUIRE(soma_array->has_metadata("md")); mdval = soma_array->get_metadata("md"); REQUIRE(*((const int32_t*)std::get(*mdval)) == 100); + + // Delete and have it reflected when reading metadata while in write mode soma_array->delete_metadata("md"); mdval = soma_array->get_metadata("md"); REQUIRE(!mdval.has_value()); soma_array->close(); - soma_array->open(OpenMode::read, std::pair(3, 3)); - REQUIRE(soma_array->has_metadata("md") == false); - REQUIRE(soma_array->metadata_num() == 1); - soma_array->close(); + // Confirm delete in read mode + soma_array->open(OpenMode::read, TimestampRange(0, 2)); + REQUIRE(!soma_array->has_metadata("md")); + REQUIRE(soma_array->metadata_num() == 2); } TEST_CASE("SOMAArray: Test buffer size") { diff --git a/libtiledbsoma/test/unit_soma_collection.cc b/libtiledbsoma/test/unit_soma_collection.cc index e873c86c9c..38336e1b27 100644 --- a/libtiledbsoma/test/unit_soma_collection.cc +++ b/libtiledbsoma/test/unit_soma_collection.cc @@ -81,7 +81,8 @@ TEST_CASE("SOMACollection: basic") { auto ctx = std::make_shared(); std::string uri = "mem://unit-test-collection-basic"; - auto soma_collection = SOMACollection::create(uri, ctx); + SOMACollection::create(uri, ctx); + auto soma_collection = SOMACollection::open(uri, OpenMode::read, ctx); REQUIRE(soma_collection->uri() == uri); REQUIRE(soma_collection->ctx() == ctx); REQUIRE(soma_collection->type() == "SOMACollection"); @@ -250,110 +251,158 @@ TEST_CASE("SOMACollection: metadata") { auto ctx = std::make_shared(); std::string uri = "mem://unit-test-collection"; - SOMACollection::create(uri, ctx); + SOMACollection::create(uri, ctx, TimestampRange(0, 2)); auto soma_collection = SOMACollection::open( - uri, OpenMode::write, ctx, std::pair(1, 1)); + uri, OpenMode::write, ctx, TimestampRange(1, 1)); + int32_t val = 100; soma_collection->set_metadata("md", TILEDB_INT32, 1, &val); soma_collection->close(); - soma_collection->open(OpenMode::read, std::pair(1, 1)); - REQUIRE(soma_collection->metadata_num() == 2); - REQUIRE(soma_collection->has_metadata("soma_object_type") == true); - REQUIRE(soma_collection->has_metadata("md") == true); - + // Read metadata + soma_collection->open(OpenMode::read, TimestampRange(0, 2)); + REQUIRE(soma_collection->metadata_num() == 3); + REQUIRE(soma_collection->has_metadata("soma_object_type")); + REQUIRE(soma_collection->has_metadata("soma_encoding_version")); + REQUIRE(soma_collection->has_metadata("md")); auto mdval = soma_collection->get_metadata("md"); REQUIRE(std::get(*mdval) == TILEDB_INT32); REQUIRE(std::get(*mdval) == 1); REQUIRE(*((const int32_t*)std::get(*mdval)) == 100); soma_collection->close(); - soma_collection->open(OpenMode::write, std::pair(2, 2)); + // md should not be available at (2, 2) + soma_collection->open(OpenMode::read, TimestampRange(2, 2)); + REQUIRE(soma_collection->metadata_num() == 2); + REQUIRE(soma_collection->has_metadata("soma_object_type")); + REQUIRE(soma_collection->has_metadata("soma_encoding_version")); + REQUIRE(!soma_collection->has_metadata("md")); + soma_collection->close(); + // Metadata should also be retrievable in write mode + soma_collection->open(OpenMode::write, TimestampRange(0, 2)); + REQUIRE(soma_collection->metadata_num() == 3); + REQUIRE(soma_collection->has_metadata("soma_object_type")); + REQUIRE(soma_collection->has_metadata("soma_encoding_version")); + REQUIRE(soma_collection->has_metadata("md")); mdval = soma_collection->get_metadata("md"); REQUIRE(*((const int32_t*)std::get(*mdval)) == 100); + + // Delete and have it reflected when reading metadata while in write mode soma_collection->delete_metadata("md"); mdval = soma_collection->get_metadata("md"); REQUIRE(!mdval.has_value()); soma_collection->close(); - soma_collection->open(OpenMode::read, std::pair(3, 3)); - REQUIRE(soma_collection->has_metadata("md") == false); - REQUIRE(soma_collection->metadata_num() == 1); - soma_collection->close(); + // Confirm delete in read mode + soma_collection->open(OpenMode::read, TimestampRange(0, 2)); + REQUIRE(!soma_collection->has_metadata("md")); + REQUIRE(soma_collection->metadata_num() == 2); } TEST_CASE("SOMAExperiment: metadata") { auto ctx = std::make_shared(); std::string uri = "mem://unit-test-experiment"; - SOMAExperiment::create(uri, create_schema(*ctx->tiledb_ctx()), ctx); + SOMAExperiment::create( + uri, create_schema(*ctx->tiledb_ctx()), ctx, TimestampRange(0, 2)); auto soma_experiment = SOMAExperiment::open( - uri, OpenMode::write, ctx, std::pair(1, 1)); + uri, OpenMode::write, ctx, TimestampRange(1, 1)); + int32_t val = 100; soma_experiment->set_metadata("md", TILEDB_INT32, 1, &val); soma_experiment->close(); - soma_experiment->open(OpenMode::read, std::pair(1, 1)); - REQUIRE(soma_experiment->metadata_num() == 2); - REQUIRE(soma_experiment->has_metadata("soma_object_type") == true); - REQUIRE(soma_experiment->has_metadata("md") == true); - + // Read metadata + soma_experiment->open(OpenMode::read, TimestampRange(0, 2)); + REQUIRE(soma_experiment->metadata_num() == 3); + REQUIRE(soma_experiment->has_metadata("soma_object_type")); + REQUIRE(soma_experiment->has_metadata("soma_encoding_version")); + REQUIRE(soma_experiment->has_metadata("md")); auto mdval = soma_experiment->get_metadata("md"); REQUIRE(std::get(*mdval) == TILEDB_INT32); REQUIRE(std::get(*mdval) == 1); REQUIRE(*((const int32_t*)std::get(*mdval)) == 100); soma_experiment->close(); - soma_experiment->open(OpenMode::write, std::pair(2, 2)); + // md should not be available at (2, 2) + soma_experiment->open(OpenMode::read, TimestampRange(2, 2)); + REQUIRE(soma_experiment->metadata_num() == 2); + REQUIRE(soma_experiment->has_metadata("soma_object_type")); + REQUIRE(soma_experiment->has_metadata("soma_encoding_version")); + REQUIRE(!soma_experiment->has_metadata("md")); + soma_experiment->close(); + // Metadata should also be retrievable in write mode + soma_experiment->open(OpenMode::write, TimestampRange(0, 2)); + REQUIRE(soma_experiment->metadata_num() == 3); + REQUIRE(soma_experiment->has_metadata("soma_object_type")); + REQUIRE(soma_experiment->has_metadata("soma_encoding_version")); + REQUIRE(soma_experiment->has_metadata("md")); mdval = soma_experiment->get_metadata("md"); REQUIRE(*((const int32_t*)std::get(*mdval)) == 100); + + // Delete and have it reflected when reading metadata while in write mode soma_experiment->delete_metadata("md"); mdval = soma_experiment->get_metadata("md"); REQUIRE(!mdval.has_value()); soma_experiment->close(); - soma_experiment->open(OpenMode::read, std::pair(3, 3)); - REQUIRE(soma_experiment->has_metadata("md") == false); - REQUIRE(soma_experiment->metadata_num() == 1); - soma_experiment->close(); + // Confirm delete in read mode + soma_experiment->open(OpenMode::read, TimestampRange(0, 2)); + REQUIRE(!soma_experiment->has_metadata("md")); + REQUIRE(soma_experiment->metadata_num() == 2); } TEST_CASE("SOMAMeasurement: metadata") { auto ctx = std::make_shared(); - std::string uri = "mem://unit-test-measurement"; - SOMAMeasurement::create(uri, create_schema(*ctx->tiledb_ctx()), ctx); + SOMAMeasurement::create( + uri, create_schema(*ctx->tiledb_ctx()), ctx, TimestampRange(0, 2)); auto soma_measurement = SOMAMeasurement::open( - uri, OpenMode::write, ctx, std::pair(1, 1)); + uri, OpenMode::write, ctx, TimestampRange(1, 1)); + int32_t val = 100; soma_measurement->set_metadata("md", TILEDB_INT32, 1, &val); soma_measurement->close(); - soma_measurement->open(OpenMode::read, std::pair(1, 1)); - REQUIRE(soma_measurement->metadata_num() == 2); - REQUIRE(soma_measurement->has_metadata("soma_object_type") == true); - REQUIRE(soma_measurement->has_metadata("md") == true); - + // Read metadata + soma_measurement->open(OpenMode::read, TimestampRange(0, 2)); + REQUIRE(soma_measurement->metadata_num() == 3); + REQUIRE(soma_measurement->has_metadata("soma_object_type")); + REQUIRE(soma_measurement->has_metadata("soma_encoding_version")); + REQUIRE(soma_measurement->has_metadata("md")); auto mdval = soma_measurement->get_metadata("md"); REQUIRE(std::get(*mdval) == TILEDB_INT32); REQUIRE(std::get(*mdval) == 1); REQUIRE(*((const int32_t*)std::get(*mdval)) == 100); soma_measurement->close(); - soma_measurement->open( - OpenMode::write, std::pair(2, 2)); + // md should not be available at (2, 2) + soma_measurement->open(OpenMode::read, TimestampRange(2, 2)); + REQUIRE(soma_measurement->metadata_num() == 2); + REQUIRE(soma_measurement->has_metadata("soma_object_type")); + REQUIRE(soma_measurement->has_metadata("soma_encoding_version")); + REQUIRE(!soma_measurement->has_metadata("md")); + soma_measurement->close(); + // Metadata should also be retrievable in write mode + soma_measurement->open(OpenMode::write, TimestampRange(0, 2)); + REQUIRE(soma_measurement->metadata_num() == 3); + REQUIRE(soma_measurement->has_metadata("soma_object_type")); + REQUIRE(soma_measurement->has_metadata("soma_encoding_version")); + REQUIRE(soma_measurement->has_metadata("md")); mdval = soma_measurement->get_metadata("md"); REQUIRE(*((const int32_t*)std::get(*mdval)) == 100); + + // Delete and have it reflected when reading metadata while in write mode soma_measurement->delete_metadata("md"); mdval = soma_measurement->get_metadata("md"); REQUIRE(!mdval.has_value()); soma_measurement->close(); - soma_measurement->open(OpenMode::read, std::pair(3, 3)); - REQUIRE(soma_measurement->has_metadata("md") == false); - REQUIRE(soma_measurement->metadata_num() == 1); - soma_measurement->close(); + // Confirm delete in read mode + soma_measurement->open(OpenMode::read, TimestampRange(0, 2)); + REQUIRE(!soma_measurement->has_metadata("md")); + REQUIRE(soma_measurement->metadata_num() == 2); } \ No newline at end of file diff --git a/libtiledbsoma/test/unit_soma_dataframe.cc b/libtiledbsoma/test/unit_soma_dataframe.cc index 9822caeeb8..9f50de5807 100644 --- a/libtiledbsoma/test/unit_soma_dataframe.cc +++ b/libtiledbsoma/test/unit_soma_dataframe.cc @@ -80,17 +80,8 @@ TEST_CASE("SOMADataFrame: basic") { auto ctx = std::make_shared(); std::string uri = "mem://unit-test-dataframe-basic"; - SOMADataFrame::create(uri, create_schema(*ctx->tiledb_ctx()), ctx); - - auto soma_dataframe = SOMADataFrame::open(uri, OpenMode::read, ctx); - REQUIRE(soma_dataframe->uri() == uri); - REQUIRE(soma_dataframe->ctx() == ctx); - REQUIRE(soma_dataframe->type() == "SOMADataFrame"); - std::vector expected_index_column_names = {"d0"}; - REQUIRE( - soma_dataframe->index_column_names() == expected_index_column_names); - REQUIRE(soma_dataframe->count() == 0); - soma_dataframe->close(); + auto soma_dataframe = SOMADataFrame::create( + uri, create_schema(*ctx->tiledb_ctx()), ctx); std::vector d0(10); for (int j = 0; j < 10; j++) @@ -103,11 +94,13 @@ TEST_CASE("SOMADataFrame: basic") { array_buffer->emplace("a0", ColumnBuffer::create(tdb_arr, "a0", a0)); array_buffer->emplace("d0", ColumnBuffer::create(tdb_arr, "d0", d0)); - soma_dataframe = SOMADataFrame::open(uri, OpenMode::write, ctx); soma_dataframe->write(array_buffer); soma_dataframe->close(); - soma_dataframe = SOMADataFrame::open(uri, OpenMode::read, ctx); + soma_dataframe->open(OpenMode::read); + REQUIRE(soma_dataframe->uri() == uri); + REQUIRE(soma_dataframe->ctx() == ctx); + REQUIRE(soma_dataframe->type() == "SOMADataFrame"); while (auto batch = soma_dataframe->read_next()) { auto arrbuf = batch.value(); auto d0span = arrbuf->at("d0")->data(); @@ -125,42 +118,59 @@ TEST_CASE("SOMADataFrame: basic") { TEST_CASE("SOMADataFrame: metadata") { auto ctx = std::make_shared(); - std::string uri = "mem://unit-test-collection"; - SOMADataFrame::create(uri, create_schema(*ctx->tiledb_ctx()), ctx); + SOMADataFrame::create( + uri, create_schema(*ctx->tiledb_ctx()), ctx, TimestampRange(0, 2)); + auto soma_dataframe = SOMADataFrame::open( uri, OpenMode::write, ctx, {}, ResultOrder::automatic, - std::pair(1, 1)); + TimestampRange(1, 1)); + int32_t val = 100; soma_dataframe->set_metadata("md", TILEDB_INT32, 1, &val); soma_dataframe->close(); - soma_dataframe->open(OpenMode::read, std::pair(1, 1)); - REQUIRE(soma_dataframe->metadata_num() == 2); - REQUIRE(soma_dataframe->has_metadata("soma_object_type") == true); - REQUIRE(soma_dataframe->has_metadata("md") == true); - + // Read metadata + soma_dataframe->open(OpenMode::read, TimestampRange(0, 2)); + REQUIRE(soma_dataframe->metadata_num() == 3); + REQUIRE(soma_dataframe->has_metadata("soma_object_type")); + REQUIRE(soma_dataframe->has_metadata("soma_encoding_version")); + REQUIRE(soma_dataframe->has_metadata("md")); auto mdval = soma_dataframe->get_metadata("md"); REQUIRE(std::get(*mdval) == TILEDB_INT32); REQUIRE(std::get(*mdval) == 1); REQUIRE(*((const int32_t*)std::get(*mdval)) == 100); soma_dataframe->close(); - soma_dataframe->open(OpenMode::write, std::pair(2, 2)); + // md should not be available at (2, 2) + soma_dataframe->open(OpenMode::read, TimestampRange(2, 2)); + REQUIRE(soma_dataframe->metadata_num() == 2); + REQUIRE(soma_dataframe->has_metadata("soma_object_type")); + REQUIRE(soma_dataframe->has_metadata("soma_encoding_version")); + REQUIRE(!soma_dataframe->has_metadata("md")); + soma_dataframe->close(); + // Metadata should also be retrievable in write mode + soma_dataframe->open(OpenMode::write, TimestampRange(0, 2)); + REQUIRE(soma_dataframe->metadata_num() == 3); + REQUIRE(soma_dataframe->has_metadata("soma_object_type")); + REQUIRE(soma_dataframe->has_metadata("soma_encoding_version")); + REQUIRE(soma_dataframe->has_metadata("md")); mdval = soma_dataframe->get_metadata("md"); REQUIRE(*((const int32_t*)std::get(*mdval)) == 100); + + // Delete and have it reflected when reading metadata while in write mode soma_dataframe->delete_metadata("md"); mdval = soma_dataframe->get_metadata("md"); REQUIRE(!mdval.has_value()); soma_dataframe->close(); - soma_dataframe->open(OpenMode::read, std::pair(3, 3)); - REQUIRE(soma_dataframe->has_metadata("md") == false); - REQUIRE(soma_dataframe->metadata_num() == 1); - soma_dataframe->close(); + // Confirm delete in read mode + soma_dataframe->open(OpenMode::read, TimestampRange(0, 2)); + REQUIRE(!soma_dataframe->has_metadata("md")); + REQUIRE(soma_dataframe->metadata_num() == 2); } \ No newline at end of file diff --git a/libtiledbsoma/test/unit_soma_dense_ndarray.cc b/libtiledbsoma/test/unit_soma_dense_ndarray.cc index decb0c5400..e2e9c50e28 100644 --- a/libtiledbsoma/test/unit_soma_dense_ndarray.cc +++ b/libtiledbsoma/test/unit_soma_dense_ndarray.cc @@ -58,8 +58,8 @@ const std::string src_path = TILEDBSOMA_SOURCE_ROOT; namespace { ArraySchema create_schema(Context& ctx, bool allow_duplicates = false) { - // Create schema - ArraySchema schema(ctx, TILEDB_DENSE); + // SOMADenseNDArray is actually a TILEDB_SPARSE under the hood + ArraySchema schema(ctx, TILEDB_SPARSE); auto dim = Dimension::create(ctx, "d0", {0, 1000}); @@ -80,21 +80,12 @@ TEST_CASE("SOMADenseNDArray: basic") { auto ctx = std::make_shared(); std::string uri = "mem://unit-test-dense-ndarray-basic"; - SOMADenseNDArray::create(uri, create_schema(*ctx->tiledb_ctx()), ctx); + auto soma_dense = SOMADenseNDArray::create( + uri, create_schema(*ctx->tiledb_ctx()), ctx); - auto soma_dense = SOMADenseNDArray::open(uri, OpenMode::read, ctx); - REQUIRE(soma_dense->uri() == uri); - REQUIRE(soma_dense->ctx() == ctx); - REQUIRE(soma_dense->type() == "SOMADenseNDArray"); - REQUIRE(soma_dense->is_sparse() == false); - auto schema = soma_dense->tiledb_schema(); - REQUIRE(schema->has_attribute("a0")); - REQUIRE(schema->domain().has_dimension("d0")); - REQUIRE(soma_dense->ndim() == 1); - REQUIRE(soma_dense->shape() == std::vector{1001}); - soma_dense->close(); - - std::vector d0{1, 10}; + std::vector d0(10); + for (int j = 0; j < 10; j++) + d0[j] = j; std::vector a0(10, 1); auto array_buffer = std::make_shared(); @@ -103,18 +94,18 @@ TEST_CASE("SOMADenseNDArray: basic") { array_buffer->emplace("a0", ColumnBuffer::create(tdb_arr, "a0", a0)); array_buffer->emplace("d0", ColumnBuffer::create(tdb_arr, "d0", d0)); - soma_dense->open(OpenMode::write); soma_dense->write(array_buffer); soma_dense->close(); soma_dense->open(OpenMode::read); + REQUIRE(soma_dense->uri() == uri); + REQUIRE(soma_dense->ctx() == ctx); + REQUIRE(soma_dense->type() == "SOMADenseNDArray"); while (auto batch = soma_dense->read_next()) { auto arrbuf = batch.value(); auto d0span = arrbuf->at("d0")->data(); auto a0span = arrbuf->at("a0")->data(); - REQUIRE( - std::vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10} == - std::vector(d0span.begin(), d0span.end())); + REQUIRE(d0 == std::vector(d0span.begin(), d0span.end())); REQUIRE(a0 == std::vector(a0span.begin(), a0span.end())); } soma_dense->close(); @@ -124,7 +115,8 @@ TEST_CASE("SOMADenseNDArray: metadata") { auto ctx = std::make_shared(); std::string uri = "mem://unit-test-dense-ndarray"; - SOMADenseNDArray::create(uri, create_schema(*ctx->tiledb_ctx()), ctx); + SOMADenseNDArray::create( + uri, create_schema(*ctx->tiledb_ctx()), ctx, TimestampRange(0, 2)); auto soma_dense = SOMADenseNDArray::open( uri, OpenMode::write, @@ -132,32 +124,48 @@ TEST_CASE("SOMADenseNDArray: metadata") { {}, ResultOrder::automatic, std::pair(1, 1)); + int32_t val = 100; soma_dense->set_metadata("md", TILEDB_INT32, 1, &val); soma_dense->close(); - soma_dense->open(OpenMode::read, std::pair(1, 1)); - REQUIRE(soma_dense->metadata_num() == 2); - REQUIRE(soma_dense->has_metadata("soma_object_type") == true); - REQUIRE(soma_dense->has_metadata("md") == true); - + // Read metadata + soma_dense->open(OpenMode::read, TimestampRange(0, 2)); + REQUIRE(soma_dense->metadata_num() == 3); + REQUIRE(soma_dense->has_metadata("soma_object_type")); + REQUIRE(soma_dense->has_metadata("soma_encoding_version")); + REQUIRE(soma_dense->has_metadata("md")); auto mdval = soma_dense->get_metadata("md"); REQUIRE(std::get(*mdval) == TILEDB_INT32); REQUIRE(std::get(*mdval) == 1); REQUIRE(*((const int32_t*)std::get(*mdval)) == 100); soma_dense->close(); - soma_dense->open(OpenMode::write, std::pair(2, 2)); + // md should not be available at (2, 2) + soma_dense->open(OpenMode::read, TimestampRange(2, 2)); + REQUIRE(soma_dense->metadata_num() == 2); + REQUIRE(soma_dense->has_metadata("soma_object_type")); + REQUIRE(soma_dense->has_metadata("soma_encoding_version")); + REQUIRE(!soma_dense->has_metadata("md")); + soma_dense->close(); + // Metadata should also be retrievable in write mode + soma_dense->open(OpenMode::write, TimestampRange(0, 2)); + REQUIRE(soma_dense->metadata_num() == 3); + REQUIRE(soma_dense->has_metadata("soma_object_type")); + REQUIRE(soma_dense->has_metadata("soma_encoding_version")); + REQUIRE(soma_dense->has_metadata("md")); mdval = soma_dense->get_metadata("md"); REQUIRE(*((const int32_t*)std::get(*mdval)) == 100); + + // Delete and have it reflected when reading metadata while in write mode soma_dense->delete_metadata("md"); mdval = soma_dense->get_metadata("md"); REQUIRE(!mdval.has_value()); soma_dense->close(); - soma_dense->open(OpenMode::read, std::pair(3, 3)); - REQUIRE(soma_dense->has_metadata("md") == false); - REQUIRE(soma_dense->metadata_num() == 1); - soma_dense->close(); + // Confirm delete in read mode + soma_dense->open(OpenMode::read, TimestampRange(0, 2)); + REQUIRE(!soma_dense->has_metadata("md")); + REQUIRE(soma_dense->metadata_num() == 2); } \ No newline at end of file diff --git a/libtiledbsoma/test/unit_soma_group.cc b/libtiledbsoma/test/unit_soma_group.cc index a45430b375..c04030037b 100644 --- a/libtiledbsoma/test/unit_soma_group.cc +++ b/libtiledbsoma/test/unit_soma_group.cc @@ -156,11 +156,7 @@ TEST_CASE("SOMAGroup: basic") { "mem://sub-array", *ctx->tiledb_ctx()); auto soma_group = SOMAGroup::open( - OpenMode::write, - uri_main_group, - ctx, - "metadata", - std::pair(0, 1)); + OpenMode::write, uri_main_group, ctx, "metadata", TimestampRange(0, 1)); soma_group->set(uri_sub_group, URIType::absolute, "subgroup"); soma_group->set(uri_sub_array, URIType::absolute, "subarray"); soma_group->close(); @@ -168,7 +164,7 @@ TEST_CASE("SOMAGroup: basic") { std::map expected_map{ {"subgroup", uri_sub_group}, {"subarray", uri_sub_array}}; - soma_group->open(OpenMode::read, std::pair(0, 2)); + soma_group->open(OpenMode::read, TimestampRange(0, 2)); REQUIRE(soma_group->ctx() == ctx); REQUIRE(soma_group->uri() == uri_main_group); REQUIRE(soma_group->count() == 2); @@ -177,12 +173,12 @@ TEST_CASE("SOMAGroup: basic") { REQUIRE(soma_group->get("subarray").type() == Object::Type::Array); soma_group->close(); - soma_group->open(OpenMode::write, std::pair(0, 3)); + soma_group->open(OpenMode::write, TimestampRange(0, 3)); REQUIRE(expected_map == soma_group->member_to_uri_mapping()); soma_group->del("subgroup"); soma_group->close(); - soma_group->open(OpenMode::read, std::pair(0, 4)); + soma_group->open(OpenMode::read, TimestampRange(0, 4)); REQUIRE(soma_group->count() == 1); REQUIRE(soma_group->has("subgroup") == false); REQUIRE(soma_group->has("subarray") == true); @@ -193,39 +189,50 @@ TEST_CASE("SOMAGroup: metadata") { auto ctx = std::make_shared(); std::string uri = "mem://unit-test-group"; - SOMAGroup::create(ctx, uri, "NONE"); + SOMAGroup::create(ctx, uri, "NONE", TimestampRange(0, 2)); auto soma_group = SOMAGroup::open( - OpenMode::write, - uri, - ctx, - "metadata", - std::pair(1, 1)); + OpenMode::write, uri, ctx, "metadata", TimestampRange(1, 1)); int32_t val = 100; soma_group->set_metadata("md", TILEDB_INT32, 1, &val); soma_group->close(); - soma_group->open(OpenMode::read, std::pair(1, 1)); - REQUIRE(soma_group->metadata_num() == 2); - REQUIRE(soma_group->has_metadata("soma_object_type") == true); - REQUIRE(soma_group->has_metadata("md") == true); - + // Read metadata + soma_group->open(OpenMode::read, TimestampRange(0, 2)); + REQUIRE(soma_group->metadata_num() == 3); + REQUIRE(soma_group->has_metadata("soma_object_type")); + REQUIRE(soma_group->has_metadata("soma_encoding_version")); + REQUIRE(soma_group->has_metadata("md")); auto mdval = soma_group->get_metadata("md"); REQUIRE(std::get(*mdval) == TILEDB_INT32); REQUIRE(std::get(*mdval) == 1); REQUIRE(*((const int32_t*)std::get(*mdval)) == 100); soma_group->close(); - soma_group->open(OpenMode::write, std::pair(2, 2)); + // md should not be available at (2, 2) + soma_group->open(OpenMode::read, TimestampRange(2, 2)); + REQUIRE(soma_group->metadata_num() == 2); + REQUIRE(soma_group->has_metadata("soma_object_type")); + REQUIRE(soma_group->has_metadata("soma_encoding_version")); + REQUIRE(!soma_group->has_metadata("md")); + soma_group->close(); + // Metadata should also be retrievable in write mode + soma_group->open(OpenMode::write, TimestampRange(0, 2)); + REQUIRE(soma_group->metadata_num() == 3); + REQUIRE(soma_group->has_metadata("soma_object_type")); + REQUIRE(soma_group->has_metadata("soma_encoding_version")); + REQUIRE(soma_group->has_metadata("md")); mdval = soma_group->get_metadata("md"); REQUIRE(*((const int32_t*)std::get(*mdval)) == 100); + + // Delete and have it reflected when reading metadata while in write mode soma_group->delete_metadata("md"); mdval = soma_group->get_metadata("md"); REQUIRE(!mdval.has_value()); soma_group->close(); - soma_group->open(OpenMode::read, std::pair(3, 3)); - REQUIRE(soma_group->has_metadata("md") == false); - REQUIRE(soma_group->metadata_num() == 1); - soma_group->close(); + // Confirm delete in read mode + soma_group->open(OpenMode::read, TimestampRange(0, 2)); + REQUIRE(!soma_group->has_metadata("md")); + REQUIRE(soma_group->metadata_num() == 2); } \ No newline at end of file diff --git a/libtiledbsoma/test/unit_soma_sparse_ndarray.cc b/libtiledbsoma/test/unit_soma_sparse_ndarray.cc index 246db3692c..b9a37d5dc4 100644 --- a/libtiledbsoma/test/unit_soma_sparse_ndarray.cc +++ b/libtiledbsoma/test/unit_soma_sparse_ndarray.cc @@ -80,19 +80,8 @@ TEST_CASE("SOMASparseNDArray: basic") { auto ctx = std::make_shared(); std::string uri = "mem://unit-test-sparse-ndarray-basic"; - SOMASparseNDArray::create(uri, create_schema(*ctx->tiledb_ctx()), ctx); - - auto soma_sparse = SOMASparseNDArray::open(uri, OpenMode::read, ctx); - REQUIRE(soma_sparse->uri() == uri); - REQUIRE(soma_sparse->ctx() == ctx); - REQUIRE(soma_sparse->type() == "SOMASparseNDArray"); - REQUIRE(soma_sparse->is_sparse() == true); - auto schema = soma_sparse->tiledb_schema(); - REQUIRE(schema->has_attribute("a0")); - REQUIRE(schema->domain().has_dimension("d0")); - REQUIRE(soma_sparse->ndim() == 1); - REQUIRE(soma_sparse->nnz() == 0); - soma_sparse->close(); + auto soma_sparse = SOMASparseNDArray::create( + uri, create_schema(*ctx->tiledb_ctx()), ctx); std::vector d0(10); for (int j = 0; j < 10; j++) @@ -105,11 +94,13 @@ TEST_CASE("SOMASparseNDArray: basic") { array_buffer->emplace("a0", ColumnBuffer::create(tdb_arr, "a0", a0)); array_buffer->emplace("d0", ColumnBuffer::create(tdb_arr, "d0", d0)); - soma_sparse->open(OpenMode::write); soma_sparse->write(array_buffer); soma_sparse->close(); soma_sparse->open(OpenMode::read); + REQUIRE(soma_sparse->uri() == uri); + REQUIRE(soma_sparse->ctx() == ctx); + REQUIRE(soma_sparse->type() == "SOMASparseNDArray"); while (auto batch = soma_sparse->read_next()) { auto arrbuf = batch.value(); auto d0span = arrbuf->at("d0")->data(); @@ -124,7 +115,8 @@ TEST_CASE("SOMASparseNDArray: metadata") { auto ctx = std::make_shared(); std::string uri = "mem://unit-test-sparse-ndarray"; - SOMASparseNDArray::create(uri, create_schema(*ctx->tiledb_ctx()), ctx); + SOMASparseNDArray::create( + uri, create_schema(*ctx->tiledb_ctx()), ctx, TimestampRange(0, 2)); auto soma_sparse = SOMASparseNDArray::open( uri, OpenMode::write, @@ -132,32 +124,48 @@ TEST_CASE("SOMASparseNDArray: metadata") { {}, ResultOrder::automatic, std::pair(1, 1)); + int32_t val = 100; soma_sparse->set_metadata("md", TILEDB_INT32, 1, &val); soma_sparse->close(); - soma_sparse->open(OpenMode::read, std::pair(1, 1)); - REQUIRE(soma_sparse->metadata_num() == 2); - REQUIRE(soma_sparse->has_metadata("soma_object_type") == true); - REQUIRE(soma_sparse->has_metadata("md") == true); - + // Read metadata + soma_sparse->open(OpenMode::read, TimestampRange(0, 2)); + REQUIRE(soma_sparse->metadata_num() == 3); + REQUIRE(soma_sparse->has_metadata("soma_object_type")); + REQUIRE(soma_sparse->has_metadata("soma_encoding_version")); + REQUIRE(soma_sparse->has_metadata("md")); auto mdval = soma_sparse->get_metadata("md"); REQUIRE(std::get(*mdval) == TILEDB_INT32); REQUIRE(std::get(*mdval) == 1); REQUIRE(*((const int32_t*)std::get(*mdval)) == 100); soma_sparse->close(); - soma_sparse->open(OpenMode::write, std::pair(2, 2)); + // md should not be available at (2, 2) + soma_sparse->open(OpenMode::read, TimestampRange(2, 2)); + REQUIRE(soma_sparse->metadata_num() == 2); + REQUIRE(soma_sparse->has_metadata("soma_object_type")); + REQUIRE(soma_sparse->has_metadata("soma_encoding_version")); + REQUIRE(!soma_sparse->has_metadata("md")); + soma_sparse->close(); + // Metadata should also be retrievable in write mode + soma_sparse->open(OpenMode::write, TimestampRange(0, 2)); + REQUIRE(soma_sparse->metadata_num() == 3); + REQUIRE(soma_sparse->has_metadata("soma_object_type")); + REQUIRE(soma_sparse->has_metadata("soma_encoding_version")); + REQUIRE(soma_sparse->has_metadata("md")); mdval = soma_sparse->get_metadata("md"); REQUIRE(*((const int32_t*)std::get(*mdval)) == 100); + + // Delete and have it reflected when reading metadata while in write mode soma_sparse->delete_metadata("md"); mdval = soma_sparse->get_metadata("md"); REQUIRE(!mdval.has_value()); soma_sparse->close(); - soma_sparse->open(OpenMode::read, std::pair(3, 3)); - REQUIRE(soma_sparse->has_metadata("md") == false); - REQUIRE(soma_sparse->metadata_num() == 1); - soma_sparse->close(); + // Confirm delete in read mode + soma_sparse->open(OpenMode::read, TimestampRange(0, 2)); + REQUIRE(!soma_sparse->has_metadata("md")); + REQUIRE(soma_sparse->metadata_num() == 2); } \ No newline at end of file