Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[c++] Addition of ArrowSchema to TileDB ArraySchema Converter #2418

Merged
merged 10 commits into from
Apr 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions libtiledbsoma/src/soma/soma_array.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
*
* The MIT License
*
* @copyright Copyright (c) 2022-2023 TileDB, Inc.
* @copyright Copyright (c) 2022-2024 TileDB, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
Expand Down Expand Up @@ -102,8 +102,7 @@ class SOMAArray : public SOMAObject {
*
* @param mode read or write
* @param uri URI of the array
* @param name Name of the array
* @param platform_config Config parameter dictionary
* @param ctx SOMAContext
* @param column_names Columns to read
* @param batch_size Read batch size
* @param result_order Read result order: automatic (default), rowmajor,
Expand Down Expand Up @@ -152,8 +151,8 @@ class SOMAArray : public SOMAObject {
*
* @param mode read or write
* @param uri URI of the array
* @param ctx SOMAContext
* @param name name of the array
* @param platform_config Config parameter dictionary
* @param column_names Columns to read
* @param batch_size Batch size
* @param result_order Result order
Expand Down
28 changes: 17 additions & 11 deletions libtiledbsoma/src/soma/soma_collection.cc
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,11 @@ using namespace tiledb;
//= public static
//===================================================================

std::unique_ptr<SOMACollection> SOMACollection::create(
void SOMACollection::create(
std::string_view uri,
std::shared_ptr<SOMAContext> ctx,
std::optional<TimestampRange> timestamp) {
auto soma_group = SOMAGroup::create(ctx, uri, "SOMACollection", timestamp);
return std::make_unique<SOMACollection>(*soma_group);
SOMAGroup::create(ctx, uri, "SOMACollection", timestamp);
}

std::unique_ptr<SOMACollection> SOMACollection::open(
Expand Down Expand Up @@ -111,8 +110,11 @@ std::shared_ptr<SOMAExperiment> SOMACollection::add_new_experiment(
std::string_view uri,
URIType uri_type,
std::shared_ptr<SOMAContext> ctx,
ArraySchema schema) {
SOMAExperiment::create(uri, schema, ctx);
std::unique_ptr<ArrowSchema> schema,
ColumnIndexInfo index_columns,
std::optional<PlatformConfig> platform_config) {
SOMAExperiment::create(
uri, std::move(schema), index_columns, ctx, platform_config);
std::shared_ptr<SOMAExperiment> member = SOMAExperiment::open(
uri, OpenMode::read, ctx);
this->set(std::string(uri), uri_type, std::string(key));
Expand All @@ -125,8 +127,9 @@ std::shared_ptr<SOMAMeasurement> SOMACollection::add_new_measurement(
std::string_view uri,
URIType uri_type,
std::shared_ptr<SOMAContext> ctx,
ArraySchema schema) {
SOMAMeasurement::create(uri, schema, ctx);
std::unique_ptr<ArrowSchema> schema,
ColumnIndexInfo index_columns) {
SOMAMeasurement::create(uri, std::move(schema), index_columns, ctx);
std::shared_ptr<SOMAMeasurement> member = SOMAMeasurement::open(
uri, OpenMode::read, ctx);
this->set(std::string(uri), uri_type, std::string(key));
Expand All @@ -139,8 +142,11 @@ std::shared_ptr<SOMADataFrame> SOMACollection::add_new_dataframe(
std::string_view uri,
URIType uri_type,
std::shared_ptr<SOMAContext> ctx,
ArraySchema schema) {
SOMADataFrame::create(uri, schema, ctx);
std::unique_ptr<ArrowSchema> schema,
ColumnIndexInfo index_columns,
std::optional<PlatformConfig> platform_config) {
SOMADataFrame::create(
uri, std::move(schema), index_columns, ctx, platform_config);
std::shared_ptr<SOMADataFrame> member = SOMADataFrame::open(
uri, OpenMode::read, ctx);
this->set(std::string(uri), uri_type, std::string(key));
Expand All @@ -154,7 +160,7 @@ std::shared_ptr<SOMADenseNDArray> SOMACollection::add_new_dense_ndarray(
URIType uri_type,
std::shared_ptr<SOMAContext> ctx,
ArraySchema schema) {
SOMADenseNDArray::create(uri, schema, ctx);
SOMADenseNDArray::create(uri, std::move(schema), ctx);
std::shared_ptr<SOMADenseNDArray> member = SOMADenseNDArray::open(
uri, OpenMode::read, ctx);
this->set(std::string(uri), uri_type, std::string(key));
Expand All @@ -168,7 +174,7 @@ std::shared_ptr<SOMASparseNDArray> SOMACollection::add_new_sparse_ndarray(
URIType uri_type,
std::shared_ptr<SOMAContext> ctx,
ArraySchema schema) {
SOMASparseNDArray::create(uri, schema, ctx);
SOMASparseNDArray::create(uri, std::move(schema), ctx);
std::shared_ptr<SOMASparseNDArray> member = SOMASparseNDArray::open(
uri, OpenMode::read, ctx);
this->set(std::string(uri), uri_type, std::string(key));
Expand Down
13 changes: 9 additions & 4 deletions libtiledbsoma/src/soma/soma_collection.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ class SOMACollection : public SOMAGroup {
* @param ctx TileDB context
* @param uri URI to create the SOMACollection
*/
static std::unique_ptr<SOMACollection> create(
static void create(
std::string_view uri,
std::shared_ptr<SOMAContext> ctx,
std::optional<TimestampRange> timestamp = std::nullopt);
Expand Down Expand Up @@ -157,7 +157,9 @@ class SOMACollection : public SOMAGroup {
std::string_view uri,
URIType uri_type,
std::shared_ptr<SOMAContext> ctx,
ArraySchema schema);
std::unique_ptr<ArrowSchema> schema,
ColumnIndexInfo index_columns,
std::optional<PlatformConfig> platform_config = std::nullopt);

/**
* Create and add a SOMAMeasurement to the SOMACollection.
Expand All @@ -172,7 +174,8 @@ class SOMACollection : public SOMAGroup {
std::string_view uri,
URIType uri_type,
std::shared_ptr<SOMAContext> ctx,
ArraySchema schema);
std::unique_ptr<ArrowSchema> schema,
ColumnIndexInfo index_columns);

/**
* Create and add a SOMADataFrame to the SOMACollection.
Expand All @@ -187,7 +190,9 @@ class SOMACollection : public SOMAGroup {
std::string_view uri,
URIType uri_type,
std::shared_ptr<SOMAContext> ctx,
ArraySchema schema);
std::unique_ptr<ArrowSchema> schema,
ColumnIndexInfo index_columns,
std::optional<PlatformConfig> platform_config = std::nullopt);

/**
* Create and add a SOMADenseNDArray to the SOMACollection.
Expand Down
12 changes: 7 additions & 5 deletions libtiledbsoma/src/soma/soma_dataframe.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,14 +39,16 @@ using namespace tiledb;
//= public static
//===================================================================

std::unique_ptr<SOMADataFrame> SOMADataFrame::create(
void SOMADataFrame::create(
std::string_view uri,
ArraySchema schema,
std::unique_ptr<ArrowSchema> schema,
ColumnIndexInfo index_columns,
std::shared_ptr<SOMAContext> ctx,
std::optional<PlatformConfig> platform_config,
std::optional<TimestampRange> timestamp) {
auto soma_array = SOMAArray::create(
ctx, uri, schema, "SOMADataFrame", timestamp);
return std::make_unique<SOMADataFrame>(*soma_array);
auto tiledb_schema = ArrowAdapter::tiledb_schema_from_arrow_schema(
ctx->tiledb_ctx(), std::move(schema), index_columns, platform_config);
SOMAArray::create(ctx, uri, tiledb_schema, "SOMADataFrame", timestamp);
}

std::unique_ptr<SOMADataFrame> SOMADataFrame::open(
Expand Down
25 changes: 14 additions & 11 deletions libtiledbsoma/src/soma/soma_dataframe.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
*
* The MIT License
*
* @copyright Copyright (c) 2023 TileDB, Inc.
* @copyright Copyright (c) 2023-2024 TileDB, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
Expand Down Expand Up @@ -52,33 +52,36 @@ class SOMADataFrame : public SOMAArray {
/**
* @brief Create a SOMADataFrame object at the given URI.
*
* @param uri URI to create the SOMAArray
* @param schema TileDB ArraySchema
* @param uri URI to create the SOMADataFrame
* @param schema Arrow schema
* @param index_columns The index column names with associated domains
* and tile extents per dimension
* @param ctx SOMAContext
* @param timestamp Optional pair indicating timestamp start and end
* @return std::unique_ptr<SOMADataFrame>
* @param platform_config Optional config parameter dictionary
* @param timestamp Optional the timestamp range to write SOMA metadata info
*/
static std::unique_ptr<SOMADataFrame> create(
static void create(
std::string_view uri,
ArraySchema schema,
std::unique_ptr<ArrowSchema> schema,
ColumnIndexInfo index_columns,
std::shared_ptr<SOMAContext> ctx,
std::optional<PlatformConfig> platform_config = std::nullopt,
std::optional<TimestampRange> timestamp = std::nullopt);

/**
* @brief Open and return a SOMADataFrame object at the given URI.
*
* @param mode read or write
* @param uri URI to create the SOMADataFrame
* @param mode read or write
* @param ctx SOMAContext
* @param column_names A list of column names to use as user-defined index
* columns (e.g., ``['cell_type', 'tissue_type']``). All named columns must
* exist in the schema, and at least one index column name is required.
* @param platform_config Platform-specific options used to create this
* DataFrame
* @param result_order Read result order: automatic (default), rowmajor, or
* colmajor
* @param timestamp If specified, overrides the default timestamp used to
* open this object. If unset, uses the timestamp provided by the context.
* @return std::unique_ptr<SOMADataFrame>
* @return std::unique_ptr<SOMADataFrame> SOMADataFrame
*/
static std::unique_ptr<SOMADataFrame> open(
std::string_view uri,
Expand Down
7 changes: 2 additions & 5 deletions libtiledbsoma/src/soma/soma_dense_ndarray.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
*
* This file defines the SOMADenseNDArray class.
*/

#include "soma_dense_ndarray.h"

namespace tiledbsoma {
Expand All @@ -39,14 +38,12 @@ using namespace tiledb;
//= public static
//===================================================================

std::unique_ptr<SOMADenseNDArray> SOMADenseNDArray::create(
void SOMADenseNDArray::create(
std::string_view uri,
ArraySchema schema,
std::shared_ptr<SOMAContext> ctx,
std::optional<TimestampRange> timestamp) {
auto soma_array = SOMAArray::create(
ctx, uri, schema, "SOMADenseNDArray", timestamp);
return std::make_unique<SOMADenseNDArray>(*soma_array);
SOMAArray::create(ctx, uri, schema, "SOMADenseNDArray", timestamp);
}

std::unique_ptr<SOMADenseNDArray> SOMADenseNDArray::open(
Expand Down
23 changes: 11 additions & 12 deletions libtiledbsoma/src/soma/soma_dense_ndarray.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
*
* The MIT License
*
* @copyright Copyright (c) 2023 TileDB, Inc.
* @copyright Copyright (c) 2023-2024 TileDB, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
Expand Down Expand Up @@ -52,13 +52,12 @@ class SOMADenseNDArray : public SOMAArray {
/**
* @brief Create a SOMADenseNDArray object at the given URI.
*
* @param uri URI to create the SOMAArray
* @param schema TileDB ArraySchema
* @param uri URI to create the SOMADenseNDArray
* @param schema Arrow schema
* @param ctx SOMAContext
* @param timestamp Optional pair indicating timestamp start and end
* @return std::unique_ptr<SOMADenseNDArray>
* @param timestamp Optional the timestamp range to write SOMA metadata info
*/
static std::unique_ptr<SOMADenseNDArray> create(
static void create(
nguyenv marked this conversation as resolved.
Show resolved Hide resolved
std::string_view uri,
ArraySchema schema,
std::shared_ptr<SOMAContext> ctx,
Expand All @@ -67,18 +66,17 @@ class SOMADenseNDArray : public SOMAArray {
/**
* @brief Open and return a SOMADenseNDArray object at the given URI.
*
* @param mode read or write
* @param uri URI to create the SOMADenseNDArray
* @param mode read or write
* @param ctx SOMAContext
* @param column_names A list of column names to use as user-defined index
* columns (e.g., ``['cell_type', 'tissue_type']``). All named columns must
* exist in the schema, and at least one index column name is required.
* @param platform_config Platform-specific options used to create this
* SOMADenseNDArray
* @param timestamp If specified, overrides the default timestamp used to
* open this object. If unset, uses the timestamp provided by the context.
* @param result_order Read result order: automatic (default), rowmajor, or
* colmajor
* @return std::shared_ptr<SOMADenseNDArray>
* @param timestamp If specified, overrides the default timestamp used to
* open this object. If unset, uses the timestamp provided by the context.
* @return std::unique_ptr<SOMADenseNDArray> SOMADenseNDArray
*/
static std::unique_ptr<SOMADenseNDArray> open(
std::string_view uri,
Expand All @@ -105,6 +103,7 @@ class SOMADenseNDArray : public SOMAArray {
* @param mode read or write
* @param uri URI of the array
* @param ctx TileDB context
* @param column_names Columns to read
* @param result_order Read result order: automatic (default), rowmajor, or
* colmajor
* @param timestamp Timestamp
Expand Down
26 changes: 19 additions & 7 deletions libtiledbsoma/src/soma/soma_experiment.cc
Original file line number Diff line number Diff line change
Expand Up @@ -41,19 +41,31 @@ using namespace tiledb;
//= public static
//===================================================================

std::unique_ptr<SOMAExperiment> SOMAExperiment::create(
void SOMAExperiment::create(
std::string_view uri,
ArraySchema schema,
std::unique_ptr<ArrowSchema> schema,
ColumnIndexInfo index_columns,
std::shared_ptr<SOMAContext> ctx,
std::optional<PlatformConfig> platform_config,
std::optional<TimestampRange> timestamp) {
std::string exp_uri(uri);

auto soma_group = SOMAGroup::create(ctx, uri, "SOMAExperiment", timestamp);
SOMADataFrame::create(exp_uri + "/obs", schema, ctx, timestamp);
SOMAGroup::create(ctx, exp_uri, "SOMAExperiment", timestamp);
SOMADataFrame::create(
exp_uri + "/obs",
std::move(schema),
index_columns,
ctx,
platform_config,
timestamp);
SOMACollection::create(exp_uri + "/ms", ctx, timestamp);
soma_group->set(exp_uri + "/obs", URIType::absolute, "obs");
soma_group->set(exp_uri + "/ms", URIType::absolute, "ms");
return std::make_unique<SOMAExperiment>(*soma_group);

auto name = std::string(std::filesystem::path(uri).filename());
auto group = SOMAGroup::open(
OpenMode::write, exp_uri, ctx, name, timestamp);
group->set(exp_uri + "/obs", URIType::absolute, "obs");
group->set(exp_uri + "/ms", URIType::absolute, "ms");
group->close();
}

std::unique_ptr<SOMAExperiment> SOMAExperiment::open(
Expand Down
6 changes: 4 additions & 2 deletions libtiledbsoma/src/soma/soma_experiment.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,12 @@ class SOMAExperiment : public SOMACollection {
* @param schema TileDB ArraySchema
* @param platform_config Optional config parameter dictionary
*/
static std::unique_ptr<SOMAExperiment> create(
static void create(
std::string_view uri,
ArraySchema schema,
std::unique_ptr<ArrowSchema> schema,
ColumnIndexInfo index_columns,
std::shared_ptr<SOMAContext> ctx,
std::optional<PlatformConfig> platform_config = std::nullopt,
std::optional<TimestampRange> timestamp = std::nullopt);

/**
Expand Down
Loading
Loading