Skip to content

Commit

Permalink
[c++] Fix segfault issues
Browse files Browse the repository at this point in the history
  • Loading branch information
nguyenv committed Mar 15, 2024
1 parent ab3f6d0 commit b341e0e
Show file tree
Hide file tree
Showing 5 changed files with 2,497 additions and 2,250 deletions.
2 changes: 1 addition & 1 deletion apis/python/src/tiledbsoma/reindexer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
*/

#include <tiledbsoma/reindexer/reindexer.h>
//#include <tiledbsoma/utils/carrow.h>
// #include <tiledbsoma/utils/carrow.h>
#include "common.h"

#define DENUM(x) .value(#x, TILEDB_##x)
Expand Down
2 changes: 1 addition & 1 deletion libtiledbsoma/src/cli/cli.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
#include "soma/enums.h"
#include "soma/soma_array.h"
#include "utils/arrow_adapter.h"
//#include "utils/carrow.h"
// #include "utils/carrow.h"
#include "utils/logger.h"

using namespace tiledbsoma;
Expand Down
147 changes: 98 additions & 49 deletions libtiledbsoma/src/utils/arrow_adapter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,9 @@ namespace tiledbsoma {
using namespace tiledb;

void ArrowAdapter::release_schema(struct ArrowSchema* schema) {
LOG_DEBUG(fmt::format("[ArrowAdapter] release_schema for {}", schema->name));
if (schema->name != nullptr)
LOG_DEBUG(
fmt::format("[ArrowAdapter] release_schema for {}", schema->name));

Check warning on line 44 in libtiledbsoma/src/utils/arrow_adapter.cc

View check run for this annotation

Codecov / codecov/patch

libtiledbsoma/src/utils/arrow_adapter.cc#L43-L44

Added lines #L43 - L44 were not covered by tests

if (schema->name != nullptr) {
LOG_TRACE("[ArrowAdapter] release_schema schema->name");
Expand All @@ -61,10 +63,14 @@ void ArrowAdapter::release_schema(struct ArrowSchema* schema) {
for (auto i = 0; i < schema->n_children; i++) {
if (schema->children[i] != nullptr) {
if (schema->children[i]->release != nullptr) {
LOG_TRACE(fmt::format("[ArrowAdapter] release_schema schema->child {} release",i));
LOG_TRACE(fmt::format(
"[ArrowAdapter] release_schema schema->child {} "

Check warning on line 67 in libtiledbsoma/src/utils/arrow_adapter.cc

View check run for this annotation

Codecov / codecov/patch

libtiledbsoma/src/utils/arrow_adapter.cc#L66-L67

Added lines #L66 - L67 were not covered by tests
"release",
i));
release_schema(schema->children[i]);

Check warning on line 70 in libtiledbsoma/src/utils/arrow_adapter.cc

View check run for this annotation

Codecov / codecov/patch

libtiledbsoma/src/utils/arrow_adapter.cc#L70

Added line #L70 was not covered by tests
}
LOG_TRACE(fmt::format("[ArrowAdapter] release_schema schema->child {} free",i));
LOG_TRACE(fmt::format(
"[ArrowAdapter] release_schema schema->child {} free", i));
free(schema->children[i]);

Check warning on line 74 in libtiledbsoma/src/utils/arrow_adapter.cc

View check run for this annotation

Codecov / codecov/patch

libtiledbsoma/src/utils/arrow_adapter.cc#L72-L74

Added lines #L72 - L74 were not covered by tests
}
}
Expand Down Expand Up @@ -108,10 +114,13 @@ void ArrowAdapter::release_array(struct ArrowArray* array) {
for (auto i = 0; i < array->n_children; i++) {
if (array->children[i] != nullptr) {
if (array->children[i]->release != nullptr) {
LOG_TRACE(fmt::format("[ArrowAdapter] release_schema array->child {} release",i));
LOG_TRACE(fmt::format(
"[ArrowAdapter] release_schema array->child {} release",

Check warning on line 118 in libtiledbsoma/src/utils/arrow_adapter.cc

View check run for this annotation

Codecov / codecov/patch

libtiledbsoma/src/utils/arrow_adapter.cc#L117-L118

Added lines #L117 - L118 were not covered by tests
i));
release_array(array->children[i]);

Check warning on line 120 in libtiledbsoma/src/utils/arrow_adapter.cc

View check run for this annotation

Codecov / codecov/patch

libtiledbsoma/src/utils/arrow_adapter.cc#L120

Added line #L120 was not covered by tests
}
LOG_TRACE(fmt::format("[ArrowAdapter] release_schema array->child {} free",i));
LOG_TRACE(fmt::format(
"[ArrowAdapter] release_schema array->child {} free", i));
free(array->children[i]);

Check warning on line 124 in libtiledbsoma/src/utils/arrow_adapter.cc

View check run for this annotation

Codecov / codecov/patch

libtiledbsoma/src/utils/arrow_adapter.cc#L122-L124

Added lines #L122 - L124 were not covered by tests
}
}
Expand All @@ -121,9 +130,10 @@ void ArrowAdapter::release_array(struct ArrowArray* array) {
}

if (array->dictionary != nullptr) {
// -- TODO: This can lead to segfault on some data sets and could be cause
// -- TODO: This can lead to segfault on some data sets and could be
// cause
// by how we fill arrow data structures. This should pass.
//if (array->dictionary->release != nullptr) {
// if (array->dictionary->release != nullptr) {
// LOG_TRACE("[ArrowAdapter] release_array array->dict release");
// release_array(array->dictionary);
//}
Expand All @@ -143,17 +153,21 @@ std::unique_ptr<ArrowSchema> ArrowAdapter::arrow_schema_from_tiledb_array(
auto nattr = tiledb_schema.attribute_num();

std::unique_ptr<ArrowSchema> arrow_schema = std::make_unique<ArrowSchema>();
arrow_schema->format = "+s";
arrow_schema->format = strdup("+s");

Check warning on line 156 in libtiledbsoma/src/utils/arrow_adapter.cc

View check run for this annotation

Codecov / codecov/patch

libtiledbsoma/src/utils/arrow_adapter.cc#L156

Added line #L156 was not covered by tests
arrow_schema->n_children = ndim + nattr;
arrow_schema->release = &ArrowAdapter::release_schema;
arrow_schema->children = (ArrowSchema**) malloc(arrow_schema->n_children * sizeof(ArrowSchema*)); //new ArrowSchema*[arrow_schema->n_children];
arrow_schema->children = (ArrowSchema**)malloc(
arrow_schema->n_children *

Check warning on line 160 in libtiledbsoma/src/utils/arrow_adapter.cc

View check run for this annotation

Codecov / codecov/patch

libtiledbsoma/src/utils/arrow_adapter.cc#L159-L160

Added lines #L159 - L160 were not covered by tests
sizeof(ArrowSchema*)); // new ArrowSchema*[arrow_schema->n_children];

ArrowSchema* child = nullptr;

for (uint32_t i = 0; i < ndim; ++i) {
auto dim = tiledb_schema.domain().dimension(i);
child = arrow_schema->children[i] = (ArrowSchema*) malloc(sizeof(ArrowSchema)); //new ArrowSchema;
child->format = ArrowAdapter::to_arrow_format(dim.type()).data();
child = arrow_schema->children[i] = (ArrowSchema*)malloc(

Check warning on line 167 in libtiledbsoma/src/utils/arrow_adapter.cc

View check run for this annotation

Codecov / codecov/patch

libtiledbsoma/src/utils/arrow_adapter.cc#L167

Added line #L167 was not covered by tests
sizeof(ArrowSchema)); // new ArrowSchema;
child->format = strdup(
ArrowAdapter::to_arrow_format(dim.type()).data());

Check warning on line 170 in libtiledbsoma/src/utils/arrow_adapter.cc

View check run for this annotation

Codecov / codecov/patch

libtiledbsoma/src/utils/arrow_adapter.cc#L169-L170

Added lines #L169 - L170 were not covered by tests
child->name = strdup(dim.name().c_str());
child->metadata = nullptr;
child->flags = 0;
Expand All @@ -165,8 +179,10 @@ std::unique_ptr<ArrowSchema> ArrowAdapter::arrow_schema_from_tiledb_array(

for (uint32_t i = 0; i < nattr; ++i) {
auto attr = tiledb_schema.attribute(i);
child = arrow_schema->children[ndim + i] = (ArrowSchema*) malloc(sizeof(ArrowSchema)); //new ArrowSchema;
child->format = ArrowAdapter::to_arrow_format(attr.type()).data();
child = arrow_schema->children[ndim + i] = (ArrowSchema*)malloc(

Check warning on line 182 in libtiledbsoma/src/utils/arrow_adapter.cc

View check run for this annotation

Codecov / codecov/patch

libtiledbsoma/src/utils/arrow_adapter.cc#L182

Added line #L182 was not covered by tests
sizeof(ArrowSchema)); // new ArrowSchema;
child->format = strdup(
ArrowAdapter::to_arrow_format(attr.type()).data());

Check warning on line 185 in libtiledbsoma/src/utils/arrow_adapter.cc

View check run for this annotation

Codecov / codecov/patch

libtiledbsoma/src/utils/arrow_adapter.cc#L184-L185

Added lines #L184 - L185 were not covered by tests
child->name = strdup(attr.name().c_str());
child->metadata = nullptr;
child->flags = attr.nullable() ? ARROW_FLAG_NULLABLE : 0;
Expand All @@ -179,7 +195,8 @@ std::unique_ptr<ArrowSchema> ArrowAdapter::arrow_schema_from_tiledb_array(
if (enmr_name.has_value()) {
auto enmr = ArrayExperimental::get_enumeration(
*ctx, *tiledb_array, attr.name());
auto dict = (ArrowSchema*) malloc(sizeof(ArrowSchema)); //new ArrowSchema;
auto dict = (ArrowSchema*)malloc(

Check warning on line 198 in libtiledbsoma/src/utils/arrow_adapter.cc

View check run for this annotation

Codecov / codecov/patch

libtiledbsoma/src/utils/arrow_adapter.cc#L198

Added line #L198 was not covered by tests
sizeof(ArrowSchema)); // new ArrowSchema;
dict->format = strdup(
ArrowAdapter::to_arrow_format(enmr.type(), false).data());
dict->name = strdup(enmr.name().c_str());
Expand Down Expand Up @@ -214,7 +231,7 @@ std::pair<const void*, std::size_t> ArrowAdapter::_get_data_and_length(

// Allocate a single byte to copy the bits into
size_t sz = 1;
dst = malloc(sz); //new const void*[sz];
dst = malloc(sz); // new const void*[sz];

Check warning on line 234 in libtiledbsoma/src/utils/arrow_adapter.cc

View check run for this annotation

Codecov / codecov/patch

libtiledbsoma/src/utils/arrow_adapter.cc#L234

Added line #L234 was not covered by tests
std::memcpy((void*)dst, &src, sz);

return std::pair(dst, data.size());
Expand Down Expand Up @@ -277,8 +294,8 @@ std::pair<const void*, std::size_t> ArrowAdapter::_get_data_and_length(

inline void exitIfError(const ArrowErrorCode ec, const std::string& msg) {
if (ec != NANOARROW_OK)
throw TileDBSOMAError(fmt::format(
"ArrowAdapter: Arrow Error {} ", msg));
throw TileDBSOMAError(
fmt::format("ArrowAdapter: Arrow Error {} ", msg));

Check warning on line 298 in libtiledbsoma/src/utils/arrow_adapter.cc

View check run for this annotation

Codecov / codecov/patch

libtiledbsoma/src/utils/arrow_adapter.cc#L296-L298

Added lines #L296 - L298 were not covered by tests
}

std::pair<std::unique_ptr<ArrowArray>, std::unique_ptr<ArrowSchema>>
Expand All @@ -291,8 +308,10 @@ ArrowAdapter::to_arrow(std::shared_ptr<ColumnBuffer> column) {
auto coltype = to_arrow_format(column->type()).data();
auto natype = to_nanoarrow_type(coltype);
exitIfError(ArrowSchemaInitFromType(sch, natype), "Bad schema init");
exitIfError(ArrowSchemaSetName(sch, column->name().data()), "Bad schema name");
exitIfError(ArrowSchemaAllocateChildren(sch, 0), "Bad schema children alloc");
exitIfError(
ArrowSchemaSetName(sch, column->name().data()), "Bad schema name");
exitIfError(
ArrowSchemaAllocateChildren(sch, 0), "Bad schema children alloc");

Check warning on line 314 in libtiledbsoma/src/utils/arrow_adapter.cc

View check run for this annotation

Codecov / codecov/patch

libtiledbsoma/src/utils/arrow_adapter.cc#L308-L314

Added lines #L308 - L314 were not covered by tests

#if 0
schema->format = to_arrow_format(column->type()).data();
Expand All @@ -306,7 +325,9 @@ ArrowAdapter::to_arrow(std::shared_ptr<ColumnBuffer> column) {
schema->release = &release_schema;
schema->private_data = nullptr;

int n_buffers = column->is_var() ? 3 : 2; // this will be 2 for enumerations and 3 for char vectors
int n_buffers = column->is_var() ? 3 :

Check warning on line 328 in libtiledbsoma/src/utils/arrow_adapter.cc

View check run for this annotation

Codecov / codecov/patch

libtiledbsoma/src/utils/arrow_adapter.cc#L328

Added line #L328 was not covered by tests
2; // this will be 2 for enumerations
// and 3 for char vectors

// Create an ArrowBuffer to manage the lifetime of `column`.
// - `arrow_buffer` holds a shared_ptr to `column`, which
Expand All @@ -324,10 +345,13 @@ ArrowAdapter::to_arrow(std::shared_ptr<ColumnBuffer> column) {
exitIfError(ArrowArrayAllocateChildren(arr, 0), "Bad array children alloc");

Check warning on line 345 in libtiledbsoma/src/utils/arrow_adapter.cc

View check run for this annotation

Codecov / codecov/patch

libtiledbsoma/src/utils/arrow_adapter.cc#L344-L345

Added lines #L344 - L345 were not covered by tests
array->length = column->size();

LOG_DEBUG(fmt::format("[ArrowAdapter] column type {} name {} nbuf {} {} nullable {}",
to_arrow_format(column->type()).data(),
column->name().data(), n_buffers, array->n_buffers, column->is_nullable()));

LOG_DEBUG(fmt::format(
"[ArrowAdapter] column type {} name {} nbuf {} {} nullable {}",
to_arrow_format(column->type()).data(),
column->name().data(),

Check warning on line 351 in libtiledbsoma/src/utils/arrow_adapter.cc

View check run for this annotation

Codecov / codecov/patch

libtiledbsoma/src/utils/arrow_adapter.cc#L348-L351

Added lines #L348 - L351 were not covered by tests
n_buffers,
array->n_buffers,
column->is_nullable()));

Check warning on line 354 in libtiledbsoma/src/utils/arrow_adapter.cc

View check run for this annotation

Codecov / codecov/patch

libtiledbsoma/src/utils/arrow_adapter.cc#L353-L354

Added lines #L353 - L354 were not covered by tests

#if 0
array->null_count = 0;
Expand All @@ -346,16 +370,18 @@ ArrowAdapter::to_arrow(std::shared_ptr<ColumnBuffer> column) {
column->name(),
column.use_count()));

array->buffers = (const void**) malloc(sizeof(void*) * n_buffers); //new const void*[n_buffers];
array->buffers = (const void**)malloc(
sizeof(void*) * n_buffers); // new const void*[n_buffers];

Check warning on line 374 in libtiledbsoma/src/utils/arrow_adapter.cc

View check run for this annotation

Codecov / codecov/patch

libtiledbsoma/src/utils/arrow_adapter.cc#L373-L374

Added lines #L373 - L374 were not covered by tests
assert(array->buffers != nullptr);
array->buffers[0] = nullptr; // validity addressed below
array->buffers[0] = nullptr; // validity addressed below

Check warning on line 376 in libtiledbsoma/src/utils/arrow_adapter.cc

View check run for this annotation

Codecov / codecov/patch

libtiledbsoma/src/utils/arrow_adapter.cc#L376

Added line #L376 was not covered by tests
array->buffers[n_buffers - 1] = column->data<void*>().data(); // data
if (n_buffers == 3) {
array->buffers[1] = column->offsets().data(); // offsets
}

if (column->is_nullable()) {
schema->flags |= ARROW_FLAG_NULLABLE; // turns out it is also set by default
schema->flags |= ARROW_FLAG_NULLABLE; // turns out it is also set by

Check warning on line 383 in libtiledbsoma/src/utils/arrow_adapter.cc

View check run for this annotation

Codecov / codecov/patch

libtiledbsoma/src/utils/arrow_adapter.cc#L383

Added line #L383 was not covered by tests
// default

// Count nulls
for (auto v : column->validity()) {
Expand All @@ -366,7 +392,8 @@ ArrowAdapter::to_arrow(std::shared_ptr<ColumnBuffer> column) {
column->validity_to_bitmap();
array->buffers[0] = column->validity().data();
} else {
schema->flags = 0; // because ArrowSchemaInitFromType leads to NULLABLE set
schema->flags = 0; // because ArrowSchemaInitFromType leads to NULLABLE

Check warning on line 395 in libtiledbsoma/src/utils/arrow_adapter.cc

View check run for this annotation

Codecov / codecov/patch

libtiledbsoma/src/utils/arrow_adapter.cc#L394-L395

Added lines #L394 - L395 were not covered by tests
// set
}

if (column->is_ordered()) {
Expand All @@ -379,16 +406,21 @@ ArrowAdapter::to_arrow(std::shared_ptr<ColumnBuffer> column) {
}

if (column->has_enumeration()) {
auto dict_sch = (ArrowSchema*) malloc(sizeof(ArrowSchema)); //new ArrowSchema;
auto dict_arr = (ArrowArray*) malloc(sizeof(ArrowArray)); //new ArrowArray;
auto dict_sch = (ArrowSchema*)malloc(

Check warning on line 409 in libtiledbsoma/src/utils/arrow_adapter.cc

View check run for this annotation

Codecov / codecov/patch

libtiledbsoma/src/utils/arrow_adapter.cc#L409

Added line #L409 was not covered by tests
sizeof(ArrowSchema)); // new ArrowSchema;
auto dict_arr = (ArrowArray*)malloc(

Check warning on line 411 in libtiledbsoma/src/utils/arrow_adapter.cc

View check run for this annotation

Codecov / codecov/patch

libtiledbsoma/src/utils/arrow_adapter.cc#L411

Added line #L411 was not covered by tests
sizeof(ArrowArray)); // new ArrowArray;

auto enmr = column->get_enumeration_info();
auto dcoltype = to_arrow_format(enmr->type(), false).data();
auto dnatype = to_nanoarrow_type(dcoltype);

Check warning on line 416 in libtiledbsoma/src/utils/arrow_adapter.cc

View check run for this annotation

Codecov / codecov/patch

libtiledbsoma/src/utils/arrow_adapter.cc#L415-L416

Added lines #L415 - L416 were not covered by tests

exitIfError(ArrowSchemaInitFromType(dict_sch, dnatype), "Bad schema init");
exitIfError(
ArrowSchemaInitFromType(dict_sch, dnatype), "Bad schema init");
exitIfError(ArrowSchemaSetName(dict_sch, ""), "Bad schema name");
exitIfError(ArrowSchemaAllocateChildren(dict_sch, 0), "Bad schema children alloc");
exitIfError(
ArrowSchemaAllocateChildren(dict_sch, 0),
"Bad schema children alloc");

Check warning on line 423 in libtiledbsoma/src/utils/arrow_adapter.cc

View check run for this annotation

Codecov / codecov/patch

libtiledbsoma/src/utils/arrow_adapter.cc#L418-L423

Added lines #L418 - L423 were not covered by tests
#if 0
dict_sch->format = strdup(to_arrow_format(enmr->type(), false).data());
dict_sch->name = nullptr;
Expand All @@ -401,8 +433,11 @@ ArrowAdapter::to_arrow(std::shared_ptr<ColumnBuffer> column) {
dict_sch->private_data = nullptr;
#endif

exitIfError(ArrowArrayInitFromType(dict_arr, dnatype), "Bad array init");
exitIfError(ArrowArrayAllocateChildren(dict_arr, 0), "Bad array children alloc");
exitIfError(
ArrowArrayInitFromType(dict_arr, dnatype), "Bad array init");
exitIfError(
ArrowArrayAllocateChildren(dict_arr, 0),
"Bad array children alloc");

Check warning on line 440 in libtiledbsoma/src/utils/arrow_adapter.cc

View check run for this annotation

Codecov / codecov/patch

libtiledbsoma/src/utils/arrow_adapter.cc#L436-L440

Added lines #L436 - L440 were not covered by tests
#if 0
const int n_buf = strcmp(dict_sch->format, "u") == 0 ? 3 : 2;
dict_arr->null_count = 0;
Expand Down Expand Up @@ -507,21 +542,35 @@ std::string_view ArrowAdapter::to_arrow_format(

// FIXME: Add more types, maybe make it a map
enum ArrowType ArrowAdapter::to_nanoarrow_type(std::string_view sv) {

Check warning on line 544 in libtiledbsoma/src/utils/arrow_adapter.cc

View check run for this annotation

Codecov / codecov/patch

libtiledbsoma/src/utils/arrow_adapter.cc#L544

Added line #L544 was not covered by tests
if (sv == "i") return NANOARROW_TYPE_INT32;
else if (sv == "c") return NANOARROW_TYPE_INT8;
else if (sv == "C") return NANOARROW_TYPE_UINT8;
else if (sv == "s") return NANOARROW_TYPE_INT16;
else if (sv == "S") return NANOARROW_TYPE_UINT16;
else if (sv == "I") return NANOARROW_TYPE_UINT32;
else if (sv == "l") return NANOARROW_TYPE_INT64;
else if (sv == "L") return NANOARROW_TYPE_UINT64;
else if (sv == "f") return NANOARROW_TYPE_FLOAT;
else if (sv == "g") return NANOARROW_TYPE_DOUBLE;
else if (sv == "u") return NANOARROW_TYPE_STRING;
else if (sv == "U") return NANOARROW_TYPE_LARGE_STRING;
else if (sv == "b") return NANOARROW_TYPE_BOOL;
else throw TileDBSOMAError(fmt::format(
"ArrowAdapter: Unsupported TileDB datatype string: {} ", sv));
if (sv == "i")
return NANOARROW_TYPE_INT32;
else if (sv == "c")
return NANOARROW_TYPE_INT8;
else if (sv == "C")
return NANOARROW_TYPE_UINT8;
else if (sv == "s")
return NANOARROW_TYPE_INT16;
else if (sv == "S")
return NANOARROW_TYPE_UINT16;
else if (sv == "I")
return NANOARROW_TYPE_UINT32;
else if (sv == "l")
return NANOARROW_TYPE_INT64;
else if (sv == "L")
return NANOARROW_TYPE_UINT64;
else if (sv == "f")
return NANOARROW_TYPE_FLOAT;
else if (sv == "g")
return NANOARROW_TYPE_DOUBLE;
else if (sv == "u")
return NANOARROW_TYPE_STRING;
else if (sv == "U")
return NANOARROW_TYPE_LARGE_STRING;
else if (sv == "b")
return NANOARROW_TYPE_BOOL;

Check warning on line 570 in libtiledbsoma/src/utils/arrow_adapter.cc

View check run for this annotation

Codecov / codecov/patch

libtiledbsoma/src/utils/arrow_adapter.cc#L546-L570

Added lines #L546 - L570 were not covered by tests
else
throw TileDBSOMAError(fmt::format(
"ArrowAdapter: Unsupported TileDB datatype string: {} ", sv));

Check warning on line 573 in libtiledbsoma/src/utils/arrow_adapter.cc

View check run for this annotation

Codecov / codecov/patch

libtiledbsoma/src/utils/arrow_adapter.cc#L572-L573

Added lines #L572 - L573 were not covered by tests
}

} // namespace tiledbsoma
6 changes: 3 additions & 3 deletions libtiledbsoma/src/utils/arrow_adapter.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@
// https://arrow.apache.org/docs/format/CDataInterface.html#exporting-a-simple-int32-array

#include "nanoarrow.hpp"
//#ifndef ARROW_SCHEMA_AND_ARRAY_DEFINED
//#include "carrow.h"
//#endif
// #ifndef ARROW_SCHEMA_AND_ARRAY_DEFINED
// #include "carrow.h"
// #endif

namespace tiledbsoma {

Expand Down
Loading

0 comments on commit b341e0e

Please sign in to comment.