From 67a0b54617714552ece6d6e8e3d72a9e33fa3c79 Mon Sep 17 00:00:00 2001 From: Vivian Nguyen Date: Wed, 27 Mar 2024 16:54:40 -0500 Subject: [PATCH] Fix writes when using slice of arrow table --- apis/python/src/tiledbsoma/_dataframe.py | 8 ++++---- apis/python/src/tiledbsoma/io/ingest.py | 1 - apis/python/src/tiledbsoma/soma_array.cc | 18 +++++++++++++++++- apis/python/tests/test_io.py | 1 + libtiledbsoma/src/soma/soma_array.cc | 3 +-- 5 files changed, 23 insertions(+), 8 deletions(-) diff --git a/apis/python/src/tiledbsoma/_dataframe.py b/apis/python/src/tiledbsoma/_dataframe.py index c3bdf053c6..a5e1dacb55 100644 --- a/apis/python/src/tiledbsoma/_dataframe.py +++ b/apis/python/src/tiledbsoma/_dataframe.py @@ -455,6 +455,8 @@ def write( _util.check_type("values", values, (pa.Table,)) target_schema = [] + print("self.schema") + print(self.schema) for input_field in values.schema: target_field = self.schema.field(input_field.name) @@ -465,11 +467,9 @@ def write( target_schema.append(target_field.with_type(pa.uint8())) else: target_schema.append(target_field) + # print(values) values = values.cast(pa.schema(target_schema, values.schema.metadata)) - - print("HELLLLOOOOOOOOOOOOOOOOO") - print() - + for batch in values.to_batches(): self._handle.write(batch) diff --git a/apis/python/src/tiledbsoma/io/ingest.py b/apis/python/src/tiledbsoma/io/ingest.py index 48e2906ac0..a05a9385be 100644 --- a/apis/python/src/tiledbsoma/io/ingest.py +++ b/apis/python/src/tiledbsoma/io/ingest.py @@ -1219,7 +1219,6 @@ def _write_arrow_table( ) handle.write(arrow_table) - def _write_dataframe( df_uri: str, df: pd.DataFrame, diff --git a/apis/python/src/tiledbsoma/soma_array.cc b/apis/python/src/tiledbsoma/soma_array.cc index c81c945a20..f81364b53d 100644 --- a/apis/python/src/tiledbsoma/soma_array.cc +++ b/apis/python/src/tiledbsoma/soma_array.cc @@ -91,9 +91,25 @@ void write(SOMAArray& array, py::handle py_batch) { } } + auto np = py::module::import("numpy"); + auto table_offset = arr_->offset; + auto data_size = tiledb::impl::type_size(ArrowAdapter::to_tiledb_format(sch_->format)); + + if(offsets){ + offsets += table_offset; + } + if(validities){ + validities += table_offset; + } + array.set_column_data( - sch_->name, arr_->length, data, offsets, validities); + sch_->name, + arr_->length, + (char*)data + table_offset * data_size, + offsets, + nullptr); } + try { array.write(); } catch (const std::exception& e) { diff --git a/apis/python/tests/test_io.py b/apis/python/tests/test_io.py index e696ffe927..61a5861952 100644 --- a/apis/python/tests/test_io.py +++ b/apis/python/tests/test_io.py @@ -171,3 +171,4 @@ def test_write_arrow_table(tmp_path, num_rows, cap_nbytes): with soma.DataFrame.open(uri) as sdf: pdf = sdf.read().concat().to_pandas() assert list(pdf["foo"]) == pydict["foo"] + assert list(pdf["bar"]) == pydict["bar"] diff --git a/libtiledbsoma/src/soma/soma_array.cc b/libtiledbsoma/src/soma/soma_array.cc index e1c7dafe2a..e0c6b5a58b 100644 --- a/libtiledbsoma/src/soma/soma_array.cc +++ b/libtiledbsoma/src/soma/soma_array.cc @@ -404,11 +404,10 @@ void SOMAArray::write() { if (mq_->query_type() != TILEDB_WRITE) { throw TileDBSOMAError("[SOMAArray] array must be opened in write mode"); } - mq_->submit_write(); mq_->reset(); - array_buffer_ = nullptr; + // array_buffer_ = nullptr; } uint64_t SOMAArray::nnz() {