WIP

single-cell-data · Mar 26, 2024 · 2bb57f1 · 2bb57f1
1 parent 5417c17
commit 2bb57f1
Show file tree

Hide file tree

Showing 2 changed files with 4 additions and 88 deletions.
diff --git a/apis/python/src/tiledbsoma/_dataframe.py b/apis/python/src/tiledbsoma/_dataframe.py
@@ -454,94 +454,6 @@ def write(
         """
         _util.check_type("values", values, (pa.Table,))
 
-        # dim_cols_map: Dict[str, pd.DataFrame] = {}
-        # attr_cols_map: Dict[str, pd.DataFrame] = {}
-        # dim_names_set = self.index_column_names
-        # n = None
-
-        # for col_info in values.schema:
-        #     name = col_info.name
-        #     col = values.column(name).combine_chunks()
-        #     n = len(col)
-
-        #     if self._handle.schema.has_attr(name):
-        #         attr = self._handle.schema.attr(name)
-
-        #         # Add the enumeration values to the TileDB Array from ArrowArray
-        #         if attr.enum_label is not None:
-        #             if not pa.types.is_dictionary(col_info.type):
-        #                 raise ValueError(
-        #                     "Expected dictionary type for enumerated attribute "
-        #                     f"{name} but saw {col.type}"
-        #                 )
-
-        #             enmr = self._handle.enum(attr.name)
-
-        #             # get new enumeration values by taking the set difference
-        #             # while maintaining ordering
-        #             update_vals = np.setdiff1d(
-        #                 col.dictionary, enmr.values(), assume_unique=True
-        #             )
-
-        #             index_capacity_current = len(enmr.values()) + len(update_vals)
-        #             index_capacity_max = np.iinfo(
-        #                 col_info.type.index_type.to_pandas_dtype()
-        #             ).max
-        #             if index_capacity_max < index_capacity_current:
-        #                 raise ValueError(
-        #                     f"Too many enumeration values ({index_capacity_current}) "
-        #                     "for index type {col_info.type.index_type}"
-        #                 )
-
-        #             # only extend if there are new values
-        #             if len(update_vals) != 0:
-        #                 se = tiledb.ArraySchemaEvolution(self.context.tiledb_ctx)
-        #                 if np.issubdtype(enmr.dtype.type, np.str_):
-        #                     extend_vals = np.array(update_vals, "U")
-        #                 elif np.issubdtype(enmr.dtype.type, np.bytes_):
-        #                     extend_vals = np.array(update_vals, "S")
-        #                 else:
-        #                     extend_vals = np.array(update_vals, enmr.dtype)
-        #                 new_enmr = enmr.extend(extend_vals)
-        #                 df = pd.Categorical(col.to_pandas(), new_enmr.values())
-        #                 col = pa.DictionaryArray.from_pandas(df)
-        #                 se.extend_enumeration(new_enmr)
-        #                 se.array_evolve(uri=self.uri)
-
-        #     cols_map = dim_cols_map if name in dim_names_set else attr_cols_map
-        #     schema = self._handle.schema
-        #     if pa.types.is_dictionary(col.type):
-        #         if (
-        #             name not in dim_names_set
-        #             and schema.attr(name).enum_label is not None
-        #         ):
-        #             cols_map[name] = col.indices.to_pandas()
-        #         else:
-        #             cols_map[name] = col
-
-        #     else:
-        #         if name not in dim_names_set:
-        #             if schema.attr(name).enum_label is not None:
-        #                 raise ValueError(
-        #                     f"Categorical column {name} must be presented with categorical data"
-        #                 )
-
-        #         cols_map[name] = col.to_pandas()
-
-        # if n is None:
-        #     raise ValueError(f"did not find any column names in {values.schema.names}")
-
-        # We need to produce the dim cols in the same order as they're present in the TileDB schema
-        # (tracked by self.index_column_names). This is important in the multi-index case.  Suppose
-        # the Arrow schema has two index columns in the order "burger" and "meister", and suppose
-        # the user set index_column_names = ["meister", "burger"] when creating the TileDB schema.
-        # Then the above for-loop over the Arrow schema will find the former ordering, but for the
-        # ``writer[dims] = attrs`` below we must have dims with the latter ordering.
-        # values = values.cast(self.schema)
-        # target_schema = pa.schema(self.schema.field(f.name) for f in values.schema)
-        # values = values.cast(target_schema)
-        # print(values)
-
         target_schema = []
         for input_field in values.schema:
             target_field = self.schema.field(input_field.name)
@@ -555,6 +467,9 @@ def write(
                 target_schema.append(target_field)
         values = values.cast(pa.schema(target_schema, values.schema.metadata))
 
+        print("HELLLLOOOOOOOOOOOOOOOOO")
+        print()
+
         for batch in values.to_batches():
             self._handle.write(batch)
 

diff --git a/libtiledbsoma/src/soma/managed_query.cc b/libtiledbsoma/src/soma/managed_query.cc
@@ -272,6 +272,7 @@ void ManagedQuery::setup_read() {
 
 void ManagedQuery::submit_write() {
     query_->submit();
+    query_->finalize();
 }
 
 void ManagedQuery::submit_read() {