Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
nguyenv committed Mar 26, 2024
1 parent 5417c17 commit 2bb57f1
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 88 deletions.
91 changes: 3 additions & 88 deletions apis/python/src/tiledbsoma/_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -454,94 +454,6 @@ def write(
"""
_util.check_type("values", values, (pa.Table,))

# dim_cols_map: Dict[str, pd.DataFrame] = {}
# attr_cols_map: Dict[str, pd.DataFrame] = {}
# dim_names_set = self.index_column_names
# n = None

# for col_info in values.schema:
# name = col_info.name
# col = values.column(name).combine_chunks()
# n = len(col)

# if self._handle.schema.has_attr(name):
# attr = self._handle.schema.attr(name)

# # Add the enumeration values to the TileDB Array from ArrowArray
# if attr.enum_label is not None:
# if not pa.types.is_dictionary(col_info.type):
# raise ValueError(
# "Expected dictionary type for enumerated attribute "
# f"{name} but saw {col.type}"
# )

# enmr = self._handle.enum(attr.name)

# # get new enumeration values by taking the set difference
# # while maintaining ordering
# update_vals = np.setdiff1d(
# col.dictionary, enmr.values(), assume_unique=True
# )

# index_capacity_current = len(enmr.values()) + len(update_vals)
# index_capacity_max = np.iinfo(
# col_info.type.index_type.to_pandas_dtype()
# ).max
# if index_capacity_max < index_capacity_current:
# raise ValueError(
# f"Too many enumeration values ({index_capacity_current}) "
# "for index type {col_info.type.index_type}"
# )

# # only extend if there are new values
# if len(update_vals) != 0:
# se = tiledb.ArraySchemaEvolution(self.context.tiledb_ctx)
# if np.issubdtype(enmr.dtype.type, np.str_):
# extend_vals = np.array(update_vals, "U")
# elif np.issubdtype(enmr.dtype.type, np.bytes_):
# extend_vals = np.array(update_vals, "S")
# else:
# extend_vals = np.array(update_vals, enmr.dtype)
# new_enmr = enmr.extend(extend_vals)
# df = pd.Categorical(col.to_pandas(), new_enmr.values())
# col = pa.DictionaryArray.from_pandas(df)
# se.extend_enumeration(new_enmr)
# se.array_evolve(uri=self.uri)

# cols_map = dim_cols_map if name in dim_names_set else attr_cols_map
# schema = self._handle.schema
# if pa.types.is_dictionary(col.type):
# if (
# name not in dim_names_set
# and schema.attr(name).enum_label is not None
# ):
# cols_map[name] = col.indices.to_pandas()
# else:
# cols_map[name] = col

# else:
# if name not in dim_names_set:
# if schema.attr(name).enum_label is not None:
# raise ValueError(
# f"Categorical column {name} must be presented with categorical data"
# )

# cols_map[name] = col.to_pandas()

# if n is None:
# raise ValueError(f"did not find any column names in {values.schema.names}")

# We need to produce the dim cols in the same order as they're present in the TileDB schema
# (tracked by self.index_column_names). This is important in the multi-index case. Suppose
# the Arrow schema has two index columns in the order "burger" and "meister", and suppose
# the user set index_column_names = ["meister", "burger"] when creating the TileDB schema.
# Then the above for-loop over the Arrow schema will find the former ordering, but for the
# ``writer[dims] = attrs`` below we must have dims with the latter ordering.
# values = values.cast(self.schema)
# target_schema = pa.schema(self.schema.field(f.name) for f in values.schema)
# values = values.cast(target_schema)
# print(values)

target_schema = []
for input_field in values.schema:
target_field = self.schema.field(input_field.name)
Expand All @@ -555,6 +467,9 @@ def write(
target_schema.append(target_field)
values = values.cast(pa.schema(target_schema, values.schema.metadata))

print("HELLLLOOOOOOOOOOOOOOOOO")
print()

for batch in values.to_batches():
self._handle.write(batch)

Expand Down
1 change: 1 addition & 0 deletions libtiledbsoma/src/soma/managed_query.cc
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,7 @@ void ManagedQuery::setup_read() {

void ManagedQuery::submit_write() {
query_->submit();
query_->finalize();
}

void ManagedQuery::submit_read() {
Expand Down

0 comments on commit 2bb57f1

Please sign in to comment.