WIP fix several errors for macos

single-cell-data · Apr 5, 2024 · 6637b82 · 6637b82
1 parent fc35aba
commit 6637b82
Show file tree

Hide file tree

Showing 4 changed files with 7 additions and 122 deletions.
diff --git a/apis/python/devtools/ingestor b/apis/python/devtools/ingestor
@@ -16,7 +16,6 @@ import os
 import sys
 from typing import Optional
 
-import tiledb
 from somacore import options
 
 import tiledbsoma
@@ -26,6 +25,7 @@ import tiledbsoma._util
 import tiledbsoma.io
 import tiledbsoma.logging
 from tiledbsoma.options import SOMATileDBContext
+import tiledb
 
 
 # ================================================================

diff --git a/apis/python/src/tiledbsoma/_dataframe.py b/apis/python/src/tiledbsoma/_dataframe.py
@@ -12,7 +12,6 @@
 import pandas as pd
 import pyarrow as pa
 import somacore
-import tiledb
 from somacore import options
 from typing_extensions import Self
 
@@ -506,7 +505,7 @@ def write(
         return self
 
     def _set_reader_coord(
-        self, sr: clib.SOMAArray, dim_idx: int, dim: tiledb.Dim, coord: object
+        self, sr: clib.SOMAArray, dim_idx: int, dim: pa.Field, coord: object
     ) -> bool:
         if coord is None:
             return True  # No constraint; select all in this dimension
@@ -580,7 +579,7 @@ def _set_reader_coord_by_py_seq_or_np_array(
         self,
         sr: clib.SOMAArray,
         dim_idx: int,
-        dim: tiledb.Dim,
+        dim: pa.Field,
         coord: object,
     ) -> bool:
         if isinstance(coord, np.ndarray):
@@ -705,120 +704,6 @@ def _canonicalize_schema(
     return schema
 
 
-def _build_tiledb_schema(
-    schema: pa.Schema,
-    index_column_names: Sequence[str],
-    domain: Optional[Sequence[Optional[Tuple[Any, Any]]]],
-    tiledb_create_options: TileDBCreateOptions,
-    context: SOMATileDBContext,
-) -> tiledb.ArraySchema:
-    """Converts an Arrow schema into a TileDB ArraySchema for creation."""
-
-    if domain is None:
-        domain = tuple(None for _ in index_column_names)
-    else:
-        ndom = len(domain)
-        nidx = len(index_column_names)
-        if ndom != nidx:
-            raise ValueError(
-                f"if domain is specified, it must have the same length as index_column_names; got {ndom} != {nidx}"
-            )
-
-    dims = []
-    for index_column_name, slot_domain in zip(index_column_names, domain):
-        pa_type = schema.field(index_column_name).type
-        dtype = _arrow_types.tiledb_type_from_arrow_type(
-            pa_type, is_indexed_column=True
-        )
-
-        slot_domain = _fill_out_slot_domain(
-            slot_domain, index_column_name, pa_type, dtype
-        )
-
-        extent = _find_extent_for_domain(
-            index_column_name, tiledb_create_options, dtype, slot_domain
-        )
-
-        dim = tiledb.Dim(
-            name=index_column_name,
-            domain=slot_domain,
-            tile=extent,
-            dtype=dtype,
-            filters=tiledb_create_options.dim_filters_tiledb(
-                index_column_name,
-                [
-                    dict(
-                        _type="ZstdFilter",
-                        level=tiledb_create_options.dataframe_dim_zstd_level,
-                    )
-                ],
-            ),
-        )
-        dims.append(dim)
-
-    dom = tiledb.Domain(dims, ctx=context.tiledb_ctx)
-
-    attrs = []
-    enums = []
-    metadata = schema.metadata or {}
-    for pa_attr in schema:
-        attr_name = pa_attr.name
-
-        if attr_name in index_column_names:
-            continue
-
-        has_enum = pa.types.is_dictionary(pa_attr.type)
-
-        if has_enum:
-            enmr_dtype: np.dtype[Any]
-            vtype = pa_attr.type.value_type
-            if pa.types.is_large_string(vtype) or pa.types.is_string(vtype):
-                enmr_dtype = np.dtype("U")
-            elif pa.types.is_large_binary(vtype) or pa.types.is_binary(vtype):
-                enmr_dtype = np.dtype("S")
-            else:
-                enmr_dtype = np.dtype(vtype.to_pandas_dtype())
-            enums.append(
-                tiledb.Enumeration(
-                    name=attr_name,
-                    ordered=pa_attr.type.ordered,
-                    dtype=enmr_dtype,
-                )
-            )
-
-        attr = tiledb.Attr(
-            name=attr_name,
-            dtype=_arrow_types.tiledb_type_from_arrow_type(
-                schema.field(attr_name).type
-            ),
-            nullable=metadata.get(attr_name.encode("utf-8")) == b"nullable",
-            filters=tiledb_create_options.attr_filters_tiledb(
-                attr_name, ["ZstdFilter"]
-            ),
-            enum_label=attr_name if has_enum else None,
-            ctx=context.tiledb_ctx,
-        )
-        attrs.append(attr)
-
-    cell_order, tile_order = tiledb_create_options.cell_tile_orders()
-
-    return tiledb.ArraySchema(
-        domain=dom,
-        attrs=attrs,
-        enums=enums,
-        sparse=True,
-        allows_duplicates=tiledb_create_options.allows_duplicates,
-        offsets_filters=tiledb_create_options.offsets_filters_tiledb(),
-        validity_filters=tiledb_create_options.validity_filters_tiledb(),
-        capacity=tiledb_create_options.capacity,
-        cell_order=cell_order,
-        # As of TileDB core 2.8.2, we cannot consolidate string-indexed sparse arrays with
-        # col-major tile order: so we write ``X`` with row-major tile order.
-        tile_order=tile_order,
-        ctx=context.tiledb_ctx,
-    )
-
-
 def _fill_out_slot_domain(
     slot_domain: Optional[Tuple[Any, Any]],
     index_column_name: str,

diff --git a/apis/python/tests/test_dataframe.py b/apis/python/tests/test_dataframe.py
@@ -1361,7 +1361,7 @@ def test_enum_extend_past_numerical_limit(tmp_path):
 
     # cannot add additional categories as already maxed out earlier
     tbl = pa.Table.from_pandas(df2, preserve_index=False)
-    with pytest.raises(soma.SOMAError):
+    with pytest.raises((RuntimeError, soma.SOMAError)):
         with soma.open(uri, mode="w") as A:
             A.write(tbl)
 

diff --git a/libtiledbsoma/src/utils/arrow_adapter.cc b/libtiledbsoma/src/utils/arrow_adapter.cc
@@ -505,8 +505,8 @@ ArrowTable ArrowAdapter::to_arrow(std::shared_ptr<ColumnBuffer> column) {
 }
 
 bool ArrowAdapter::_isvar(const char* format) {
-    if ((strcmp(format, "U") == 0) | (strcmp(format, "Z") == 0) |
-        (strcmp(format, "u") == 0) | (strcmp(format, "z") == 0)) {
+    if ((strcmp(format, "U") == 0) || (strcmp(format, "Z") == 0) ||
+        (strcmp(format, "u") == 0) || (strcmp(format, "z") == 0)) {
         return true;
     }
     return false;
@@ -560,4 +560,4 @@ tiledb_datatype_t ArrowAdapter::to_tiledb_format(std::string_view arrow_dtype) {
     }
 }
 
-}  // namespace tiledbsoma
+}  // namespace tiledbsoma