Skip to content

Commit

Permalink
validate byteorder on argument arrays (#3508)
Browse files Browse the repository at this point in the history
  • Loading branch information
bkmartinjr authored Jan 2, 2025
1 parent 11ee34c commit fbeee6f
Show file tree
Hide file tree
Showing 2 changed files with 117 additions and 1 deletion.
39 changes: 38 additions & 1 deletion apis/python/src/tiledbsoma/fastercsx.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
*
* Python bindings for CSX conversion primitives.
*/

#include <bit>
#include <variant>

// Define to include extra debugging bindings (e.g., count_rows)
Expand Down Expand Up @@ -108,6 +108,31 @@ std::span<R> make_mutable_casted_span_(py::array arr) {
return std::span<R>(reinterpret_cast<R*>(p), arr.size());
}

/**
* @brief Return true if the NP byteorder is native (or equivalent).
*/
bool is_native_byteorder(const char byteorder) {
if (byteorder == '=') // native
return true;
if (byteorder == '|') // not-applicable
return true;
if constexpr (std::endian::native == std::endian::big)
return byteorder == '>'; // big
else
return byteorder == '<'; // little
}

/**
* @brief Check enddianness/byteorder is native, and raise exception if not.
* Necessary becuase we dispatch on dtype().num(), which doesn't confirm
* byteorder is native.
*/
void check_byteorder(const py::dtype& dtype) {
if (!is_native_byteorder(dtype.byteorder()))
throw invalid_argument(
"All arrays must have native byteorder (endianness).");
}

/*
* Value/data arrays are cast to an unsigned of the same width as the actual
* value type. This is solely to reduce the combinatorics of template
Expand Down Expand Up @@ -209,6 +234,7 @@ T lookup_dtype_(
const std::unordered_map<int, T>& index,
const py::dtype& dtype,
const std::string& array_name) {
check_byteorder(dtype);
try {
return index.at(dtype.num());
} catch (const std::out_of_range& oor) {
Expand Down Expand Up @@ -241,6 +267,7 @@ void compress_coo_validate_args_(
5. ensure B* are writeable
6. Ensure each element in A* tuples are same type
7. Ensure each element in the A* tuples are the same length
8. byteorder
etc...
Not checked:
Expand All @@ -261,6 +288,7 @@ void compress_coo_validate_args_(
if (arr.dtype().num() != vec[0].dtype().num())
throw pybind11::type_error(
"All chunks of COO arrays must be of same type.");
check_byteorder(arr.dtype());
}
}
for (uint64_t chunk_idx = 0; chunk_idx < n_chunks; chunk_idx++) {
Expand All @@ -269,9 +297,14 @@ void compress_coo_validate_args_(
throw std::length_error(
"All COO array tuple elements must be of the same size.");
}

if (Bp.ndim() != 1 || Bj.ndim() != 1 || Bd.ndim() != 1)
throw std::length_error("All arrays must be of dimension rank 1.");

check_byteorder(Bp.dtype());
check_byteorder(Bj.dtype());
check_byteorder(Bd.dtype());

for (auto& arr : Ad)
if (arr.dtype().num() != Bd.dtype().num())
throw pybind11::type_error("All data arrays must be of same type.");
Expand Down Expand Up @@ -408,6 +441,10 @@ bool sort_csx_indices(
if (!Bp.writeable() || !Bj.writeable() || !Bd.writeable())
throw std::invalid_argument("Output arrays must be writeable.");

check_byteorder(Bp.dtype());
check_byteorder(Bj.dtype());
check_byteorder(Bd.dtype());

// Get dispatch types
CsxIndexType csx_major_index_type = lookup_dtype_(
csx_index_type_dispatch, Bp.dtype(), "CSx indptr array");
Expand Down
79 changes: 79 additions & 0 deletions apis/python/tests/test_libfastercsx.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from __future__ import annotations

import sys
from typing import Any

import numpy as np
Expand All @@ -11,6 +12,8 @@
import tiledbsoma.pytiledbsoma as clib
import tiledbsoma.pytiledbsoma.fastercsx as fastercsx

NON_NATIVE_BYTEORDER = ">" if sys.byteorder == "little" else "<"


@pytest.fixture
def concurrency() -> int | None:
Expand Down Expand Up @@ -243,6 +246,14 @@ def test_sort_csx_indices_bad_args(
pbad[1] = -1
fastercsx.sort_csx_indices(context, pbad, j, d)

# non-native byteorder should throw
with pytest.raises(ValueError):
fastercsx.sort_csx_indices(context, p.astype(f"{NON_NATIVE_BYTEORDER}i4"), j, d)
with pytest.raises(ValueError):
fastercsx.sort_csx_indices(context, p, j.astype(f"{NON_NATIVE_BYTEORDER}i4"), d)
with pytest.raises(ValueError):
fastercsx.sort_csx_indices(context, p, j, d.astype(f"{NON_NATIVE_BYTEORDER}i4"))


def test_compress_coo_bad_args(
rng: np.random.Generator, context: clib.SOMAContext
Expand Down Expand Up @@ -312,6 +323,74 @@ def test_compress_coo_bad_args(
context, sp.shape, (i,), (j,), (d[1:],), indptr, indices, data
)

# non-native byteorder should throw
with pytest.raises(ValueError):
fastercsx.compress_coo(
context,
sp.shape,
(i.astype(f"{NON_NATIVE_BYTEORDER}i4"),),
(j,),
(d,),
indptr,
indices,
data,
)
with pytest.raises(ValueError):
fastercsx.compress_coo(
context,
sp.shape,
(i,),
(j.astype(f"{NON_NATIVE_BYTEORDER}i4"),),
(d,),
indptr,
indices,
data,
)
with pytest.raises(ValueError):
fastercsx.compress_coo(
context,
sp.shape,
(i,),
(j,),
(d.astype(f"{NON_NATIVE_BYTEORDER}i4"),),
indptr,
indices,
data,
)
with pytest.raises(ValueError):
fastercsx.compress_coo(
context,
sp.shape,
(i,),
(j,),
(d,),
indptr.astype(f"{NON_NATIVE_BYTEORDER}i4"),
indices,
data,
)
with pytest.raises(ValueError):
fastercsx.compress_coo(
context,
sp.shape,
(i,),
(j,),
(d,),
indptr,
indices.astype(f"{NON_NATIVE_BYTEORDER}i4"),
data,
)
with pytest.raises(ValueError):
fastercsx.compress_coo(
context,
sp.shape,
(i,),
(j,),
(d,),
indptr,
indices,
data.astype(f"{NON_NATIVE_BYTEORDER}i4"),
)


def test_ragged_chunk_error(
rng: np.random.Generator, context: clib.SOMAContext
Expand Down

0 comments on commit fbeee6f

Please sign in to comment.