From fbeee6f4c7484ad48eaa2b94d02f7fdb3103a46b Mon Sep 17 00:00:00 2001 From: Bruce Martin Date: Thu, 2 Jan 2025 07:48:23 -0800 Subject: [PATCH] validate byteorder on argument arrays (#3508) --- apis/python/src/tiledbsoma/fastercsx.cc | 39 +++++++++++- apis/python/tests/test_libfastercsx.py | 79 +++++++++++++++++++++++++ 2 files changed, 117 insertions(+), 1 deletion(-) diff --git a/apis/python/src/tiledbsoma/fastercsx.cc b/apis/python/src/tiledbsoma/fastercsx.cc index 9eb5a9ced4..d82a20bef6 100644 --- a/apis/python/src/tiledbsoma/fastercsx.cc +++ b/apis/python/src/tiledbsoma/fastercsx.cc @@ -29,7 +29,7 @@ * * Python bindings for CSX conversion primitives. */ - +#include #include // Define to include extra debugging bindings (e.g., count_rows) @@ -108,6 +108,31 @@ std::span make_mutable_casted_span_(py::array arr) { return std::span(reinterpret_cast(p), arr.size()); } +/** + * @brief Return true if the NP byteorder is native (or equivalent). + */ +bool is_native_byteorder(const char byteorder) { + if (byteorder == '=') // native + return true; + if (byteorder == '|') // not-applicable + return true; + if constexpr (std::endian::native == std::endian::big) + return byteorder == '>'; // big + else + return byteorder == '<'; // little +} + +/** + * @brief Check enddianness/byteorder is native, and raise exception if not. + * Necessary becuase we dispatch on dtype().num(), which doesn't confirm + * byteorder is native. + */ +void check_byteorder(const py::dtype& dtype) { + if (!is_native_byteorder(dtype.byteorder())) + throw invalid_argument( + "All arrays must have native byteorder (endianness)."); +} + /* * Value/data arrays are cast to an unsigned of the same width as the actual * value type. This is solely to reduce the combinatorics of template @@ -209,6 +234,7 @@ T lookup_dtype_( const std::unordered_map& index, const py::dtype& dtype, const std::string& array_name) { + check_byteorder(dtype); try { return index.at(dtype.num()); } catch (const std::out_of_range& oor) { @@ -241,6 +267,7 @@ void compress_coo_validate_args_( 5. ensure B* are writeable 6. Ensure each element in A* tuples are same type 7. Ensure each element in the A* tuples are the same length + 8. byteorder etc... Not checked: @@ -261,6 +288,7 @@ void compress_coo_validate_args_( if (arr.dtype().num() != vec[0].dtype().num()) throw pybind11::type_error( "All chunks of COO arrays must be of same type."); + check_byteorder(arr.dtype()); } } for (uint64_t chunk_idx = 0; chunk_idx < n_chunks; chunk_idx++) { @@ -269,9 +297,14 @@ void compress_coo_validate_args_( throw std::length_error( "All COO array tuple elements must be of the same size."); } + if (Bp.ndim() != 1 || Bj.ndim() != 1 || Bd.ndim() != 1) throw std::length_error("All arrays must be of dimension rank 1."); + check_byteorder(Bp.dtype()); + check_byteorder(Bj.dtype()); + check_byteorder(Bd.dtype()); + for (auto& arr : Ad) if (arr.dtype().num() != Bd.dtype().num()) throw pybind11::type_error("All data arrays must be of same type."); @@ -408,6 +441,10 @@ bool sort_csx_indices( if (!Bp.writeable() || !Bj.writeable() || !Bd.writeable()) throw std::invalid_argument("Output arrays must be writeable."); + check_byteorder(Bp.dtype()); + check_byteorder(Bj.dtype()); + check_byteorder(Bd.dtype()); + // Get dispatch types CsxIndexType csx_major_index_type = lookup_dtype_( csx_index_type_dispatch, Bp.dtype(), "CSx indptr array"); diff --git a/apis/python/tests/test_libfastercsx.py b/apis/python/tests/test_libfastercsx.py index 6e949a9916..461e6bec7b 100644 --- a/apis/python/tests/test_libfastercsx.py +++ b/apis/python/tests/test_libfastercsx.py @@ -2,6 +2,7 @@ from __future__ import annotations +import sys from typing import Any import numpy as np @@ -11,6 +12,8 @@ import tiledbsoma.pytiledbsoma as clib import tiledbsoma.pytiledbsoma.fastercsx as fastercsx +NON_NATIVE_BYTEORDER = ">" if sys.byteorder == "little" else "<" + @pytest.fixture def concurrency() -> int | None: @@ -243,6 +246,14 @@ def test_sort_csx_indices_bad_args( pbad[1] = -1 fastercsx.sort_csx_indices(context, pbad, j, d) + # non-native byteorder should throw + with pytest.raises(ValueError): + fastercsx.sort_csx_indices(context, p.astype(f"{NON_NATIVE_BYTEORDER}i4"), j, d) + with pytest.raises(ValueError): + fastercsx.sort_csx_indices(context, p, j.astype(f"{NON_NATIVE_BYTEORDER}i4"), d) + with pytest.raises(ValueError): + fastercsx.sort_csx_indices(context, p, j, d.astype(f"{NON_NATIVE_BYTEORDER}i4")) + def test_compress_coo_bad_args( rng: np.random.Generator, context: clib.SOMAContext @@ -312,6 +323,74 @@ def test_compress_coo_bad_args( context, sp.shape, (i,), (j,), (d[1:],), indptr, indices, data ) + # non-native byteorder should throw + with pytest.raises(ValueError): + fastercsx.compress_coo( + context, + sp.shape, + (i.astype(f"{NON_NATIVE_BYTEORDER}i4"),), + (j,), + (d,), + indptr, + indices, + data, + ) + with pytest.raises(ValueError): + fastercsx.compress_coo( + context, + sp.shape, + (i,), + (j.astype(f"{NON_NATIVE_BYTEORDER}i4"),), + (d,), + indptr, + indices, + data, + ) + with pytest.raises(ValueError): + fastercsx.compress_coo( + context, + sp.shape, + (i,), + (j,), + (d.astype(f"{NON_NATIVE_BYTEORDER}i4"),), + indptr, + indices, + data, + ) + with pytest.raises(ValueError): + fastercsx.compress_coo( + context, + sp.shape, + (i,), + (j,), + (d,), + indptr.astype(f"{NON_NATIVE_BYTEORDER}i4"), + indices, + data, + ) + with pytest.raises(ValueError): + fastercsx.compress_coo( + context, + sp.shape, + (i,), + (j,), + (d,), + indptr, + indices.astype(f"{NON_NATIVE_BYTEORDER}i4"), + data, + ) + with pytest.raises(ValueError): + fastercsx.compress_coo( + context, + sp.shape, + (i,), + (j,), + (d,), + indptr, + indices, + data.astype(f"{NON_NATIVE_BYTEORDER}i4"), + ) + def test_ragged_chunk_error( rng: np.random.Generator, context: clib.SOMAContext