[Backport release-1.15] [python/c++] Fix invalid memory read in `fast…

…ercsx` (#3464) * [python/c++] Fix invalid memory read in `fastercsx` (#3456) * fix invalid memory read * lint * C++20 -> C++17 for `release-1.15` branch --------- Co-authored-by: Bruce Martin <[email protected]> Co-authored-by: John Kerl <[email protected]>
single-cell-data · Dec 18, 2024 · 6145ed2 · 6145ed2
1 parent 75beec1
commit 6145ed2
Show file tree

Hide file tree

Showing 3 changed files with 51 additions and 0 deletions.
diff --git a/apis/python/src/tiledbsoma/fastercsx.cc b/apis/python/src/tiledbsoma/fastercsx.cc
@@ -239,6 +239,7 @@ void compress_coo_validate_args_(
     4. num chunks/items in Ai/Aj/Ad is same size and type
     5. ensure B* are writeable
     6. Ensure each element in A* tuples are same type
+    7. Ensure each element in the A* tuples are the same length
     etc...
 
     Not checked:
@@ -261,6 +262,12 @@ void compress_coo_validate_args_(
                     "All chunks of COO arrays must be of same type.");
         }
     }
+    for (uint64_t chunk_idx = 0; chunk_idx < n_chunks; chunk_idx++) {
+        if ((Ai[chunk_idx].size() != Aj[chunk_idx].size()) ||
+            (Ai[chunk_idx].size() != Ad[chunk_idx].size()))
+            throw std::length_error(
+                "All COO array tuple elements must be of the same size.");
+    }
     if (Bp.ndim() != 1 || Bj.ndim() != 1 || Bd.ndim() != 1)
         throw std::length_error("All arrays must be of dimension rank 1.");
 

diff --git a/apis/python/tests/test_libfastercsx.py b/apis/python/tests/test_libfastercsx.py
@@ -311,3 +311,26 @@ def test_compress_coo_bad_args(
         fastercsx.compress_coo(
             context, sp.shape, (i,), (j,), (d[1:],), indptr, indices, data
         )
+
+
+def test_ragged_chunk_error(
+    rng: np.random.Generator, context: clib.SOMAContext
+) -> None:
+    # module assumes all chunks are regular across all input arrays. Check that this
+    # is enforced.
+    nnz = 3
+    shape = (100, 20)
+    indptr = np.empty(shape[0] + 1, dtype=np.int32)
+    indices = np.empty(nnz, dtype=np.int32)
+    data = np.empty(nnz, dtype=np.int8)
+    with pytest.raises(ValueError):
+        fastercsx.compress_coo(
+            context,
+            shape,
+            (np.array([0]), np.array([1, 2])),
+            (np.array([0]), np.array([1, 2])),
+            (np.array([0, 1], dtype=np.int8), np.array([2], dtype=np.int8)),
+            indptr,
+            indices,
+            data,
+        )
diff --git a/libtiledbsoma/src/utils/fastercsx.h b/libtiledbsoma/src/utils/fastercsx.h
@@ -78,6 +78,26 @@ size_t sum_over_size_(const std::vector<tcb::span<const T>>& v) noexcept {
             return a.size();
         });
 }
+
+/**
+ * @brief Return true if any of the tuple sub-arrays are not equal in size,
+ * false otherwise.  Used only in asserts.
+ */
+template <typename Ti, typename Tj, typename Td>
+bool no_ragged_chunks(
+    const std::vector<tcb::span<const Ti>>& Ai,
+    const std::vector<tcb::span<const Tj>>& Aj,
+    const std::vector<tcb::span<const Td>>& Ad) {
+    if ((Ai.size() != Aj.size()) || (Ai.size() != Ad.size()))
+        return false;
+
+    for (uint64_t chunk_idx = 0; chunk_idx < Ai.size(); chunk_idx++) {
+        if ((Ai[chunk_idx].size() != Aj[chunk_idx].size()) ||
+            (Ai[chunk_idx].size() != Ad[chunk_idx].size()))
+            return false;
+    }
+    return true;
+}
 #endif
 
 /**
@@ -324,6 +344,7 @@ void compress_coo(
     assert(sum_over_size_(Ai) == nnz);
     assert(sum_over_size_(Aj) == nnz);
     assert(sum_over_size_(Ad) == nnz);
+    assert(no_ragged_chunks(Ai, Aj, Ad));
     assert(Bp.size() == n_row + 1);
     assert(Bj.size() == nnz);
     assert(Bd.size() == nnz);