From f4207f4178a309dbbd813e17669c629fe81faac5 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 17 Jun 2024 12:16:02 -0400 Subject: [PATCH] [r] Connect re-indexer to blockwise iterator (#2742) (#2748) Connect the re-indexer to the blockwise iterator, allowing reads to be re-indexed on-the-fly. This PR parallels #1792 and completes #2152 and #2637; in addition, provides new shorthand for `reindex_disable_on_axis`: - `TRUE`: disable re-indexing on all axes - `FALSE: re-index on all axes - `NA`: re-index only on major axis, disable re-indexing on all axes (default) `BlockwiseTableReadIter$concat()` and `BlockwiseSparseReadIter$concat()` are disabled when re-indexing is requested (paralleling Python) `BlockwiseSparseReadIter` now accepts `repr = "R"` or `repr = "C"` under certain circumstances: - axis 0 (`soma_dim_0`) must be re-indexed to allow `repr = "R"` - axis 1 (`soma_dim_1`) must be re-indexed to allow `repr = "C"` `repr` of `"T"` is allowed in all circumstances and continues to be the default Two new fields are available to blockwise iterators: - `$axes_to_reindex`: a vector of minor axes slated to be re-indexed - `$reindexable`: status indicator stating if _any_ axis (major or minor) is slated to be re-indexed resolves #2671 Co-authored-by: Paul Hoffman --- apis/r/R/BlockwiseIter.R | 161 +++++++-- apis/r/R/SOMASparseNDArrayRead.R | 5 +- apis/r/man/BlockwiseReadIterBase.Rd | 16 +- apis/r/man/BlockwiseSparseReadIter.Rd | 25 +- apis/r/man/BlockwiseTableReadIter.Rd | 4 +- apis/r/man/IntIndexer.Rd | 5 +- apis/r/man/SOMASparseNDArrayBlockwiseRead.Rd | 18 +- apis/r/man/SOMASparseNDArrayRead.Rd | 9 +- .../roxygen/templates/param-blockwise-iter.R | 9 +- .../r/man/roxygen/templates/param-repr-read.R | 7 + apis/r/tests/testthat/test-Blockwise.R | 326 +++++++++++++----- 11 files changed, 464 insertions(+), 121 deletions(-) diff --git a/apis/r/R/BlockwiseIter.R b/apis/r/R/BlockwiseIter.R index 4701803602..9913125c77 100644 --- a/apis/r/R/BlockwiseIter.R +++ b/apis/r/R/BlockwiseIter.R @@ -22,7 +22,7 @@ BlockwiseReadIterBase <- R6::R6Class( coords, axis, ..., - reindex_disable_on_axis = NULL + reindex_disable_on_axis = NA ) { super$initialize(sr) stopifnot( @@ -55,6 +55,16 @@ BlockwiseReadIterBase <- R6::R6Class( } private$.coords <- coords # Check reindex_disable_on_axis + if (is_scalar_logical(reindex_disable_on_axis)) { + reindex_disable_on_axis <- if (isTRUE(reindex_disable_on_axis)) { # TRUE + bit64::seq.integer64(0L, ndim) + } else if (isFALSE(reindex_disable_on_axis)) { # FALSE + NULL + } else { # NA + ax <- bit64::seq.integer64(0L, ndim) + ax[ax != self$axis] + } + } if (!is.null(reindex_disable_on_axis)) { stopifnot( "'reindex_disable_on_axis' must be a vector of integers" = ( @@ -63,11 +73,27 @@ BlockwiseReadIterBase <- R6::R6Class( ), "'reindex_disable_on_axis' must be finite" = is.finite(reindex_disable_on_axis), "'reindex_disable_on_axis' must be within the range of dimensions of the array" = all( - reindex_disable_on_axis >= 0 && reindex_disable_on_axis <= ndim + reindex_disable_on_axis >= 0 & reindex_disable_on_axis <= ndim ) ) + reindex_disable_on_axis <- unique(bit64::as.integer64(reindex_disable_on_axis)) } private$.reindex_disable_on_axis <- reindex_disable_on_axis + axes_to_reindex <- self$axes_to_reindex + private$.reindexers <- vector("list", length = length(axes_to_reindex)) + shape <- self$array$shape() + dnames <- self$array$dimnames() + for (i in seq_along(axes_to_reindex)) { + ax <- as.numeric(axes_to_reindex[i]) + 1L + coords <- as.list(CoordsStrider$new(start = 0L, end = shape[ax] - 1L)) + coords <- if (length(coords) == 1L) { + coords[[1L]] + } else { + unlist64(coords) + } + private$.reindexers[[i]] <- IntIndexer$new(coords) + names(private$.reindexers)[i] <- dnames[ax] + } }, #' @description Check if the iterated read is complete or not #' @@ -90,8 +116,8 @@ BlockwiseReadIterBase <- R6::R6Class( } private$reset() dimnam <- self$array$dimnames()[self$axis + 1L] - nextelems <- self$coords_axis$next_element() - private$set_dim_points(dimnam, nextelems) + private$.nextelems <- self$coords_axis$next_element() + private$set_dim_points(dimnam, private$.nextelems) return(private$.read_next()) } ), @@ -102,6 +128,19 @@ BlockwiseReadIterBase <- R6::R6Class( #' @field axis The axis to iterate over in a blockwise fashion #' axis = function() private$.axis, + #' @field axes_to_reindex The axes to re-index + #' + axes_to_reindex = function() { + ax <- bit64::seq.integer64(0L, self$array$ndim() - 1L) + ax <- ax[!ax %in% self$reindex_disable_on_axis] + if (length(ax)) { + ax <- ax[ax != self$axis] + } + if (!length(ax)) { + return(NULL) + } + return(ax) + }, #' @field coords A list of \code{\link{CoordsStrider}} objects #' coords = function() private$.coords, @@ -113,13 +152,26 @@ BlockwiseReadIterBase <- R6::R6Class( }, #' @field reindex_disable_on_axis Additional axes that will not be re-indexed #' - reindex_disable_on_axis = function() private$.reindex_disable_on_axis + reindex_disable_on_axis = function() private$.reindex_disable_on_axis, + #' @field reindexable Shorthand to see if this iterator is poised to be + #' re-indexed or not + #' + reindexable = function() length(self$axes_to_reindex) || + !bit64::as.integer64(self$axis) %in% self$reindex_disable_on_axis ), private = list( .array = NULL, .coords = list(), .axis = integer(1L), + .nextelems = NULL, .reindex_disable_on_axis = NULL, + .reindexers = list(), + # @description Throw an error saying that re-indexed + # iterators are not concatenatable + .notConcatenatable = function() stop(errorCondition( + message = "Re-indexed blockwise iterators are not concatenatable", + class = "notConcatenatableError" + )), # @description Reset internal state of SOMA Reader while keeping array open reset = function() { if (is.null(private$soma_reader_pointer)) { @@ -128,6 +180,48 @@ BlockwiseReadIterBase <- R6::R6Class( sr_reset(private$soma_reader_pointer) return(invisible(NULL)) }, + # @description Re-index an Arrow table + reindex_arrow_table = function(tbl) { + stopifnot( + "'tbl' must be an Arrow table" = R6::is.R6(tbl) && inherits(tbl, 'Table') + ) + dname <- self$array$dimnames()[self$axis + 1L] + if (!dname %in% names(tbl)) { + stop( + "Cannot find ", + sQuote(dname), + " in the provided Arrow table", + call. = FALSE + ) + } + op <- options(arrow.int64_downcast = FALSE) + on.exit(options(op), add = TRUE, after = FALSE) + coords <- self$coords + coords[[dname]] <- CoordsStrider$new( + private$.nextelems, + stride = coords[[dname]]$stride + ) + if (!bit64::as.integer64(self$axis) %in% self$reindex_disable_on_axis) { + indexer <- IntIndexer$new(private$.nextelems) + tbl[[dname]] <- indexer$get_indexer( + tbl[[dname]]$as_vector(), + nomatch_na = TRUE + ) + rm(indexer) + } + for (dname in names(private$.reindexers)) { + if (!dname %in% names(tbl)) { + "" + } + indexer <- private$.reindexers[[dname]] + tbl[[dname]] <- indexer$get_indexer( + tbl[[dname]]$as_vector(), + nomatch_na = TRUE + ) + } + attr(tbl, "coords") <- coords + return(tbl) + }, # @description Set dimension selection on given axis set_dim_points = function(dimname, points) { stopifnot( @@ -156,14 +250,22 @@ BlockwiseTableReadIter <- R6::R6Class( classname = "BlockwiseTableReadIter", inherit = BlockwiseReadIterBase, public = list( - #' @description ... + #' @description Concatenate the remainder of the blockwise iterator #' - #' @return ... + #' @return An Arrow Table with the remainder of the iterator #' - concat = function() soma_array_to_arrow_table_concat(self) + concat = function() { + if (self$reindexable) { + private$.notConcatenatable() + } + return(soma_array_to_arrow_table_concat(self)) + } ), private = list( - soma_reader_transform = function(x) soma_array_to_arrow_table(x) + soma_reader_transform = function(x) { + tbl <- soma_array_to_arrow_table(x) + return(private$reindex_arrow_table(tbl)) + } ) ) @@ -194,7 +296,7 @@ BlockwiseSparseReadIter <- R6::R6Class( axis, ..., repr = "T", - reindex_disable_on_axis = NULL + reindex_disable_on_axis = NA ) { super$initialize( sr, @@ -204,14 +306,28 @@ BlockwiseSparseReadIter <- R6::R6Class( ..., reindex_disable_on_axis = reindex_disable_on_axis ) - private$.repr <- match.arg(repr) + stopifnot( + "Sparse reads only work with two-dimensional arrays" = self$array$ndim() == 2L + ) + reprs <- c( + 'T', + if (!bit64::as.integer64(0L) %in% self$reindex_disable_on_axis)'R', + if (!bit64::as.integer64(1L) %in% self$reindex_disable_on_axis) 'C' + ) + private$.repr <- match.arg(repr, choices = reprs) private$.shape <- sapply(coords, length) }, - #' @description ... + #' @description Concatenate the remainder of the blockwise iterator #' - #' @return ... + #' @return A sparse matrix (determined by \code{self$repr}) with + #' the remainder of the iterator #' - concat = function() soma_array_to_sparse_matrix_concat(self, private$.zero_based) + concat = function() { + if (self$reindexable) { + private$.notConcatenatable() + } + return(soma_array_to_sparse_matrix_concat(self, private$.zero_based)) + } ), active = list( #' @field repr Representation of the sparse matrix to return @@ -222,11 +338,16 @@ BlockwiseSparseReadIter <- R6::R6Class( .repr = character(1L), .shape = NULL, .zero_based = FALSE, - soma_reader_transform = function(x) arrow_table_to_sparse( - soma_array_to_arrow_table(x), - repr = self$repr, - shape = private$.shape, - zero_based = private$.zero_based - ) + soma_reader_transform = function(x) { + tbl <- private$reindex_arrow_table(soma_array_to_arrow_table(x)) + mat <- arrow_table_to_sparse( + tbl, + repr = self$repr, + shape = private$.shape, + zero_based = private$.zero_based + ) + attr(mat, "coords") <- attr(tbl, "coords", exact = TRUE) + return(mat) + } ) ) diff --git a/apis/r/R/SOMASparseNDArrayRead.R b/apis/r/R/SOMASparseNDArrayRead.R index 1ab0d872dd..0070aa4875 100644 --- a/apis/r/R/SOMASparseNDArrayRead.R +++ b/apis/r/R/SOMASparseNDArrayRead.R @@ -170,7 +170,7 @@ SOMASparseNDArrayRead <- R6::R6Class( axis, ..., size = NULL, - reindex_disable_on_axis = NULL + reindex_disable_on_axis = NA ) { return(SOMASparseNDArrayBlockwiseRead$new( self$sr, @@ -210,7 +210,7 @@ SOMASparseNDArrayBlockwiseRead <- R6::R6Class( axis, ..., size, - reindex_disable_on_axis = NULL + reindex_disable_on_axis = NA ) { super$initialize(sr, array, coords) stopifnot( @@ -218,6 +218,7 @@ SOMASparseNDArrayBlockwiseRead <- R6::R6Class( rlang::is_integerish(size, 1L, finite = TRUE) || (inherits(size, 'integer64') && length(size) == 1L && is.finite(size)), "'reindex_disable_on_axis' must be a vector of integers" = is.null(reindex_disable_on_axis) || + is_scalar_logical(reindex_disable_on_axis) || rlang::is_integerish(reindex_disable_on_axis, finite = TRUE) || (inherits(reindex_disable_on_axis, 'integer64') && all(is.finite(reindex_disable_on_axis))) ) diff --git a/apis/r/man/BlockwiseReadIterBase.Rd b/apis/r/man/BlockwiseReadIterBase.Rd index 6e631250af..7d406e09cb 100644 --- a/apis/r/man/BlockwiseReadIterBase.Rd +++ b/apis/r/man/BlockwiseReadIterBase.Rd @@ -17,11 +17,16 @@ Class that allows for blockwise read iteration of SOMA reads \item{\code{axis}}{The axis to iterate over in a blockwise fashion} +\item{\code{axes_to_reindex}}{The axes to re-index} + \item{\code{coords}}{A list of \code{\link{CoordsStrider}} objects} \item{\code{coords_axis}}{The \code{\link{CoordsStrider}} for \code{axis}} \item{\code{reindex_disable_on_axis}}{Additional axes that will not be re-indexed} + +\item{\code{reindexable}}{Shorthand to see if this iterator is poised to be +re-indexed or not} } \if{html}{\out{}} } @@ -53,7 +58,7 @@ Create coords, axis, ..., - reindex_disable_on_axis = NULL + reindex_disable_on_axis = NA )}\if{html}{\out{}} } @@ -72,7 +77,14 @@ named after \code{array$dimnames()}} \item{\code{...}}{Ignored} \item{\code{reindex_disable_on_axis}}{Additional axes that will not be re-indexed; -currently not yet implemented} +the following values may be used as shorthands for common settings: +\itemize{ +\item \dQuote{\code{TRUE}}: disable re-indexing on all axes +\item \dQuote{\code{NA}}: re-index only on \code{axis}, disable +re-indexing on all others +\item \dQuote{\code{FALSE}}: re-index on \emph{all} axes, do \strong{not} +disable re-indexing +}} } \if{html}{\out{}} } diff --git a/apis/r/man/BlockwiseSparseReadIter.Rd b/apis/r/man/BlockwiseSparseReadIter.Rd index 0936be88be..1906a3e9ff 100644 --- a/apis/r/man/BlockwiseSparseReadIter.Rd +++ b/apis/r/man/BlockwiseSparseReadIter.Rd @@ -47,7 +47,7 @@ Create axis, ..., repr = "T", - reindex_disable_on_axis = NULL + reindex_disable_on_axis = NA )}\if{html}{\out{}} } @@ -69,10 +69,24 @@ named after \code{array$dimnames()}} \itemize{ \item \dQuote{\code{T}}: returns a \code{\link[Matrix:TsparseMatrix-class]{TsparseMatrix}} -}} +\item \dQuote{\code{R}}: returns an +\code{\link[Matrix:RsparseMatrix-class]{RsparseMatrix}} +\item \dQuote{\code{C}}: returns a +\code{\link[Matrix:CsparseMatrix-class]{CsparseMatrix}} +} +\strong{Note}: passing \code{repr} of \dQuote{\code{R}} or \dQuote{\code{C}} +are only available if re-indexing is enabled on axes \code{0} or \code{1}, +respectively} \item{\code{reindex_disable_on_axis}}{Additional axes that will not be re-indexed; -currently not yet implemented} +the following values may be used as shorthands for common settings: +\itemize{ +\item \dQuote{\code{TRUE}}: disable re-indexing on all axes +\item \dQuote{\code{NA}}: re-index only on \code{axis}, disable +re-indexing on all others +\item \dQuote{\code{FALSE}}: re-index on \emph{all} axes, do \strong{not} +disable re-indexing +}} } \if{html}{\out{}} } @@ -81,13 +95,14 @@ currently not yet implemented} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-BlockwiseSparseReadIter-concat}{}}} \subsection{Method \code{concat()}}{ -... +Concatenate the remainder of the blockwise iterator \subsection{Usage}{ \if{html}{\out{
}}\preformatted{BlockwiseSparseReadIter$concat()}\if{html}{\out{
}} } \subsection{Returns}{ -... +A sparse matrix (determined by \code{self$repr}) with +the remainder of the iterator } } \if{html}{\out{
}} diff --git a/apis/r/man/BlockwiseTableReadIter.Rd b/apis/r/man/BlockwiseTableReadIter.Rd index 9b353009e6..72db146d0d 100644 --- a/apis/r/man/BlockwiseTableReadIter.Rd +++ b/apis/r/man/BlockwiseTableReadIter.Rd @@ -31,13 +31,13 @@ as Arrow \code{\link[Arrow]{Table}s} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-BlockwiseTableReadIter-concat}{}}} \subsection{Method \code{concat()}}{ -... +Concatenate the remainder of the blockwise iterator \subsection{Usage}{ \if{html}{\out{
}}\preformatted{BlockwiseTableReadIter$concat()}\if{html}{\out{
}} } \subsection{Returns}{ -... +An Arrow Table with the remainder of the iterator } } \if{html}{\out{
}} diff --git a/apis/r/man/IntIndexer.Rd b/apis/r/man/IntIndexer.Rd index b67d56a9c7..2b448ea3f9 100644 --- a/apis/r/man/IntIndexer.Rd +++ b/apis/r/man/IntIndexer.Rd @@ -37,7 +37,7 @@ Create a new re-indexer \subsection{Method \code{get_indexer()}}{ Get the underlying indices for the target data \subsection{Usage}{ -\if{html}{\out{
}}\preformatted{IntIndexer$get_indexer(target, nomatch = -1L)}\if{html}{\out{
}} +\if{html}{\out{
}}\preformatted{IntIndexer$get_indexer(target, nomatch_na = FALSE)}\if{html}{\out{
}} } \subsection{Arguments}{ @@ -45,8 +45,7 @@ Get the underlying indices for the target data \describe{ \item{\code{target}}{Data to re-index} -\item{\code{nomatch}}{The value to be returned when no match is found; will be -coerced to a 64-bit integer} +\item{\code{nomatch_na}}{Set non-matches to \code{NA} instead of \code{-1}} } \if{html}{\out{}} } diff --git a/apis/r/man/SOMASparseNDArrayBlockwiseRead.Rd b/apis/r/man/SOMASparseNDArrayBlockwiseRead.Rd index 8c22287a9e..c573600a21 100644 --- a/apis/r/man/SOMASparseNDArrayBlockwiseRead.Rd +++ b/apis/r/man/SOMASparseNDArrayBlockwiseRead.Rd @@ -59,7 +59,14 @@ values; must be named after \code{array$dimnames()}} \item{\code{size}}{The size of each blockwise chunk to generate} \item{\code{reindex_disable_on_axis}}{Additional axes that will not be re-indexed; -currently not yet implemented} +the following values may be used as shorthands for common settings: +\itemize{ +\item \dQuote{\code{TRUE}}: disable re-indexing on all axes +\item \dQuote{\code{NA}}: re-index only on \code{axis}, disable +re-indexing on all others +\item \dQuote{\code{FALSE}}: re-index on \emph{all} axes, do \strong{not} +disable re-indexing +}} } \if{html}{\out{}} } @@ -94,7 +101,14 @@ Read as a sparse matrix \itemize{ \item \dQuote{\code{T}}: returns a \code{\link[Matrix:TsparseMatrix-class]{TsparseMatrix}} -}} +\item \dQuote{\code{R}}: returns an +\code{\link[Matrix:RsparseMatrix-class]{RsparseMatrix}} +\item \dQuote{\code{C}}: returns a +\code{\link[Matrix:CsparseMatrix-class]{CsparseMatrix}} +} +\strong{Note}: passing \code{repr} of \dQuote{\code{R}} or \dQuote{\code{C}} +are only available if re-indexing is enabled on axes \code{0} or \code{1}, +respectively} } \if{html}{\out{}} } diff --git a/apis/r/man/SOMASparseNDArrayRead.Rd b/apis/r/man/SOMASparseNDArrayRead.Rd index 9089d91818..f88645dd86 100644 --- a/apis/r/man/SOMASparseNDArrayRead.Rd +++ b/apis/r/man/SOMASparseNDArrayRead.Rd @@ -87,7 +87,14 @@ Read in a blockwise fashion \item{\code{size}}{The size of each blockwise chunk to generate} \item{\code{reindex_disable_on_axis}}{Additional axes that will not be re-indexed; -currently not yet implemented} +the following values may be used as shorthands for common settings: +\itemize{ +\item \dQuote{\code{TRUE}}: disable re-indexing on all axes +\item \dQuote{\code{NA}}: re-index only on \code{axis}, disable +re-indexing on all others +\item \dQuote{\code{FALSE}}: re-index on \emph{all} axes, do \strong{not} +disable re-indexing +}} } \if{html}{\out{}} } diff --git a/apis/r/man/roxygen/templates/param-blockwise-iter.R b/apis/r/man/roxygen/templates/param-blockwise-iter.R index 1d959dc1c9..7c8d130f29 100644 --- a/apis/r/man/roxygen/templates/param-blockwise-iter.R +++ b/apis/r/man/roxygen/templates/param-blockwise-iter.R @@ -3,4 +3,11 @@ #' @param axis Axis to iterate over in a blockwise manner #' @param size The size of each blockwise chunk to generate #' @param reindex_disable_on_axis Additional axes that will not be re-indexed; -#' currently not yet implemented +#' the following values may be used as shorthands for common settings: +#' \itemize{ +#' \item \dQuote{\code{TRUE}}: disable re-indexing on all axes +#' \item \dQuote{\code{NA}}: re-index only on \code{axis}, disable +#' re-indexing on all others +#' \item \dQuote{\code{FALSE}}: re-index on \emph{all} axes, do \strong{not} +#' disable re-indexing +#' } diff --git a/apis/r/man/roxygen/templates/param-repr-read.R b/apis/r/man/roxygen/templates/param-repr-read.R index b5e886d566..76340ddbdb 100644 --- a/apis/r/man/roxygen/templates/param-repr-read.R +++ b/apis/r/man/roxygen/templates/param-repr-read.R @@ -2,4 +2,11 @@ #' \itemize{ #' \item \dQuote{\code{T}}: returns a #' \code{\link[Matrix:TsparseMatrix-class]{TsparseMatrix}} +#' \item \dQuote{\code{R}}: returns an +#' \code{\link[Matrix:RsparseMatrix-class]{RsparseMatrix}} +#' \item \dQuote{\code{C}}: returns a +#' \code{\link[Matrix:CsparseMatrix-class]{CsparseMatrix}} #' } +#' \strong{Note}: passing \code{repr} of \dQuote{\code{R}} or \dQuote{\code{C}} +#' are only available if re-indexing is enabled on axes \code{0} or \code{1}, +#' respectively diff --git a/apis/r/tests/testthat/test-Blockwise.R b/apis/r/tests/testthat/test-Blockwise.R index bae331ec7c..5c5ba8f0b5 100644 --- a/apis/r/tests/testthat/test-Blockwise.R +++ b/apis/r/tests/testthat/test-Blockwise.R @@ -1,90 +1,250 @@ + test_that("Blockwise iterator for arrow tables", { - skip_if(!extended_tests() || covr_tests()) - skip_if_not_installed("pbmc3k.tiledb") # a Suggests: pre-package 3k PBMC data - # see https://ghrr.github.io/drat/ - - tdir <- tempfile() - tgzfile <- system.file("raw-data", "soco-pbmc3k.tar.gz", package="pbmc3k.tiledb") - untar(tarfile = tgzfile, exdir = tdir) - - uri <- file.path(tdir, "soco", "pbmc3k_processed") - expect_true(dir.exists(uri)) - - ax <- 0 - sz <- 1000L - expqry <- SOMAExperimentOpen(uri) - axqry <- expqry$axis_query("RNA") - xrqry <- axqry$X("data") - - expect_error(xrqry$blockwise(axis=2)) - expect_error(xrqry$blockwise(size=-100)) - - expect_s3_class( - bi <- xrqry$blockwise(axis=ax, size=sz), - "SOMASparseNDArrayBlockwiseRead" - ) - - expect_s3_class(it <- bi$tables(), "BlockwiseTableReadIter") - expect_false(it$read_complete()) - - for (i in seq.int(1L, ceiling(it$coords_axis$length() / it$coords_axis$stride))) { - at <- it$read_next() - expect_s3_class(at, "ArrowTabular") - } - expect_true(it$read_complete()) - - rm(bi, it, xrqry, axqry) - axqry <- expqry$axis_query("RNA") - xrqry <- axqry$X("data") - bi <- xrqry$blockwise(axis=ax, size=sz) - it <- bi$tables() - at <- it$concat() - expect_s3_class(at, "Table") + skip_if(!extended_tests() || covr_tests()) + skip_if_not_installed("pbmc3k.tiledb") # a Suggests: pre-package 3k PBMC data + # see https://ghrr.github.io/drat/ + + tdir <- tempfile() + tgzfile <- system.file("raw-data", "soco-pbmc3k.tar.gz", package="pbmc3k.tiledb") + untar(tarfile = tgzfile, exdir = tdir) + + uri <- file.path(tdir, "soco", "pbmc3k_processed") + expect_true(dir.exists(uri)) + + ax <- 0 + sz <- 1000L + expqry <- SOMAExperimentOpen(uri) + axqry <- expqry$axis_query("RNA") + xrqry <- axqry$X("data") + + expect_error(xrqry$blockwise(axis=2)) + expect_error(xrqry$blockwise(size=-100)) + + expect_s3_class( + bi <- xrqry$blockwise(axis=ax, size=sz, reindex_disable_on_axis = TRUE), + "SOMASparseNDArrayBlockwiseRead" + ) + + expect_s3_class(it <- bi$tables(), "BlockwiseTableReadIter") + expect_false(it$read_complete()) + + for (i in seq.int(1L, ceiling(it$coords_axis$length() / it$coords_axis$stride))) { + at <- it$read_next() expect_s3_class(at, "ArrowTabular") - expect_equal(dim(at), c(4848644, 3)) + } + expect_true(it$read_complete()) + + rm(bi, it, xrqry, axqry) + axqry <- expqry$axis_query("RNA") + xrqry <- axqry$X("data") + bi <- xrqry$blockwise(axis=ax, size=sz, reindex_disable_on_axis = TRUE) + it <- bi$tables() + at <- it$concat() + expect_s3_class(at, "Table") + expect_s3_class(at, "ArrowTabular") + expect_equal(dim(at), c(4848644, 3)) +}) + +test_that("Table blockwise iterator: re-indexed", { + skip_if(!extended_tests() || covr_tests()) + skip_if_not_installed("SeuratObject", minimum_version = .MINIMUM_SEURAT_VERSION('c')) + + obj <- get_data("pbmc_small", package = "SeuratObject") + obj <- suppressWarnings(SeuratObject::UpdateSeuratObject(obj)) + for (lyr in setdiff(SeuratObject::Layers(obj), "data")) { + SeuratObject::LayerData(obj, lyr) <- NULL + } + for (reduc in SeuratObject::Reductions(obj)) { + obj[[reduc]] <- NULL + } + for (grph in SeuratObject::Graphs(obj)) { + obj[[grph]] <- NULL + } + for (cmd in SeuratObject::Command(obj)) { + obj[[cmd]] <- NULL + } + + tmp <- tempfile("blockwise-reindexed-tables") + uri <- write_soma(obj, uri = tmp) + + exp <- SOMAExperimentOpen(uri) + on.exit(exp$close(), add = TRUE, after = FALSE) + + ax <- 0L + sz <- 23L + query <- exp$axis_query("RNA") + xrqry <- query$X("data") + + expect_s3_class( + bi <- xrqry$blockwise(axis = ax, size = sz, reindex_disable_on_axis = NA), + "SOMASparseNDArrayBlockwiseRead" + ) + + expect_s3_class(it <- bi$tables(), "BlockwiseTableReadIter") + expect_false(it$read_complete()) + expect_true(it$reindexable) + expect_error(it$concat(), class = "notConcatenatableError") + expect_length(it$axes_to_reindex, 0L) + + for (i in seq.int(1L, ceiling(it$coords_axis$length() / it$coords_axis$stride))) { + at <- it$read_next() + expect_true(R6::is.R6(at)) + expect_s3_class(at, "Table") + sd0 <- at$GetColumnByName("soma_dim_0")$as_vector() + expect_true(min(sd0) >= 0L) + expect_true(max(sd0) <= sz) + strider <- attr(at, 'coords')$soma_dim_0 + expect_s3_class(strider, 'CoordsStrider') + expect_true(strider$start == sz * (i - 1L)) + expect_true(strider$end < sz * i) + } + + expect_s3_class( + bi <- suppressWarnings(xrqry$blockwise( + axis = ax, + size = sz, + reindex_disable_on_axis = FALSE + )), + "SOMASparseNDArrayBlockwiseRead" + ) + expect_s3_class(it <- bi$tables(), "BlockwiseTableReadIter") + expect_false(it$read_complete()) + expect_true(it$reindexable) + expect_error(it$concat(), class = "notConcatenatableError") + expect_length(it$axes_to_reindex, it$array$ndim() - 1L) + + for (i in seq.int(1L, ceiling(it$coords_axis$length() / it$coords_axis$stride))) { + at <- it$read_next() + expect_true(R6::is.R6(at)) + expect_s3_class(at, "Table") + sd0 <- at$GetColumnByName("soma_dim_0")$as_vector() + expect_true(min(sd0) >= 0L) + expect_true(max(sd0) <= sz) + } }) test_that("Blockwise iterator for sparse matrices", { - skip_if(!extended_tests() || covr_tests()) - skip_if_not_installed("pbmc3k.tiledb") # a Suggests: pre-package 3k PBMC data - # see https://ghrr.github.io/drat/ - - tdir <- tempfile() - tgzfile <- system.file("raw-data", "soco-pbmc3k.tar.gz", package="pbmc3k.tiledb") - untar(tarfile = tgzfile, exdir = tdir) - - uri <- file.path(tdir, "soco", "pbmc3k_processed") - expect_true(dir.exists(uri)) - - ax <- 0 - sz <- 1000L - expqry <- SOMAExperimentOpen(uri) - axqry <- expqry$axis_query("RNA") - xrqry <- axqry$X("data") - - expect_error(xrqry$blockwise(axis=2)) - expect_error(xrqry$blockwise(size=-100)) - - expect_s3_class( - bi <- xrqry$blockwise(axis=ax, size=sz), - "SOMASparseNDArrayBlockwiseRead" - ) - - expect_s3_class(it <- bi$sparse_matrix(), "BlockwiseSparseReadIter") - expect_false(it$read_complete()) - - for (i in seq.int(1L, ceiling(it$coords_axis$length() / it$coords_axis$stride))) { - at <- it$read_next() - expect_s4_class(at, "dgTMatrix") - } - expect_true(it$read_complete()) - - rm(bi, it, xrqry, axqry) - axqry <- expqry$axis_query("RNA") - xrqry <- axqry$X("data") - bi <- xrqry$blockwise(axis=ax, size=sz) - it <- bi$sparse_matrix() - at <- it$concat() + skip_if(!extended_tests() || covr_tests()) + skip_if_not_installed("pbmc3k.tiledb") # a Suggests: pre-package 3k PBMC data + # see https://ghrr.github.io/drat/ + + tdir <- tempfile() + tgzfile <- system.file("raw-data", "soco-pbmc3k.tar.gz", package="pbmc3k.tiledb") + untar(tarfile = tgzfile, exdir = tdir) + + uri <- file.path(tdir, "soco", "pbmc3k_processed") + expect_true(dir.exists(uri)) + + ax <- 0 + sz <- 1000L + expqry <- SOMAExperimentOpen(uri) + axqry <- expqry$axis_query("RNA") + xrqry <- axqry$X("data") + + expect_error(xrqry$blockwise(axis=2)) + expect_error(xrqry$blockwise(size=-100)) + + expect_s3_class( + bi <- xrqry$blockwise(axis=ax, size=sz, reindex_disable_on_axis = TRUE), + "SOMASparseNDArrayBlockwiseRead" + ) + + expect_error(bi$sparse_matrix("C")) + expect_error(bi$sparse_matrix("R")) + + expect_s3_class(it <- bi$sparse_matrix(), "BlockwiseSparseReadIter") + expect_false(it$read_complete()) + + for (i in seq.int(1L, ceiling(it$coords_axis$length() / it$coords_axis$stride))) { + at <- it$read_next() expect_s4_class(at, "dgTMatrix") - expect_equal(dim(at), c(2638, 1838)) + } + expect_true(it$read_complete()) + + rm(bi, it, xrqry, axqry) + axqry <- expqry$axis_query("RNA") + xrqry <- axqry$X("data") + bi <- xrqry$blockwise(axis=ax, size=sz, reindex_disable_on_axis = TRUE) + it <- bi$sparse_matrix() + at <- it$concat() + expect_s4_class(at, "dgTMatrix") + expect_equal(dim(at), c(2638, 1838)) +}) + +test_that("Sparse matrix blockwise iterator: re-indexed", { + skip_if(!extended_tests() || covr_tests()) + skip_if_not_installed("SeuratObject", minimum_version = .MINIMUM_SEURAT_VERSION('c')) + + obj <- get_data("pbmc_small", package = "SeuratObject") + obj <- suppressWarnings(SeuratObject::UpdateSeuratObject(obj)) + for (lyr in setdiff(SeuratObject::Layers(obj), "data")) { + SeuratObject::LayerData(obj, lyr) <- NULL + } + for (reduc in SeuratObject::Reductions(obj)) { + obj[[reduc]] <- NULL + } + for (grph in SeuratObject::Graphs(obj)) { + obj[[grph]] <- NULL + } + for (cmd in SeuratObject::Command(obj)) { + obj[[cmd]] <- NULL + } + + tmp <- tempfile("blockwise-reindexed-sparse") + uri <- write_soma(obj, uri = tmp) + + exp <- SOMAExperimentOpen(uri) + on.exit(exp$close(), add = TRUE, after = FALSE) + + ax <- 0L + sz <- 23L + query <- exp$axis_query("RNA") + xrqry <- query$X("data") + + expect_s3_class( + bi <- xrqry$blockwise(axis = ax, size = sz, reindex_disable_on_axis = NA), + "SOMASparseNDArrayBlockwiseRead" + ) + + expect_error(bi$sparse_matrix("C")) + + expect_s3_class(it <- bi$sparse_matrix(), "BlockwiseSparseReadIter") + expect_false(it$read_complete()) + expect_true(it$reindexable) + expect_error(it$concat(), class = "notConcatenatableError") + expect_length(it$axes_to_reindex, 0L) + + for (i in seq.int(1L, ceiling(it$coords_axis$length() / it$coords_axis$stride))) { + mat <- it$read_next() + expect_s4_class(mat, "TsparseMatrix") + expect_identical(dim(mat), rev(dim(obj))) + expect_true(min(mat@i) >= 0L) + expect_true(max(mat@i) <= sz) + strider <- attr(mat, 'coords')$soma_dim_0 + expect_s3_class(strider, 'CoordsStrider') + expect_true(strider$start == sz * (i - 1L)) + expect_true(strider$end < sz * i) + } + + expect_s3_class( + bi <- suppressWarnings(xrqry$blockwise( + axis = ax, + size = sz, + reindex_disable_on_axis = FALSE + )), + "SOMASparseNDArrayBlockwiseRead" + ) + expect_s3_class(it <- bi$sparse_matrix(), "BlockwiseSparseReadIter") + expect_false(it$read_complete()) + expect_true(it$reindexable) + expect_error(it$concat(), class = "notConcatenatableError") + expect_length(it$axes_to_reindex, it$array$ndim() - 1L) + + for (i in seq.int(1L, ceiling(it$coords_axis$length() / it$coords_axis$stride))) { + mat <- it$read_next() + expect_s4_class(mat, "TsparseMatrix") + expect_identical(dim(mat), rev(dim(obj))) + expect_true(min(mat@i) >= 0L) + expect_true(max(mat@i) <= sz) + } })