Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[r] Remove last private$.tiledb_array and self$object from R classes [WIP] #3129

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion apis/r/R/SOMADataFrame.R
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,6 @@ SOMADataFrame <- R6::R6Class(
arrow::as_record_batch(values)$export_to_c(naap, nasp)

df <- as.data.frame(values)[schema_names]
arr <- self$object
writeArrayFromArrow(
uri = self$uri,
naap = naap,
Expand Down
8 changes: 1 addition & 7 deletions apis/r/R/SOMADenseNDArray.R
Original file line number Diff line number Diff line change
Expand Up @@ -133,14 +133,9 @@ SOMADenseNDArray <- R6::R6Class(

## the 'soma_data' data type may not have been cached, and if so we need to fetch it
if (is.null(private$.type)) {
## TODO: replace with a libtiledbsoma accessor as discussed
tpstr <- tiledb::datatype(tiledb::attrs(tiledb::schema(self$uri))[["soma_data"]])
arstr <- arrow_type_from_tiledb_type(tpstr)
private$.type <- arstr
private$.type <- self$schema()[["soma_data"]]$type
}

arr <- self$object
tiledb::query_layout(arr) <- "COL_MAJOR"
spdl::debug("[SOMADenseNDArray::write] about to call write")
arrsch <- arrow::schema(arrow::field("soma_data", private$.type))
tbl <- arrow::arrow_table(soma_data = values, schema = arrsch)
Expand All @@ -149,7 +144,6 @@ SOMADenseNDArray <- R6::R6Class(
naap <- nanoarrow::nanoarrow_allocate_array()
nasp <- nanoarrow::nanoarrow_allocate_schema()
arrow::as_record_batch(tbl)$export_to_c(naap, nasp)
#arr[] <- values
writeArrayFromArrow(
uri = self$uri,
naap = naap,
Expand Down
5 changes: 1 addition & 4 deletions apis/r/R/SOMASparseNDArray.R
Original file line number Diff line number Diff line change
Expand Up @@ -266,10 +266,7 @@ SOMASparseNDArray <- R6::R6Class(

## the 'soma_data' data type may not have been cached, and if so we need to fetch it
if (is.null(private$.type)) {
## TODO: replace with a libtiledbsoma accessor as discussed
tpstr <- tiledb::datatype(tiledb::attrs(tiledb::schema(self$uri))[["soma_data"]])
arstr <- arrow_type_from_tiledb_type(tpstr)
private$.type <- arstr
private$.type <- self$schema()[["soma_data"]]$type
}

arrsch <- arrow::schema(arrow::field(nms[1], arrow::int64()),
Expand Down
90 changes: 2 additions & 88 deletions apis/r/R/TileDBArray.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ TileDBArray <- R6::R6Class(
private$.mode <- mode
if (is.null(self$tiledb_timestamp)) {
spdl::debug("[TileDBArray$open] Opening {} '{}' in {} mode", self$class(), self$uri, mode)
private$.tiledb_array <- tiledb::tiledb_array_open(self$object, type = mode)
} else {
if (is.null(internal_use_only)) stopifnot("tiledb_timestamp not yet supported for WRITE mode" = mode == "READ")
spdl::debug(
Expand All @@ -38,8 +37,6 @@ TileDBArray <- R6::R6Class(
mode,
self$tiledb_timestamp %||% "now"
)
#private$.tiledb_array <- tiledb::tiledb_array_open_at(self$object, type = mode,
# timestamp = self$tiledb_timestamp)
}

## TODO -- cannot do here while needed for array case does not work for data frame case
Expand All @@ -54,7 +51,6 @@ TileDBArray <- R6::R6Class(
close = function() {
spdl::debug("[TileDBArray$close] Closing {} '{}'", self$class(), self$uri)
private$.mode = "CLOSED"
tiledb::tiledb_array_close(self$object)
invisible(self)
},

Expand All @@ -67,19 +63,6 @@ TileDBArray <- R6::R6Class(
}
},

#' @description Return a [`TileDBArray`] object (lifecycle: maturing)
#' @param ... Optional arguments to pass to `tiledb::tiledb_array()`
#' @return A [`tiledb::tiledb_array`] object.
tiledb_array = function(...) {
args <- list(...)
args$uri <- self$uri
args$query_type <- self$.mode
args$query_layout <- "UNORDERED"
args$ctx <- self$tiledbsoma_ctx$context()
spdl::debug("[TileDBArray$tiledb_array] ctor uri {} mode {} layout {}", args$uri, args$query_type, args$query_layout)
do.call(tiledb::tiledb_array, args)
},

#' @description Retrieve metadata from the TileDB array. (lifecycle: maturing)
#' @param key The name of the metadata attribute to retrieve.
#' @return A list of metadata values.
Expand Down Expand Up @@ -136,12 +119,6 @@ TileDBArray <- R6::R6Class(
c_schema(self$uri, private$.soma_context));
},

#' @description Retrieve the array schema as TileDB schema (lifecycle: maturing)
#' @return A [`tiledb::tiledb_array_schema`] object
tiledb_schema = function() {
tiledb::schema(self$object)
},

#' @description Retrieve the array dimensions (lifecycle: maturing)
#' @return A named list of [`tiledb::tiledb_dim`] objects
dimensions = function() {
Expand Down Expand Up @@ -225,25 +202,8 @@ TileDBArray <- R6::R6Class(
#' @return A vector of [`bit64::integer64`]s with one entry for
#' each dimension.
non_empty_domain = function(index1 = FALSE) {
dims <- self$dimnames()
ned <- bit64::integer64(length = length(dims))
## added during C++-ification as self$object could close
if (isFALSE(tiledb::tiledb_array_is_open(self$object))) {
arrhandle <- tiledb::tiledb_array_open(self$object, type = "READ")
} else {
arrhandle <- self$object
}
for (i in seq_along(along.with = ned)) {
dom <- max(tiledb::tiledb_array_get_non_empty_domain_from_name(
arrhandle, # instead of: self$object,
name = dims[i]
))
if (isTRUE(x = index1)) {
dom <- dom + 1L
}
ned[i] <- dom
}
return(ned)
# XXX TEMP
vapply_int(self$non_empty_domain_new(), function(x){x[[2]]})
},

#' @description Returns a named list of minimum/maximum pairs, one per index
Expand All @@ -268,12 +228,6 @@ TileDBArray <- R6::R6Class(
ndim(self$uri, private$.soma_context)
},

#' @description Retrieve the array attributes (lifecycle: maturing)
#' @return A list of [`tiledb::tiledb_attr`] objects
attributes = function() {
tiledb::attrs(self$tiledb_schema())
},

#' @description Retrieve dimension names (lifecycle: maturing)
#' @return A character vector with the array's dimension names
dimnames = function() {
Expand Down Expand Up @@ -301,57 +255,17 @@ TileDBArray <- R6::R6Class(
),

active = list(
#' @field object Access the underlying TileB object directly (either a
#' [`tiledb::tiledb_array`] or [`tiledb::tiledb_group`]).
object = function(value) {
if (!missing(value)) {
stop(sprintf("'%s' is a read-only field.", "object"), call. = FALSE)
}
# If the array was created after the object was instantiated, we need to
# initialize private$.tiledb_array
if (is.null(private$.tiledb_array)) {
private$initialize_object()
}
private$.tiledb_array
}
),

private = list(

# Internal pointer to the TileDB array.
#
# Important implementation note:
# * In TileDB-R there is an unopened handle obtained by tiledb::tiledb_array, which takes
# a URI as its argument.
# * One may then open and close this using tiledb::tiledb_array_open (for read or write)
# and tiledb::tiledb_array_close, which take a tiledb_array handle as their first argument.
#
# However, for groups:
# * tiledb::tiledb_group and tiledb::group_open both return an object opened for read or write.
# * Therefore for groups we cannot imitate the behavior for arrays.
#
# For this reason there is a limit to how much handle-abstraction we can do in the TileDBObject
# parent class. In particular, we cannot have a single .tiledb_object shared by both TileDBArray
# and TileDBGroup.
.tiledb_array = NULL,

# Initially NULL, once the array is created or opened, this is populated
# with a list that's empty or contains the array metadata. Since the SOMA
# spec requires that we allow readback of array metadata even when the array
# is open for write, but the TileDB layer underneath us does not, we must
# have this cache.
.metadata_cache = NULL,

# Once the array has been created this initializes the TileDB array object
# and stores the reference in private$.tiledb_array.
initialize_object = function() {
private$.tiledb_array <- tiledb::tiledb_array(
uri = self$uri,
ctx = self$tiledbsoma_ctx$context(),
query_layout = "UNORDERED"
)
},

# ----------------------------------------------------------------
# Metadata-caching

Expand Down
1 change: 1 addition & 0 deletions apis/r/tests/testthat/helper-test-soma-objects.R
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,7 @@ create_and_populate_32bit_sparse_nd_array <- function(uri) {
soma_data = c(1L, 2L, 3L)
)

# XXX CHANGEME
tdb_dims <- mapply(
tiledb::tiledb_dim,
name = c("soma_dim_0", "soma_dim_1"),
Expand Down
5 changes: 5 additions & 0 deletions apis/r/tests/testthat/helper-test-tiledb-objects.R
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
create_empty_test_array <- function(uri) {
stopifnot(!dir.exists(uri))
# XXX CHANGEME
dim <- tiledb::tiledb_dim("d0", type = "ASCII", domain = NULL, tile = NULL)
# XXX CHANGEME
dom <- tiledb::tiledb_domain(dims = dim)
# XXX CHANGEME
schema <- tiledb::tiledb_array_schema(
domain = dom,
# XXX CHANGEME
attrs = c(tiledb::tiledb_attr("a", type = "INT32")),
sparse = TRUE
)
# XXX CHANGEME
tiledb::tiledb_array_create(uri, schema)
return(uri)
}
Expand Down
14 changes: 14 additions & 0 deletions apis/r/tests/testthat/test-Arrow-utils.R
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
test_that("TileDB classes can be converted to Arrow equivalents", {

# Dimension to Arrow field
# XXX CHANGEME
dim0 <- tiledb::tiledb_dim(
name = "dim0",
domain = NULL,
tile = NULL,
type = "ASCII"
)

# XXX CHANGEME
dim1 <- tiledb::tiledb_dim(
name = "dim1",
domain = bit64::as.integer64(c(0, 100)),
Expand All @@ -21,27 +23,33 @@ test_that("TileDB classes can be converted to Arrow equivalents", {
# String dimension
dim0_field <- arrow_field_from_tiledb_dim(dim0)
expect_true(is_arrow_field(dim0_field))
# XXX CHANGEME
expect_equal(dim0_field$name, tiledb::name(dim0))
expect_equal(
tiledb_type_from_arrow_type(dim0_field$type, is_dim=TRUE),
# XXX CHANGEME
tiledb::datatype(dim0)
)

# Integer dimension
dim1_field <- arrow_field_from_tiledb_dim(dim1)
expect_true(is_arrow_field(dim1_field))
# XXX CHANGEME
expect_equal(dim1_field$name, tiledb::name(dim1))
expect_equal(
tiledb_type_from_arrow_type(dim1_field$type, is_dim=TRUE),
# XXX CHANGEME
tiledb::datatype(dim1)
)

# Attribute to Arrow field
# XXX CHANGEME
attr0 <- tiledb::tiledb_attr(
name = "attr0",
type = "UTF8"
)

# XXX CHANGEME
attr1 <- tiledb::tiledb_attr(
name = "attr1",
type = "INT64"
Expand All @@ -53,23 +61,29 @@ test_that("TileDB classes can be converted to Arrow equivalents", {
# String attribute
attr0_field <- arrow_field_from_tiledb_attr(attr0)
expect_true(is_arrow_field(attr0_field))
# XXX CHANGEME
expect_equal(attr0_field$name, tiledb::name(attr0))
expect_equal(
tiledb_type_from_arrow_type(attr0_field$type, is_dim=FALSE),
# XXX CHANGEME
tiledb::datatype(attr0)
)

# Integer attribute
attr1_field <- arrow_field_from_tiledb_attr(attr1)
expect_true(is_arrow_field(attr1_field))
# XXX CHANGEME
expect_equal(attr1_field$name, tiledb::name(attr1))
expect_equal(
tiledb_type_from_arrow_type(attr1_field$type, is_dim=FALSE),
# XXX CHANGEME
tiledb::datatype(attr1)
)

# TileDB schema to Arrow schema
# XXX CHANGEME
tdb_schema <- tiledb::tiledb_array_schema(
# XXX CHANGEME
domain = tiledb::tiledb_domain(c(dim0, dim1)),
attrs = c(attr0, attr1),
sparse = TRUE
Expand Down
1 change: 1 addition & 0 deletions apis/r/tests/testthat/test-OrderedAndFactor.R
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ test_that("SOMADataFrame round-trip with factor and ordered", {
## quick write with tiledb-r so that we get a schema from the manifested array
## there should possibly be a helper function to create the schema from a data.frame
turi <- tempfile()
# XXX CHANGEME
expect_silent(tiledb::fromDataFrame(ett, turi, col_index="soma_joinid"))

tsch <- tiledb::schema(turi)
Expand Down
1 change: 0 additions & 1 deletion apis/r/tests/testthat/test-SOMACollection.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ test_that("SOMACollection basics", {
# Verify the empty collection is accessible and reads back as empty
collection <- SOMACollectionOpen(uri)
expect_true(dir.exists(uri))
expect_match(tiledb::tiledb_object_type(uri), "GROUP")
expect_true(collection$soma_type == "SOMACollection")
expect_true(collection$exists())
expect_equal(collection$length(), 0)
Expand Down
Loading