diff --git a/apis/python/src/tiledbsoma/_read_iters.py b/apis/python/src/tiledbsoma/_read_iters.py index f5717526d8..a0c7368d7d 100644 --- a/apis/python/src/tiledbsoma/_read_iters.py +++ b/apis/python/src/tiledbsoma/_read_iters.py @@ -13,7 +13,6 @@ from typing import ( TYPE_CHECKING, Dict, - Generator, Iterator, List, Optional, @@ -249,7 +248,7 @@ def _table_reader(self) -> Iterator[BlockwiseTableReadIterResult]: def _reindexed_table_reader( self, _pool: Optional[ThreadPoolExecutor] = None, - ) -> Generator[BlockwiseTableReadIterResult, None, None]: + ) -> Iterator[BlockwiseTableReadIterResult]: """Private. Blockwise table reader w/ reindexing. Helper function for sub-class use""" for tbl, coords in self._maybe_eager_iterator(self._table_reader(), _pool): pytbl = {} diff --git a/apis/python/src/tiledbsoma/_util.py b/apis/python/src/tiledbsoma/_util.py index b94da8b163..b659ddd923 100644 --- a/apis/python/src/tiledbsoma/_util.py +++ b/apis/python/src/tiledbsoma/_util.py @@ -13,6 +13,7 @@ import pandas as pd import pyarrow as pa import somacore +from anndata import AnnData from somacore import options from . import pytiledbsoma as clib @@ -311,3 +312,13 @@ def anndata_dataframe_unmodified_nan_safe(old: pd.DataFrame, new: pd.DataFrame) if any(old.keys() != new.keys()): return False return True + + +def verify_obs_var(ad0: AnnData, ad1: AnnData, nan_safe: bool = False) -> None: + """Verify that two ``AnnData``'s ``obs`` and ``var`` dataframes are equivalent.""" + if nan_safe: + assert anndata_dataframe_unmodified_nan_safe(ad0.obs, ad1.obs) + assert anndata_dataframe_unmodified_nan_safe(ad0.var, ad1.var) + else: + assert anndata_dataframe_unmodified(ad0.obs, ad1.obs) + assert anndata_dataframe_unmodified(ad0.var, ad1.var) diff --git a/apis/python/tests/_util.py b/apis/python/tests/_util.py index 00b872f07e..5de7d67768 100644 --- a/apis/python/tests/_util.py +++ b/apis/python/tests/_util.py @@ -1,9 +1,14 @@ from contextlib import contextmanager +from pathlib import Path from typing import Any, Type import pytest from typeguard import suppress_type_checks +HERE = Path(__file__).parent +PY_ROOT = HERE.parent +TESTDATA = PY_ROOT / "testdata" + @contextmanager def raises_no_typeguard(exc: Type[Exception], *args: Any, **kwargs: Any): diff --git a/apis/python/tests/conftest.py b/apis/python/tests/conftest.py new file mode 100644 index 0000000000..1be78f55b1 --- /dev/null +++ b/apis/python/tests/conftest.py @@ -0,0 +1,40 @@ +from tempfile import TemporaryDirectory + +import anndata +import pytest + +import tiledbsoma +import tiledbsoma.io + +from ._util import TESTDATA + + +@pytest.fixture +def h5ad_path(request): + # pbmc-small is faster for automated unit-test / CI runs. + return TESTDATA / "pbmc-small.h5ad" + + +@pytest.fixture +def adata(h5ad_path): + return anndata.read_h5ad(h5ad_path) + + +@pytest.fixture +def h5ad_file_extended(request): + # This has more component arrays in it + return TESTDATA / "pbmc3k_processed.h5ad" + + +@pytest.fixture +def adata_extended(h5ad_file_extended): + return anndata.read_h5ad(h5ad_file_extended) + + +@pytest.fixture +def pbmc_small(h5ad_path): + """Ingest an ``AnnData``, yield a ``TestCase`` with the original and new AnnData objects.""" + with TemporaryDirectory() as exp_path: + tiledbsoma.io.from_h5ad(exp_path, h5ad_path, measurement_name="RNA") + with tiledbsoma.Experiment.open(exp_path) as exp: + yield exp diff --git a/apis/python/tests/test_basic_anndata_io.py b/apis/python/tests/test_basic_anndata_io.py index 6d8ed295e9..bf6f1cf043 100644 --- a/apis/python/tests/test_basic_anndata_io.py +++ b/apis/python/tests/test_basic_anndata_io.py @@ -16,42 +16,22 @@ import tiledbsoma.io from tiledbsoma import Experiment, _constants, _factory from tiledbsoma._tiledb_object import TileDBObject -from tiledbsoma._util import ( - anndata_dataframe_unmodified, - anndata_dataframe_unmodified_nan_safe, -) - -HERE = Path(__file__).parent +from tiledbsoma._util import verify_obs_var - -@pytest.fixture -def h5ad_file(request): - # pbmc-small is faster for automated unit-test / CI runs. - input_path = HERE.parent / "testdata/pbmc-small.h5ad" - # input_path = HERE.parent / "testdata/pbmc3k_processed.h5ad" - return input_path - - -@pytest.fixture -def h5ad_file_extended(request): - # This has more component arrays in it - input_path = HERE.parent / "testdata/pbmc3k_processed.h5ad" - return input_path +from ._util import TESTDATA @pytest.fixture def h5ad_file_with_obsm_holes(request): # This has zeroes in an obsm matrix so nnz is not num_rows * num_cols - input_path = HERE.parent / "testdata/pbmc3k-with-obsm-zero.h5ad" - return input_path + return TESTDATA / "pbmc3k-with-obsm-zero.h5ad" @pytest.fixture def h5ad_file_uns_string_arrays(request): # This has uns["louvain_colors"] with dtype.char == "U". # It also has uns["more_colors"] in the form '[[...]]', as often occurs in the wild. - input_path = HERE.parent / "testdata/pbmc3k.h5ad" - return input_path + return TESTDATA / "pbmc3k.h5ad" @pytest.fixture @@ -68,15 +48,13 @@ def h5ad_file_categorical_int_nan(request): # s[0] = math.nan # adata.obs["categ_int_nan"] = s # adata.write_h5ad("categorical_int_nan.h5ad") - input_path = HERE.parent / "testdata/categorical_int_nan.h5ad" - return input_path + return TESTDATA / "categorical_int_nan.h5ad" @pytest.fixture def h5ad_file_X_empty(request): """adata.X is a zero-cell sparse matrix""" - input_path = HERE.parent / "testdata/x-empty.h5ad" - return input_path + return TESTDATA / "x-empty.h5ad" @pytest.fixture @@ -85,13 +63,7 @@ def h5ad_file_X_none(request): adata.X has Python value None if read in non-backed mode; if read in backed mode, adata.X is not present as an attribute of adata. """ - input_path = HERE.parent / "testdata/x-none.h5ad" - return input_path - - -@pytest.fixture -def adata(h5ad_file): - return anndata.read_h5ad(h5ad_file) + return TESTDATA / "x-none.h5ad" @pytest.mark.parametrize( @@ -139,8 +111,7 @@ def test_import_anndata(adata, ingest_modes, X_kind): if ingest_mode != "schema_only": have_ingested = True - assert anndata_dataframe_unmodified(original.obs, adata.obs) - assert anndata_dataframe_unmodified(original.var, adata.var) + verify_obs_var(original, adata) exp = tiledbsoma.Experiment.open(uri) @@ -268,21 +239,21 @@ def test_import_anndata(adata, ingest_modes, X_kind): "othername", ], ) -def test_named_X_layers(h5ad_file, X_layer_name): +def test_named_X_layers(h5ad_path, X_layer_name): tempdir = tempfile.TemporaryDirectory() soma_path = tempdir.name if X_layer_name is None: tiledbsoma.io.from_h5ad( soma_path, - h5ad_file.as_posix(), + h5ad_path.as_posix(), "RNA", ingest_mode="write", ) else: tiledbsoma.io.from_h5ad( soma_path, - h5ad_file.as_posix(), + h5ad_path.as_posix(), "RNA", ingest_mode="write", X_layer_name=X_layer_name, @@ -305,9 +276,9 @@ def _get_fragment_count(array_uri): @pytest.mark.parametrize( "resume_mode_h5ad_file", [ - HERE.parent / "testdata/pbmc-small-x-dense.h5ad", - HERE.parent / "testdata/pbmc-small-x-csr.h5ad", - HERE.parent / "testdata/pbmc-small-x-csc.h5ad", + TESTDATA / "pbmc-small-x-dense.h5ad", + TESTDATA / "pbmc-small-x-csr.h5ad", + TESTDATA / "pbmc-small-x-csc.h5ad", ], ) def test_resume_mode(adata, resume_mode_h5ad_file): @@ -417,19 +388,19 @@ def test_ingest_relative(h5ad_file_extended, use_relative_uri): @pytest.mark.parametrize("ingest_uns_keys", [["louvain_colors"], None]) -def test_ingest_uns(tmp_path: pathlib.Path, h5ad_file_extended, ingest_uns_keys): +def test_ingest_uns( + tmp_path: pathlib.Path, h5ad_file_extended, adata_extended, ingest_uns_keys +): tmp_uri = tmp_path.as_uri() - original = anndata.read(h5ad_file_extended) - adata = anndata.read(h5ad_file_extended) + adata_extended2 = anndata.read(h5ad_file_extended) uri = tiledbsoma.io.from_anndata( tmp_uri, - adata, + adata_extended2, measurement_name="hello", uns_keys=ingest_uns_keys, ) - assert anndata_dataframe_unmodified(original.obs, adata.obs) - assert anndata_dataframe_unmodified(original.var, adata.var) + verify_obs_var(adata_extended, adata_extended2) with tiledbsoma.Experiment.open(uri) as exp: uns = exp.ms["hello"]["uns"] @@ -459,7 +430,9 @@ def test_ingest_uns(tmp_path: pathlib.Path, h5ad_file_extended, ingest_uns_keys) assert isinstance(random_state, tiledbsoma.DenseNDArray) assert np.array_equal(random_state.read().to_numpy(), np.array([0])) got_pca_variance = uns["pca"]["variance"].read().to_numpy() - assert np.array_equal(got_pca_variance, adata.uns["pca"]["variance"]) + assert np.array_equal( + got_pca_variance, adata_extended2.uns["pca"]["variance"] + ) else: assert set(uns) == set(ingest_uns_keys) @@ -498,8 +471,7 @@ def test_add_matrix_to_collection(adata): uri = tiledbsoma.io.from_anndata(output_path, adata, measurement_name="RNA") - assert anndata_dataframe_unmodified(original.obs, adata.obs) - assert anndata_dataframe_unmodified(original.var, adata.var) + verify_obs_var(original, adata) exp = tiledbsoma.Experiment.open(uri) with _factory.open(output_path) as exp_r: @@ -625,8 +597,7 @@ def add_matrix_to_collection( uri = tiledbsoma.io.from_anndata(output_path, adata, measurement_name="RNA") - assert anndata_dataframe_unmodified(original.obs, adata.obs) - assert anndata_dataframe_unmodified(original.var, adata.var) + verify_obs_var(original, adata) exp = tiledbsoma.Experiment.open(uri) with _factory.open(output_path) as exp_r: @@ -684,8 +655,7 @@ def test_export_anndata(adata): tiledbsoma.io.from_anndata(output_path, adata, measurement_name="RNA") - assert anndata_dataframe_unmodified(original.obs, adata.obs) - assert anndata_dataframe_unmodified(original.var, adata.var) + verify_obs_var(original, adata) with _factory.open(output_path) as exp: with pytest.raises(ValueError): @@ -776,8 +746,7 @@ def test_null_obs(adata, tmp_path: Path): uri = tiledbsoma.io.from_anndata( output_path, adata, "RNA", ingest_mode="write", X_kind=tiledbsoma.SparseNDArray ) - assert anndata_dataframe_unmodified_nan_safe(original.obs, adata.obs) - assert anndata_dataframe_unmodified_nan_safe(original.var, adata.var) + verify_obs_var(original, adata, nan_safe=True) exp = tiledbsoma.Experiment.open(uri) with tiledb.open(exp.obs.uri, "r") as obs: @@ -807,8 +776,7 @@ def test_export_obsm_with_holes(h5ad_file_with_obsm_holes, tmp_path): output_path = tmp_path.as_posix() tiledbsoma.io.from_anndata(output_path, adata, "RNA") - assert anndata_dataframe_unmodified(original.obs, adata.obs) - assert anndata_dataframe_unmodified(original.var, adata.var) + verify_obs_var(original, adata) exp = tiledbsoma.Experiment.open(output_path) @@ -954,8 +922,7 @@ def test_id_names(tmp_path, obs_id_name, var_id_name, indexify_obs, indexify_var obs_id_name=obs_id_name, var_id_name=var_id_name, ) - assert anndata_dataframe_unmodified(original.obs, adata.obs) - assert anndata_dataframe_unmodified(original.var, adata.var) + verify_obs_var(original, adata) with tiledbsoma.Experiment.open(uri) as exp: assert obs_id_name in exp.obs.keys() @@ -1040,8 +1007,7 @@ def test_uns_io(tmp_path, outgest_uns_keys): soma_uri = tmp_path.as_posix() tiledbsoma.io.from_anndata(soma_uri, adata, measurement_name="RNA") - assert anndata_dataframe_unmodified(original.obs, adata.obs) - assert anndata_dataframe_unmodified(original.var, adata.var) + verify_obs_var(original, adata) with tiledbsoma.Experiment.open(soma_uri) as exp: bdata = tiledbsoma.io.to_anndata( @@ -1092,8 +1058,7 @@ def test_string_nan_columns(tmp_path, adata, write_index): uri = tmp_path.as_posix() original = adata.copy() tiledbsoma.io.from_anndata(uri, adata, measurement_name="RNA") - assert anndata_dataframe_unmodified_nan_safe(original.obs, adata.obs) - assert anndata_dataframe_unmodified_nan_safe(original.var, adata.var) + verify_obs_var(original, adata, nan_safe=True) # Step 3 with tiledbsoma.open(uri, "r") as exp: @@ -1151,8 +1116,7 @@ def test_index_names_io(tmp_path, obs_index_name, var_index_name): original = adata.copy() tiledbsoma.io.from_anndata(soma_uri, adata, measurement_name) - assert anndata_dataframe_unmodified(original.obs, adata.obs) - assert anndata_dataframe_unmodified(original.var, adata.var) + verify_obs_var(original, adata) with tiledbsoma.Experiment.open(soma_uri) as exp: bdata = tiledbsoma.io.to_anndata(exp, measurement_name) diff --git a/apis/python/tests/test_experiment_query.py b/apis/python/tests/test_experiment_query.py index 8aefdcdbde..f4007c6fd5 100644 --- a/apis/python/tests/test_experiment_query.py +++ b/apis/python/tests/test_experiment_query.py @@ -7,7 +7,7 @@ import pyarrow as pa import pytest from scipy import sparse -from somacore import options +from somacore import AxisQuery, options import tiledbsoma as soma from tests._util import raises_no_typeguard @@ -905,3 +905,17 @@ def test_experiment_query_uses_threadpool_from_context(soma_experiment): assert adata is not None pool.submit.assert_called() + + +def test_empty_categorical_query(pbmc_small): + q = pbmc_small.axis_query( + measurement_name="RNA", obs_query=AxisQuery(value_filter='groups == "g1"') + ) + obs = q.obs().concat() + assert len(obs) == 44 + + q = pbmc_small.axis_query( + measurement_name="RNA", obs_query=AxisQuery(value_filter='groups == "foo"') + ) + obs = q.obs().concat() + assert len(obs) == 0 diff --git a/apis/python/tests/test_io.py b/apis/python/tests/test_io.py index d466a1d663..29d0af55d1 100644 --- a/apis/python/tests/test_io.py +++ b/apis/python/tests/test_io.py @@ -1,5 +1,3 @@ -from pathlib import Path - import anndata as ad import numpy as np import pyarrow as pa @@ -11,8 +9,6 @@ from tiledbsoma import _factory from tiledbsoma.options._tiledb_create_options import TileDBCreateOptions -HERE = Path(__file__).parent - @pytest.fixture def src_matrix(request): @@ -178,15 +174,13 @@ def test_write_arrow_table(tmp_path, num_rows, cap_nbytes): assert list(pdf["foo"]) == pydict["foo"] -def test_add_matrices(tmp_path): +def test_add_matrices(tmp_path, h5ad_path): """Test multiple add_matrix_to_collection calls can be issued on the same soma object. See https://github.com/single-cell-data/TileDB-SOMA/issues/1565.""" # Create a soma object from an anndata object - soma_path = tmp_path.as_posix() - h5ad_path = HERE.parent / "testdata/pbmc-small.h5ad" soma_uri = soma.io.from_h5ad( - soma_path, input_path=h5ad_path, measurement_name="RNA" + tmp_path.as_posix(), input_path=h5ad_path, measurement_name="RNA" ) # Synthesize some new data to be written into two matrices within the soma object (ensuring it's different from the diff --git a/apis/python/tests/test_notebook_sparse_dense.py b/apis/python/tests/test_notebook_sparse_dense.py index 385fb6f40f..178639f670 100644 --- a/apis/python/tests/test_notebook_sparse_dense.py +++ b/apis/python/tests/test_notebook_sparse_dense.py @@ -1,16 +1,13 @@ -from pathlib import Path - import pytest import tiledbsoma import tiledbsoma.io - -HERE = Path(__file__).parent +from tests._util import PY_ROOT @pytest.mark.parametrize("name", ["sparse", "dense"]) def test_notebook_path_dense(name): - path = HERE.parent / f"notebooks/data/{name}/pbmc3k" + path = PY_ROOT / f"notebooks/data/{name}/pbmc3k" with tiledbsoma.Experiment.open(path.as_posix()) as exp: assert len(exp.obs.read().concat()) == 2638 diff --git a/apis/python/tests/test_platform_config.py b/apis/python/tests/test_platform_config.py index 08018f3ba4..2fe8514171 100644 --- a/apis/python/tests/test_platform_config.py +++ b/apis/python/tests/test_platform_config.py @@ -1,29 +1,12 @@ import tempfile -from pathlib import Path -import anndata import pytest import tiledb import tiledbsoma import tiledbsoma.io import tiledbsoma.options._tiledb_create_options as tco -from tiledbsoma._util import anndata_dataframe_unmodified - -HERE = Path(__file__).parent - - -@pytest.fixture -def h5ad_file(request): - # pbmc-small is faster for automated unit-test / CI runs. - # input_path = HERE.parent / "testdata/pbmc3k_processed.h5ad" - input_path = HERE.parent / "testdata/pbmc-small.h5ad" - return input_path - - -@pytest.fixture -def adata(h5ad_file): - return anndata.read_h5ad(h5ad_file) +from tiledbsoma._util import verify_obs_var @pytest.mark.skip(reason="No longer return ArraySchema - see note in test") @@ -68,8 +51,7 @@ def test_platform_config(adata): } }, ) - assert anndata_dataframe_unmodified(original.obs, adata.obs) - assert anndata_dataframe_unmodified(original.var, adata.var) + verify_obs_var(original, adata) with tiledbsoma.Experiment.open(output_path) as exp: x_data = exp.ms["RNA"].X["data"] diff --git a/apis/python/tests/test_registration_mappings.py b/apis/python/tests/test_registration_mappings.py index cd50102af7..f13bf5c957 100644 --- a/apis/python/tests/test_registration_mappings.py +++ b/apis/python/tests/test_registration_mappings.py @@ -13,7 +13,7 @@ import tiledbsoma.io import tiledbsoma.io._registration as registration -from tiledbsoma._util import anndata_dataframe_unmodified +from tiledbsoma._util import verify_obs_var def _create_anndata( @@ -788,8 +788,7 @@ def test_append_items_with_experiment(obs_field_name, var_field_name): registration_mapping=rd, ) - assert anndata_dataframe_unmodified(original.obs, adata2.obs) - assert anndata_dataframe_unmodified(original.var, adata2.var) + verify_obs_var(original, adata2) expect_obs_soma_joinids = list(range(6)) expect_var_soma_joinids = list(range(5)) @@ -895,8 +894,7 @@ def test_append_with_disjoint_measurements( registration_mapping=rd, ) - assert anndata_dataframe_unmodified(original.obs, anndata2.obs) - assert anndata_dataframe_unmodified(original.var, anndata2.var) + verify_obs_var(original, anndata2) # exp/obs, use_same_cells=True: exp/obs, use_same_cells=False: # soma_joinid obs_id cell_type is_primary_data soma_joinid obs_id cell_type is_primary_data diff --git a/apis/python/tests/test_registration_signatures.py b/apis/python/tests/test_registration_signatures.py index 777d91e26e..ea4a8e3c8c 100644 --- a/apis/python/tests/test_registration_signatures.py +++ b/apis/python/tests/test_registration_signatures.py @@ -1,38 +1,22 @@ import tempfile -from pathlib import Path -import anndata as ad import pytest import tiledbsoma.io import tiledbsoma.io._registration.signatures as signatures -from tiledbsoma._util import anndata_dataframe_unmodified +from tiledbsoma._util import verify_obs_var -HERE = Path(__file__).parent - -@pytest.fixture -def canned_h5ad_file(request): - input_path = HERE.parent / "testdata/pbmc-small.h5ad" - return input_path - - -@pytest.fixture -def canned_anndata(canned_h5ad_file): - return ad.read_h5ad(canned_h5ad_file) - - -def test_signature_serdes(canned_h5ad_file, canned_anndata): - sig = signatures.Signature.from_h5ad(canned_h5ad_file.as_posix()) +def test_signature_serdes(h5ad_path, adata): + sig = signatures.Signature.from_h5ad(h5ad_path.as_posix()) text1 = sig.to_json() assert "obs_schema" in text1 assert "var_schema" in text1 assert sig == signatures.Signature.from_json(text1) - original = canned_anndata.copy() - sig = signatures.Signature.from_anndata(canned_anndata) - assert anndata_dataframe_unmodified(original.obs, canned_anndata.obs) - assert anndata_dataframe_unmodified(original.var, canned_anndata.var) + original = adata.copy() + sig = signatures.Signature.from_anndata(adata) + verify_obs_var(original, adata) text2 = sig.to_json() assert sig == signatures.Signature.from_json(text2) @@ -42,9 +26,8 @@ def test_signature_serdes(canned_h5ad_file, canned_anndata): tempdir = tempfile.TemporaryDirectory() output_path = tempdir.name - uri = tiledbsoma.io.from_anndata(output_path, canned_anndata, "RNA") - assert anndata_dataframe_unmodified(original.obs, canned_anndata.obs) - assert anndata_dataframe_unmodified(original.var, canned_anndata.var) + uri = tiledbsoma.io.from_anndata(output_path, adata, "RNA") + verify_obs_var(original, adata) sig = signatures.Signature.from_soma_experiment(uri) text3 = sig.to_json() @@ -53,20 +36,18 @@ def test_signature_serdes(canned_h5ad_file, canned_anndata): assert text1 == text3 -def test_compatible(canned_anndata): +def test_compatible(adata): # Check that zero inputs result in zero incompatibility signatures.Signature.check_compatible({}) - original = canned_anndata.copy() - sig1 = signatures.Signature.from_anndata(canned_anndata) - assert anndata_dataframe_unmodified(original.obs, canned_anndata.obs) - assert anndata_dataframe_unmodified(original.var, canned_anndata.var) + original = adata.copy() + sig1 = signatures.Signature.from_anndata(adata) + verify_obs_var(original, adata) tempdir = tempfile.TemporaryDirectory() output_path = tempdir.name - uri = tiledbsoma.io.from_anndata(output_path, canned_anndata, "RNA") - assert anndata_dataframe_unmodified(original.obs, canned_anndata.obs) - assert anndata_dataframe_unmodified(original.var, canned_anndata.var) + uri = tiledbsoma.io.from_anndata(output_path, adata, "RNA") + verify_obs_var(original, adata) sig2 = signatures.Signature.from_soma_experiment(uri) # Check that single inputs result in zero incompatibility @@ -85,13 +66,12 @@ def test_compatible(canned_anndata): ) # no throw # Check incompatibility of modified AnnData - adata3 = canned_anndata + adata3 = adata del adata3.obs["groups"] original = adata3.copy() sig3 = signatures.Signature.from_anndata(adata3) - assert anndata_dataframe_unmodified(original.obs, adata3.obs) - assert anndata_dataframe_unmodified(original.var, adata3.var) + verify_obs_var(original, adata3) with pytest.raises(ValueError): signatures.Signature.check_compatible({"orig": sig1, "anndata3": sig3}) diff --git a/apis/python/tests/test_update_dataframes.py b/apis/python/tests/test_update_dataframes.py index 0325a15167..cf30c07410 100644 --- a/apis/python/tests/test_update_dataframes.py +++ b/apis/python/tests/test_update_dataframes.py @@ -1,85 +1,109 @@ import tempfile -from pathlib import Path +from contextlib import nullcontext +from dataclasses import dataclass, fields -import anndata import numpy as np import pandas as pd import pyarrow as pa import pytest +from anndata import AnnData +from pyarrow import Schema import tiledbsoma import tiledbsoma.io -from tiledbsoma._util import anndata_dataframe_unmodified +from tiledbsoma._util import anndata_dataframe_unmodified, verify_obs_var -HERE = Path(__file__).parent - -@pytest.fixture -def h5ad_file(request): - # pbmc-small is faster for automated unit-test / CI runs. - input_path = HERE.parent / "testdata/pbmc-small.h5ad" - # input_path = HERE.parent / "testdata/pbmc3k_processed.h5ad" - return input_path +@dataclass +class TestCase: + exp_path: str + original: AnnData + new: AnnData + new_obs: pd.DataFrame + new_var: pd.DataFrame + o1: Schema + v1: Schema @pytest.fixture -def adata(h5ad_file): - return anndata.read_h5ad(h5ad_file) - - -@pytest.mark.parametrize("readback", [False, True]) -def test_no_change(adata, readback): - tempdir = tempfile.TemporaryDirectory() - output_path = tempdir.name - original = adata.copy() - tiledbsoma.io.from_anndata(output_path, adata, measurement_name="RNA") - assert anndata_dataframe_unmodified(original.obs, adata.obs) - assert anndata_dataframe_unmodified(original.var, adata.var) - - with tiledbsoma.Experiment.open(output_path) as exp: - o1 = exp.obs.schema - v1 = exp.ms["RNA"].var.schema - - if readback: - new_obs = exp.obs.read().concat().to_pandas() - new_var = exp.ms["RNA"].var.read().concat().to_pandas() - else: - new_obs = adata.obs - new_var = adata.var - - with tiledbsoma.Experiment.open(output_path, "w") as exp: - tiledbsoma.io.update_obs(exp, new_obs) - tiledbsoma.io.update_var(exp, new_var, "RNA") - assert anndata_dataframe_unmodified(original.obs, adata.obs) - assert anndata_dataframe_unmodified(original.var, adata.var) - - with tiledbsoma.Experiment.open(output_path) as exp: +def case(request, adata) -> TestCase: + """Ingest an ``AnnData``, yield a ``TestCase`` with the original and new AnnData objects.""" + with tempfile.TemporaryDirectory() as exp_path: + original = adata.copy() + tiledbsoma.io.from_anndata(exp_path, adata, measurement_name="RNA") + verify_obs_var(original, adata) + readback = request.param + with tiledbsoma.Experiment.open(exp_path) as exp: + o1 = exp.obs.schema + v1 = exp.ms["RNA"].var.schema + if readback: + new_obs = exp.obs.read().concat().to_pandas() + new_var = exp.ms["RNA"].var.read().concat().to_pandas() + else: + new_obs = adata.obs + new_var = adata.var + + yield TestCase( + exp_path=exp_path, + original=original, + new=adata, + new_obs=new_obs, + new_var=new_var, + o1=o1, + v1=v1, + ) + + +def create_member_fixture(name): + """Create a ``pytest.fixture`` for a ``TestCase`` field.""" + + @pytest.fixture + def member_fixture(case): + return getattr(case, name) + + return member_fixture + + +for field in fields(TestCase): + """Create ``pytest.fixture``s for each ``TestCase`` field.""" + globals()[field.name] = create_member_fixture(field.name) + + +def verify_schemas(exp_path, o1, v1): + """Read {obs,var} schemas, verify they match initial versions.""" + with tiledbsoma.Experiment.open(exp_path) as exp: o2 = exp.obs.schema v2 = exp.ms["RNA"].var.schema - assert o1 == o2 assert v1 == v2 -@pytest.mark.parametrize("readback", [False, True]) -def test_add(adata, readback): - tempdir = tempfile.TemporaryDirectory() - output_path = tempdir.name - original = adata.copy() - tiledbsoma.io.from_anndata(output_path, adata, measurement_name="RNA") - assert anndata_dataframe_unmodified(original.obs, adata.obs) - assert anndata_dataframe_unmodified(original.var, adata.var) +def verify_updates(exp_path, obs, var, exc=False): + obs0 = obs.copy() + var0 = var.copy() + + def ctx(): + return pytest.raises(ValueError) if exc else nullcontext() + + with tiledbsoma.Experiment.open(exp_path, "w") as exp: + with ctx(): + tiledbsoma.io.update_obs(exp, obs) + with ctx(): + tiledbsoma.io.update_var(exp, var, "RNA") + + assert anndata_dataframe_unmodified(obs0, obs) + assert anndata_dataframe_unmodified(var0, var) - with tiledbsoma.Experiment.open(output_path) as exp: - exp.ms["RNA"].var.schema - if readback: - new_obs = exp.obs.read().concat().to_pandas() - new_var = exp.ms["RNA"].var.read().concat().to_pandas() - else: - new_obs = adata.obs - new_var = adata.var +@pytest.mark.parametrize("case", [False, True], indirect=True) +def test_no_change(exp_path, original, new, new_obs, new_var, o1, v1): + verify_updates(exp_path, new_obs, new_var) + verify_schemas(exp_path, o1, v1) + verify_obs_var(original, new) + +@pytest.mark.parametrize("case", [False, True], indirect=True) +def test_add(exp_path, new_obs, new_var): # boolean new_obs["is_g1"] = new_obs["groups"] == "g1" # int @@ -91,15 +115,9 @@ def test_add(adata, readback): new_var["vst.mean.sq"] = new_var["vst.mean"] ** 2 - new_obs_save = new_obs.copy() - new_var_save = new_var.copy() - with tiledbsoma.Experiment.open(output_path, "w") as exp: - tiledbsoma.io.update_obs(exp, new_obs) - tiledbsoma.io.update_var(exp, new_var, "RNA") - assert anndata_dataframe_unmodified(new_obs, new_obs_save) - assert anndata_dataframe_unmodified(new_var, new_var_save) + verify_updates(exp_path, new_obs, new_var) - with tiledbsoma.Experiment.open(output_path) as exp: + with tiledbsoma.Experiment.open(exp_path) as exp: o2 = exp.obs.schema v2 = exp.ms["RNA"].var.schema obs = exp.obs.read().concat().to_pandas() @@ -114,37 +132,14 @@ def test_add(adata, readback): assert v2.field("vst.mean.sq").type == pa.float64() -@pytest.mark.parametrize("readback", [False, True]) -def test_drop(adata, readback): - tempdir = tempfile.TemporaryDirectory() - output_path = tempdir.name - original = adata.copy() - tiledbsoma.io.from_anndata(output_path, adata, measurement_name="RNA") - assert anndata_dataframe_unmodified(original.obs, adata.obs) - assert anndata_dataframe_unmodified(original.var, adata.var) - - with tiledbsoma.Experiment.open(output_path) as exp: - exp.ms["RNA"].var.schema - - if readback: - new_obs = exp.obs.read().concat().to_pandas() - new_var = exp.ms["RNA"].var.read().concat().to_pandas() - else: - new_obs = adata.obs - new_var = adata.var - +@pytest.mark.parametrize("case", [False, True], indirect=True) +def test_drop(exp_path, new_obs, new_var): del new_obs["groups"] del new_var["vst.mean"] - new_obs_save = new_obs.copy() - new_var_save = new_var.copy() - with tiledbsoma.Experiment.open(output_path, "w") as exp: - tiledbsoma.io.update_obs(exp, new_obs) - tiledbsoma.io.update_var(exp, new_var, "RNA") - assert anndata_dataframe_unmodified(new_obs, new_obs_save) - assert anndata_dataframe_unmodified(new_var, new_var_save) + verify_updates(exp_path, new_obs, new_var) - with tiledbsoma.Experiment.open(output_path) as exp: + with tiledbsoma.Experiment.open(exp_path) as exp: o2 = exp.obs.schema v2 = exp.ms["RNA"].var.schema @@ -154,110 +149,43 @@ def test_drop(adata, readback): v2.field("vst.mean") -@pytest.mark.parametrize("readback", [False, True]) -def test_change(adata, readback): - tempdir = tempfile.TemporaryDirectory() - output_path = tempdir.name - original = adata.copy() - tiledbsoma.io.from_anndata(output_path, adata, measurement_name="RNA") - assert anndata_dataframe_unmodified(original.obs, adata.obs) - assert anndata_dataframe_unmodified(original.var, adata.var) - - with tiledbsoma.Experiment.open(output_path) as exp: - o1 = exp.obs.schema - v1 = exp.ms["RNA"].var.schema - - if readback: - new_obs = exp.obs.read().concat().to_pandas() - new_var = exp.ms["RNA"].var.read().concat().to_pandas() - else: - new_obs = adata.obs - new_var = adata.var - +@pytest.mark.parametrize("case", [False, True], indirect=True) +def test_change(exp_path, new_obs, new_var, o1, v1): new_obs["groups"] = np.arange(new_obs.shape[0], dtype=np.int16) new_var["vst.mean"] = np.arange(new_var.shape[0], dtype=np.int32) - - new_obs_save = new_obs.copy() - new_var_save = new_var.copy() - with tiledbsoma.Experiment.open(output_path, "w") as exp: - with pytest.raises(ValueError): - tiledbsoma.io.update_obs(exp, new_obs) - with pytest.raises(ValueError): - tiledbsoma.io.update_var(exp, new_var, "RNA") - assert anndata_dataframe_unmodified(new_obs, new_obs_save) - assert anndata_dataframe_unmodified(new_var, new_var_save) - - with tiledbsoma.Experiment.open(output_path) as exp: - o2 = exp.obs.schema - v2 = exp.ms["RNA"].var.schema - - assert o1 == o2 - assert v1 == v2 + verify_updates(exp_path, new_obs, new_var, exc=True) + verify_schemas(exp_path, o1, v1) -@pytest.mark.parametrize("readback", [False, True]) +@pytest.mark.parametrize("case", [False, True], indirect=True) @pytest.mark.parametrize("shift_and_exc", [[0, None], [1, ValueError]]) -def test_change_counts(adata, readback, shift_and_exc): +def test_change_counts( + exp_path, original, new, new_obs, new_var, shift_and_exc, o1, v1 +): shift, exc = shift_and_exc - tempdir = tempfile.TemporaryDirectory() - output_path = tempdir.name - original = adata.copy() - tiledbsoma.io.from_anndata(output_path, adata, measurement_name="RNA") - assert anndata_dataframe_unmodified(original.obs, adata.obs) - assert anndata_dataframe_unmodified(original.var, adata.var) - - with tiledbsoma.Experiment.open(output_path) as exp: - o1 = exp.obs.schema - v1 = exp.ms["RNA"].var.schema - - if readback: - old_obs = exp.obs.read().concat().to_pandas() - old_var = exp.ms["RNA"].var.read().concat().to_pandas() - else: - old_obs = adata.obs - old_var = adata.var - - old_nobs = len(old_obs) - old_nvar = len(old_var) - - new_nobs = old_nobs + shift - new_nvar = old_nvar + shift - - new_obs = pd.DataFrame( + + new_nobs = len(new_obs) + new_nvar = len(new_var) + + new_nobs2 = new_nobs + shift + new_nvar2 = new_nvar + shift + + new_obs2 = pd.DataFrame( data={ - "somebool": np.asarray([True] * new_nobs), + "somebool": np.asarray([True] * new_nobs2), }, - index=np.arange(new_nobs).astype(str), + index=np.arange(new_nobs2).astype(str), ) - new_var = pd.DataFrame( + new_var2 = pd.DataFrame( data={ - "somebool": np.asarray([True] * new_nvar), + "somebool": np.asarray([True] * new_nvar2), }, - index=np.arange(new_nvar).astype(str), + index=np.arange(new_nvar2).astype(str), ) if exc is None: - new_obs_save = new_obs.copy() - new_var_save = new_var.copy() - with tiledbsoma.Experiment.open(output_path, "w") as exp: - tiledbsoma.io.update_obs(exp, new_obs) - tiledbsoma.io.update_var(exp, new_var, measurement_name="RNA") - - assert anndata_dataframe_unmodified(new_obs, new_obs_save) - assert anndata_dataframe_unmodified(new_var, new_var_save) - + verify_updates(exp_path, new_obs2, new_var2) else: - with tiledbsoma.Experiment.open(output_path, "w") as exp: - with pytest.raises(exc): - tiledbsoma.io.update_obs(exp, new_obs) - with pytest.raises(exc): - tiledbsoma.io.update_var(exp, new_var, measurement_name="RNA") - - assert anndata_dataframe_unmodified(original.obs, adata.obs) - assert anndata_dataframe_unmodified(original.var, adata.var) - - with tiledbsoma.Experiment.open(output_path) as exp: - o2 = exp.obs.schema - v2 = exp.ms["RNA"].var.schema - assert o1 == o2 - assert v1 == v2 + verify_updates(exp_path, new_obs2, new_var2, exc=True) + verify_obs_var(original, new) + verify_schemas(exp_path, o1, v1) diff --git a/apis/python/tests/test_update_matrix.py b/apis/python/tests/test_update_matrix.py index 513f96050b..fd9d7af88d 100644 --- a/apis/python/tests/test_update_matrix.py +++ b/apis/python/tests/test_update_matrix.py @@ -1,31 +1,14 @@ import tempfile -from pathlib import Path - -import anndata -import pytest import tiledbsoma import tiledbsoma.io -HERE = Path(__file__).parent - - -@pytest.fixture -def h5ad_file(request): - input_path = HERE.parent / "testdata/pbmc3k_processed.h5ad" - return input_path - - -@pytest.fixture -def adata(h5ad_file): - return anndata.read_h5ad(h5ad_file) - -def test_update_matrix_X(adata): +def test_update_matrix_X(adata_extended): tempdir = tempfile.TemporaryDirectory() output_path = tempdir.name - tiledbsoma.io.from_anndata(output_path, adata, measurement_name="RNA") + tiledbsoma.io.from_anndata(output_path, adata_extended, measurement_name="RNA") with tiledbsoma.Experiment.open(output_path) as exp: old = exp.ms["RNA"].X["data"].read().tables().concat() @@ -37,7 +20,7 @@ def test_update_matrix_X(adata): with tiledbsoma.Experiment.open(output_path, "w") as exp: tiledbsoma.io.update_matrix( exp.ms["RNA"].X["data"], - adata.X + 1, + adata_extended.X + 1, ) with tiledbsoma.Experiment.open(output_path) as exp: @@ -52,11 +35,11 @@ def test_update_matrix_X(adata): assert old["soma_data"] != new["soma_data"] -def test_update_matrix_obsm(adata): +def test_update_matrix_obsm(adata_extended): tempdir = tempfile.TemporaryDirectory() output_path = tempdir.name - tiledbsoma.io.from_anndata(output_path, adata, measurement_name="RNA") + tiledbsoma.io.from_anndata(output_path, adata_extended, measurement_name="RNA") with tiledbsoma.Experiment.open(output_path) as exp: old = exp.ms["RNA"].obsm["X_pca"].read().tables().concat() @@ -68,7 +51,7 @@ def test_update_matrix_obsm(adata): with tiledbsoma.Experiment.open(output_path, "w") as exp: tiledbsoma.io.update_matrix( exp.ms["RNA"].obsm["X_pca"], - adata.obsm["X_pca"] + 1, + adata_extended.obsm["X_pca"] + 1, ) with tiledbsoma.Experiment.open(output_path) as exp: