Skip to content

Commit

Permalink
Factor pytest.fixtures, verify #1988 (#2299)
Browse files Browse the repository at this point in the history
* typeguard nit

missed in #1960

* factor common fixtures into conftest.py

* factor test_update_dataframes fixture

* `verify_obs_var` helper, more `test_update_dataframes` factoring

* test_experiment_query.py: verify #1988

* `s/h5ad_file/h5ad_path/g`, factor `HERE`s
  • Loading branch information
ryan-williams authored and github-actions[bot] committed Mar 22, 2024
1 parent 801b101 commit 6eff473
Show file tree
Hide file tree
Showing 13 changed files with 249 additions and 354 deletions.
3 changes: 1 addition & 2 deletions apis/python/src/tiledbsoma/_read_iters.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
from typing import (
TYPE_CHECKING,
Dict,
Generator,
Iterator,
List,
Optional,
Expand Down Expand Up @@ -249,7 +248,7 @@ def _table_reader(self) -> Iterator[BlockwiseTableReadIterResult]:
def _reindexed_table_reader(
self,
_pool: Optional[ThreadPoolExecutor] = None,
) -> Generator[BlockwiseTableReadIterResult, None, None]:
) -> Iterator[BlockwiseTableReadIterResult]:
"""Private. Blockwise table reader w/ reindexing. Helper function for sub-class use"""
for tbl, coords in self._maybe_eager_iterator(self._table_reader(), _pool):
pytbl = {}
Expand Down
11 changes: 11 additions & 0 deletions apis/python/src/tiledbsoma/_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import pandas as pd
import pyarrow as pa
import somacore
from anndata import AnnData
from somacore import options

from . import pytiledbsoma as clib
Expand Down Expand Up @@ -311,3 +312,13 @@ def anndata_dataframe_unmodified_nan_safe(old: pd.DataFrame, new: pd.DataFrame)
if any(old.keys() != new.keys()):
return False
return True


def verify_obs_var(ad0: AnnData, ad1: AnnData, nan_safe: bool = False) -> None:
"""Verify that two ``AnnData``'s ``obs`` and ``var`` dataframes are equivalent."""
if nan_safe:
assert anndata_dataframe_unmodified_nan_safe(ad0.obs, ad1.obs)
assert anndata_dataframe_unmodified_nan_safe(ad0.var, ad1.var)
else:
assert anndata_dataframe_unmodified(ad0.obs, ad1.obs)
assert anndata_dataframe_unmodified(ad0.var, ad1.var)
5 changes: 5 additions & 0 deletions apis/python/tests/_util.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
from contextlib import contextmanager
from pathlib import Path
from typing import Any, Type

import pytest
from typeguard import suppress_type_checks

HERE = Path(__file__).parent
PY_ROOT = HERE.parent
TESTDATA = PY_ROOT / "testdata"


@contextmanager
def raises_no_typeguard(exc: Type[Exception], *args: Any, **kwargs: Any):
Expand Down
40 changes: 40 additions & 0 deletions apis/python/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from tempfile import TemporaryDirectory

import anndata
import pytest

import tiledbsoma
import tiledbsoma.io

from ._util import TESTDATA


@pytest.fixture
def h5ad_path(request):
# pbmc-small is faster for automated unit-test / CI runs.
return TESTDATA / "pbmc-small.h5ad"


@pytest.fixture
def adata(h5ad_path):
return anndata.read_h5ad(h5ad_path)


@pytest.fixture
def h5ad_file_extended(request):
# This has more component arrays in it
return TESTDATA / "pbmc3k_processed.h5ad"


@pytest.fixture
def adata_extended(h5ad_file_extended):
return anndata.read_h5ad(h5ad_file_extended)


@pytest.fixture
def pbmc_small(h5ad_path):
"""Ingest an ``AnnData``, yield a ``TestCase`` with the original and new AnnData objects."""
with TemporaryDirectory() as exp_path:
tiledbsoma.io.from_h5ad(exp_path, h5ad_path, measurement_name="RNA")
with tiledbsoma.Experiment.open(exp_path) as exp:
yield exp
100 changes: 32 additions & 68 deletions apis/python/tests/test_basic_anndata_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,42 +16,22 @@
import tiledbsoma.io
from tiledbsoma import Experiment, _constants, _factory
from tiledbsoma._tiledb_object import TileDBObject
from tiledbsoma._util import (
anndata_dataframe_unmodified,
anndata_dataframe_unmodified_nan_safe,
)

HERE = Path(__file__).parent
from tiledbsoma._util import verify_obs_var


@pytest.fixture
def h5ad_file(request):
# pbmc-small is faster for automated unit-test / CI runs.
input_path = HERE.parent / "testdata/pbmc-small.h5ad"
# input_path = HERE.parent / "testdata/pbmc3k_processed.h5ad"
return input_path


@pytest.fixture
def h5ad_file_extended(request):
# This has more component arrays in it
input_path = HERE.parent / "testdata/pbmc3k_processed.h5ad"
return input_path
from ._util import TESTDATA


@pytest.fixture
def h5ad_file_with_obsm_holes(request):
# This has zeroes in an obsm matrix so nnz is not num_rows * num_cols
input_path = HERE.parent / "testdata/pbmc3k-with-obsm-zero.h5ad"
return input_path
return TESTDATA / "pbmc3k-with-obsm-zero.h5ad"


@pytest.fixture
def h5ad_file_uns_string_arrays(request):
# This has uns["louvain_colors"] with dtype.char == "U".
# It also has uns["more_colors"] in the form '[[...]]', as often occurs in the wild.
input_path = HERE.parent / "testdata/pbmc3k.h5ad"
return input_path
return TESTDATA / "pbmc3k.h5ad"


@pytest.fixture
Expand All @@ -68,15 +48,13 @@ def h5ad_file_categorical_int_nan(request):
# s[0] = math.nan
# adata.obs["categ_int_nan"] = s
# adata.write_h5ad("categorical_int_nan.h5ad")
input_path = HERE.parent / "testdata/categorical_int_nan.h5ad"
return input_path
return TESTDATA / "categorical_int_nan.h5ad"


@pytest.fixture
def h5ad_file_X_empty(request):
"""adata.X is a zero-cell sparse matrix"""
input_path = HERE.parent / "testdata/x-empty.h5ad"
return input_path
return TESTDATA / "x-empty.h5ad"


@pytest.fixture
Expand All @@ -85,13 +63,7 @@ def h5ad_file_X_none(request):
adata.X has Python value None if read in non-backed mode; if read in backed
mode, adata.X is not present as an attribute of adata.
"""
input_path = HERE.parent / "testdata/x-none.h5ad"
return input_path


@pytest.fixture
def adata(h5ad_file):
return anndata.read_h5ad(h5ad_file)
return TESTDATA / "x-none.h5ad"


@pytest.mark.parametrize(
Expand Down Expand Up @@ -139,8 +111,7 @@ def test_import_anndata(adata, ingest_modes, X_kind):
if ingest_mode != "schema_only":
have_ingested = True

assert anndata_dataframe_unmodified(original.obs, adata.obs)
assert anndata_dataframe_unmodified(original.var, adata.var)
verify_obs_var(original, adata)

exp = tiledbsoma.Experiment.open(uri)

Expand Down Expand Up @@ -268,21 +239,21 @@ def test_import_anndata(adata, ingest_modes, X_kind):
"othername",
],
)
def test_named_X_layers(h5ad_file, X_layer_name):
def test_named_X_layers(h5ad_path, X_layer_name):
tempdir = tempfile.TemporaryDirectory()
soma_path = tempdir.name

if X_layer_name is None:
tiledbsoma.io.from_h5ad(
soma_path,
h5ad_file.as_posix(),
h5ad_path.as_posix(),
"RNA",
ingest_mode="write",
)
else:
tiledbsoma.io.from_h5ad(
soma_path,
h5ad_file.as_posix(),
h5ad_path.as_posix(),
"RNA",
ingest_mode="write",
X_layer_name=X_layer_name,
Expand All @@ -305,9 +276,9 @@ def _get_fragment_count(array_uri):
@pytest.mark.parametrize(
"resume_mode_h5ad_file",
[
HERE.parent / "testdata/pbmc-small-x-dense.h5ad",
HERE.parent / "testdata/pbmc-small-x-csr.h5ad",
HERE.parent / "testdata/pbmc-small-x-csc.h5ad",
TESTDATA / "pbmc-small-x-dense.h5ad",
TESTDATA / "pbmc-small-x-csr.h5ad",
TESTDATA / "pbmc-small-x-csc.h5ad",
],
)
def test_resume_mode(adata, resume_mode_h5ad_file):
Expand Down Expand Up @@ -417,19 +388,19 @@ def test_ingest_relative(h5ad_file_extended, use_relative_uri):


@pytest.mark.parametrize("ingest_uns_keys", [["louvain_colors"], None])
def test_ingest_uns(tmp_path: pathlib.Path, h5ad_file_extended, ingest_uns_keys):
def test_ingest_uns(
tmp_path: pathlib.Path, h5ad_file_extended, adata_extended, ingest_uns_keys
):
tmp_uri = tmp_path.as_uri()
original = anndata.read(h5ad_file_extended)
adata = anndata.read(h5ad_file_extended)
adata_extended2 = anndata.read(h5ad_file_extended)
uri = tiledbsoma.io.from_anndata(
tmp_uri,
adata,
adata_extended2,
measurement_name="hello",
uns_keys=ingest_uns_keys,
)

assert anndata_dataframe_unmodified(original.obs, adata.obs)
assert anndata_dataframe_unmodified(original.var, adata.var)
verify_obs_var(adata_extended, adata_extended2)

with tiledbsoma.Experiment.open(uri) as exp:
uns = exp.ms["hello"]["uns"]
Expand Down Expand Up @@ -459,7 +430,9 @@ def test_ingest_uns(tmp_path: pathlib.Path, h5ad_file_extended, ingest_uns_keys)
assert isinstance(random_state, tiledbsoma.DenseNDArray)
assert np.array_equal(random_state.read().to_numpy(), np.array([0]))
got_pca_variance = uns["pca"]["variance"].read().to_numpy()
assert np.array_equal(got_pca_variance, adata.uns["pca"]["variance"])
assert np.array_equal(
got_pca_variance, adata_extended2.uns["pca"]["variance"]
)
else:
assert set(uns) == set(ingest_uns_keys)

Expand Down Expand Up @@ -498,8 +471,7 @@ def test_add_matrix_to_collection(adata):

uri = tiledbsoma.io.from_anndata(output_path, adata, measurement_name="RNA")

assert anndata_dataframe_unmodified(original.obs, adata.obs)
assert anndata_dataframe_unmodified(original.var, adata.var)
verify_obs_var(original, adata)

exp = tiledbsoma.Experiment.open(uri)
with _factory.open(output_path) as exp_r:
Expand Down Expand Up @@ -625,8 +597,7 @@ def add_matrix_to_collection(

uri = tiledbsoma.io.from_anndata(output_path, adata, measurement_name="RNA")

assert anndata_dataframe_unmodified(original.obs, adata.obs)
assert anndata_dataframe_unmodified(original.var, adata.var)
verify_obs_var(original, adata)

exp = tiledbsoma.Experiment.open(uri)
with _factory.open(output_path) as exp_r:
Expand Down Expand Up @@ -684,8 +655,7 @@ def test_export_anndata(adata):

tiledbsoma.io.from_anndata(output_path, adata, measurement_name="RNA")

assert anndata_dataframe_unmodified(original.obs, adata.obs)
assert anndata_dataframe_unmodified(original.var, adata.var)
verify_obs_var(original, adata)

with _factory.open(output_path) as exp:
with pytest.raises(ValueError):
Expand Down Expand Up @@ -776,8 +746,7 @@ def test_null_obs(adata, tmp_path: Path):
uri = tiledbsoma.io.from_anndata(
output_path, adata, "RNA", ingest_mode="write", X_kind=tiledbsoma.SparseNDArray
)
assert anndata_dataframe_unmodified_nan_safe(original.obs, adata.obs)
assert anndata_dataframe_unmodified_nan_safe(original.var, adata.var)
verify_obs_var(original, adata, nan_safe=True)

exp = tiledbsoma.Experiment.open(uri)
with tiledb.open(exp.obs.uri, "r") as obs:
Expand Down Expand Up @@ -807,8 +776,7 @@ def test_export_obsm_with_holes(h5ad_file_with_obsm_holes, tmp_path):
output_path = tmp_path.as_posix()
tiledbsoma.io.from_anndata(output_path, adata, "RNA")

assert anndata_dataframe_unmodified(original.obs, adata.obs)
assert anndata_dataframe_unmodified(original.var, adata.var)
verify_obs_var(original, adata)

exp = tiledbsoma.Experiment.open(output_path)

Expand Down Expand Up @@ -954,8 +922,7 @@ def test_id_names(tmp_path, obs_id_name, var_id_name, indexify_obs, indexify_var
obs_id_name=obs_id_name,
var_id_name=var_id_name,
)
assert anndata_dataframe_unmodified(original.obs, adata.obs)
assert anndata_dataframe_unmodified(original.var, adata.var)
verify_obs_var(original, adata)

with tiledbsoma.Experiment.open(uri) as exp:
assert obs_id_name in exp.obs.keys()
Expand Down Expand Up @@ -1040,8 +1007,7 @@ def test_uns_io(tmp_path, outgest_uns_keys):
soma_uri = tmp_path.as_posix()

tiledbsoma.io.from_anndata(soma_uri, adata, measurement_name="RNA")
assert anndata_dataframe_unmodified(original.obs, adata.obs)
assert anndata_dataframe_unmodified(original.var, adata.var)
verify_obs_var(original, adata)

with tiledbsoma.Experiment.open(soma_uri) as exp:
bdata = tiledbsoma.io.to_anndata(
Expand Down Expand Up @@ -1092,8 +1058,7 @@ def test_string_nan_columns(tmp_path, adata, write_index):
uri = tmp_path.as_posix()
original = adata.copy()
tiledbsoma.io.from_anndata(uri, adata, measurement_name="RNA")
assert anndata_dataframe_unmodified_nan_safe(original.obs, adata.obs)
assert anndata_dataframe_unmodified_nan_safe(original.var, adata.var)
verify_obs_var(original, adata, nan_safe=True)

# Step 3
with tiledbsoma.open(uri, "r") as exp:
Expand Down Expand Up @@ -1151,8 +1116,7 @@ def test_index_names_io(tmp_path, obs_index_name, var_index_name):

original = adata.copy()
tiledbsoma.io.from_anndata(soma_uri, adata, measurement_name)
assert anndata_dataframe_unmodified(original.obs, adata.obs)
assert anndata_dataframe_unmodified(original.var, adata.var)
verify_obs_var(original, adata)

with tiledbsoma.Experiment.open(soma_uri) as exp:
bdata = tiledbsoma.io.to_anndata(exp, measurement_name)
Expand Down
16 changes: 15 additions & 1 deletion apis/python/tests/test_experiment_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import pyarrow as pa
import pytest
from scipy import sparse
from somacore import options
from somacore import AxisQuery, options

import tiledbsoma as soma
from tests._util import raises_no_typeguard
Expand Down Expand Up @@ -905,3 +905,17 @@ def test_experiment_query_uses_threadpool_from_context(soma_experiment):
assert adata is not None

pool.submit.assert_called()


def test_empty_categorical_query(pbmc_small):
q = pbmc_small.axis_query(
measurement_name="RNA", obs_query=AxisQuery(value_filter='groups == "g1"')
)
obs = q.obs().concat()
assert len(obs) == 44

q = pbmc_small.axis_query(
measurement_name="RNA", obs_query=AxisQuery(value_filter='groups == "foo"')
)
obs = q.obs().concat()
assert len(obs) == 0
Loading

0 comments on commit 6eff473

Please sign in to comment.