Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[python/ci] Factor pytest.fixtures, verify #1988 #2299

Merged
merged 6 commits into from
Mar 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions apis/python/src/tiledbsoma/_read_iters.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
from typing import (
TYPE_CHECKING,
Dict,
Generator,
Iterator,
List,
Optional,
Expand Down Expand Up @@ -249,7 +248,7 @@ def _table_reader(self) -> Iterator[BlockwiseTableReadIterResult]:
def _reindexed_table_reader(
self,
_pool: Optional[ThreadPoolExecutor] = None,
) -> Generator[BlockwiseTableReadIterResult, None, None]:
) -> Iterator[BlockwiseTableReadIterResult]:
"""Private. Blockwise table reader w/ reindexing. Helper function for sub-class use"""
for tbl, coords in self._maybe_eager_iterator(self._table_reader(), _pool):
pytbl = {}
Expand Down
11 changes: 11 additions & 0 deletions apis/python/src/tiledbsoma/_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import pandas as pd
import pyarrow as pa
import somacore
from anndata import AnnData
from somacore import options

from . import pytiledbsoma as clib
Expand Down Expand Up @@ -311,3 +312,13 @@ def anndata_dataframe_unmodified_nan_safe(old: pd.DataFrame, new: pd.DataFrame)
if any(old.keys() != new.keys()):
return False
return True


def verify_obs_var(ad0: AnnData, ad1: AnnData, nan_safe: bool = False) -> None:
"""Verify that two ``AnnData``'s ``obs`` and ``var`` dataframes are equivalent."""
if nan_safe:
assert anndata_dataframe_unmodified_nan_safe(ad0.obs, ad1.obs)
assert anndata_dataframe_unmodified_nan_safe(ad0.var, ad1.var)
else:
assert anndata_dataframe_unmodified(ad0.obs, ad1.obs)
assert anndata_dataframe_unmodified(ad0.var, ad1.var)
5 changes: 5 additions & 0 deletions apis/python/tests/_util.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
from contextlib import contextmanager
from pathlib import Path
from typing import Any, Type

import pytest
from typeguard import suppress_type_checks

HERE = Path(__file__).parent
PY_ROOT = HERE.parent
TESTDATA = PY_ROOT / "testdata"


@contextmanager
def raises_no_typeguard(exc: Type[Exception], *args: Any, **kwargs: Any):
Expand Down
40 changes: 40 additions & 0 deletions apis/python/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from tempfile import TemporaryDirectory

import anndata
import pytest

import tiledbsoma
import tiledbsoma.io

from ._util import TESTDATA


@pytest.fixture
def h5ad_path(request):
# pbmc-small is faster for automated unit-test / CI runs.
return TESTDATA / "pbmc-small.h5ad"


@pytest.fixture
def adata(h5ad_path):
return anndata.read_h5ad(h5ad_path)


@pytest.fixture
def h5ad_file_extended(request):
# This has more component arrays in it
return TESTDATA / "pbmc3k_processed.h5ad"


@pytest.fixture
def adata_extended(h5ad_file_extended):
return anndata.read_h5ad(h5ad_file_extended)


@pytest.fixture
def pbmc_small(h5ad_path):
"""Ingest an ``AnnData``, yield a ``TestCase`` with the original and new AnnData objects."""
with TemporaryDirectory() as exp_path:
tiledbsoma.io.from_h5ad(exp_path, h5ad_path, measurement_name="RNA")
with tiledbsoma.Experiment.open(exp_path) as exp:
yield exp
100 changes: 32 additions & 68 deletions apis/python/tests/test_basic_anndata_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,42 +16,22 @@
import tiledbsoma.io
from tiledbsoma import Experiment, _constants, _factory
from tiledbsoma._tiledb_object import TileDBObject
from tiledbsoma._util import (
anndata_dataframe_unmodified,
anndata_dataframe_unmodified_nan_safe,
)

HERE = Path(__file__).parent
from tiledbsoma._util import verify_obs_var


@pytest.fixture
def h5ad_file(request):
# pbmc-small is faster for automated unit-test / CI runs.
input_path = HERE.parent / "testdata/pbmc-small.h5ad"
# input_path = HERE.parent / "testdata/pbmc3k_processed.h5ad"
return input_path


@pytest.fixture
def h5ad_file_extended(request):
# This has more component arrays in it
input_path = HERE.parent / "testdata/pbmc3k_processed.h5ad"
return input_path
from ._util import TESTDATA


@pytest.fixture
def h5ad_file_with_obsm_holes(request):
# This has zeroes in an obsm matrix so nnz is not num_rows * num_cols
input_path = HERE.parent / "testdata/pbmc3k-with-obsm-zero.h5ad"
return input_path
return TESTDATA / "pbmc3k-with-obsm-zero.h5ad"


@pytest.fixture
def h5ad_file_uns_string_arrays(request):
# This has uns["louvain_colors"] with dtype.char == "U".
# It also has uns["more_colors"] in the form '[[...]]', as often occurs in the wild.
input_path = HERE.parent / "testdata/pbmc3k.h5ad"
return input_path
return TESTDATA / "pbmc3k.h5ad"


@pytest.fixture
Expand All @@ -68,15 +48,13 @@ def h5ad_file_categorical_int_nan(request):
# s[0] = math.nan
# adata.obs["categ_int_nan"] = s
# adata.write_h5ad("categorical_int_nan.h5ad")
input_path = HERE.parent / "testdata/categorical_int_nan.h5ad"
return input_path
return TESTDATA / "categorical_int_nan.h5ad"


@pytest.fixture
def h5ad_file_X_empty(request):
"""adata.X is a zero-cell sparse matrix"""
input_path = HERE.parent / "testdata/x-empty.h5ad"
return input_path
return TESTDATA / "x-empty.h5ad"


@pytest.fixture
Expand All @@ -85,13 +63,7 @@ def h5ad_file_X_none(request):
adata.X has Python value None if read in non-backed mode; if read in backed
mode, adata.X is not present as an attribute of adata.
"""
input_path = HERE.parent / "testdata/x-none.h5ad"
return input_path


@pytest.fixture
def adata(h5ad_file):
return anndata.read_h5ad(h5ad_file)
return TESTDATA / "x-none.h5ad"


@pytest.mark.parametrize(
Expand Down Expand Up @@ -139,8 +111,7 @@ def test_import_anndata(adata, ingest_modes, X_kind):
if ingest_mode != "schema_only":
have_ingested = True

assert anndata_dataframe_unmodified(original.obs, adata.obs)
assert anndata_dataframe_unmodified(original.var, adata.var)
verify_obs_var(original, adata)

exp = tiledbsoma.Experiment.open(uri)

Expand Down Expand Up @@ -268,21 +239,21 @@ def test_import_anndata(adata, ingest_modes, X_kind):
"othername",
],
)
def test_named_X_layers(h5ad_file, X_layer_name):
def test_named_X_layers(h5ad_path, X_layer_name):
tempdir = tempfile.TemporaryDirectory()
soma_path = tempdir.name

if X_layer_name is None:
tiledbsoma.io.from_h5ad(
soma_path,
h5ad_file.as_posix(),
h5ad_path.as_posix(),
"RNA",
ingest_mode="write",
)
else:
tiledbsoma.io.from_h5ad(
soma_path,
h5ad_file.as_posix(),
h5ad_path.as_posix(),
"RNA",
ingest_mode="write",
X_layer_name=X_layer_name,
Expand All @@ -305,9 +276,9 @@ def _get_fragment_count(array_uri):
@pytest.mark.parametrize(
"resume_mode_h5ad_file",
[
HERE.parent / "testdata/pbmc-small-x-dense.h5ad",
HERE.parent / "testdata/pbmc-small-x-csr.h5ad",
HERE.parent / "testdata/pbmc-small-x-csc.h5ad",
TESTDATA / "pbmc-small-x-dense.h5ad",
TESTDATA / "pbmc-small-x-csr.h5ad",
TESTDATA / "pbmc-small-x-csc.h5ad",
],
)
def test_resume_mode(adata, resume_mode_h5ad_file):
Expand Down Expand Up @@ -417,19 +388,19 @@ def test_ingest_relative(h5ad_file_extended, use_relative_uri):


@pytest.mark.parametrize("ingest_uns_keys", [["louvain_colors"], None])
def test_ingest_uns(tmp_path: pathlib.Path, h5ad_file_extended, ingest_uns_keys):
def test_ingest_uns(
tmp_path: pathlib.Path, h5ad_file_extended, adata_extended, ingest_uns_keys
):
tmp_uri = tmp_path.as_uri()
original = anndata.read(h5ad_file_extended)
adata = anndata.read(h5ad_file_extended)
adata_extended2 = anndata.read(h5ad_file_extended)
uri = tiledbsoma.io.from_anndata(
tmp_uri,
adata,
adata_extended2,
measurement_name="hello",
uns_keys=ingest_uns_keys,
)

assert anndata_dataframe_unmodified(original.obs, adata.obs)
assert anndata_dataframe_unmodified(original.var, adata.var)
verify_obs_var(adata_extended, adata_extended2)

with tiledbsoma.Experiment.open(uri) as exp:
uns = exp.ms["hello"]["uns"]
Expand Down Expand Up @@ -459,7 +430,9 @@ def test_ingest_uns(tmp_path: pathlib.Path, h5ad_file_extended, ingest_uns_keys)
assert isinstance(random_state, tiledbsoma.DenseNDArray)
assert np.array_equal(random_state.read().to_numpy(), np.array([0]))
got_pca_variance = uns["pca"]["variance"].read().to_numpy()
assert np.array_equal(got_pca_variance, adata.uns["pca"]["variance"])
assert np.array_equal(
got_pca_variance, adata_extended2.uns["pca"]["variance"]
)
else:
assert set(uns) == set(ingest_uns_keys)

Expand Down Expand Up @@ -498,8 +471,7 @@ def test_add_matrix_to_collection(adata):

uri = tiledbsoma.io.from_anndata(output_path, adata, measurement_name="RNA")

assert anndata_dataframe_unmodified(original.obs, adata.obs)
assert anndata_dataframe_unmodified(original.var, adata.var)
verify_obs_var(original, adata)

exp = tiledbsoma.Experiment.open(uri)
with _factory.open(output_path) as exp_r:
Expand Down Expand Up @@ -625,8 +597,7 @@ def add_matrix_to_collection(

uri = tiledbsoma.io.from_anndata(output_path, adata, measurement_name="RNA")

assert anndata_dataframe_unmodified(original.obs, adata.obs)
assert anndata_dataframe_unmodified(original.var, adata.var)
verify_obs_var(original, adata)

exp = tiledbsoma.Experiment.open(uri)
with _factory.open(output_path) as exp_r:
Expand Down Expand Up @@ -684,8 +655,7 @@ def test_export_anndata(adata):

tiledbsoma.io.from_anndata(output_path, adata, measurement_name="RNA")

assert anndata_dataframe_unmodified(original.obs, adata.obs)
assert anndata_dataframe_unmodified(original.var, adata.var)
verify_obs_var(original, adata)

with _factory.open(output_path) as exp:
with pytest.raises(ValueError):
Expand Down Expand Up @@ -776,8 +746,7 @@ def test_null_obs(adata, tmp_path: Path):
uri = tiledbsoma.io.from_anndata(
output_path, adata, "RNA", ingest_mode="write", X_kind=tiledbsoma.SparseNDArray
)
assert anndata_dataframe_unmodified_nan_safe(original.obs, adata.obs)
assert anndata_dataframe_unmodified_nan_safe(original.var, adata.var)
verify_obs_var(original, adata, nan_safe=True)

exp = tiledbsoma.Experiment.open(uri)
with tiledb.open(exp.obs.uri, "r") as obs:
Expand Down Expand Up @@ -807,8 +776,7 @@ def test_export_obsm_with_holes(h5ad_file_with_obsm_holes, tmp_path):
output_path = tmp_path.as_posix()
tiledbsoma.io.from_anndata(output_path, adata, "RNA")

assert anndata_dataframe_unmodified(original.obs, adata.obs)
assert anndata_dataframe_unmodified(original.var, adata.var)
verify_obs_var(original, adata)

exp = tiledbsoma.Experiment.open(output_path)

Expand Down Expand Up @@ -954,8 +922,7 @@ def test_id_names(tmp_path, obs_id_name, var_id_name, indexify_obs, indexify_var
obs_id_name=obs_id_name,
var_id_name=var_id_name,
)
assert anndata_dataframe_unmodified(original.obs, adata.obs)
assert anndata_dataframe_unmodified(original.var, adata.var)
verify_obs_var(original, adata)

with tiledbsoma.Experiment.open(uri) as exp:
assert obs_id_name in exp.obs.keys()
Expand Down Expand Up @@ -1040,8 +1007,7 @@ def test_uns_io(tmp_path, outgest_uns_keys):
soma_uri = tmp_path.as_posix()

tiledbsoma.io.from_anndata(soma_uri, adata, measurement_name="RNA")
assert anndata_dataframe_unmodified(original.obs, adata.obs)
assert anndata_dataframe_unmodified(original.var, adata.var)
verify_obs_var(original, adata)

with tiledbsoma.Experiment.open(soma_uri) as exp:
bdata = tiledbsoma.io.to_anndata(
Expand Down Expand Up @@ -1092,8 +1058,7 @@ def test_string_nan_columns(tmp_path, adata, write_index):
uri = tmp_path.as_posix()
original = adata.copy()
tiledbsoma.io.from_anndata(uri, adata, measurement_name="RNA")
assert anndata_dataframe_unmodified_nan_safe(original.obs, adata.obs)
assert anndata_dataframe_unmodified_nan_safe(original.var, adata.var)
verify_obs_var(original, adata, nan_safe=True)

# Step 3
with tiledbsoma.open(uri, "r") as exp:
Expand Down Expand Up @@ -1151,8 +1116,7 @@ def test_index_names_io(tmp_path, obs_index_name, var_index_name):

original = adata.copy()
tiledbsoma.io.from_anndata(soma_uri, adata, measurement_name)
assert anndata_dataframe_unmodified(original.obs, adata.obs)
assert anndata_dataframe_unmodified(original.var, adata.var)
verify_obs_var(original, adata)

with tiledbsoma.Experiment.open(soma_uri) as exp:
bdata = tiledbsoma.io.to_anndata(exp, measurement_name)
Expand Down
16 changes: 15 additions & 1 deletion apis/python/tests/test_experiment_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import pyarrow as pa
import pytest
from scipy import sparse
from somacore import options
from somacore import AxisQuery, options

import tiledbsoma as soma
from tests._util import raises_no_typeguard
Expand Down Expand Up @@ -905,3 +905,17 @@ def test_experiment_query_uses_threadpool_from_context(soma_experiment):
assert adata is not None

pool.submit.assert_called()


def test_empty_categorical_query(pbmc_small):
q = pbmc_small.axis_query(
measurement_name="RNA", obs_query=AxisQuery(value_filter='groups == "g1"')
)
obs = q.obs().concat()
assert len(obs) == 44

q = pbmc_small.axis_query(
measurement_name="RNA", obs_query=AxisQuery(value_filter='groups == "foo"')
)
obs = q.obs().concat()
assert len(obs) == 0
Loading
Loading