From a3d1d7f5fdb5ff0b37eb35cf031727bc28c8b6e0 Mon Sep 17 00:00:00 2001 From: Julia Dark <24235303+jp-dark@users.noreply.github.com> Date: Thu, 2 May 2024 09:14:10 -0400 Subject: [PATCH] [python][spatial] Implement Scene class in Python (#2485) * Update somacore version for Python API and pre-commit to use GitHub commit/branch * Add Scene class to Python implementation * Use Scene in visium ingestion Co-authored-by: Paul Fisher --------- Co-authored-by: Paul Fisher --- .github/workflows/python-so-copying.yml | 4 +- .pre-commit-config.yaml | 2 +- apis/python/setup.py | 3 +- apis/python/src/tiledbsoma/__init__.py | 2 + apis/python/src/tiledbsoma/_experiment.py | 7 +- apis/python/src/tiledbsoma/_factory.py | 6 +- apis/python/src/tiledbsoma/_scene.py | 39 +++++++++ .../src/tiledbsoma/experimental/ingest.py | 85 +++++++++++-------- apis/python/src/tiledbsoma/io/ingest.py | 13 +++ apis/python/tests/test_experiment_basic.py | 6 +- libtiledbsoma/src/soma/soma_experiment.cc | 2 + libtiledbsoma/src/soma/soma_experiment.h | 5 +- 12 files changed, 130 insertions(+), 44 deletions(-) create mode 100644 apis/python/src/tiledbsoma/_scene.py diff --git a/.github/workflows/python-so-copying.yml b/.github/workflows/python-so-copying.yml index cfe9638995..cdbf668723 100644 --- a/.github/workflows/python-so-copying.yml +++ b/.github/workflows/python-so-copying.yml @@ -309,6 +309,8 @@ jobs: otool -L ./venv-soma/lib/python*/site-packages/tiledbsoma/pytiledbsoma.*.so otool -l ./venv-soma/lib/python*/site-packages/tiledbsoma/pytiledbsoma.*.so - name: Install runtime dependencies - run: ./venv-soma/bin/python -m pip install --prefer-binary `grep -v '^\[' apis/python/src/tiledbsoma.egg-info/requires.txt` + run: | + grep -v '^\[' apis/python/src/tiledbsoma.egg-info/requires.txt >/tmp/filtered-requirements.txt + ./venv-soma/bin/pip install --prefer-binary -r /tmp/filtered-requirements.txt - name: Runtime test run: ./venv-soma/bin/python -c "import tiledbsoma; print(tiledbsoma.pytiledbsoma.version())" diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f06b1b7884..521e736c14 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -15,7 +15,7 @@ repos: - id: mypy additional_dependencies: - "pandas-stubs==1.5.3.230214" - - "somacore==1.0.11" + - "somacore @ git+https://github.com/single-cell-data/SOMA.git@c403caf9cc48c" # DO NOT MERGE TO MAIN - "types-setuptools==67.4.0.3" args: ["--config-file=apis/python/pyproject.toml", "apis/python/src", "apis/python/devtools"] pass_filenames: false diff --git a/apis/python/setup.py b/apis/python/setup.py index 9ef669ff96..ac0c25b40f 100644 --- a/apis/python/setup.py +++ b/apis/python/setup.py @@ -329,8 +329,7 @@ def run(self): "pyarrow>=9.0.0; platform_system!='Darwin'", "scanpy>=1.9.2", "scipy", - # Note: the somacore version is in .pre-commit-config.yaml too - "somacore==1.0.11", + "somacore @ git+https://github.com/single-cell-data/SOMA.git@spatial", # DO NOT MERGE TO MAIN "tiledb~=0.28.0", "typing-extensions", # Note "-" even though `import typing_extensions` ], diff --git a/apis/python/src/tiledbsoma/__init__.py b/apis/python/src/tiledbsoma/__init__.py index 946f9cba40..1410232e7f 100644 --- a/apis/python/src/tiledbsoma/__init__.py +++ b/apis/python/src/tiledbsoma/__init__.py @@ -165,6 +165,7 @@ ) from ._indexer import IntIndexer, tiledbsoma_build_index from ._measurement import Measurement +from ._scene import Scene from ._sparse_nd_array import SparseNDArray from .options import SOMATileDBContext, TileDBCreateOptions from .pytiledbsoma import ( @@ -204,6 +205,7 @@ "SOMA_JOINID", "SOMAError", "SOMATileDBContext", + "Scene", "SparseNDArray", "TileDBCreateOptions", "tiledbsoma_build_index", diff --git a/apis/python/src/tiledbsoma/_experiment.py b/apis/python/src/tiledbsoma/_experiment.py index 07f9ce0346..31a70c8a18 100644 --- a/apis/python/src/tiledbsoma/_experiment.py +++ b/apis/python/src/tiledbsoma/_experiment.py @@ -6,7 +6,7 @@ """Implementation of a SOMA Experiment. """ import functools -from typing import Any, Optional +from typing import Any, Optional, Union from somacore import experiment, query from typing_extensions import Self @@ -15,6 +15,7 @@ from ._dataframe import DataFrame from ._indexer import IntIndexer from ._measurement import Measurement +from ._scene import Scene from ._tdb_handles import Wrapper from ._tiledb_object import AnyTileDBObject @@ -24,6 +25,7 @@ class Experiment( # type: ignore[misc] # __eq__ false positive experiment.Experiment[ # type: ignore[type-var] DataFrame, Collection[Measurement], + Collection[Union[DataFrame, Scene]], AnyTileDBObject, ], ): @@ -43,6 +45,8 @@ class Experiment( # type: ignore[misc] # __eq__ false positive defined in this dataframe. ms (Collection): A collection of named measurements. + spatial (Collection): + A collection of spatial scenes. Example: >>> import tiledbsoma @@ -68,6 +72,7 @@ class Experiment( # type: ignore[misc] # __eq__ false positive _subclass_constrained_soma_types = { "obs": ("SOMADataFrame",), "ms": ("SOMACollection",), + "spatial": ("SOMACollection",), } @classmethod diff --git a/apis/python/src/tiledbsoma/_factory.py b/apis/python/src/tiledbsoma/_factory.py index 4369145f0e..8ea66e5c46 100644 --- a/apis/python/src/tiledbsoma/_factory.py +++ b/apis/python/src/tiledbsoma/_factory.py @@ -28,6 +28,7 @@ _dense_nd_array, _experiment, _measurement, + _scene, _sparse_nd_array, _tdb_handles, _tiledb_object, @@ -219,12 +220,13 @@ def _type_name_to_cls(type_name: str) -> Type[AnyTileDBObject]: _experiment.Experiment, _measurement.Measurement, _sparse_nd_array.SparseNDArray, + _scene.Scene, ) } try: return type_map[type_name.lower()] except KeyError as ke: - options = sorted(type_map) + _options = sorted(type_map) raise SOMAError( - f"{type_name!r} is not a recognized SOMA type; expected one of {options}" + f"{type_name!r} is not a recognized SOMA type; expected one of {_options}" ) from ke diff --git a/apis/python/src/tiledbsoma/_scene.py b/apis/python/src/tiledbsoma/_scene.py new file mode 100644 index 0000000000..82a07fa217 --- /dev/null +++ b/apis/python/src/tiledbsoma/_scene.py @@ -0,0 +1,39 @@ +# Copyright (c) 2024 TileDB, Inc. +# +# Licensed under the MIT License. + +"""Implementation of a SOMA Scene.""" + + +from typing import Union + +from somacore import scene + +from ._collection import Collection, CollectionBase +from ._dataframe import DataFrame +from ._dense_nd_array import DenseNDArray +from ._sparse_nd_array import SparseNDArray +from ._tiledb_object import AnyTileDBObject + + +class Scene( # type: ignore[misc] # __eq__ false positive + CollectionBase[AnyTileDBObject], + scene.Scene[ # type: ignore[type-var] + Collection[ + Union[DataFrame, DenseNDArray, SparseNDArray] + ], # not just DataFrame and NDArray since NDArray does not have a common `read` + AnyTileDBObject, + ], +): + """TODO: Add documentation for a Scene + + Lifecycle: + Experimental. + """ + + __slots__ = () + + _subclass_constrained_soma_types = { + "exp": ("SOMACollection",), + "ms": ("SOMACollection",), + } diff --git a/apis/python/src/tiledbsoma/experimental/ingest.py b/apis/python/src/tiledbsoma/experimental/ingest.py index 77075f9ceb..1069bab619 100644 --- a/apis/python/src/tiledbsoma/experimental/ingest.py +++ b/apis/python/src/tiledbsoma/experimental/ingest.py @@ -30,7 +30,7 @@ import scanpy from PIL import Image -from .. import Collection, DataFrame, DenseNDArray, Experiment, SparseNDArray +from .. import Collection, DataFrame, DenseNDArray, Experiment, Scene, SparseNDArray from .._constants import SOMA_JOINID from .._tiledb_object import AnyTileDBObject from .._types import IngestMode @@ -147,52 +147,67 @@ def from_visium( with Experiment.open(uri, mode="w", context=context) as experiment: spatial_uri = f"{uri}/spatial" with _create_or_open_collection( - Collection[Collection[AnyTileDBObject]], spatial_uri, **ingest_ctx + Collection[Union[DataFrame, Scene]], spatial_uri, **ingest_ctx ) as spatial: _maybe_set( experiment, "spatial", spatial, use_relative_uri=use_relative_uri ) scene_uri = f"{spatial_uri}/{scene_name}" - with _create_or_open_collection( - Collection[AnyTileDBObject], scene_uri, **ingest_ctx - ) as scene: + with _create_or_open_collection(Scene, scene_uri, **ingest_ctx) as scene: _maybe_set( spatial, scene_name, scene, use_relative_uri=use_relative_uri ) - obs_locations_uri = f"{scene_uri}/obs_locations" - - # Write spot data and add to the scene. - with _write_visium_spot_dataframe( - obs_locations_uri, - input_tissue_positions, - scale_factors, - obs_df, - obs_id_name, - **ingest_ctx, - ) as obs_locations: + scene_exp_uri = f"{scene_uri}/exp" + with _create_or_open_collection( + Collection[AnyTileDBObject], scene_exp_uri, **ingest_ctx + ) as scene_exp: _maybe_set( - scene, - "obs_locations", - obs_locations, - use_relative_uri=use_relative_uri, + scene, "exp", scene_exp, use_relative_uri=use_relative_uri ) - # Write image data and add to the scene. - images_uri = f"{scene_uri}/images" - with _write_visium_images( - images_uri, - scale_factors, - input_hires=input_hires, - input_lowres=input_lowres, - input_fullres=input_fullres, - use_relative_uri=use_relative_uri, - **ingest_ctx, - ) as images: - _maybe_set( - scene, "images", images, use_relative_uri=use_relative_uri - ) - return uri + obs_locations_uri = f"{scene_exp_uri}/obs_locations" + + # Write spot data and add to the scene. + with _write_visium_spot_dataframe( + obs_locations_uri, + input_tissue_positions, + scale_factors, + obs_df, + obs_id_name, + **ingest_ctx, + ) as obs_locations: + _maybe_set( + scene_exp, + "obs_locations", + obs_locations, + use_relative_uri=use_relative_uri, + ) + + # Write image data and add to the scene. + images_uri = f"{scene_exp_uri}/images" + with _write_visium_images( + images_uri, + scale_factors, + input_hires=input_hires, + input_lowres=input_lowres, + input_fullres=input_fullres, + use_relative_uri=use_relative_uri, + **ingest_ctx, + ) as images: + _maybe_set( + scene_exp, + "images", + images, + use_relative_uri=use_relative_uri, + ) + + scene_ms_uri = f"{scene_uri}/ms" + with _create_or_open_collection( + Collection[Collection[AnyTileDBObject]], scene_ms_uri, **ingest_ctx + ) as scene_ms: + _maybe_set(scene, "ms", scene_ms, use_relative_uri=use_relative_uri) + return uri def _write_visium_spot_dataframe( diff --git a/apis/python/src/tiledbsoma/io/ingest.py b/apis/python/src/tiledbsoma/io/ingest.py index 4e6203f1bd..814b41d42c 100644 --- a/apis/python/src/tiledbsoma/io/ingest.py +++ b/apis/python/src/tiledbsoma/io/ingest.py @@ -48,6 +48,7 @@ DenseNDArray, Experiment, Measurement, + Scene, SparseNDArray, _factory, _util, @@ -979,6 +980,18 @@ def _create_or_open_collection( ... +@overload +def _create_or_open_collection( + cls: Type[Scene], + uri: str, + *, + ingestion_params: IngestionParams, + context: Optional["SOMATileDBContext"], + additional_metadata: "AdditionalMetadata" = None, +) -> Scene: + ... + + @no_type_check def _create_or_open_collection( cls: Type[CollectionBase[_TDBO]], diff --git a/apis/python/tests/test_experiment_basic.py b/apis/python/tests/test_experiment_basic.py index 513217afcd..0313fcbbec 100644 --- a/apis/python/tests/test_experiment_basic.py +++ b/apis/python/tests/test_experiment_basic.py @@ -90,6 +90,8 @@ def test_experiment_basic(tmp_path): measurement = ms.add_new_collection("RNA", soma.Measurement) assert soma.Measurement.exists(measurement.uri) assert not soma.Collection.exists(measurement.uri) + spatial = experiment.add_new_collection("spatial", soma.Collection) + assert soma.Collection.exists(spatial.uri) measurement["var"] = create_and_populate_var(urljoin(measurement.uri, "var")) @@ -99,11 +101,13 @@ def test_experiment_basic(tmp_path): x.set("data", nda, use_relative_uri=False) # ---------------------------------------------------------------- - assert len(experiment) == 2 + assert len(experiment) == 3 assert isinstance(experiment.obs, soma.DataFrame) assert isinstance(experiment.ms, soma.Collection) + assert isinstance(experiment.spatial, soma.Collection) assert "obs" in experiment assert "ms" in experiment + assert "spatial" in experiment assert "nonesuch" not in experiment assert experiment.obs == experiment["obs"] diff --git a/libtiledbsoma/src/soma/soma_experiment.cc b/libtiledbsoma/src/soma/soma_experiment.cc index fe19b05ecb..b647638a11 100644 --- a/libtiledbsoma/src/soma/soma_experiment.cc +++ b/libtiledbsoma/src/soma/soma_experiment.cc @@ -60,12 +60,14 @@ void SOMAExperiment::create( platform_config, timestamp); SOMACollection::create(exp_uri + "/ms", ctx, timestamp); + SOMACollection::create(exp_uri + "/spatial", ctx, timestamp); auto name = std::string(std::filesystem::path(uri).filename()); auto group = SOMAGroup::open( OpenMode::write, exp_uri, ctx, name, timestamp); group->set(exp_uri + "/obs", URIType::absolute, "obs"); group->set(exp_uri + "/ms", URIType::absolute, "ms"); + group->set(exp_uri + "/spatial", URIType::absolute, "spatial"); group->close(); } diff --git a/libtiledbsoma/src/soma/soma_experiment.h b/libtiledbsoma/src/soma/soma_experiment.h index 40a744b87e..ded42113b3 100644 --- a/libtiledbsoma/src/soma/soma_experiment.h +++ b/libtiledbsoma/src/soma/soma_experiment.h @@ -109,7 +109,10 @@ class SOMAExperiment : public SOMACollection { // A collection of named measurements std::shared_ptr ms_; + + // A collection of spatial scenes + std::shared_ptr spatial_; }; } // namespace tiledbsoma -#endif // SOMA_EXPERIMENT \ No newline at end of file +#endif // SOMA_EXPERIMENT