diff --git a/.flake8 b/.flake8 index a30ef821..43ded24c 100644 --- a/.flake8 +++ b/.flake8 @@ -38,6 +38,8 @@ ignore = D400 # First line should be in imperative mood; try rephrasing D401 + # Abstract base class without abstractmethod. + B024 exclude = .git,__pycache__,build,docs/_build,dist per-file-ignores = tests/*: D diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 1335bdaa..8546b3d7 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -18,7 +18,7 @@ jobs: strategy: fail-fast: false matrix: - python: ["3.9", "3.10"] + python: ["3.10", "3.12"] os: [ubuntu-latest] env: @@ -52,7 +52,7 @@ jobs: pip install --pre -e ".[dev,test]" - name: Download artifact of test data - if: matrix.python == '3.10' + if: matrix.python == '3.12' uses: dawidd6/action-download-artifact@v2 with: workflow: prepare_test_data.yaml @@ -60,7 +60,7 @@ jobs: path: ./data - name: List the data directory - if: matrix.python == '3.10' + if: matrix.python == '3.12' run: | ls -l ./data pwd diff --git a/.mypy.ini b/.mypy.ini index 4da7a91d..f658d6f6 100644 --- a/.mypy.ini +++ b/.mypy.ini @@ -1,5 +1,5 @@ [mypy] -python_version = 3.9 +python_version = 3.10 plugins = numpy.typing.mypy_plugin ignore_errors = False diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 84472fb3..396d67df 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -2,8 +2,8 @@ fail_fast: false default_language_version: python: python3 default_stages: - - commit - - push + - pre-commit + - pre-push minimum_pre_commit_version: 2.16.0 repos: - repo: https://github.com/psf/black @@ -73,7 +73,7 @@ repos: rev: v3.17.0 hooks: - id: pyupgrade - args: [--py3-plus, --py39-plus, --keep-runtime-typing] + args: [--py3-plus, --py310-plus, --keep-runtime-typing] - repo: local hooks: - id: forbid-to-commit diff --git a/.readthedocs.yaml b/.readthedocs.yaml index ec4886d3..f3a78576 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -3,7 +3,7 @@ version: 2 build: os: ubuntu-20.04 tools: - python: "3.9" + python: "3.10" sphinx: configuration: docs/conf.py fail_on_warning: true diff --git a/CHANGELOG.md b/CHANGELOG.md index 76f559a6..47f2900a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,13 @@ and this project adheres to [Semantic Versioning][]. [keep a changelog]: https://keepachangelog.com/en/1.0.0/ [semantic versioning]: https://semver.org/spec/v2.0.0.html -## [0.1.4] - xxxx-xx-xx +## [0.1.6] - 2024-11-26 + +- (MERSCOPE) added `feature_key` attribute for points (i.e., the `'gene'` column) #210 +- (Visium HD) get transformation matrices even when only images are parsed #215 +- Support for `xarray.DataTree` (which was moved from `datatree.DataTree`) #232 + +## [0.1.5] - 2024-09-25 ### Added @@ -18,6 +24,8 @@ and this project adheres to [Semantic Versioning][]. - Passing `rgb=None` to image model parser for both visium and visiumhd, leading to 3-4 channel images being interpreted as RGB(A) +- Fix header bug Visium data #200 +- (Visium HD) Fix path parsing when images are missing #204 #206 ## [0.1.4] - 2024-08-07 diff --git a/docs/conf.py b/docs/conf.py index e32e9f58..15bf1458 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -90,6 +90,7 @@ } intersphinx_mapping = { + "python": ("https://docs.python.org/3", None), "anndata": ("https://anndata.readthedocs.io/en/stable/", None), "spatialdata": ("https://scverse-spatialdata.readthedocs.io/en/latest/", None), "numpy": ("https://numpy.org/doc/stable/", None), diff --git a/pyproject.toml b/pyproject.toml index 199868c4..2bdebfe0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,7 @@ dynamic= [ ] description = "SpatialData IO for common techs" readme = "README.md" -requires-python = ">=3.9" +requires-python = ">=3.10" license = {file = "LICENSE"} authors = [ {name = "scverse"}, @@ -25,7 +25,7 @@ dependencies = [ "anndata", "numpy", "scanpy", - "spatialdata>=0.2.2", + "spatialdata>=0.2.6", "scikit-image", "h5py", "joblib", @@ -83,7 +83,7 @@ skip_glob = ["docs/*"] [tool.black] line-length = 120 -target-version = ['py39'] +target-version = ['py310'] include = '\.pyi?$' exclude = ''' ( diff --git a/src/spatialdata_io/_constants/_constants.py b/src/spatialdata_io/_constants/_constants.py index 9ec43fa9..34848137 100644 --- a/src/spatialdata_io/_constants/_constants.py +++ b/src/spatialdata_io/_constants/_constants.py @@ -322,6 +322,7 @@ class MerscopeKeys(ModeEnum): GLOBAL_Z = "global_z" Z_INDEX = "ZIndex" REGION_KEY = "cells_region" + GENE_KEY = "gene" @unique diff --git a/src/spatialdata_io/_constants/_enum.py b/src/spatialdata_io/_constants/_enum.py index 63ba74fb..55decf37 100644 --- a/src/spatialdata_io/_constants/_enum.py +++ b/src/spatialdata_io/_constants/_enum.py @@ -1,7 +1,8 @@ from abc import ABC, ABCMeta +from collections.abc import Callable from enum import Enum, EnumMeta from functools import wraps -from typing import Any, Callable +from typing import Any class PrettyEnum(Enum): diff --git a/src/spatialdata_io/_docs.py b/src/spatialdata_io/_docs.py index becd920d..34efbd5e 100644 --- a/src/spatialdata_io/_docs.py +++ b/src/spatialdata_io/_docs.py @@ -1,7 +1,8 @@ from __future__ import annotations +from collections.abc import Callable from textwrap import dedent -from typing import Any, Callable +from typing import Any def inject_docs(**kwargs: Any) -> Callable[..., Any]: # noqa: D103 diff --git a/src/spatialdata_io/_utils.py b/src/spatialdata_io/_utils.py index 65e5e930..90dee2ac 100644 --- a/src/spatialdata_io/_utils.py +++ b/src/spatialdata_io/_utils.py @@ -2,7 +2,8 @@ import functools import warnings -from typing import Any, Callable, TypeVar +from collections.abc import Callable +from typing import Any, TypeVar RT = TypeVar("RT") diff --git a/src/spatialdata_io/readers/_utils/_read_10x_h5.py b/src/spatialdata_io/readers/_utils/_read_10x_h5.py index 55aa344b..4c5b33cc 100644 --- a/src/spatialdata_io/readers/_utils/_read_10x_h5.py +++ b/src/spatialdata_io/readers/_utils/_read_10x_h5.py @@ -31,7 +31,7 @@ # code below taken from https://github.com/scverse/scanpy/blob/master/scanpy/readwrite.py from pathlib import Path -from typing import Any, Optional, Union +from typing import Any import h5py import numpy as np @@ -40,8 +40,8 @@ def _read_10x_h5( - filename: Union[str, Path], - genome: Optional[str] = None, + filename: str | Path, + genome: str | None = None, gex_only: bool = True, ) -> AnnData: """ @@ -96,7 +96,7 @@ def _read_10x_h5( return adata -def _read_v3_10x_h5(filename: Union[str, Path], *, start: Optional[Any] = None) -> AnnData: +def _read_v3_10x_h5(filename: str | Path, *, start: Any | None = None) -> AnnData: """Read hdf5 file from Cell Ranger v3 or later versions.""" with h5py.File(str(filename), "r") as f: try: diff --git a/src/spatialdata_io/readers/_utils/_utils.py b/src/spatialdata_io/readers/_utils/_utils.py index 4623fced..da0ac9e3 100644 --- a/src/spatialdata_io/readers/_utils/_utils.py +++ b/src/spatialdata_io/readers/_utils/_utils.py @@ -3,9 +3,8 @@ import os from collections.abc import Mapping from pathlib import Path -from typing import Any, Optional, Union +from typing import Any, Union -import numpy as np from anndata import AnnData, read_text from h5py import File @@ -13,19 +12,11 @@ PathLike = Union[os.PathLike, str] # type:ignore[type-arg] -try: - from numpy.typing import NDArray - - NDArrayA = NDArray[Any] -except (ImportError, TypeError): - NDArray = np.ndarray - NDArrayA = np.ndarray - def _read_counts( path: str | Path, counts_file: str, - library_id: Optional[str] = None, + library_id: str | None = None, **kwargs: Any, ) -> tuple[AnnData, str]: path = Path(path) diff --git a/src/spatialdata_io/readers/cosmx.py b/src/spatialdata_io/readers/cosmx.py index 9b8df25b..8f4ec38f 100644 --- a/src/spatialdata_io/readers/cosmx.py +++ b/src/spatialdata_io/readers/cosmx.py @@ -5,7 +5,7 @@ from collections.abc import Mapping from pathlib import Path from types import MappingProxyType -from typing import Any, Optional +from typing import Any import dask.array as da import numpy as np @@ -38,7 +38,7 @@ @inject_docs(cx=CosmxKeys) def cosmx( path: str | Path, - dataset_id: Optional[str] = None, + dataset_id: str | None = None, transcripts: bool = True, imread_kwargs: Mapping[str, Any] = MappingProxyType({}), image_models_kwargs: Mapping[str, Any] = MappingProxyType({}), diff --git a/src/spatialdata_io/readers/dbit.py b/src/spatialdata_io/readers/dbit.py index 26d3790d..4fc1afdb 100644 --- a/src/spatialdata_io/readers/dbit.py +++ b/src/spatialdata_io/readers/dbit.py @@ -4,7 +4,6 @@ import re from pathlib import Path from re import Pattern -from typing import Optional, Union import anndata as ad import numpy as np @@ -27,9 +26,9 @@ def _check_path( path: Path, pattern: Pattern[str], key: DbitKeys, - path_specific: Optional[str | Path] = None, + path_specific: str | Path | None = None, optional_arg: bool = False, -) -> tuple[Union[Path, None], bool]: +) -> tuple[Path | None, bool]: """ Check that the path is valid and match a regex pattern. @@ -218,11 +217,11 @@ def _xy2edges(xy: list[int], scale: float = 1.0, border: bool = True, border_sca @inject_docs(vx=DbitKeys) def dbit( - path: Optional[str | Path] = None, - anndata_path: Optional[str] = None, - barcode_position: Optional[str] = None, - image_path: Optional[str] = None, - dataset_id: Optional[str] = None, + path: str | Path | None = None, + anndata_path: str | None = None, + barcode_position: str | None = None, + image_path: str | None = None, + dataset_id: str | None = None, border: bool = True, border_scale: float = 1, ) -> SpatialData: diff --git a/src/spatialdata_io/readers/merscope.py b/src/spatialdata_io/readers/merscope.py index c5c5cf03..af258857 100644 --- a/src/spatialdata_io/readers/merscope.py +++ b/src/spatialdata_io/readers/merscope.py @@ -2,10 +2,10 @@ import re import warnings -from collections.abc import Mapping +from collections.abc import Callable, Mapping from pathlib import Path from types import MappingProxyType -from typing import Any, Callable, Literal +from typing import Any, Literal import anndata import dask.dataframe as dd @@ -303,6 +303,7 @@ def _get_points(transcript_path: Path, transformations: dict[str, BaseTransforma transcript_df, coordinates={"x": MerscopeKeys.GLOBAL_X, "y": MerscopeKeys.GLOBAL_Y}, transformations=transformations, + feature_key=MerscopeKeys.GENE_KEY, ) transcripts["gene"] = transcripts["gene"].astype("category") return transcripts diff --git a/src/spatialdata_io/readers/steinbock.py b/src/spatialdata_io/readers/steinbock.py index 2f0283a5..763311a9 100644 --- a/src/spatialdata_io/readers/steinbock.py +++ b/src/spatialdata_io/readers/steinbock.py @@ -4,7 +4,7 @@ from collections.abc import Mapping from pathlib import Path from types import MappingProxyType -from typing import Any, Literal, Union +from typing import Any, Literal import anndata as ad from dask_image.imread import imread @@ -95,7 +95,7 @@ def _get_images( sample: str, imread_kwargs: Mapping[str, Any] = MappingProxyType({}), image_models_kwargs: Mapping[str, Any] = MappingProxyType({}), -) -> Union[SpatialImage, MultiscaleSpatialImage]: +) -> SpatialImage | MultiscaleSpatialImage: image = imread(path / SteinbockKeys.IMAGES_DIR / f"{sample}{SteinbockKeys.IMAGE_SUFFIX}", **imread_kwargs) return Image2DModel.parse(data=image, transformations={sample: Identity()}, rgb=None, **image_models_kwargs) @@ -106,6 +106,6 @@ def _get_labels( labels_kind: str, imread_kwargs: Mapping[str, Any] = MappingProxyType({}), image_models_kwargs: Mapping[str, Any] = MappingProxyType({}), -) -> Union[SpatialImage, MultiscaleSpatialImage]: +) -> SpatialImage | MultiscaleSpatialImage: image = imread(path / labels_kind / f"{sample}{SteinbockKeys.LABEL_SUFFIX}", **imread_kwargs).squeeze() return Labels2DModel.parse(data=image, transformations={sample: Identity()}, **image_models_kwargs) diff --git a/src/spatialdata_io/readers/stereoseq.py b/src/spatialdata_io/readers/stereoseq.py index 0405192b..c2af7951 100644 --- a/src/spatialdata_io/readers/stereoseq.py +++ b/src/spatialdata_io/readers/stereoseq.py @@ -5,7 +5,7 @@ from collections.abc import Mapping from pathlib import Path from types import MappingProxyType -from typing import Any, Union +from typing import Any import anndata as ad import h5py @@ -29,7 +29,7 @@ @inject_docs(xx=SK) def stereoseq( path: str | Path, - dataset_id: Union[str, None] = None, + dataset_id: str | None = None, read_square_bin: bool = True, optional_tif: bool = False, imread_kwargs: Mapping[str, Any] = MappingProxyType({}), diff --git a/src/spatialdata_io/readers/visium_hd.py b/src/spatialdata_io/readers/visium_hd.py index 98a69259..43e21ae2 100644 --- a/src/spatialdata_io/readers/visium_hd.py +++ b/src/spatialdata_io/readers/visium_hd.py @@ -84,7 +84,7 @@ def visium_hd( image_models_kwargs Keyword arguments for :class:`spatialdata.models.Image2DModel`. anndata_kwargs - Keyword arguments for :func:`anndata.read_h5ad`. + Keyword arguments for :func:`anndata.io.read_h5ad`. Returns ------- @@ -113,7 +113,6 @@ def load_image(path: Path, suffix: str, scale_factors: list[int] | None = None) ) metadata, hd_layout = _parse_metadata(path, filename_prefix) - transform_matrices = _get_transform_matrices(metadata, hd_layout) file_format = hd_layout[VisiumHDKeys.FILE_FORMAT] if file_format != "1.0": warnings.warn( @@ -338,6 +337,8 @@ def _get_bins(path: Path) -> list[str]: fullres_image_paths = [path_fullres / image_filename for image_filename in fullres_image_filenames] elif list((path_fullres := (path / f"{filename_prefix}tissue_image")).parent.glob(f"{path_fullres.name}.*")): fullres_image_paths = list(path_fullres.parent.glob(f"{path_fullres.name}.*")) + else: + fullres_image_paths = [] if len(fullres_image_paths) > 1: warnings.warn( f"Multiple files found in {path_fullres}, using the first one: {fullres_image_paths[0].stem}. Please" @@ -411,6 +412,7 @@ def _get_bins(path: Path) -> list[str]: suffix="_cytassist_image", ) image = images[dataset_id + "_cytassist_image"] + transform_matrices = _get_transform_matrices(metadata, hd_layout) affine0 = transform_matrices["cytassist_colrow_to_spot_colrow"] affine1 = transform_matrices["spot_colrow_to_microscope_colrow"] set_transformation(image, Sequence([affine0, affine1]), "global") diff --git a/src/spatialdata_io/readers/xenium.py b/src/spatialdata_io/readers/xenium.py index 74034f37..54f911d2 100644 --- a/src/spatialdata_io/readers/xenium.py +++ b/src/spatialdata_io/readers/xenium.py @@ -10,7 +10,7 @@ from collections.abc import Mapping from pathlib import Path from types import MappingProxyType -from typing import Any, Optional +from typing import Any import dask.array as da import numpy as np @@ -22,7 +22,6 @@ from anndata import AnnData from dask.dataframe import read_parquet from dask_image.imread import imread -from datatree.datatree import DataTree from geopandas import GeoDataFrame from joblib import Parallel, delayed from pyarrow import Table @@ -38,7 +37,7 @@ TableModel, ) from spatialdata.transformations.transformations import Affine, Identity, Scale -from xarray import DataArray +from xarray import DataArray, DataTree from spatialdata_io._constants._constants import XeniumKeys from spatialdata_io._docs import inject_docs @@ -364,7 +363,7 @@ def _decode_cell_id_column(cell_id_column: pd.Series) -> pd.Series: def _get_polygons( - path: Path, file: str, specs: dict[str, Any], n_jobs: int, idx: Optional[ArrayLike] = None + path: Path, file: str, specs: dict[str, Any], n_jobs: int, idx: ArrayLike | None = None ) -> GeoDataFrame: def _poly(arr: ArrayLike) -> Polygon: return Polygon(arr[:-1]) diff --git a/tests/test_xenium.py b/tests/test_xenium.py index f5127a85..1c64a912 100644 --- a/tests/test_xenium.py +++ b/tests/test_xenium.py @@ -46,7 +46,7 @@ def test_roundtrip_with_data_limits() -> None: # pointing to "data". # The GitHub workflow "prepare_test_data.yaml" takes care of downloading the datasets and uploading an artifact for the # tests to use -@pytest.mark.skipif(sys.version_info < (3, 10), reason="Test requires Python 3.10 or higher") +@pytest.mark.skipif(sys.version_info < (3, 12), reason="Test requires Python 3.10 or higher") @pytest.mark.parametrize( "dataset,expected", [