diff --git a/.github/workflows/prepare_test_data.yaml b/.github/workflows/prepare_test_data.yaml index 10d58c44..77f984fc 100644 --- a/.github/workflows/prepare_test_data.yaml +++ b/.github/workflows/prepare_test_data.yaml @@ -18,15 +18,30 @@ jobs: run: | mkdir -p ./data cd ./data + + # 10x Genomics Xenium 2.0.0 curl -O https://cf.10xgenomics.com/samples/xenium/2.0.0/Xenium_V1_human_Breast_2fov/Xenium_V1_human_Breast_2fov_outs.zip curl -O https://cf.10xgenomics.com/samples/xenium/2.0.0/Xenium_V1_human_Lung_2fov/Xenium_V1_human_Lung_2fov_outs.zip + # 10x Genomics Xenium 3.0.0 (5K) Mouse ileum, multimodal cell segmentation + # this file seems to be corrupted; skipping it for now + # curl -O https://cf.10xgenomics.com/samples/xenium/3.0.0/Xenium_Prime_MultiCellSeg_Mouse_Ileum_tiny/Xenium_Prime_MultiCellSeg_Mouse_Ileum_tiny.zip + + # 10x Genomics Xenium 3.0.0 (5K) Mouse ileum, nuclear expansion + curl -O https://cf.10xgenomics.com/samples/xenium/3.0.0/Xenium_Prime_Mouse_Ileum_tiny/Xenium_Prime_Mouse_Ileum_tiny_outs.zip + + # Spatial Genomics seqFISH v2 + curl -O https://s3.embl.de/spatialdata/raw_data/seqfish-2-test-dataset.zip + - name: Unzip files run: | - unzip ./data/Xenium_V1_human_Breast_2fov_outs.zip -d ./data/Xenium_V1_human_Breast_2fov_outs - unzip ./data/Xenium_V1_human_Lung_2fov_outs.zip -d ./data/Xenium_V1_human_Lung_2fov_outs - rm ./data/Xenium_V1_human_Breast_2fov_outs.zip - rm ./data/Xenium_V1_human_Lung_2fov_outs.zip + cd ./data + for file in *.zip; do + dir="${file%.zip}" + mkdir -p "$dir" + unzip "$file" -d "$dir" + rm "$file" + done - name: Upload artifacts uses: actions/upload-artifact@v3 diff --git a/CHANGELOG.md b/CHANGELOG.md index 47f2900a..b9f29809 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,135 +8,141 @@ and this project adheres to [Semantic Versioning][]. [keep a changelog]: https://keepachangelog.com/en/1.0.0/ [semantic versioning]: https://semver.org/spec/v2.0.0.html +## incoming release + +- (Visium/Visium HD) lowres and hires images now mapped also to the 'global' coordinate system #230 +- (Macsima) added support @berombau #224 +- (seqFISH) support for v2 instrument #227 + ## [0.1.6] - 2024-11-26 -- (MERSCOPE) added `feature_key` attribute for points (i.e., the `'gene'` column) #210 -- (Visium HD) get transformation matrices even when only images are parsed #215 -- Support for `xarray.DataTree` (which was moved from `datatree.DataTree`) #232 +- (MERSCOPE) added `feature_key` attribute for points (i.e., the `'gene'` column) #210 +- (Visium HD) get transformation matrices even when only images are parsed #215 +- Support for `xarray.DataTree` (which was moved from `datatree.DataTree`) #232 ## [0.1.5] - 2024-09-25 ### Added -- (Xenium) added `dims` parameter for more control in `xenium_aligned_image()` +- (Xenium) added `dims` parameter for more control in `xenium_aligned_image()` ### Fixed -- Passing `rgb=None` to image model parser for both visium and visiumhd, leading to 3-4 channel images being - interpreted as RGB(A) -- Fix header bug Visium data #200 -- (Visium HD) Fix path parsing when images are missing #204 #206 +- Passing `rgb=None` to image model parser for both visium and visiumhd, leading to 3-4 channel images being + interpreted as RGB(A) +- Fix header bug Visium data #200 +- (Visium HD) Fix path parsing when images are missing #204 #206 ## [0.1.4] - 2024-08-07 ### Changed -- (Xenium) changed default target of table to labels; radii of circles computed from cells, not nuclei #179 -- (Visium HD) changed default geometry to squares from circles for the bins; added parameter to choose #183 -- (CosMx) dropping points element with zero-length from the cosmx reader #191 +- (Xenium) changed default target of table to labels; radii of circles computed from cells, not nuclei #179 +- (Visium HD) changed default geometry to squares from circles for the bins; added parameter to choose #183 +- (CosMx) dropping points element with zero-length from the cosmx reader #191 ## [0.1.3] - 2024-07-03 ### Added -- (Xenium) support reading multi-polygon selection files from the Xenium Explorer -- (ISS) An experimental loader to load elemental ISS data objects, e.g. raw.tif, label.tif and anndata.h5ad -- (Stereo-seq) Added reader @LLehner @timtreis @florianingelfinger #70 -- (MERSCOPE) Optional rioxarray backend for MERSCOPE data (reads chunks) -- (MERSCOPE) Can choose which elements should be loaded +- (Xenium) support reading multi-polygon selection files from the Xenium Explorer +- (ISS) An experimental loader to load elemental ISS data objects, e.g. raw.tif, label.tif and anndata.h5ad +- (Stereo-seq) Added reader @LLehner @timtreis @florianingelfinger #70 +- (MERSCOPE) Optional rioxarray backend for MERSCOPE data (reads chunks) +- (MERSCOPE) Can choose which elements should be loaded ### Fixed -- (Visium) Fixed issue with joining a SpatialElement with a table due to index values not being unique. - obs_names_make_unique is now called internally to enforce unique index values allowing for join operations. +- (Visium) Fixed issue with joining a SpatialElement with a table due to index values not being unique. + obs_names_make_unique is now called internally to enforce unique index values allowing for join operations. ### Changed -- (MERSCOPE) "global" coordinate system is used as a default instead of "microns" +- (MERSCOPE) "global" coordinate system is used as a default instead of "microns" ## [0.1.2] - 2024-03-30 ### Added -- (Visium HD) added reader, coauthored by @LLehner +- (Visium HD) added reader, coauthored by @LLehner ### Fixed -- (Xenium) reader for 1.0.1 (paper data) and unknown versions -- (Xenium) fix in reading "minimalistic" Xenium datasets #132 +- (Xenium) reader for 1.0.1 (paper data) and unknown versions +- (Xenium) fix in reading "minimalistic" Xenium datasets #132 ## [0.1.1] - 2024-03-24 ### Added -- (Xenium) support for post-xenium aligned images (IF, HE) -- (Xenium) reader for the selection coordinates file from the Xenium Explorer -- (Xenium) support for the new Xenium 2.0.0 (multimodal segmentation) -- (Xenium) reading multiscale labels from cells.zarr.zip -- (MCMICRO) support for TMAs (such as the data of exemplar-002) -- (DBiT-seq) reader -- converter functions `experimental.to_legacy_anndata()` and `experimental.from_legacy_anndata()` -- (Visium) support for raw reads (capture locations not under tissue) +- (Xenium) support for post-xenium aligned images (IF, HE) +- (Xenium) reader for the selection coordinates file from the Xenium Explorer +- (Xenium) support for the new Xenium 2.0.0 (multimodal segmentation) +- (Xenium) reading multiscale labels from cells.zarr.zip +- (MCMICRO) support for TMAs (such as the data of exemplar-002) +- (DBiT-seq) reader +- converter functions `experimental.to_legacy_anndata()` and `experimental.from_legacy_anndata()` +- (Visium) support for raw reads (capture locations not under tissue) ### Fixed -- (Xenium) fixed index (fail on write) -- (Xenium) renamed cells_as_shapes to cells_as_circles; set default to True -- (MERSCOPE) don't try to load unexisting elements #87 -- (Visium) fixed axes ordering +- (Xenium) fixed index (fail on write) +- (Xenium) renamed cells_as_shapes to cells_as_circles; set default to True +- (MERSCOPE) don't try to load unexisting elements #87 +- (Visium) fixed axes ordering ## [0.0.9] - 2023-11-06 ### Fixed -- (Xenium) bug when converting feature_name #81, from @fbnrst -- (Visium) visium() supports file counts without dataset_id #91 +- (Xenium) bug when converting feature_name #81, from @fbnrst +- (Visium) visium() supports file counts without dataset_id #91 ## [0.0.8] - 2023-10-02 ### Fixed -- (Xenium) coerce cell id to str #64 -- (MERSCOPE) fix coordinate transformation #68 -- (MERSCOPE) Improvements/fixes: merscope reader #73 +- (Xenium) coerce cell id to str #64 +- (MERSCOPE) fix coordinate transformation #68 +- (MERSCOPE) Improvements/fixes: merscope reader #73 ## [0.0.7] - 2023-07-23 ### Fixed -- Bugs in Xenium and MERSCOPE +- Bugs in Xenium and MERSCOPE ## [0.0.5] - 2023-06-21 ### Added -- MERFISH reader (from @quentinblampey) -- CODEX reader (from @LLehner) +- MERFISH reader (from @quentinblampey) +- CODEX reader (from @LLehner) ### Fixed -- Issues on Visium reader (thanks @ilia-kats) and Xenium reader +- Issues on Visium reader (thanks @ilia-kats) and Xenium reader ## [0.0.4] - 2023-05-23 ### Added -- Curio reader +- Curio reader ## [0.0.3] - 2023-05-22 ### Merged -- Merge pull request #40 from scverse/fix/categories +- Merge pull request #40 from scverse/fix/categories ## [0.0.2] - 2023-05-04 ### Changed -- Revert version regex (#37) +- Revert version regex (#37) ## [0.0.1] - 2023-05-04 ### Tested -- Test installation from pypi +- Test installation from pypi diff --git a/README.md b/README.md index f4032a1b..2eaa490c 100644 --- a/README.md +++ b/README.md @@ -12,26 +12,41 @@ This package contains reader functions to load common spatial omics formats into SpatialData. Currently, we provide support for: -- 10x Genomics Visium® -- 10x Genomics Visium HD® -- 10x Genomics Xenium® -- Akoya PhenoCycler® (formerly CODEX®) -- Curio Seeker® -- DBiT-seq -- MCMICRO (output data) -- NanoString CosMx® -- Spatial Genomics GenePS® (seqFISH) -- Steinbock (output data) -- STOmics Stereo-seq® -- Vizgen MERSCOPE® (MERFISH) +- 10x Genomics Visium® +- 10x Genomics Visium HD® +- 10x Genomics Xenium® +- Akoya PhenoCycler® (formerly CODEX®) +- Curio Seeker® +- DBiT-seq +- MCMICRO (output data) +- NanoString CosMx® +- Spatial Genomics GenePS® (seqFISH) +- Steinbock (output data) +- STOmics Stereo-seq® +- Vizgen MERSCOPE® (MERFISH) +- MACSima® (MACS® iQ View output) Note: all mentioned technologies are registered trademarks of their respective companies. +## Known limitations + +Contributions for addressing the below limitations are very welcomed. + +- Only Stereo-seq 7.x is supported, 8.x is not currently supported. https://github.com/scverse/spatialdata-io/issues/161 + +### How to Contribute + +1. **Open a GitHub Issue**: Start by opening a new issue or commenting on an existing one in the repository. Clearly describe the problem and your proposed changes to avoid overlapping efforts with others. + +2. **Submit a Pull Request (PR)**: Once the issue is discussed, submit a PR to the `spatialdata-io` repository. Ensure your PR includes information about a suitable dataset for testing the reader, ideally no larger than 10 GB. Include clear instructions for accessing the data, preferably with a `curl` or `wget` command for easy downloading. + +3. **Optional Enhancements**: To facilitate reproducibility and ease of data access, consider adding a folder in the [spatialdata-sandbox](https://github.com/giovp/spatialdata-sandbox) repository. Include a `download.py` and `to_zarr.py` script (refer to examples in the repository) to enable others to reproduce your reader by simply running these scripts sequentially. + ## Getting started Please refer to the [documentation][link-docs]. In particular, the -- [API documentation][link-api]. +- [API documentation][link-api]. ## Installation @@ -61,10 +76,10 @@ If you found a bug, please use the [issue tracker][issue-tracker]. Technologies that can be read into `SpatialData` objects using third-party libraries: -- METASPACE (MALDI, ...): [metaspace-converter](https://github.com/metaspace2020/metaspace-converter) -- PhenoCycler®: [SOPA](https://github.com/gustaveroussy/sopa) -- MACSima®: [SOPA](https://github.com/gustaveroussy/sopa) -- Hyperion® (Imaging Mass Cytometry): [SOPA](https://github.com/gustaveroussy/sopa) +- METASPACE (MALDI, ...): [metaspace-converter](https://github.com/metaspace2020/metaspace-converter) +- PhenoCycler®: [SOPA](https://github.com/gustaveroussy/sopa) +- MACSima®: [SOPA](https://github.com/gustaveroussy/sopa) +- Hyperion® (Imaging Mass Cytometry): [SOPA](https://github.com/gustaveroussy/sopa) ## Disclaimer diff --git a/docs/contributing.md b/docs/contributing.md index b678c149..7df41462 100644 --- a/docs/contributing.md +++ b/docs/contributing.md @@ -138,10 +138,10 @@ in the cookiecutter-scverse template. Please write documentation for new or changed features and use-cases. This project uses [sphinx][] with the following features: -- the [myst][] extension allows to write documentation in markdown/Markedly Structured Text -- [Numpy-style docstrings][numpydoc] (through the [napoloen][numpydoc-napoleon] extension). -- Jupyter notebooks as tutorials through [myst-nb][] (See [Tutorials with myst-nb](#tutorials-with-myst-nb-and-jupyter-notebooks)) -- [Sphinx autodoc typehints][], to automatically reference annotated input and output types +- the [myst][] extension allows to write documentation in markdown/Markedly Structured Text +- [Numpy-style docstrings][numpydoc] (through the [napoloen][numpydoc-napoleon] extension). +- Jupyter notebooks as tutorials through [myst-nb][] (See [Tutorials with myst-nb](#tutorials-with-myst-nb-and-jupyter-notebooks)) +- [Sphinx autodoc typehints][], to automatically reference annotated input and output types See the [scanpy developer docs](https://scanpy.readthedocs.io/en/latest/dev/documentation.html) for more information on how to write documentation. @@ -158,10 +158,10 @@ repository. #### Hints -- If you refer to objects from other packages, please add an entry to `intersphinx_mapping` in `docs/conf.py`. Only - if you do so can sphinx automatically create a link to the external documentation. -- If building the documentation fails because of a missing link that is outside your control, you can add an entry to - the `nitpick_ignore` list in `docs/conf.py` +- If you refer to objects from other packages, please add an entry to `intersphinx_mapping` in `docs/conf.py`. Only + if you do so can sphinx automatically create a link to the external documentation. +- If building the documentation fails because of a missing link that is outside your control, you can add an entry to + the `nitpick_ignore` list in `docs/conf.py` #### Building the docs locally diff --git a/docs/template_usage.md b/docs/template_usage.md index bf8152f6..f8efbc16 100644 --- a/docs/template_usage.md +++ b/docs/template_usage.md @@ -115,13 +115,13 @@ We recommend using [readthedocs.org][] (RTD) to build and host the documentation To enable readthedocs, head over to [their website][readthedocs.org] and sign in with your GitHub account. On the RTD dashboard choose "Import a Project" and follow the instructions to add your repository. -- Make sure to choose the correct name of the default branch. On GitHub, the name of the default branch should be `main` (it has - recently changed from `master` to `main`). -- We recommend to enable documentation builds for pull requests (PRs). This ensures that a PR doesn't introduce changes - that break the documentation. To do so, got to `Admin -> Advanced Settings`, check the - `Build pull requests for this projects` option, and click `Save`. For more information, please refer to - the [official RTD documentation](https://docs.readthedocs.io/en/stable/pull-requests.html). -- If you find the RTD builds are failing, you can disable the `fail_on_warning` option in `.readthedocs.yaml`. +- Make sure to choose the correct name of the default branch. On GitHub, the name of the default branch should be `main` (it has + recently changed from `master` to `main`). +- We recommend to enable documentation builds for pull requests (PRs). This ensures that a PR doesn't introduce changes + that break the documentation. To do so, got to `Admin -> Advanced Settings`, check the + `Build pull requests for this projects` option, and click `Save`. For more information, please refer to + the [official RTD documentation](https://docs.readthedocs.io/en/stable/pull-requests.html). +- If you find the RTD builds are failing, you can disable the `fail_on_warning` option in `.readthedocs.yaml`. If your project is private, there are ways to enable docs rendering on [readthedocs.org][] but it is more cumbersome and requires a different subscription for read the docs. See a guide [here](https://docs.readthedocs.io/en/stable/guides/importing-private-repositories.html). @@ -144,52 +144,52 @@ Once authorized, pre-commit.ci should automatically be activated. The following pre-commit checks are for code style and format: -- [black](https://black.readthedocs.io/en/stable/): standard code - formatter in Python. -- [isort](https://pycqa.github.io/isort/): sort module imports into - sections and types. -- [prettier](https://prettier.io/docs/en/index.html): standard code - formatter for non-Python files (e.g. YAML). -- [blacken-docs](https://github.com/asottile/blacken-docs): black on - python code in docs. +- [black](https://black.readthedocs.io/en/stable/): standard code + formatter in Python. +- [isort](https://pycqa.github.io/isort/): sort module imports into + sections and types. +- [prettier](https://prettier.io/docs/en/index.html): standard code + formatter for non-Python files (e.g. YAML). +- [blacken-docs](https://github.com/asottile/blacken-docs): black on + python code in docs. The following pre-commit checks are for errors and inconsistencies: -- [flake8](https://flake8.pycqa.org/en/latest/): standard check for errors in Python files. - - [flake8-tidy-imports](https://github.com/adamchainz/flake8-tidy-imports): - tidy module imports. - - [flake8-docstrings](https://github.com/PyCQA/flake8-docstrings): - pydocstyle extension of flake8. - - [flake8-rst-docstrings](https://github.com/peterjc/e8-rst-docstrings): - extension of `flake8-docstrings` for `rst` docs. - - [flake8-comprehensions](https://github.com/adamchainz/e8-comprehensions): - write better list/set/dict comprehensions. - - [flake8-bugbear](https://github.com/PyCQA/flake8-bugbear): - find possible bugs and design issues in program. - - [flake8-blind-except](https://github.com/elijahandrews/flake8-blind-except): - checks for blind, catch-all `except` statements. -- [yesqa](https://github.com/asottile/yesqa): - remove unneccesary `# noqa` comments, follows additional dependencies listed above. -- [autoflake](https://github.com/PyCQA/autoflake): - remove unused imports and variables. -- [pre-commit-hooks](https://github.com/pre-commit/pre-commit-hooks): generic pre-commit hooks. - - **detect-private-key**: checks for the existence of private keys. - - **check-ast**: check whether files parse as valid python. - - **end-of-file-fixer**:check files end in a newline and only a newline. - - **mixed-line-ending**: checks mixed line ending. - - **trailing-whitespace**: trims trailing whitespace. - - **check-case-conflict**: check files that would conflict with case-insensitive file systems. -- [pyupgrade](https://github.com/asottile/pyupgrade): - upgrade syntax for newer versions of the language. -- **forbid-to-commit**: Make sure that `*.rej` files cannot be commited. These files are created by the - [automated template sync](#automated-template-sync) if there's a merge conflict and need to be addressed manually. +- [flake8](https://flake8.pycqa.org/en/latest/): standard check for errors in Python files. + - [flake8-tidy-imports](https://github.com/adamchainz/flake8-tidy-imports): + tidy module imports. + - [flake8-docstrings](https://github.com/PyCQA/flake8-docstrings): + pydocstyle extension of flake8. + - [flake8-rst-docstrings](https://github.com/peterjc/e8-rst-docstrings): + extension of `flake8-docstrings` for `rst` docs. + - [flake8-comprehensions](https://github.com/adamchainz/e8-comprehensions): + write better list/set/dict comprehensions. + - [flake8-bugbear](https://github.com/PyCQA/flake8-bugbear): + find possible bugs and design issues in program. + - [flake8-blind-except](https://github.com/elijahandrews/flake8-blind-except): + checks for blind, catch-all `except` statements. +- [yesqa](https://github.com/asottile/yesqa): + remove unneccesary `# noqa` comments, follows additional dependencies listed above. +- [autoflake](https://github.com/PyCQA/autoflake): + remove unused imports and variables. +- [pre-commit-hooks](https://github.com/pre-commit/pre-commit-hooks): generic pre-commit hooks. + - **detect-private-key**: checks for the existence of private keys. + - **check-ast**: check whether files parse as valid python. + - **end-of-file-fixer**:check files end in a newline and only a newline. + - **mixed-line-ending**: checks mixed line ending. + - **trailing-whitespace**: trims trailing whitespace. + - **check-case-conflict**: check files that would conflict with case-insensitive file systems. +- [pyupgrade](https://github.com/asottile/pyupgrade): + upgrade syntax for newer versions of the language. +- **forbid-to-commit**: Make sure that `*.rej` files cannot be commited. These files are created by the + [automated template sync](#automated-template-sync) if there's a merge conflict and need to be addressed manually. ### How to disable or add pre-commit checks -- To ignore lint warnigs from **flake8**, see [Ignore certain lint warnings](#how-to-ignore-certain-lint-warnings). -- You can add or remove pre-commit checks by simply deleting relevant lines in the `.pre-commit-config.yaml` file. - Some pre-commit checks have additional options that can be specified either in the `pyproject.toml` or tool-specific - config files, such as `.prettierrc.yml` for **prettier** and `.flake8` for **flake8**. +- To ignore lint warnigs from **flake8**, see [Ignore certain lint warnings](#how-to-ignore-certain-lint-warnings). +- You can add or remove pre-commit checks by simply deleting relevant lines in the `.pre-commit-config.yaml` file. + Some pre-commit checks have additional options that can be specified either in the `pyproject.toml` or tool-specific + config files, such as `.prettierrc.yml` for **prettier** and `.flake8` for **flake8**. ### How to ignore certain lint warnings @@ -220,10 +220,10 @@ W504 Scverse ecosystem packages should operate on [AnnData][] and/or [MuData][] data structures and typically use an API as originally [introduced by scanpy][scanpy-api] with the following submodules: -- `pp` for preprocessing -- `tl` for tools (that, compared to `pp` generate interpretable output, often associated with a corresponding plotting - function) -- `pl` for plotting functions +- `pp` for preprocessing +- `tl` for tools (that, compared to `pp` generate interpretable output, often associated with a corresponding plotting + function) +- `pl` for plotting functions You may add additional submodules as appropriate. While we encourage to follow a scanpy-like API for ecosystem packages, there may also be good reasons to choose a different approach, e.g. using an object-oriented API. @@ -280,12 +280,12 @@ The pull request can only be merged after all `*.rej` files have been removed. :::{tip} The following hints may be useful to work with the template sync: -- GitHub automatically disables scheduled actions if there has been not activity to the repository for 60 days. - You can re-enable or manually trigger the sync by navigating to `Actions` -> `Sync Template` in your GitHub repository. -- If you want to ignore certain files from the template update, you can add them to the `[tool.cruft]` section in the - `pyproject.toml` file in the root of your repository. More details are described in the - [cruft documentation][cruft-update-project]. -- To disable the sync entirely, simply remove the file `.github/workflows/sync.yaml`. +- GitHub automatically disables scheduled actions if there has been not activity to the repository for 60 days. + You can re-enable or manually trigger the sync by navigating to `Actions` -> `Sync Template` in your GitHub repository. +- If you want to ignore certain files from the template update, you can add them to the `[tool.cruft]` section in the + `pyproject.toml` file in the root of your repository. More details are described in the + [cruft documentation][cruft-update-project]. +- To disable the sync entirely, simply remove the file `.github/workflows/sync.yaml`. ::: diff --git a/pyproject.toml b/pyproject.toml index 2bdebfe0..a922782c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,6 +34,7 @@ dependencies = [ "pyarrow", "readfcs", "tifffile>=2023.8.12", + "ome-types", ] [project.optional-dependencies] diff --git a/src/spatialdata_io/__init__.py b/src/spatialdata_io/__init__.py index 48f784bd..618855a5 100644 --- a/src/spatialdata_io/__init__.py +++ b/src/spatialdata_io/__init__.py @@ -4,6 +4,7 @@ from spatialdata_io.readers.cosmx import cosmx from spatialdata_io.readers.curio import curio from spatialdata_io.readers.dbit import dbit +from spatialdata_io.readers.macsima import macsima from spatialdata_io.readers.mcmicro import mcmicro from spatialdata_io.readers.merscope import merscope from spatialdata_io.readers.seqfish import seqfish @@ -32,6 +33,7 @@ "xenium_explorer_selection", "dbit", "visium_hd", + "macsima", ] __version__ = version("spatialdata-io") diff --git a/src/spatialdata_io/_constants/_constants.py b/src/spatialdata_io/_constants/_constants.py index 34848137..92d26db3 100644 --- a/src/spatialdata_io/_constants/_constants.py +++ b/src/spatialdata_io/_constants/_constants.py @@ -66,14 +66,15 @@ class SeqfishKeys(ModeEnum): # file extensions CSV_FILE = ".csv" TIFF_FILE = ".tiff" - OME_TIFF_FILE = ".ome.tiff" + GEOJSON_FILE = ".geojson" # file identifiers - SECTION = "section" - TRANSCRIPT_COORDINATES = "TranscriptCoordinates" + ROI = "Roi" + TRANSCRIPT_COORDINATES = "TranscriptList" DAPI = "DAPI" - COUNTS_FILE = "CxG" - CELL_MASK_FILE = "CellMask" + COUNTS_FILE = "CellxGene" + SEGMENTATION = "Segmentation" CELL_COORDINATES = "CellCoordinates" + BOUNDARIES = "Boundaries" # transcripts TRANSCRIPTS_X = "x" TRANSCRIPTS_Y = "y" @@ -87,6 +88,8 @@ class SeqfishKeys(ModeEnum): SPATIAL_KEY = "spatial" REGION_KEY = "region" INSTANCE_KEY_TABLE = "instance_id" + SCALEFEFACTOR_X = "PhysicalSizeX" + SCALEFEFACTOR_Y = "PhysicalSizeY" @unique diff --git a/src/spatialdata_io/readers/_utils/_utils.py b/src/spatialdata_io/readers/_utils/_utils.py index da0ac9e3..82676098 100644 --- a/src/spatialdata_io/readers/_utils/_utils.py +++ b/src/spatialdata_io/readers/_utils/_utils.py @@ -7,6 +7,9 @@ from anndata import AnnData, read_text from h5py import File +from ome_types import from_tiff +from ome_types.model import Pixels, UnitsLength +from spatialdata._logging import logger from spatialdata_io.readers._utils._read_10x_h5 import _read_10x_h5 @@ -75,3 +78,48 @@ def _initialize_raster_models_kwargs( if "scale_factors" not in labels_models_kwargs: labels_models_kwargs["scale_factors"] = [2, 2, 2, 2] return image_models_kwargs, labels_models_kwargs + + +def calc_scale_factors(lower_scale_limit: float, min_size: int = 1000, default_scale_factor: int = 2) -> list[int]: + """Calculate scale factors based on image size to get lowest resolution under min_size pixels.""" + # get lowest dimension, ignoring channels + scale_factor: int = default_scale_factor + scale_factors = [scale_factor] + lower_scale_limit /= scale_factor + while lower_scale_limit >= min_size: + # scale_factors are cumulative, so we don't need to do e.g. scale_factor *= 2 + scale_factors.append(scale_factor) + lower_scale_limit /= scale_factor + return scale_factors + + +def parse_channels(path: Path) -> list[str]: + """Parse channel names from an OME-TIFF file.""" + images = from_tiff(path).images + if len(images) > 1: + logger.warning("Found multiple images in OME-TIFF file. Only the first one will be used.") + channels = images[0].pixels.channels + logger.debug(channels) + names = [c.name for c in channels if c.name is not None] + return names + + +def parse_physical_size(path: Path | None = None, ome_pixels: Pixels | None = None) -> float: + """Parse physical size from OME-TIFF to micrometer.""" + pixels = ome_pixels or from_tiff(path).images[0].pixels + logger.debug(pixels) + if pixels.physical_size_x_unit != pixels.physical_size_y_unit: + logger.error("Physical units for x and y dimensions are not the same.") + raise NotImplementedError + if pixels.physical_size_x != pixels.physical_size_y: + logger.error("Physical sizes for x and y dimensions are not the same.") + raise NotImplementedError + # convert to micrometer if needed + if pixels.physical_size_x_unit == UnitsLength.NANOMETER: + physical_size = pixels.physical_size_x / 1000 + elif pixels.physical_size_x_unit == UnitsLength.MICROMETER: + physical_size = pixels.physical_size_x + else: + logger.error(f"Physical unit not recognized: '{pixels.physical_size_x_unit}'.") + raise NotImplementedError + return float(physical_size) diff --git a/src/spatialdata_io/readers/macsima.py b/src/spatialdata_io/readers/macsima.py new file mode 100644 index 00000000..58e962eb --- /dev/null +++ b/src/spatialdata_io/readers/macsima.py @@ -0,0 +1,488 @@ +from __future__ import annotations + +import warnings +from collections import defaultdict +from collections.abc import Mapping +from copy import deepcopy +from dataclasses import dataclass +from pathlib import Path +from types import MappingProxyType +from typing import Any + +import anndata as ad +import dask.array as da +import pandas as pd +import spatialdata as sd +from dask_image.imread import imread +from spatialdata import SpatialData +from spatialdata._logging import logger + +from spatialdata_io._constants._enum import ModeEnum +from spatialdata_io.readers._utils._utils import ( + calc_scale_factors, + parse_channels, + parse_physical_size, +) + +__all__ = ["macsima"] + + +class MACSimaParsingStyle(ModeEnum): + """Different parsing styles for MACSima data.""" + + PROCESSED_SINGLE_FOLDER = "processed_single_folder" + PROCESSED_MULTIPLE_FOLDERS = "processed_multiple_folders" + RAW = "raw" + AUTO = "auto" + + +@dataclass +class ChannelMetadata: + """Metadata for a channel in a multichannel dataset.""" + + name: str + cycle: int + + +@dataclass +class MultiChannelImage: + """Multichannel image with metadata.""" + + data: list[da.Array] + metadata: list[ChannelMetadata] + include_cycle_in_channel_name: bool = False + + @classmethod + def from_paths( + cls, + path_files: list[Path], + imread_kwargs: Mapping[str, Any], + skip_rounds: list[int] | None = None, + ) -> MultiChannelImage: + cycles = [] + channels = [] + for p in path_files: + cycle = parse_name_to_cycle(p.stem) + cycles.append(cycle) + try: + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + channel_names = parse_channels(p) + if len(channel_names) > 1: + warnings.warn( + f"Found multiple channels in OME-TIFF file {p}. Only the first one will be used.", + UserWarning, + stacklevel=2, + ) + channels.append(channel_names[0]) + except ValueError as e: + warnings.warn( + f"Cannot parse OME metadata from {p}. Error: {e}. Skipping this file.", UserWarning, stacklevel=2 + ) + + if len(path_files) != len(cycles) or len(path_files) != len(channels): + raise ValueError("Length of path_files, cycles and channels must be the same.") + # if any of round_channels is in skip_rounds, remove that round from the list and from path_files + if skip_rounds: + logger.info(f"Skipping cycles: {skip_rounds}") + path_files, cycles, channels = map( + list, + zip( + *[ + (p, c, ch) + for p, c, ch in zip(path_files, cycles, channels, strict=True) + if c not in skip_rounds + ], + strict=True, + ), + ) + imgs = [imread(img, **imread_kwargs) for img in path_files] + for img, path in zip(imgs, path_files, strict=True): + if img.shape[1:] != imgs[0].shape[1:]: + raise ValueError( + f"Images are not all the same size. Image {path} has shape {img.shape[1:]} while the first image " + f"{path_files[0]} has shape {imgs[0].shape[1:]}" + ) + # create MultiChannelImage object with imgs and metadata + output = cls( + data=imgs, + metadata=[ChannelMetadata(name=ch, cycle=c) for c, ch in zip(cycles, channels, strict=True)], + ) + return output + + @classmethod + def subset_by_channel(cls, mci: MultiChannelImage, c_name: str) -> MultiChannelImage: + """Create new MultiChannelImage with only the channels that contain the string c_name.""" + indices = [i for i, c in enumerate(mci.metadata) if c_name in c.name] + return MultiChannelImage.subset_by_index(mci, indices) + + @classmethod + def subset_by_index(cls, mci: MultiChannelImage, indices: list[int]) -> MultiChannelImage: + """Create new MultiChannelImage with only the channels selected by the indices. The underlying data will still be the same reference, use copy.deepcopy to make a new copy.""" + metadata = [c for i, c in enumerate(mci.metadata) if i in indices] + data = [d for i, d in enumerate(mci.data) if i in indices] + return cls( + data=data, + metadata=metadata, + include_cycle_in_channel_name=mci.include_cycle_in_channel_name, + ) + + def get_channel_names(self) -> list[str]: + """Get the channel names.""" + if self.include_cycle_in_channel_name: + return [f"R{c.cycle} {c.name}" for c in self.metadata] + else: + # if name is duplicated, add (i) to the name + names = [c.name for c in self.metadata] + name_dict: dict[str, int] = defaultdict(int) + name_counter: dict[str, int] = defaultdict(int) + for name in names: + name_dict[name] += 1 + output = [] + for name in names: + name_counter[name] += 1 + output.append(f"{name} ({name_counter[name]})" if name_dict[name] > 1 else name) + return output + + def get_cycles(self) -> list[int]: + """Get the cycle numbers.""" + return [c.cycle for c in self.metadata] + + def sort_by_channel(self) -> None: + """Sort the channels by cycle number.""" + self.data = [d for _, d in sorted(zip(self.metadata, self.data, strict=True), key=lambda x: x[0].cycle)] + self.metadata = sorted(self.metadata, key=lambda x: x.cycle) + + def subset(self, subset: int | None = None) -> MultiChannelImage: + """Subsets the images to keep only the first `subset` x `subset` pixels.""" + if subset: + self.data = [d[:, :subset, :subset] for d in self.data] + return self + + def calc_scale_factors(self, default_scale_factor: int = 2) -> list[int]: + lower_scale_limit = min(self.data[0].shape[1:]) + return calc_scale_factors(lower_scale_limit, default_scale_factor=default_scale_factor) + + def get_stack(self) -> da.Array: + return da.stack(self.data, axis=0).squeeze(axis=1) + + +def macsima( + path: str | Path, + parsing_style: MACSimaParsingStyle | str = MACSimaParsingStyle.AUTO, + filter_folder_names: list[str] | None = None, + imread_kwargs: Mapping[str, Any] = MappingProxyType({}), + subset: int | None = None, + c_subset: int | None = None, + max_chunk_size: int = 1024, + c_chunks_size: int = 1, + multiscale: bool = True, + transformations: bool = True, + scale_factors: list[int] | None = None, + default_scale_factor: int = 2, + nuclei_channel_name: str = "DAPI", + split_threshold_nuclei_channel: int | None = 2, + skip_rounds: list[int] | None = None, + include_cycle_in_channel_name: bool = False, +) -> SpatialData: + """ + Read *MACSima* formatted dataset. + + This function reads images from a MACSima cyclic imaging experiment. Metadata of the cycle rounds is parsed from + the image names. The channel names are parsed from the OME metadata. + + .. seealso:: + + - `MACSima output `_. + + Parameters + ---------- + path + Path to the directory containing the data. + parsing_style + Parsing style to use. If ``auto``, the parsing style is determined based on the contents of the path. + filter_folder_names + List of folder names to filter out when parsing multiple folders. + imread_kwargs + Keyword arguments passed to :func:`dask_image.imread.imread`. + subset + Subset the image to the first ``subset`` pixels in x and y dimensions. + c_subset + Subset the image to the first ``c_subset`` channels. + max_chunk_size + Maximum chunk size for x and y dimensions. + c_chunks_size + Chunk size for c dimension. + multiscale + Whether to create a multiscale image. + transformations + Whether to add a transformation from pixels to microns to the image. + scale_factors + Scale factors to use for downsampling. If None, scale factors are calculated based on image size. + default_scale_factor + Default scale factor to use for downsampling. + nuclei_channel_name + Common string of the nuclei channel to separate nuclei from other channels. + split_threshold_nuclei_channel + Threshold for splitting nuclei channels. If the number of channels that include nuclei_channel_name is + greater than this threshold, the nuclei channels are split into a separate stack. + skip_rounds + List of round numbers to skip when parsing the data. Rounds or cycles are counted from 0 e.g. skip_rounds=[1, 2] + will parse only the first round 0 when there are only 3 cycles. + include_cycle_in_channel_name + Whether to include the cycle number in the channel name. + + Returns + ------- + :class:`spatialdata.SpatialData` + """ + path = Path(path) + if not isinstance(parsing_style, MACSimaParsingStyle): + parsing_style = MACSimaParsingStyle(parsing_style) + + if parsing_style == MACSimaParsingStyle.AUTO: + assert path.is_dir(), f"Path {path} is not a directory." + + if any(p.suffix in [".tif", ".tiff"] for p in path.iterdir()): + # if path contains tifs, do parse_processed_folder on path + parsing_style = MACSimaParsingStyle.PROCESSED_SINGLE_FOLDER + elif all(p.is_dir() for p in path.iterdir() if not p.name.startswith(".")): + # if path contains only folders or hidden files, do parse_processed_folder on each folder + parsing_style = MACSimaParsingStyle.PROCESSED_MULTIPLE_FOLDERS + else: + raise ValueError(f"Cannot determine parsing style for path {path}. Please specify the parsing style.") + + if parsing_style == MACSimaParsingStyle.PROCESSED_SINGLE_FOLDER: + return parse_processed_folder( + path=path, + imread_kwargs=imread_kwargs, + subset=subset, + c_subset=c_subset, + max_chunk_size=max_chunk_size, + c_chunks_size=c_chunks_size, + multiscale=multiscale, + transformations=transformations, + scale_factors=scale_factors, + default_scale_factor=default_scale_factor, + nuclei_channel_name=nuclei_channel_name, + split_threshold_nuclei_channel=split_threshold_nuclei_channel, + skip_rounds=skip_rounds, + include_cycle_in_channel_name=include_cycle_in_channel_name, + ) + if parsing_style == MACSimaParsingStyle.PROCESSED_MULTIPLE_FOLDERS: + sdatas = {} + # iterate over all non-filtered folders in path and parse each folder + for p in [ + p + for p in path.iterdir() + if p.is_dir() and (not filter_folder_names or not any(f in p.name for f in filter_folder_names)) + ]: + sdatas[p.stem] = parse_processed_folder( + path=p, + imread_kwargs=imread_kwargs, + subset=subset, + c_subset=c_subset, + max_chunk_size=max_chunk_size, + c_chunks_size=c_chunks_size, + multiscale=multiscale, + transformations=transformations, + scale_factors=scale_factors, + default_scale_factor=default_scale_factor, + nuclei_channel_name=nuclei_channel_name, + split_threshold_nuclei_channel=split_threshold_nuclei_channel, + skip_rounds=skip_rounds, + include_cycle_in_channel_name=include_cycle_in_channel_name, + ) + return sd.concatenate(list(sdatas.values())) + if parsing_style == MACSimaParsingStyle.RAW: + # TODO: see https://github.com/scverse/spatialdata-io/issues/155 + raise NotImplementedError("Parsing raw MACSima data is not yet implemented.") + + +def parse_name_to_cycle(name: str) -> int: + """Parse the cycle number from the name of the image.""" + cycle = name.split("_")[0] + if "-" in cycle: + cycle = cycle.split("-")[1] + return int(cycle) + + +def parse_processed_folder( + path: Path, + imread_kwargs: Mapping[str, Any] = MappingProxyType({}), + subset: int | None = None, + c_subset: int | None = None, + max_chunk_size: int = 1024, + c_chunks_size: int = 1, + multiscale: bool = True, + transformations: bool = True, + scale_factors: list[int] | None = None, + default_scale_factor: int = 2, + nuclei_channel_name: str = "DAPI", + split_threshold_nuclei_channel: int | None = 2, + skip_rounds: list[int] | None = None, + file_pattern: str = "*.tif*", + include_cycle_in_channel_name: bool = False, +) -> SpatialData: + """Parse a single folder containing images from a cyclical imaging platform.""" + # get list of image paths, get channel name from OME data and cycle round number from filename + # look for OME-TIFF files + # TODO: replace this pattern and the p.suffix in [".tif", ".tiff"] with a single function based on a regexp, like + # this one re.compile(r".*\.tif{1,2}$", re.IGNORECASE) + path_files = list(path.glob(file_pattern)) + logger.debug(path_files[0]) + + mci = MultiChannelImage.from_paths( + path_files, + imread_kwargs, + skip_rounds, + ) + mci.include_cycle_in_channel_name = include_cycle_in_channel_name + + mci.sort_by_channel() + + # do subsetting if needed + if subset: + mci = mci.subset(subset) + if c_subset: + mci = MultiChannelImage.subset_by_index(mci, indices=list(range(0, c_subset))) + if multiscale and not scale_factors: + scale_factors = mci.calc_scale_factors(default_scale_factor=default_scale_factor) + if not multiscale: + scale_factors = None + logger.debug(f"Scale factors: {scale_factors}") + + filtered_name = path.stem.replace(" ", "_") + + return create_sdata( + mci=mci, + path_files=path_files, + max_chunk_size=max_chunk_size, + c_chunks_size=c_chunks_size, + transformations=transformations, + scale_factors=scale_factors, + nuclei_channel_name=nuclei_channel_name, + split_threshold_nuclei_channel=split_threshold_nuclei_channel, + filtered_name=filtered_name, + ) + + +def create_sdata( + mci: MultiChannelImage, + path_files: list[Path], + max_chunk_size: int, + c_chunks_size: int, + transformations: bool, + scale_factors: list[int] | None, + nuclei_channel_name: str, + split_threshold_nuclei_channel: int | None, + filtered_name: str, +) -> SpatialData: + nuclei_idx = [i for i, c in enumerate(mci.get_channel_names()) if nuclei_channel_name in c] + n_nuclei_channels = len(nuclei_idx) + if not split_threshold_nuclei_channel: + # if split_threshold_nuclei_channel is None, do not split nuclei channels + split_nuclei = False + else: + split_nuclei = n_nuclei_channels > split_threshold_nuclei_channel + if split_nuclei: + # if channel name is nuclei_channel_name, add to separate nuclei stack + nuclei_mci = deepcopy(MultiChannelImage.subset_by_index(mci, indices=nuclei_idx)) + # keep the first nuclei channel in both the stack and the nuclei stack + nuclei_idx_without_first_and_last = nuclei_idx[1:-1] + mci = MultiChannelImage.subset_by_index( + mci, + [i for i in range(len(mci.metadata)) if i not in nuclei_idx_without_first_and_last], + ) + + pixels_to_microns = parse_physical_size(path_files[0]) + + image_element = create_image_element( + mci, + max_chunk_size, + c_chunks_size, + transformations, + pixels_to_microns, + scale_factors, + coordinate_system=filtered_name, + ) + table_channels = create_table(mci) + + if split_nuclei: + nuclei_image_element = create_image_element( + nuclei_mci, + max_chunk_size, + c_chunks_size, + transformations, + pixels_to_microns, + scale_factors, + coordinate_system=filtered_name, + ) + table_nuclei = create_table(nuclei_mci) + + sdata = sd.SpatialData( + images={ + f"{filtered_name}_image": image_element, + }, + tables={ + f"{filtered_name}_table": table_channels, + }, + ) + + if split_nuclei: + sdata.images[f"{filtered_name}_nuclei_image"] = nuclei_image_element + sdata.tables[f"{filtered_name}_nuclei_table"] = table_nuclei + + return sdata + + +def create_table(mci: MultiChannelImage) -> ad.AnnData: + cycles = mci.get_cycles() + names = mci.get_channel_names() + df = pd.DataFrame( + { + "name": names, + "cycle": cycles, + } + ) + table = ad.AnnData(var=df) + table.var_names = names + return sd.models.TableModel.parse(table) + + +def create_image_element( + mci: MultiChannelImage, + max_chunk_size: int, + c_chunks_size: int, + transformations: bool, + pixels_to_microns: float, + scale_factors: list[int] | None, + coordinate_system: str | None = None, +) -> sd.models.Image2DModel: + t_dict = None + if transformations: + t_pixels_to_microns = sd.transformations.Scale([pixels_to_microns, pixels_to_microns], axes=("x", "y")) + # 'microns' is also used in merscope example + # no inverse needed as the transformation is already from pixels to microns + t_dict = {coordinate_system: t_pixels_to_microns} + # # chunk_size can be 1 for channels + chunks = { + "y": max_chunk_size, + "x": max_chunk_size, + "c": c_chunks_size, + } + if t_dict: + logger.debug("Adding transformation: %s", t_dict) + el = sd.models.Image2DModel.parse( + mci.get_stack(), + # the data on disk is not always CYX, but imread takes care of parsing things correctly, so that we can assume + # mci to be CYX. Still, to make the code more robust, we could consider using a different backend, for instance + # bioio-ome-tiff, read both the data and its dimensions from disk, and let Image2DModel.parse() rearrange the + # dimensions into CYX. + dims=["c", "y", "x"], + scale_factors=scale_factors, + chunks=chunks, + c_coords=mci.get_channel_names(), + transformations=t_dict, + ) + return el diff --git a/src/spatialdata_io/readers/seqfish.py b/src/spatialdata_io/readers/seqfish.py index b82b2eef..27431c09 100644 --- a/src/spatialdata_io/readers/seqfish.py +++ b/src/spatialdata_io/readers/seqfish.py @@ -2,6 +2,7 @@ import os import re +import xml.etree.ElementTree as ET from collections.abc import Mapping from pathlib import Path from types import MappingProxyType @@ -10,6 +11,7 @@ import anndata as ad import numpy as np import pandas as pd +import tifffile from dask_image.imread import imread from spatialdata import SpatialData from spatialdata.models import ( @@ -19,7 +21,7 @@ ShapesModel, TableModel, ) -from spatialdata.transformations import Identity +from spatialdata.transformations.transformations import Identity, Scale from spatialdata_io._constants._constants import SeqfishKeys as SK from spatialdata_io._docs import inject_docs @@ -33,19 +35,22 @@ def seqfish( load_images: bool = True, load_labels: bool = True, load_points: bool = True, - sections: list[int] | None = None, + load_shapes: bool = True, + cells_as_circles: bool = False, + rois: list[int] | None = None, imread_kwargs: Mapping[str, Any] = MappingProxyType({}), + raster_models_scale_factors: list[float] | None = None, ) -> SpatialData: """ Read *seqfish* formatted dataset. This function reads the following files: - - ```{vx.COUNTS_FILE!r}{vx.SECTION!r}{vx.CSV_FILE!r}```: Counts and metadata file. - - ```{vx.CELL_COORDINATES!r}{vx.SECTION!r}{vx.CSV_FILE!r}```: Cell coordinates file. - - ```{vx.DAPI!r}{vx.SECTION!r}{vx.OME_TIFF_FILE!r}```: High resolution tiff image. - - ```{vx.CELL_MASK_FILE!r}{vx.SECTION!r}{vx.TIFF_FILE!r}```: Cell mask file. - - ```{vx.TRANSCRIPT_COORDINATES!r}{vx.SECTION!r}{vx.CSV_FILE!r}```: Transcript coordinates file. + - ```{vx.ROI!r}{vx.COUNTS_FILE!r}{vx.CSV_FILE!r}```: Counts and metadata file. + - ```{vx.ROI!r}{vx.CELL_COORDINATES!r}{vx.CSV_FILE!r}```: Cell coordinates file. + - ```{vx.ROI!r}{vx.DAPI!r}{vx.TIFF_FILE!r}```: High resolution tiff image. + - ```{vx.ROI!r}{vx.SEGMENTATION!r}{vx.TIFF_FILE!r}```: Cell mask file. + - ```{vx.ROI!r}{vx.TRANSCRIPT_COORDINATES!r}{vx.CSV_FILE!r}```: Transcript coordinates file. .. seealso:: @@ -58,133 +63,189 @@ def seqfish( load_images Whether to load the images. load_labels - Whether to load the labels. + Whether to load cell segmentation. load_points - Whether to load the points. - sections - Which sections (specified as integers) to load. By default, all sections are loaded. + Whether to load the transcript locations. + load_shapes + Whether to load cells as shape. + cells_as_circles + Whether to read cells also as circles instead of labels. + rois + Which ROIs (specified as integers) to load. Only necessary if multiple ROIs present. imread_kwargs Keyword arguments to pass to :func:`dask_image.imread.imread`. Returns ------- :class:`spatialdata.SpatialData` + + Examples + -------- + This code shows how to change the annotation target of the table from the cell labels to the cell boundaries. + Please check that the string Roi1 is used in the naming of your dataset, otherwise adjust the code below. + >>> from spatialdata_io import seqfish + >>> sdata = seqfish("path/to/raw/data") + >>> sdata["table_Roi1"].obs["region"] = "Roi1_Boundaries" + >>> sdata.set_table_annotates_spatialelement( + ... table_name="table_Roi1", region="Roi1_Boundaries", region_key="region", instance_key="instance_id" + ... ) + >>> sdata.write("path/to/data.zarr") """ path = Path(path) - count_file_pattern = re.compile(rf"(.*?)_{SK.CELL_COORDINATES}_{SK.SECTION}[0-9]+" + re.escape(SK.CSV_FILE)) - count_files = [i for i in os.listdir(path) if count_file_pattern.match(i)] + count_file_pattern = re.compile(rf"(.*?){re.escape(SK.CELL_COORDINATES)}{re.escape(SK.CSV_FILE)}$") + count_files = [f for f in os.listdir(path) if count_file_pattern.match(f)] if not count_files: - # no file matching tbe pattern found raise ValueError( f"No files matching the pattern {count_file_pattern} were found. Cannot infer the naming scheme." ) - matched = count_file_pattern.match(count_files[0]) - if matched is None: - raise ValueError(f"File {count_files[0]} does not match the pattern {count_file_pattern}") - prefix = matched.group(1) - - n = len(count_files) - all_sections = list(range(1, n + 1)) - if sections is None: - sections = all_sections + + roi_pattern = re.compile(f"^{SK.ROI}(\\d+)") + found_rois = {m.group(1) for i in os.listdir(path) if (m := roi_pattern.match(i))} + if rois is None: + rois_str = [f"{SK.ROI}{roi}" for roi in found_rois] + elif isinstance(rois, list): + for roi in rois: + if str(roi) not in found_rois: + raise ValueError(f"ROI{roi} not found.") + rois_str = [f"{SK.ROI}{roi}" for roi in rois] else: - for section in sections: - if section not in all_sections: - raise ValueError(f"Section {section} not found in the data.") - sections_str = [f"{SK.SECTION}{x}" for x in sections] + raise ValueError("Invalid type for 'roi'. Must be list[int] or None.") + + def get_cell_file(roi: str) -> str: + return f"{roi}_{SK.CELL_COORDINATES}{SK.CSV_FILE}" - def get_cell_file(section: str) -> str: - return f"{prefix}_{SK.CELL_COORDINATES}_{section}{SK.CSV_FILE}" + def get_count_file(roi: str) -> str: + return f"{roi}_{SK.COUNTS_FILE}{SK.CSV_FILE}" - def get_count_file(section: str) -> str: - return f"{prefix}_{SK.COUNTS_FILE}_{section}{SK.CSV_FILE}" + def get_dapi_file(roi: str) -> str: + return f"{roi}_{SK.DAPI}{SK.TIFF_FILE}" - def get_dapi_file(section: str) -> str: - return f"{prefix}_{SK.DAPI}_{section}{SK.OME_TIFF_FILE}" + def get_cell_segmentation_labels_file(roi: str) -> str: + return f"{roi}_{SK.SEGMENTATION}{SK.TIFF_FILE}" - def get_cell_mask_file(section: str) -> str: - return f"{prefix}_{SK.CELL_MASK_FILE}_{section}{SK.TIFF_FILE}" + def get_cell_segmentation_shapes_file(roi: str) -> str: + return f"{roi}_{SK.BOUNDARIES}{SK.GEOJSON_FILE}" - def get_transcript_file(section: str) -> str: - return f"{prefix}_{SK.TRANSCRIPT_COORDINATES}_{section}{SK.CSV_FILE}" + def get_transcript_file(roi: str) -> str: + return f"{roi}_{SK.TRANSCRIPT_COORDINATES}{SK.CSV_FILE}" - adatas: dict[str, ad.AnnData] = {} - for section in sections_str: # type: ignore[assignment] - assert isinstance(section, str) - cell_file = get_cell_file(section) - count_matrix = get_count_file(section) - adata = ad.read_csv(path / count_matrix, delimiter=",") + # parse table information + tables: dict[str, ad.AnnData] = {} + for roi_str in rois_str: + # parse cell gene expression data + count_matrix = get_count_file(roi_str) + df = pd.read_csv(path / count_matrix, delimiter=",") + instance_id = df.iloc[:, 0].astype(str) + expression = df.drop(columns=["Unnamed: 0"]) + expression.set_index(instance_id, inplace=True) + adata = ad.AnnData(expression) + + # parse cell spatial information + cell_file = get_cell_file(roi_str) cell_info = pd.read_csv(path / cell_file, delimiter=",") + cell_info["label"] = cell_info["label"].astype("str") + # below, the obsm are assigned by position, not by index. Here we check that we can do it + assert cell_info["label"].to_numpy().tolist() == adata.obs.index.to_numpy().tolist() + cell_info.set_index("label", inplace=True) + adata.obs[SK.AREA] = cell_info[SK.AREA] adata.obsm[SK.SPATIAL_KEY] = cell_info[[SK.CELL_X, SK.CELL_Y]].to_numpy() - adata.obs[SK.AREA] = np.reshape(cell_info[SK.AREA].to_numpy(), (-1, 1)) - region = f"cells_{section}" + + # map tables to cell labels (defined later) + region = os.path.splitext(get_cell_segmentation_labels_file(roi_str))[0] adata.obs[SK.REGION_KEY] = region - adata.obs[SK.INSTANCE_KEY_TABLE] = adata.obs.index.astype(int) - adatas[section] = adata + adata.obs[SK.REGION_KEY] = adata.obs[SK.REGION_KEY].astype("category") + adata.obs[SK.INSTANCE_KEY_TABLE] = instance_id.to_numpy().astype(np.uint16) + adata.obs = adata.obs.reset_index(drop=True) + tables[f"table_{roi_str}"] = TableModel.parse( + adata, + region=region, + region_key=SK.REGION_KEY.value, + instance_key=SK.INSTANCE_KEY_TABLE.value, + ) - scale_factors = [2, 2, 2, 2] + # parse scale factors to scale images and labels + scaled = {} + for roi_str in rois_str: + scaled[roi_str] = Scale( + np.array(_get_scale_factors(path / get_dapi_file(roi_str), SK.SCALEFEFACTOR_X, SK.SCALEFEFACTOR_Y)), + axes=("y", "x"), + ) if load_images: images = { - f"image_{x}": Image2DModel.parse( + f"{os.path.splitext(get_dapi_file(x))[0]}": Image2DModel.parse( imread(path / get_dapi_file(x), **imread_kwargs), dims=("c", "y", "x"), - scale_factors=scale_factors, - transformations={x: Identity()}, + scale_factors=raster_models_scale_factors, + transformations={"global": scaled[x]}, ) - for x in sections_str + for x in rois_str } else: images = {} if load_labels: labels = { - f"labels_{x}": Labels2DModel.parse( - imread(path / get_cell_mask_file(x), **imread_kwargs).squeeze(), + f"{os.path.splitext(get_cell_segmentation_labels_file(x))[0]}": Labels2DModel.parse( + imread(path / get_cell_segmentation_labels_file(x), **imread_kwargs).squeeze(), dims=("y", "x"), - scale_factors=scale_factors, - transformations={x: Identity()}, + scale_factors=raster_models_scale_factors, + transformations={"global": scaled[x]}, ) - for x in sections_str + for x in rois_str } else: labels = {} + points = {} if load_points: - points = { - f"transcripts_{x}": PointsModel.parse( - pd.read_csv(path / get_transcript_file(x), delimiter=","), + for x in rois_str: + + # prepare data + name = f"{os.path.splitext(get_transcript_file(x))[0]}" + p = pd.read_csv(path / get_transcript_file(x), delimiter=",") + instance_key_points = SK.INSTANCE_KEY_POINTS.value if SK.INSTANCE_KEY_POINTS.value in p.columns else None + + # call parser + points[name] = PointsModel.parse( + p, coordinates={"x": SK.TRANSCRIPTS_X, "y": SK.TRANSCRIPTS_Y}, feature_key=SK.FEATURE_KEY.value, - instance_key=SK.INSTANCE_KEY_POINTS.value, - transformations={x: Identity()}, + instance_key=instance_key_points, + transformations={"global": Identity()}, + ) + + shapes = {} + if cells_as_circles: + for x, adata in zip(rois_str, tables.values()): + shapes[f"{os.path.splitext(get_cell_file(x))[0]}"] = ShapesModel.parse( + adata.obsm[SK.SPATIAL_KEY], + geometry=0, + radius=np.sqrt(adata.obs[SK.AREA].to_numpy() / np.pi), + index=adata.obs[SK.INSTANCE_KEY_TABLE].copy(), + transformations={"global": Identity()}, + ) + if load_shapes: + for x in rois_str: + # this assumes that the index matches the instance key of the table. A more robust approach could be + # implemented, as described here https://github.com/scverse/spatialdata-io/issues/249 + shapes[f"{os.path.splitext(get_cell_segmentation_shapes_file(x))[0]}"] = ShapesModel.parse( + path / get_cell_segmentation_shapes_file(x), + transformations={"global": scaled[x]}, + index=adata.obs[SK.INSTANCE_KEY_TABLE].copy(), ) - for x in sections_str - } - else: - points = {} - - adata = ad.concat(adatas.values()) - adata.obs[SK.REGION_KEY] = adata.obs[SK.REGION_KEY].astype("category") - adata.obs = adata.obs.reset_index(drop=True) - table = TableModel.parse( - adata, - region=[f"cells_{x}" for x in sections_str], - region_key=SK.REGION_KEY.value, - instance_key=SK.INSTANCE_KEY_TABLE.value, - ) - - shapes = { - f"cells_{x}": ShapesModel.parse( - adata.obsm[SK.SPATIAL_KEY], - geometry=0, - radius=np.sqrt(adata.obs[SK.AREA].to_numpy() / np.pi), - index=adata.obs[SK.INSTANCE_KEY_TABLE].copy(), - transformations={x: Identity()}, - ) - for x, adata in adatas.items() - } - sdata = SpatialData(images=images, labels=labels, points=points, table=table, shapes=shapes) + sdata = SpatialData(images=images, labels=labels, points=points, tables=tables, shapes=shapes) return sdata + + +def _get_scale_factors(DAPI_path: Path, scalefactor_x_key: str, scalefactor_y_key: str) -> list[float]: + with tifffile.TiffFile(DAPI_path) as tif: + ome_metadata = tif.ome_metadata + root = ET.fromstring(ome_metadata) + for element in root.iter(): + if scalefactor_x_key in element.attrib.keys(): + scalefactor_x = element.attrib[scalefactor_x_key] + scalefactor_y = element.attrib[scalefactor_y_key] + return [float(scalefactor_x), float(scalefactor_y)] diff --git a/src/spatialdata_io/readers/visium.py b/src/spatialdata_io/readers/visium.py index 22a75855..19bc740a 100644 --- a/src/spatialdata_io/readers/visium.py +++ b/src/spatialdata_io/readers/visium.py @@ -231,14 +231,14 @@ def visium( image_hires = imread(path / VisiumKeys.IMAGE_HIRES_FILE, **imread_kwargs).squeeze().transpose(2, 0, 1) image_hires = DataArray(image_hires, dims=("c", "y", "x")) images[dataset_id + "_hires_image"] = Image2DModel.parse( - image_hires, transformations={"downscaled_hires": Identity()}, rgb=None + image_hires, transformations={"downscaled_hires": Identity(), "global": transform_hires.inverse()}, rgb=None ) if (path / VisiumKeys.IMAGE_LOWRES_FILE).exists(): image_lowres = imread(path / VisiumKeys.IMAGE_LOWRES_FILE, **imread_kwargs).squeeze().transpose(2, 0, 1) image_lowres = DataArray(image_lowres, dims=("c", "y", "x")) images[dataset_id + "_lowres_image"] = Image2DModel.parse( image_lowres, - transformations={"downscaled_lowres": Identity()}, + transformations={"downscaled_lowres": Identity(), "global": transform_lowres.inverse()}, rgb=None, ) diff --git a/src/spatialdata_io/readers/visium_hd.py b/src/spatialdata_io/readers/visium_hd.py index 43e21ae2..64215086 100644 --- a/src/spatialdata_io/readers/visium_hd.py +++ b/src/spatialdata_io/readers/visium_hd.py @@ -1,7 +1,6 @@ from __future__ import annotations import json -import os import re import warnings from collections.abc import Mapping @@ -122,12 +121,12 @@ def load_image(path: Path, suffix: str, scale_factors: list[int] | None = None) stacklevel=2, ) - def _get_bins(path: Path) -> list[str]: + def _get_bins(path_bins: Path) -> list[str]: return sorted( [ - bin_size - for bin_size in os.listdir(path) - if os.path.isdir(os.path.join(path, bin_size)) and bin_size.startswith(VisiumHDKeys.BIN_PREFIX) + bin_size.name + for bin_size in path_bins.iterdir() + if bin_size.is_dir() and bin_size.name.startswith(VisiumHDKeys.BIN_PREFIX) ] ) @@ -331,10 +330,7 @@ def _get_bins(path: Path) -> list[str]: else: path_fullres = path / VisiumHDKeys.MICROSCOPE_IMAGE if path_fullres.exists(): - fullres_image_filenames = [ - f for f in os.listdir(path_fullres) if os.path.isfile(os.path.join(path_fullres, f)) - ] - fullres_image_paths = [path_fullres / image_filename for image_filename in fullres_image_filenames] + fullres_image_paths = [file for file in path_fullres.iterdir() if file.is_file()] elif list((path_fullres := (path / f"{filename_prefix}tissue_image")).parent.glob(f"{path_fullres.name}.*")): fullres_image_paths = list(path_fullres.parent.glob(f"{path_fullres.name}.*")) else: @@ -357,7 +353,7 @@ def _get_bins(path: Path) -> list[str]: if fullres_image_file is not None: load_image( - path=path / fullres_image_file, + path=fullres_image_file, suffix="_full_image", scale_factors=[2, 2, 2, 2], ) @@ -376,7 +372,7 @@ def _get_bins(path: Path) -> list[str]: ) set_transformation( images[dataset_id + "_hires_image"], - {"downscaled_hires": Identity()}, + {"downscaled_hires": Identity(), "global": transform_hires.inverse()}, set_all=True, ) @@ -394,7 +390,7 @@ def _get_bins(path: Path) -> list[str]: ) set_transformation( images[dataset_id + "_lowres_image"], - {"downscaled_lowres": Identity()}, + {"downscaled_lowres": Identity(), "global": transform_lowres.inverse()}, set_all=True, ) @@ -422,7 +418,7 @@ def _get_bins(path: Path) -> list[str]: def _infer_dataset_id(path: Path) -> str: suffix = f"_{VisiumHDKeys.FEATURE_SLICE_FILE.value}" - files = [f for f in os.listdir(path) if os.path.isfile(os.path.join(path, f)) and f.endswith(suffix)] + files = [file.name for file in path.iterdir() if file.is_file() and file.name.endswith(suffix)] if len(files) == 0 or len(files) > 1: raise ValueError( f"Cannot infer `dataset_id` from the feature slice file in {path}, please pass `dataset_id` as an argument." diff --git a/src/spatialdata_io/readers/xenium.py b/src/spatialdata_io/readers/xenium.py index 54f911d2..2a5d6165 100644 --- a/src/spatialdata_io/readers/xenium.py +++ b/src/spatialdata_io/readers/xenium.py @@ -84,7 +84,7 @@ def xenium( .. seealso:: - - `10X Genomics Xenium file format `_. + - `10X Genomics Xenium file format `_. Parameters ---------- diff --git a/tests/_utils.py b/tests/_utils.py new file mode 100644 index 00000000..9787d1bb --- /dev/null +++ b/tests/_utils.py @@ -0,0 +1,31 @@ +import sys + +import pytest + + +def skip_if_below_python_version() -> pytest.mark.skipif: + """ + Decorator to skip tests if the Python version is below a specified version. + + This decorator prevents running tests on unsupported Python versions. Update the `MIN_VERSION` + constant to change the minimum Python version required for the tests. + + Returns + ------- + pytest.mark.skipif + A pytest marker that skips the test if the current Python version is below the specified `MIN_VERSION`. + + Notes + ----- + The current minimum version is set to Python 3.10. Adjust the `MIN_VERSION` constant as needed + to accommodate newer Python versions. + + Examples + -------- + >>> @skip_if_below_python_version() + >>> def test_some_feature(): + >>> assert True + """ + MIN_VERSION = (3, 12) + reason = f"Test requires Python {'.'.join(map(str, MIN_VERSION))} or higher" + return pytest.mark.skipif(sys.version_info < MIN_VERSION, reason=reason) diff --git a/tests/test_macsima.py b/tests/test_macsima.py new file mode 100644 index 00000000..784e4dff --- /dev/null +++ b/tests/test_macsima.py @@ -0,0 +1,201 @@ +import math +from copy import deepcopy +from pathlib import Path +from typing import Any + +import dask.array as da +import pytest +from spatialdata.models import get_channel_names + +from spatialdata_io.readers.macsima import ( + ChannelMetadata, + MultiChannelImage, + macsima, + parse_name_to_cycle, +) +from tests._utils import skip_if_below_python_version + +RNG = da.random.default_rng(seed=0) + +if not (Path("./data/Lung_adc_demo").exists() or Path("./data/MACSimaData_HCA").exists()): + pytest.skip( + "Requires the Lung_adc_demo or MACSimaData_HCA datasets, please check " + "https://github.com/giovp/spatialdata-sandbox/macsima/Readme.md for instructions on how to get the data.", + allow_module_level=True, + ) + + +@skip_if_below_python_version() +@pytest.mark.parametrize( + "dataset,expected", + [ + ("Lung_adc_demo", {"y": (0, 15460), "x": (0, 13864)}), + ("MACSimaData_HCA/HumanLiverH35", {"y": (0, 1154), "x": (0, 1396)}), + ], +) +def test_image_size(dataset: str, expected: dict[str, Any]) -> None: + from spatialdata import get_extent + + f = Path("./data") / dataset + assert f.is_dir() + sdata = macsima(f) + el = sdata[list(sdata.images.keys())[0]] + cs = sdata.coordinate_systems[0] + + extent: dict[str, tuple[float, float]] = get_extent(el, coordinate_system=cs) + extent = {ax: (math.floor(extent[ax][0]), math.ceil(extent[ax][1])) for ax in extent} + assert extent == expected + + +@skip_if_below_python_version() +@pytest.mark.parametrize( + "dataset,expected", + [("Lung_adc_demo", 116), ("MACSimaData_HCA/HumanLiverH35", 102)], +) +def test_total_channels(dataset: str, expected: int) -> None: + f = Path("./data") / dataset + assert f.is_dir() + sdata = macsima(f) + el = sdata[list(sdata.images.keys())[0]] + + # get the number of channels + channels: int = len(get_channel_names(el)) + assert channels == expected + + +@skip_if_below_python_version() +@pytest.mark.parametrize( + "dataset,expected", + [ + ("Lung_adc_demo", ["R0 DAPI", "R1 CD68", "R1 CD163"]), + ("MACSimaData_HCA/HumanLiverH35", ["R0 DAPI", "R1 PE", "R1 DAPI"]), + ], +) +def test_channel_names(dataset: str, expected: list[str]) -> None: + f = Path("./data") / dataset + assert f.is_dir() + sdata = macsima(f, c_subset=3, include_cycle_in_channel_name=True) + el = sdata[list(sdata.images.keys())[0]] + + # get the channel names + channels = get_channel_names(el) + assert list(channels) == expected + + +@skip_if_below_python_version() +@pytest.mark.parametrize( + "dataset,expected", + [ + ("Lung_adc_demo", 68), + ("MACSimaData_HCA/HumanLiverH35", 51), + ], +) +def test_total_rounds(dataset: str, expected: list[int]) -> None: + f = Path("./data") / dataset + assert f.is_dir() + sdata = macsima(f) + table = sdata[list(sdata.tables)[0]] + max_cycle = table.var["cycle"].max() + assert max_cycle == expected + + +@skip_if_below_python_version() +@pytest.mark.parametrize( + "dataset,skip_rounds,expected", + [ + ("Lung_adc_demo", list(range(2, 68)), ["DAPI (1)", "CD68", "CD163", "DAPI (2)", "Control"]), + ( + "MACSimaData_HCA/HumanLiverH35", + list(range(2, 51)), + ["DAPI (1)", "PE", "CD14", "Vimentin", "DAPI (2)", "WT1"], + ), + ], +) +def test_skip_rounds(dataset: str, skip_rounds: list[int], expected: list[str]) -> None: + f = Path("./data") / dataset + assert f.is_dir() + sdata = macsima(f, skip_rounds=skip_rounds) + el = sdata[list(sdata.images.keys())[0]] + + # get the channel names + channels = get_channel_names(el) + assert list(channels) == expected, f"Expected {expected}, got {list(channels)}" + + +@skip_if_below_python_version() +@pytest.mark.parametrize( + "dataset,expected", + [ + ("Lung_adc_demo", [0, 1, 1]), + ("MACSimaData_HCA/HumanLiverH35", [0, 1, 1]), + ], +) +def test_cycle_metadata(dataset: str, expected: list[str]) -> None: + f = Path("./data") / dataset + assert f.is_dir() + sdata = macsima(f, c_subset=3) + table = sdata[list(sdata.tables.keys())[0]] + + # get the channel names + cycles = table.var["cycle"] + assert list(cycles) == expected + + +def test_parsing_style() -> None: + with pytest.raises(ValueError): + macsima(Path("."), parsing_style="not_a_parsing_style") + + +@pytest.mark.parametrize( + "name,expected", + [ + ("C-002_S-000_S_FITC_R-01_W-C-1_ROI-01_A-CD147_C-REA282.tif", 2), + ("001_S_R-01_W-B-1_ROI-01_A-CD14REA599ROI1_C-REA599.ome.tif", 1), + ], +) +def test_parsing_of_name_to_cycle(name: str, expected: int) -> None: + result = parse_name_to_cycle(name) + assert result == expected + + +def test_mci_sort_by_channel() -> None: + sizes = [100, 200, 300] + c_names = ["test11", "test3", "test2"] + cycles = [2, 0, 1] + mci = MultiChannelImage( + data=[RNG.random((size, size), chunks=(10, 10)) for size in sizes], + metadata=[ChannelMetadata(name=c_name, cycle=cycle) for c_name, cycle in zip(c_names, cycles)], + ) + assert mci.get_channel_names() == c_names + assert [x.shape[0] for x in mci.data] == sizes + mci.sort_by_channel() + assert mci.get_channel_names() == ["test3", "test2", "test11"] + assert [x.shape[0] for x in mci.data] == [200, 300, 100] + + +def test_mci_array_reference() -> None: + arr1 = RNG.random((100, 100), chunks=(10, 10)) + arr2 = RNG.random((200, 200), chunks=(10, 10)) + mci = MultiChannelImage( + data=[arr1, arr2], + metadata=[ChannelMetadata(name="test1", cycle=0), ChannelMetadata(name="test2", cycle=1)], + ) + orig_arr1 = arr1.copy() + + # test we can subset by index and by name + subset_mci = MultiChannelImage.subset_by_index(mci, [0]) + assert subset_mci.get_channel_names() == ["test1"] + + subset_mci_name = MultiChannelImage.subset_by_channel(mci, "test") + assert subset_mci_name.get_channel_names() == ["test1", "test2"] + + # test that the subset is a view + assert subset_mci.data[0] is arr1 + assert da.all(subset_mci.data[0] == orig_arr1) + # test that a deepcopy is not a view + deepcopy_mci: MultiChannelImage = deepcopy(mci) + deepcopy_mci.data[0][0, 0] = deepcopy_mci.data[0][0, 0] + 1 + assert deepcopy_mci.data[0] is not arr1 + assert not da.all(deepcopy_mci.data[0] == orig_arr1) + # test that the original mci is not changed + assert da.all(mci.data[0] == orig_arr1) diff --git a/tests/test_seqfish.py b/tests/test_seqfish.py new file mode 100644 index 00000000..bdbf2a60 --- /dev/null +++ b/tests/test_seqfish.py @@ -0,0 +1,29 @@ +import math +from pathlib import Path + +import pytest + +from spatialdata_io.readers.seqfish import seqfish +from tests._utils import skip_if_below_python_version + + +# See https://github.com/scverse/spatialdata-io/blob/main/.github/workflows/prepare_test_data.yaml for instructions on +# how to download and place the data on disk +@skip_if_below_python_version() +@pytest.mark.parametrize( + "dataset,expected", [("seqfish-2-test-dataset/instrument 2 official", "{'y': (0, 108), 'x': (0, 108)}")] +) +@pytest.mark.parametrize("rois", [[1], None]) +@pytest.mark.parametrize("cells_as_circles", [False, True]) +def test_example_data(dataset: str, expected: str, rois: list[int] | None, cells_as_circles: bool) -> None: + f = Path("./data") / dataset + assert f.is_dir() + sdata = seqfish(f, cells_as_circles=cells_as_circles, rois=rois) + from spatialdata import get_extent + + extent = get_extent(sdata, exact=False) + extent = {ax: (math.floor(extent[ax][0]), math.ceil(extent[ax][1])) for ax in extent} + if cells_as_circles: + # manual correction required to take into account for the circle radii + expected = "{'y': (-2, 109), 'x': (-2, 109)}" + assert str(extent) == expected diff --git a/tests/test_xenium.py b/tests/test_xenium.py index 1c64a912..32408323 100644 --- a/tests/test_xenium.py +++ b/tests/test_xenium.py @@ -1,5 +1,4 @@ import math -import sys from pathlib import Path import numpy as np @@ -10,6 +9,7 @@ prefix_suffix_uint32_from_cell_id_str, xenium, ) +from tests._utils import skip_if_below_python_version def test_cell_id_str_from_prefix_suffix_uint32() -> None: @@ -40,13 +40,9 @@ def test_roundtrip_with_data_limits() -> None: assert np.array_equal(cell_id_str, f0(*f1(cell_id_str))) -# The datasets should be downloaded from -# https://www.10xgenomics.com/support/software/xenium-onboard-analysis/latest/resources/xenium-example-data#test-data -# and placed in the "data" directory; if you run the tests locally you may need to create a symlink in "tests/data" -# pointing to "data". -# The GitHub workflow "prepare_test_data.yaml" takes care of downloading the datasets and uploading an artifact for the -# tests to use -@pytest.mark.skipif(sys.version_info < (3, 12), reason="Test requires Python 3.10 or higher") +# See https://github.com/scverse/spatialdata-io/blob/main/.github/workflows/prepare_test_data.yaml for instructions on +# how to download and place the data on disk +@skip_if_below_python_version() @pytest.mark.parametrize( "dataset,expected", [ @@ -63,3 +59,6 @@ def test_example_data(dataset: str, expected: str) -> None: extent = get_extent(sdata, exact=False) extent = {ax: (math.floor(extent[ax][0]), math.ceil(extent[ax][1])) for ax in extent} assert str(extent) == expected + + +# TODO: add tests for Xenium 3.0.0