Skip to content

Commit

Permalink
[python] Add support for Visium v1 (#3510)
Browse files Browse the repository at this point in the history
1. Explicitly set header columns for 
2. Throw error if Visium version number is unknown.
3. Make check for Visium filename version specific (this can be over-ridden by the user).
  • Loading branch information
jp-dark authored Jan 3, 2025
1 parent c5ed9da commit 7addb3e
Showing 1 changed file with 41 additions and 42 deletions.
83 changes: 41 additions & 42 deletions apis/python/src/tiledbsoma/io/spatial/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,7 @@
import json
import warnings
from pathlib import Path
from typing import (
TYPE_CHECKING,
List,
Sequence,
Tuple,
Type,
Union,
)
from typing import TYPE_CHECKING, List, Sequence, Tuple, Type

import attrs
import numpy as np
Expand Down Expand Up @@ -104,7 +97,7 @@ class VisiumPaths:
@classmethod
def from_base_folder(
cls,
base_path: Union[str, Path],
base_path: str | Path,
*,
gene_expression: str | Path | None = None,
scale_factors: str | Path | None = None,
Expand Down Expand Up @@ -162,9 +155,9 @@ def from_base_folder(
@classmethod
def from_spatial_folder(
cls,
spatial_dir: Union[str, Path],
spatial_dir: str | Path,
gene_expression: str | Path,
*,
gene_expression: Union[str, Path],
scale_factors: str | Path | None = None,
tissue_positions: str | Path | None = None,
fullres_image: str | Path | None = None,
Expand All @@ -189,30 +182,22 @@ def from_spatial_folder(
try:
version = _read_visium_software_version(gene_expression)
except (KeyError, ValueError):
warnings.warn(
raise ValueError(
"Unable to determine Space Ranger vesion from gene expression file."
)
major_version = version[0] if isinstance(version, tuple) else version

# Find the tissue positions file path if it wasn't supplied.
if tissue_positions is None:
major_version = version[0] if isinstance(version, tuple) else version
if major_version == 1:
possible_file_names = [
"tissue_positions_list.csv",
"tissue_positions.csv",
]
else:
possible_file_names = [
"tissue_positions.csv",
"tissue_positions_list.csv",
]
for possible in possible_file_names:
tissue_positions = spatial_dir / possible
if tissue_positions.exists():
break
possible_file_name = "tissue_positions_list.csv"
else:
possible_file_name = "tissue_positions.csv"
tissue_positions = spatial_dir / possible_file_name
if not tissue_positions.exists():
raise OSError(
f"No tissue position file found in {spatial_dir}. Tried files: "
f"{possible_file_names}. If the file has been renamed it can be "
f"No tissue position file found in {spatial_dir}. Tried file: "
f"{possible_file_name}. If the file has been renamed it can be "
f"directly specified using argument `tissue_positions`."
)

Expand Down Expand Up @@ -247,17 +232,7 @@ def from_spatial_folder(
lowres_image: Path | None = attrs.field(
converter=optional_path_converter, validator=optional_path_validator
)
version: int | Tuple[int, int, int] | None = attrs.field(default=None)

@version.validator
def _validate_version( # type: ignore[no-untyped-def]
self, attribute, value: int | Tuple[int, int, int] | None
) -> None:
major_version = value[0] if isinstance(value, tuple) else value
if major_version is not None and major_version != 2:
warnings.warn(
f"Support for Space Ranger version {value} has not been tests."
)
version: int | Tuple[int, int, int]

@property
def has_image(self) -> bool:
Expand All @@ -267,10 +242,14 @@ def has_image(self) -> bool:
or self.lowres_image is not None
)

@property
def major_version(self) -> int:
return self.version[0] if isinstance(self.version, tuple) else self.version


def from_visium(
experiment_uri: str,
input_path: Union[Path, VisiumPaths],
input_path: Path | VisiumPaths,
measurement_name: str,
scene_name: str,
*,
Expand All @@ -284,7 +263,7 @@ def from_visium(
image_channel_first: bool = True,
ingest_mode: IngestMode = "write",
use_relative_uri: bool | None = None,
X_kind: Union[Type[SparseNDArray], Type[DenseNDArray]] = SparseNDArray,
X_kind: Type[SparseNDArray] | Type[DenseNDArray] = SparseNDArray,
registration_mapping: "ExperimentAmbientLabelMapping | None" = None,
uns_keys: Sequence[str] | None = None,
additional_metadata: "AdditionalMetadata" = None,
Expand Down Expand Up @@ -423,6 +402,20 @@ def from_visium(
else VisiumPaths.from_base_folder(input_path, use_raw_counts=use_raw_counts)
)

# Check the version.
major_version = (
input_paths.version[0]
if isinstance(input_paths.version, tuple)
else input_paths.version
)
if major_version is None:
raise ValueError("Unable to determine version number of Visium input")
if major_version not in {1, 2, 3}:
raise ValueError(
f"Visium version {input_paths.version} is not supported. Expected major "
f"version 1, 2, or 3."
)

# Get JSON scale factors.
with open(
input_paths.scale_factors, mode="r", encoding="utf-8"
Expand Down Expand Up @@ -572,6 +565,7 @@ def from_visium(
with _write_visium_spots(
loc_uri,
input_paths.tissue_positions,
input_paths.major_version,
pixels_per_spot_diameter,
obs_df,
obs_id_name,
Expand Down Expand Up @@ -686,6 +680,7 @@ def _write_scene_presence_dataframe(
def _write_visium_spots(
df_uri: str,
input_tissue_positions: Path,
major_version: int,
spot_diameter: float,
obs_df: pd.DataFrame,
id_column_name: str,
Expand All @@ -698,8 +693,12 @@ def _write_visium_spots(
"""Creates, opens, and writes data to a ``PointCloudDataFrame`` with the spot
locations and metadata. Returns the open dataframe for writing.
"""
if major_version == 1:
names = [id_column_name, "in_tissue", "array_row", "array_col", "y", "x"]
else:
names = None
df = (
pd.read_csv(input_tissue_positions)
pd.read_csv(input_tissue_positions, names=names)
.rename(
columns={
"barcode": id_column_name,
Expand Down

0 comments on commit 7addb3e

Please sign in to comment.