Skip to content

Commit

Permalink
WIP: Ingest tissue positions from Visium dataset
Browse files Browse the repository at this point in the history
  • Loading branch information
jp-dark committed Apr 5, 2024
1 parent d873690 commit 4714231
Show file tree
Hide file tree
Showing 3 changed files with 225 additions and 0 deletions.
6 changes: 6 additions & 0 deletions apis/python/src/tiledbsoma/experimental/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,9 @@
Do NOT merge this into main.
"""

from .ingest import from_visium

__all__ = [
"from_visium",
]
217 changes: 217 additions & 0 deletions apis/python/src/tiledbsoma/experimental/ingest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
# Copyright (c) 2024 TileDB, Inc,
#
# Licensed under the MIT License.

"""Experimental ingestion methods.
This module contains experimental methods to generate Spatial SOMA artifacts
start from other formats.
Do NOT merge into main.
"""

import json
import pathlib
from typing import TYPE_CHECKING, Any, Dict, Optional, Sequence, Type, Union

import numpy as np
import pandas as pd
import scanpy

from .. import Collection, DataFrame, DenseNDArray, Experiment, SparseNDArray
from .._constants import SOMA_JOINID
from .._tiledb_object import AnyTileDBObject
from .._types import IngestMode
from ..io import from_anndata
from ..io.ingest import (
IngestCtx,
IngestionParams,
_create_or_open_collection,
_maybe_set,
_write_dataframe_impl,
)

if TYPE_CHECKING:
from somacore.options import PlatformConfig

from .._types import Path
from ..io._registration import ExperimentAmbientLabelMapping
from ..io.ingeset import AdditionalMetadata
from ..options import SOMATileDBContext


def from_visium(
experiment_uri: str,
input_path: "Path",
measurement_name: str,
scene_name: str,
*,
context: Optional["SOMATileDBContext"] = None,
platform_config: Optional["PlatformConfig"] = None,
obs_id_name: str = "obs_id",
var_id_name: str = "var_id",
X_layer_name: str = "data",
raw_X_layer_name: str = "data",
ingest_mode: IngestMode = "write",
use_relative_uri: Optional[bool] = None,
X_kind: Union[Type[SparseNDArray], Type[DenseNDArray]] = SparseNDArray,
registration_mapping: Optional["ExperimentAmbientLabelMapping"] = None,
uns_keys: Optional[Sequence[str]] = None,
additional_metadata: "AdditionalMetadata" = None,
use_raw_counts: bool = True,
) -> str:
"""Reads a 10x Visium dataset and writes it to an :class:`Experiment`.
This function is for ingesting Visium data for prototyping and testing the
proposed spatial design.
TODO: Args list
WARNING: This was only tested for Space Ranger version 2 output.
Lifecycle:
Experimental
"""

if ingest_mode != "write":
raise NotImplementedError(
f'the only ingest_mode currently supported is "write"; got "{ingest_mode}"'
)

# Get input file locations.
input_path = pathlib.Path(input_path)

input_gene_expression = (
input_path / "raw_feature_bc_matrix.h5"
if use_raw_counts
else input_path / "filtered_feature_bc_matrix.h5"
)

# Note: Hard-coded for Space Range version >= 2
input_tissue_positions = input_path / "spatial/tissue_positions.csv"
input_scale_factors = input_path / "spatial/scalefactors_json.json"

# input_images = {
# "hires": input_path / "spatial/tissue_hires_image.png",
# "lowres": input_path / "spatial/tissue_lowres_image.png",
# }

# Create the
anndata = scanpy.read_10x_h5(input_gene_expression)
uri = from_anndata(
experiment_uri,
anndata,
measurement_name,
context=context,
platform_config=platform_config,
obs_id_name=obs_id_name,
var_id_name=var_id_name,
X_layer_name=X_layer_name,
raw_X_layer_name=raw_X_layer_name,
ingest_mode=ingest_mode,
use_relative_uri=use_relative_uri,
X_kind=X_kind,
registration_mapping=registration_mapping,
uns_keys=uns_keys,
additional_metadata=additional_metadata,
)

ingest_ctx: IngestCtx = {
"context": context,
"ingestion_params": IngestionParams(ingest_mode, registration_mapping),
"additional_metadata": additional_metadata,
}

# Get JSON scale factors.
with open(input_scale_factors, mode="r", encoding="utf-8") as scale_factors_json:
scale_factors = json.load(scale_factors_json)

with Experiment.open(uri, mode="r", context=context) as experiment:
obs_df = experiment.obs.read().concat().to_pandas()
with Experiment.open(uri, mode="w", context=context) as experiment:
spatial_uri = f"{uri}/spatial"
with _create_or_open_collection(
Collection[Collection[AnyTileDBObject]], spatial_uri, **ingest_ctx
) as spatial:
_maybe_set(
experiment, "spatial", spatial, use_relative_uri=use_relative_uri
)
scene_uri = f"{spatial_uri}/{scene_name}"
with _create_or_open_collection(
Collection[AnyTileDBObject], scene_uri, **ingest_ctx
) as scene:
_maybe_set(
spatial, scene_name, scene, use_relative_uri=use_relative_uri
)
obs_locations_uri = f"{scene_uri}/obs_locations"
# TODO: The `obs_df` on the next line should be a dataframe with only
# soma_joinid and obs_id. Not currently bothering to check/enforce this.
with _write_visium_spot_dataframe(
obs_locations_uri,
input_tissue_positions,
scale_factors,
obs_df,
obs_id_name,
**ingest_ctx,
) as obs_locations:
_maybe_set(
scene,
"obs_locations",
obs_locations,
use_relative_uri=use_relative_uri,
)
# images_uri = f"{scene_uri}/images"
return uri


def _write_visium_spot_dataframe(
df_uri: str,
input_tissue_positions: pathlib.Path,
scale_factors: Dict[str, Any],
obs_df: pd.DataFrame,
id_column_name: str,
*,
ingestion_params: IngestionParams,
additional_metadata: "AdditionalMetadata" = None,
platform_config: Optional["PlatformConfig"] = None,
context: Optional["SOMATileDBContext"] = None,
) -> DataFrame:
"""TODO: Add _write_visium_spot_dataframe docs"""
# Create the
spot_radius = 0.5 * scale_factors["spot_diameter_fullres"]
df = (
pd.read_csv(input_tissue_positions)
.rename(
columns={
"barcode": id_column_name,
"pxl_col_in_fullres": "y",
"pxl_row_in_fullres": "x",
}
)
.assign(_soma_geometry=np.double(spot_radius))
)

df = pd.merge(obs_df, df, how="inner", on=id_column_name)
return _write_dataframe_impl(
df,
df_uri,
id_column_name,
ingestion_params=ingestion_params,
additional_metadata=additional_metadata,
index_column_names=("y", "x", SOMA_JOINID),
platform_config=platform_config,
context=context,
)


def _write_visium_images(
image_uri: str,
input_images: Dict[str, pathlib.Path],
input_scale_factors: Dict[str, Any],
*,
ingestion_params: IngestionParams,
additional_metadata: "AdditionalMetadata" = None,
platform_config: Optional["PlatformConfig"] = None,
context: Optional["SOMATileDBContext"] = None,
) -> Collection[DenseNDArray]:
raise NotImplementedError()
2 changes: 2 additions & 0 deletions apis/python/src/tiledbsoma/io/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -1172,6 +1172,7 @@ def _write_dataframe_impl(
*,
ingestion_params: IngestionParams,
additional_metadata: AdditionalMetadata = None,
index_column_names: Sequence[str] = (SOMA_JOINID,),
original_index_name: Optional[str] = None,
platform_config: Optional[PlatformConfig] = None,
context: Optional[SOMATileDBContext] = None,
Expand Down Expand Up @@ -1199,6 +1200,7 @@ def _write_dataframe_impl(
soma_df = DataFrame.create(
df_uri,
schema=arrow_table.schema,
index_column_names=index_column_names,
platform_config=platform_config,
context=context,
)
Expand Down

0 comments on commit 4714231

Please sign in to comment.