Skip to content

Commit

Permalink
Merge branch 'ivirshup/census-builder-spatial' into ebezzi/fix-consol…
Browse files Browse the repository at this point in the history
…idation
  • Loading branch information
ebezzi committed Jan 11, 2025
2 parents 3fe22e8 + 8b54aea commit e5bb840
Show file tree
Hide file tree
Showing 9 changed files with 22 additions and 25 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/full-unittests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ env:

jobs:
py_unit_tests:
runs-on: single-cell-1tb-runner
runs-on: sc-dev-64g-runner
timeout-minutes: 1440 # 24 hour timeout
strategy:
fail-fast: false # prevent this job from killing other jobs
Expand Down Expand Up @@ -88,7 +88,7 @@ jobs:
PYTHONPATH=. pytest -v --durations=0 -rP --experimental --expensive ./api/python/cellxgene_census/tests/
r_unit_tests:
runs-on: single-cell-1tb-runner
runs-on: sc-dev-64g-runner
timeout-minutes: 1440 # 24 hour timeout
strategy:
fail-fast: false # prevent this job from killing other jobs
Expand Down
13 changes: 6 additions & 7 deletions .github/workflows/lts-compat-check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,14 @@ jobs:
census-build-version: # Add additional LTS releases as they occur
- "latest"
- "stable"
- "2024-07-01"
- "2023-12-15"
- "2023-07-25"
- "2023-05-15"
py-pkg-version:
- "~=1.10.0"
- "~=1.11.0"
- "~=1.12.0"
- "~=1.13.0"
- "~=1.14.0"
- "~=1.15.0"
- "~=1.16.0"
- "head-of-main"

runs-on: ${{matrix.os}}
Expand All @@ -43,12 +43,11 @@ jobs:
- name: Install dependencies
run: |
python -m pip install -U pip setuptools wheel
GIT_CLONE_PROTECTION_ACTIVE=false pip install -r ./api/python/cellxgene_census/scripts/requirements-dev.txt
if [ ${{matrix.py-pkg-version}} == "head-of-main" ]; then
pip install -e ./api/python/cellxgene_census/
pip install -e ./api/python/cellxgene_census/ -r ./api/python/cellxgene_census/scripts/requirements-dev.txt
else
pip install -U cellxgene_census${{ matrix.py-pkg-version }}
pip install -U cellxgene_census${{ matrix.py-pkg-version }} -r ./api/python/cellxgene_census/scripts/requirements-dev.txt
fi
- name: Test with pytest (API, main tests)
Expand Down
4 changes: 2 additions & 2 deletions api/python/cellxgene_census/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ dependencies= [
# NOTE: the tiledbsoma version must be >= to the version used in the Census builder, to
# ensure that the assets are readable (tiledbsoma supports backward compatible reading).
# Make sure this version does not fall behind the builder's tiledbsoma version.
"tiledbsoma>=1.12.3,!=1.14.1",
"tiledbsoma>=1.15.3",
"anndata",
"numpy>=1.23,<2.0",
"requests",
Expand All @@ -41,7 +41,7 @@ dependencies= [
[project.optional-dependencies]
experimental = [
"torch",
"torchdata~=0.7",
"torchdata~=0.7,<0.10",
"scikit-learn>=1.2",
"scikit-misc>=0.2,<0.4", # scikit-misc 0.3 dropped Python 3.8 support, and 0.4 doesn't have MacOS/ARM wheels
"datasets~=2.0",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from tiledbsoma import Experiment, ExperimentAxisQuery


class CellDatasetBuilder(ExperimentAxisQuery[Experiment], ABC): # type: ignore
class CellDatasetBuilder(ExperimentAxisQuery, ABC): # type: ignore
"""Abstract base class for methods to process CELLxGENE Census ExperimentAxisQuery
results into a Hugging Face Dataset in which each item represents one cell.
Subclasses implement the `cell_item()` method to process each row of an X layer
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def __init__(self, n_samples: int, n_variables: int, nnz_only: bool = False):
self.n_samples = n_samples
self.nnz_only = nnz_only
# If we want to exclude zeros, we need to keep track of the denominator
self.n = np.zeros(n_variables)
self.n = np.zeros(n_variables, dtype=np.float64)

def update(self, var_vec: npt.NDArray[np.int64], val_vec: npt.NDArray[np.float32]) -> None:
if self.nnz_only:
Expand All @@ -106,9 +106,9 @@ def update(self, var_vec: npt.NDArray[np.int64], val_vec: npt.NDArray[np.float32

def finalize(self) -> npt.NDArray[np.float64]:
if self.nnz_only:
return self.u / self.n
return np.divide(self.u, self.n, dtype=np.float64)
else:
return self.u / self.n_samples
return np.divide(self.u, self.n_samples, dtype=np.float64)


class CountsAccumulator:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ def test_counts(matrix: sparse.coo_matrix, n_batches: int, stride: int) -> None:
assert n_samples.sum() == matrix.shape[0]
assert len(n_samples) == n_batches

clip_val = 50 * np.random.rand(n_batches, matrix.shape[1])
clip_val = (50 * np.random.rand(n_batches, matrix.shape[1])).astype(np.float64)

ca = CountsAccumulator(n_batches, matrix.shape[1], clip_val)
for i in range(0, matrix.nnz, stride):
Expand Down
2 changes: 0 additions & 2 deletions api/python/cellxgene_census/tests/test_get_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,5 +34,3 @@ def test_get_presence_matrix(organism: str, census: soma.Collection) -> None:
assert pm.shape[1] == len(
census["census_data"][organism].ms["RNA"].var.read(column_names=["soma_joinid"]).concat().to_pandas()
)

census.close()
Original file line number Diff line number Diff line change
Expand Up @@ -784,10 +784,12 @@ def _validate_X_layers_raw_contents(
# the expected_X matrix.
raw_sum = np.zeros((len(obs_joinids_split),), dtype=np.float64) # 64 bit for numerical stability
np.add.at(raw_sum, rows_by_position, X_raw_data)
raw_sum = raw_sum.astype(
CENSUS_OBS_TABLE_SPEC.field("raw_sum").to_pandas_dtype()
) # cast to the storage type
assert np.allclose(raw_sum, obs_df.raw_sum.iloc[idx : idx + STRIDE].to_numpy())
assert np.allclose(
raw_sum.astype(
CENSUS_OBS_TABLE_SPEC.field("raw_sum").to_pandas_dtype()
), # cast to the storage type
obs_df.raw_sum.iloc[idx : idx + STRIDE].to_numpy(),
)
del raw_sum

# Assertion 1 - the contents of the X matrix are EQUAL for all var values present in the AnnData
Expand Down
4 changes: 1 addition & 3 deletions tools/census_contrib/src/census_contrib/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,15 +230,13 @@ def __next__(self) -> pa.Table:

i = np.empty((n_embeddings, self.n_features), dtype=np.int64)
i.T[:] = next_block
i = i.ravel()

j = np.empty((n_embeddings, self.n_features), dtype=np.int64)
j[:] = np.arange(self.n_features)
j = j.ravel()

d = self._scale * self.rng.random((n_embeddings * self.n_features), dtype=np.float32) + self._offset

return pa.Table.from_pydict({"i": i, "j": j, "d": d})
return pa.Table.from_pydict({"i": i.ravel(), "j": j.ravel(), "d": d})

@property
def type(self) -> pa.DataType:
Expand Down

0 comments on commit e5bb840

Please sign in to comment.