Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add API for writing HCS metadata #153

Merged
merged 7 commits into from
Jan 12, 2022
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,9 @@ jobs:
- {os: windows-latest, python_Version: '3.7', toxenv: 'py37'}
- {os: windows-latest, python_Version: '3.8', toxenv: 'py38'}
- {os: windows-latest, python_Version: '3.9', toxenv: 'py39'}
# Linux still not working
# {os: linux-latest, python_Version: '3.7', toxenv: 'py37'}
# {os: linux-latest, python_Version: '3.8', toxenv: 'py38'}
# {os: linux-latest, python_Version: '3.9', toxenv: 'py39'}
- {os: ubuntu-latest, python_Version: '3.7', toxenv: 'py37'}
- {os: ubuntu-latest, python_Version: '3.8', toxenv: 'py38'}
- {os: ubuntu-latest, python_Version: '3.9', toxenv: 'py39'}
- {os: macos-latest, python_Version: '3.7', toxenv: 'py37'}
- {os: macos-latest, python_Version: '3.8', toxenv: 'py38'}
# missing numcodecs wheels on 3.9. conda not yet an option. see gh-51
Expand Down
2 changes: 1 addition & 1 deletion ome_zarr/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def init_store(self, path: str, mode: str = "r") -> FSStore:

kwargs = {
"dimension_separator": "/",
"normalize_keys": True,
"normalize_keys": False,
}

mkdir = True
Expand Down
128 changes: 127 additions & 1 deletion ome_zarr/writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

"""
import logging
from typing import Any, List, Tuple, Union
from typing import Any, Dict, List, Tuple, Union

import numpy as np
import zarr
Expand Down Expand Up @@ -70,6 +70,53 @@ def _validate_axes(axes: List[str], fmt: Format = CurrentFormat()) -> None:
raise ValueError("5D data must have axes ('t', 'c', 'z', 'y', 'x')")


def _validate_well_images(images: List, fmt: Format = CurrentFormat()) -> None:

VALID_KEYS = [
"acquisition",
"path",
]
for index, image in enumerate(images):
if isinstance(image, str):
images[index] = {"path": str(image)}
elif isinstance(image, dict):
if not all(e in VALID_KEYS for e in image.keys()):
raise ValueError(f"{image} contains invalid keys")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The spec has been updated with a "MUST NOT" have other keys? Eventually, that could be problematic. Perhaps the prefix mechanism that @will-moore found in json-schema that leaves them unchecked could eventually be introduced here.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for picking up on this. I have not found a clear statement regarding the validity of additional keys in the ngff spec. This strict implementation is probably derived from my reading of multiscales where I assume metadata is the single key that tried to capture extra arguments.

That being said, I totally see this MUST NOT interpretation is and will be limiting both for the extension of the specification itself as well as for supporting external metadata not covered by the spec.

The well metadata is a perfect example as ome/ngff#24 defines more keys. Assuming someone wanted to write a version 0.3 with these new keys populated, is it legit and desired to let the writer implementation write this metadata? If so, is it worth a logging statement at WARN, INFO or DEBUG level? Or should this writer only care about the keys defined in the spec and ignore anything extra?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it was salad schema where you could use a prefix: to avoid validation, but I think for json-schema you don't need to do that and it won't fail for unrecognised attributes.

if "path" not in image:
raise ValueError(f"{image} must contain a path key")
if not isinstance(image["path"], str):
raise ValueError(f"{image} path must be of string type")
if "acquisition" in image and not isinstance(image["acquisition"], int):
raise ValueError(f"{image} acquisition must be of int type")
else:
raise ValueError(f"Unrecognized type for {image}")


def _validate_plate_acquisitions(
acquisitions: List[Dict], fmt: Format = CurrentFormat()
) -> None:

VALID_KEYS = [
"id",
"name",
"maximumfieldcount",
"description",
"starttime",
"endtime",
]
if acquisitions is None:
return
for acquisition in acquisitions:
if not isinstance(acquisition, dict):
raise ValueError(f"{acquisition} must be a dictionary")
if not all(e in VALID_KEYS for e in acquisition.keys()):
raise ValueError(f"{acquisition} contains invalid keys")
if "id" not in acquisition:
raise ValueError(f"{acquisition} must contain an id key")
if not isinstance(acquisition["id"], int):
raise ValueError(f"{acquisition} id must be of int type")


def write_multiscale(
pyramid: List,
group: zarr.Group,
Expand Down Expand Up @@ -147,6 +194,85 @@ def write_multiscales_metadata(
group.attrs["multiscales"] = multiscales


def write_plate_metadata(
group: zarr.Group,
rows: List[str],
columns: List[str],
wells: List[str],
fmt: Format = CurrentFormat(),
sbesson marked this conversation as resolved.
Show resolved Hide resolved
acquisitions: List[dict] = None,
field_count: int = None,
name: str = None,
) -> None:
"""
Write the plate metadata in the group.

Parameters
----------
group: zarr.Group
the group within the zarr store to write the metadata in.
rows: list of str
The list of names for the plate rows
columns: list of str
The list of names for the plate columns
wells: list of str
The list of paths for the well groups
fmt: Format
The format of the ome_zarr data which should be used.
Defaults to the most current.
name: str
The plate name
field_count: int
The maximum number of fields per view across wells
acquisitions: list of dict
A list of the various plate acquisitions
"""

plate: Dict[str, Union[str, int, List[Dict]]] = {
"columns": [{"name": str(c)} for c in columns],
"rows": [{"name": str(r)} for r in rows],
"wells": [{"path": str(wp)} for wp in wells],
"version": fmt.version,
}
if name is not None:
plate["name"] = name
if field_count is not None:
plate["field_count"] = field_count
if acquisitions is not None:
_validate_plate_acquisitions(acquisitions)
plate["acquisitions"] = acquisitions
group.attrs["plate"] = plate


def write_well_metadata(
group: zarr.Group,
images: Union[List[str], List[dict]],
fmt: Format = CurrentFormat(),
) -> None:
"""
Write the well metadata in the group.

Parameters
----------
group: zarr.Group
the group within the zarr store to write the metadata in.
image_paths: list of str
The list of paths for the well images
image_acquisitions: list of int
The list of acquisitions for the well images
fmt: Format
The format of the ome_zarr data which should be used.
Defaults to the most current.
"""

_validate_well_images(images)
well = {
"images": images,
"version": fmt.version,
}
group.attrs["well"] = well


def write_image(
image: np.ndarray,
group: zarr.Group,
Expand Down
116 changes: 115 additions & 1 deletion tests/test_node.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
import pytest
import zarr
from numpy import zeros

from ome_zarr.data import create_zarr
from ome_zarr.format import FormatV01, FormatV02, FormatV03
from ome_zarr.io import parse_url
from ome_zarr.reader import Node
from ome_zarr.reader import Label, Labels, Multiscales, Node, Plate, Well
from ome_zarr.writer import write_image, write_plate_metadata, write_well_metadata


class TestNode:
Expand All @@ -15,15 +19,125 @@ def test_image(self):
node = Node(parse_url(str(self.path)), list())
assert node.data
assert node.metadata
assert len(node.specs) == 2
assert isinstance(node.specs[0], Multiscales)

def test_labels(self):
filename = str(self.path.join("labels"))
node = Node(parse_url(filename), list())
assert not node.data
assert not node.metadata
assert len(node.specs) == 1
assert isinstance(node.specs[0], Labels)

def test_label(self):
filename = str(self.path.join("labels", "coins"))
node = Node(parse_url(filename), list())
assert node.data
assert node.metadata
assert len(node.specs) == 2
assert isinstance(node.specs[0], Label)
assert isinstance(node.specs[1], Multiscales)


class TestHCSNode:
@pytest.fixture(autouse=True)
def initdir(self, tmpdir):
self.path = tmpdir.mkdir("data")
self.store = parse_url(str(self.path), mode="w").store
self.root = zarr.group(store=self.store)

def test_minimal_plate(self):
write_plate_metadata(self.root, ["A"], ["1"], ["A/1"])
row_group = self.root.require_group("A")
well = row_group.require_group("1")
write_well_metadata(well, ["0"])
image = well.require_group("0")
write_image(zeros((1, 1, 1, 256, 256)), image)

node = Node(parse_url(str(self.path)), list())
assert node.data
assert node.metadata
assert len(node.specs) == 1
assert isinstance(node.specs[0], Plate)
assert node.specs[0].row_names == ["A"]
assert node.specs[0].col_names == ["1"]
assert node.specs[0].well_paths == ["A/1"]
assert node.specs[0].row_count == 1
assert node.specs[0].column_count == 1

node = Node(parse_url(str(self.path / "A" / "1")), list())
assert node.data
assert node.metadata
assert len(node.specs) == 1
assert isinstance(node.specs[0], Well)

@pytest.mark.parametrize("fmt", (FormatV01(), FormatV02(), FormatV03()))
def test_multiwells_plate(self, fmt):
row_names = ["A", "B", "C"]
col_names = ["1", "2", "3", "4"]
well_paths = ["A/1", "A/2", "A/4", "B/2", "B/3", "C/1", "C/3", "C/4"]
write_plate_metadata(self.root, row_names, col_names, well_paths, fmt=fmt)
for wp in well_paths:
row, col = wp.split("/")
row_group = self.root.require_group(row)
well = row_group.require_group(col)
write_well_metadata(well, ["0", "1", "2"], fmt=fmt)
for field in range(3):
image = well.require_group(str(field))
write_image(zeros((1, 1, 1, 256, 256)), image)

node = Node(parse_url(str(self.path)), list())
assert node.data
assert node.metadata
assert len(node.specs) == 1
assert isinstance(node.specs[0], Plate)
assert node.specs[0].row_names == row_names
assert node.specs[0].col_names == col_names
assert node.specs[0].well_paths == well_paths
assert node.specs[0].row_count == 3
assert node.specs[0].column_count == 4

for wp in well_paths:
node = Node(parse_url(str(self.path / wp)), list())
assert node.data
assert node.metadata
assert len(node.specs) == 1
assert isinstance(node.specs[0], Well)

empty_wells = ["A/3", "B/1", "B/4", "C/2"]
for wp in empty_wells:
assert parse_url(str(self.path / wp)) is None

@pytest.mark.xfail(reason="https://github.com/ome/ome-zarr-py/issues/145")
@pytest.mark.parametrize(
"axes, dims",
(
(["y", "x"], (256, 256)),
(["t", "y", "x"], (1, 256, 256)),
(["z", "y", "x"], (1, 256, 256)),
(["c", "y", "x"], (1, 256, 256)),
(["c", "z", "y", "x"], (1, 1, 256, 256)),
(["t", "z", "y", "x"], (1, 1, 256, 256)),
(["t", "c", "y", "x"], (1, 1, 256, 256)),
),
)
def test_plate_2D5D(self, axes, dims):
write_plate_metadata(self.root, ["A"], ["1"], ["A/1"], fmt=FormatV03())
row_group = self.root.require_group("A")
well = row_group.require_group("1")
write_well_metadata(well, ["0"], fmt=FormatV03())
image = well.require_group("0")
write_image(zeros(dims), image, fmt=FormatV03(), axes=axes)

node = Node(parse_url(str(self.path)), list())
assert node.data
assert node.metadata
assert len(node.specs) == 1
assert isinstance(node.specs[0], Plate)

node = Node(parse_url(str(self.path / "A" / "1")), list())
assert node.data
assert node.metadata
assert len(node.specs) == 1
assert isinstance(node.specs[0], Well)
51 changes: 50 additions & 1 deletion tests/test_reader.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
import pytest
import zarr
from numpy import zeros

from ome_zarr.data import create_zarr
from ome_zarr.io import parse_url
from ome_zarr.reader import Node, Reader
from ome_zarr.reader import Node, Plate, PlateLabels, Reader
from ome_zarr.writer import write_image, write_plate_metadata, write_well_metadata


class TestReader:
Expand All @@ -28,3 +31,49 @@ def test_label(self):
filename = str(self.path.join("labels", "coins"))
reader = Reader(parse_url(filename))
assert len(list(reader())) == 3


class TestHCSReader:
@pytest.fixture(autouse=True)
def initdir(self, tmpdir):
self.path = tmpdir.mkdir("data")
self.store = parse_url(str(self.path), mode="w").store
self.root = zarr.group(store=self.store)

def test_minimal_plate(self):
write_plate_metadata(self.root, ["A"], ["1"], ["A/1"])
row_group = self.root.require_group("A")
well = row_group.require_group("1")
write_well_metadata(well, ["0"])
image = well.require_group("0")
write_image(zeros((1, 1, 1, 256, 256)), image)

reader = Reader(parse_url(str(self.path)))
nodes = list(reader())
assert len(nodes) == 2
assert len(nodes[0].specs) == 1
assert isinstance(nodes[0].specs[0], Plate)
assert len(nodes[1].specs) == 1
assert isinstance(nodes[1].specs[0], PlateLabels)

def test_multiwells_plate(self):
row_names = ["A", "B", "C"]
col_names = ["1", "2", "3", "4"]
well_paths = ["A/1", "A/2", "A/4", "B/2", "B/3", "C/1", "C/3", "C/4"]
write_plate_metadata(self.root, row_names, col_names, well_paths)
for wp in well_paths:
row, col = wp.split("/")
row_group = self.root.require_group(row)
well = row_group.require_group(col)
write_well_metadata(well, ["0", "1", "2"])
for field in range(3):
image = well.require_group(str(field))
write_image(zeros((1, 1, 1, 256, 256)), image)

reader = Reader(parse_url(str(self.path)))
nodes = list(reader())
assert len(nodes) == 2
assert len(nodes[0].specs) == 1
assert isinstance(nodes[0].specs[0], Plate)
assert len(nodes[1].specs) == 1
assert isinstance(nodes[1].specs[0], PlateLabels)
Loading