Skip to content

Commit

Permalink
added hook for data_readers.Excel.effective_sheet_name to allow multi…
Browse files Browse the repository at this point in the history
…ple sheets of same type
  • Loading branch information
dmichaels-harvard committed Jan 22, 2025
1 parent 805af4a commit d6b2ad1
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 3 deletions.
4 changes: 3 additions & 1 deletion CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,12 @@ Change Log

8.17.0
======
* dmichaels / 2025-01-14
* dmichaels / 2025-01-14 / branch: dmichaels-structured-data-row-mapper-hook-20250114 / PR-324
* Added hook to structured_data.StructuredDataSet to allow a custom Excel class
to be use, so a custom column mapping can be provided; this was initially to support
special/more-intuitive columns for QC values in the submission spreadsheet for smaht-submitr.
* Added hook to structured_data.StructuredDataSet to allow multiple sheets associated with
the same type (via a new data_readers.Excel.effective_sheet_name function).

8.16.6
======
Expand Down
3 changes: 3 additions & 0 deletions dcicutils/data_readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,9 @@ def __init__(self, file: str, reader_class: Optional[Type] = None, include_hidde
def sheet_reader(self, sheet_name: str) -> ExcelSheetReader:
return self._reader_class(self, sheet_name=sheet_name, workbook=self._workbook)

def effective_sheet_name(self, sheet_name: str) -> str:
return sheet_name

def open(self) -> None:
if self._workbook is None:
import warnings
Expand Down
11 changes: 10 additions & 1 deletion dcicutils/structured_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,16 @@ def get_counts() -> Tuple[int, int]:
# Order the sheet names by any specified ordering (e.g. ala snovault.loadxl).
order = {Schema.type_name(key): index for index, key in enumerate(self._order)} if self._order else {}
for sheet_name in sorted(excel.sheet_names, key=lambda key: order.get(Schema.type_name(key), sys.maxsize)):
self._load_reader(excel.sheet_reader(sheet_name), type_name=Schema.type_name(sheet_name))
# This effective_sheet_name function added 2025-01-21 to allow sheets whose sheet names are
# other than simply the name of the type, but which do contain that type somehow; i.e. e.g.
# specifically where the sheet name is like "DSA:ExternalQualityMetric" where the "DSA"
# part is purely informational, and the "ExternalQualityMetric" is the type name; so we
# now can have multiple sheets of the same type (impossible before as sheet names need
# to be unique); this is simply a mechanism to allow the user to partition/organize their
# sheets with some data/rows for a given type split across multiple actual sheets.
type_name = Schema.type_name(excel.effective_sheet_name(sheet_name))
self._load_reader(excel.sheet_reader(sheet_name), type_name=type_name)
# self._load_reader(excel.sheet_reader(sheet_name), type_name=Schema.type_name(sheet_name))
if self._validator_sheet_hook and self.data.get(sheet_name):
self._validator_sheet_hook(self, sheet_name, self.data[sheet_name])
# TODO: Do we really need progress reporting for the below?
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "dcicutils"
version = "8.16.6.1b4" # TODO: 8.17.0
version = "8.16.6.1b5" # TODO: 8.17.0
description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources"
authors = ["4DN-DCIC Team <[email protected]>"]
license = "MIT"
Expand Down

0 comments on commit d6b2ad1

Please sign in to comment.