Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix dynamic <<>> runindex #213

Merged
merged 22 commits into from
Jan 22, 2024
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 55 additions & 25 deletions bidscoin/bids.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import ast
from functools import lru_cache
from pathlib import Path
from typing import Union, List, Tuple
from typing import Dict, List, Set, Tuple, Union
from nibabel.parrec import parse_PAR_header
from pandas import DataFrame
from pydicom import dcmread, fileset, datadict
Expand Down Expand Up @@ -1372,7 +1372,8 @@ def get_run_(provenance: Union[str, Path]='', dataformat: str='', datatype: str=
attributes = {},
bids = {},
meta = {},
datasource = datasource)
datasource = datasource,
targets = set())


def get_run(bidsmap: dict, datatype: str, suffix_idx: Union[int, str], datasource: DataSource) -> dict:
Expand Down Expand Up @@ -1488,7 +1489,7 @@ def delete_run(bidsmap: dict, provenance: Union[dict, str], datatype: str= '', d

def append_run(bidsmap: dict, run: dict, clean: bool=True) -> None:
"""
Append a run to the BIDS map
Append a run to the BIDS map, without targets

:param bidsmap: Full bidsmap data structure, with all options, BIDS labels and attributes, etc.
:param run: The run (listitem) that is appended to the datatype
Expand All @@ -1512,7 +1513,10 @@ def append_run(bidsmap: dict, run: dict, clean: bool=True) -> None:
run_[item].update(run[item])

run = run_
else:
run = copy.copy(run) # popping targets will not change original run

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks a bit like a hack, I suspect it is not needed if we add targets to the datasource instead of to the run. The datasource as part of the run is already hacky, but at least then it is all in one place. Moreover, cleaning up the datasource from the run is already implemented (e.g. when saving the bidsmap). I will make some commits to your PR so you can see what I mean

run.pop("targets", None)
if not bidsmap.get(dataformat):
bidsmap[dataformat] = {datatype: []}
if not bidsmap.get(dataformat).get(datatype):
Expand Down Expand Up @@ -1903,19 +1907,17 @@ def insert_bidskeyval(bidsfile: Union[str, Path], bidskey: str, newvalue: str, v
return newbidsfile


def increment_runindex(outfolder: Path, bidsname: str, run: dict, scans_table: DataFrame=pd.DataFrame()) -> Union[Path, str]:
def increment_runindex(outfolder: Path, bidsname: str, run: dict) -> Union[Path, str]:
"""
Checks if a file with the same bidsname already exists in the folder and then increments the dynamic runindex
(if any) until no such file is found.

Important side effect for <<>> dynamic value:
If the run-less file already exists, start with run-2 and rename the existing run-less files to run-index 1.
Also update the scans table accordingly
If the run-less file already exists, start with run-2, run-1 will be added later to run-less files

:param outfolder: The full pathname of the bids output folder
:param bidsname: The bidsname with a provisional runindex
:param run: The run mapping with the BIDS key-value pairs
:param scans_table: BIDS scans.tsv dataframe with all filenames and acquisition timestamps
:return: The bidsname with the original or incremented runindex
"""

Expand All @@ -1930,34 +1932,62 @@ def increment_runindex(outfolder: Path, bidsname: str, run: dict, scans_table: D
if '.' in bidsname:
bidsname, suffixes = bidsname.split('.', 1)

# Catch run-less bidsnames from <<>> dynamic run-values
run2_bidsname = insert_bidskeyval(bidsname, 'run', '2', False)
if '_run-' not in bidsname and list(outfolder.glob(f"{run2_bidsname}.*")):
bidsname = run2_bidsname # There is more than 1 run, i.e. run-2 already exists and should be normally incremented
# Delete runindex from bidsname if no runless files (run-1) exist (e.g. dcm2niix postfixes changed name)
runless_bidsname = insert_bidskeyval(bidsname, 'run', '', False)
if runval == '<<>>' and '_run-' in bidsname and not list(outfolder.glob(f"{runless_bidsname}.*")):
return runless_bidsname

# Increment the run-index if the bidsfile already exists
while list(outfolder.glob(f"{bidsname}.*")):
runindex = get_bidsvalue(bidsname, 'run')
if not runindex: # The run-less bids file already exists -> start with run-2
bidsname = run2_bidsname
bidsname = insert_bidskeyval(bidsname, 'run', '2', False)
else: # Do the normal increment
bidsname = get_bidsvalue(bidsname, 'run', str(int(runindex) + 1))

# Adds run-1 key to files with bidsname that don't have run index. Updates scans table respectively
if runval == '<<>>' and bidsname == run2_bidsname:
old_bidsname = insert_bidskeyval(bidsname, 'run', '', False)
new_bidsname = insert_bidskeyval(bidsname, 'run', '1', False)
for file in outfolder.glob(f"{old_bidsname}.*"):
ext = ''.join(file.suffixes)
file.replace((outfolder/new_bidsname).with_suffix(ext))
return f"{bidsname}.{suffixes}" if suffixes else bidsname

# Change row name in the scans table
if f"{outfolder.name}/{old_bidsname}{ext}" in scans_table.index:
LOGGER.verbose(f"Renaming:\n{outfolder/old_bidsname}.* ->\n{outfolder/new_bidsname}.*")
scans_table.rename(index={f"{outfolder.name}/{old_bidsname}{ext}":
f"{outfolder.name}/{new_bidsname}{ext}"}, inplace=True) # NB: '/' as_posix

return f"{bidsname}.{suffixes}" if suffixes else bidsname
def rename_runless_to_run1(matched_runs: List[dict], scans_table: pd.DataFrame) -> None:
"""
Adds run-1 label to run-less files that use dynamic index (<<>>) in bidsmap run-items for which files with
run-2 label exist in the output folder. Additionally, 'scans_table' is updated based on the changes.
:param matched_runs: Bidsmap run-items with accumulated files under 'target' (all files created via that run-item)
:param scans_table: BIDS scans.tsv dataframe with all filenames and acquisition timestamps
"""
for matched_run in matched_runs:
if matched_run.get('bids', {}).get('run') != '<<>>':
continue

for bids_target in matched_run["targets"].copy(): # copy: avoid problems with removing items within loop
bidsname = bids_target.name
suffixes = ''
if '.' in bidsname:
bidsname, suffixes = bidsname.split('.', 1)
if suffixes:
suffixes = '.' + suffixes
if get_bidsvalue(bidsname, 'run') == '':
run2_bidsname = insert_bidskeyval(bidsname, 'run', '2', False)
outfolder = bids_target.parent
if list(outfolder.glob(f"{run2_bidsname}.*")):
# add run-1 to run-less bidsname files because run-2 exists
run1_bidsname = insert_bidskeyval(bidsname, 'run', '1', False)
for runless_file in outfolder.glob(f"{bidsname}.*"):
ext = ''.join(runless_file.suffixes)
run1_file = (runless_file.parent / run1_bidsname).with_suffix(ext)
LOGGER.info(f"Found run-2 files for <<>> index, renaming\n{runless_file} ->\n{run1_file}")
runless_file.replace(run1_file)

# Change row name in the scans table
if f"{outfolder.name}/{bidsname}{ext}" in scans_table.index:
LOGGER.verbose(f"Renaming scans entry:\n{outfolder.name}/{bidsname}{ext} ->\n{outfolder.name}/{run1_bidsname}{ext}")
scans_table.rename(
index={f"{outfolder.name}/{bidsname}{ext}": f"{outfolder.name}/{run1_bidsname}{ext}"},
inplace=True
) # NB: '/' as_posix
# change bids_target from run-less to run-1
matched_run["targets"].remove(bids_target)
matched_run["targets"].add((outfolder / run1_bidsname).with_suffix(suffixes))


def updatemetadata(sourcemeta: Path, targetmeta: Path, usermeta: dict, extensions: list, datasource: DataSource) -> dict:
Expand Down
2 changes: 2 additions & 0 deletions bidscoin/bidscoiner.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,7 @@ def bidscoiner(rawfolder: str, bidsfolder: str, subjects: list=(), force: bool=F

LOGGER.info('')
if not jobids:

LOGGER.info('============== HPC FINISH =============')
LOGGER.info('')
return
Expand Down Expand Up @@ -296,6 +297,7 @@ def bidscoiner(rawfolder: str, bidsfolder: str, subjects: list=(), force: bool=F
if unpacked:
shutil.rmtree(sesfolder)


LOGGER.info('-------------- FINISHED! ------------')
LOGGER.info('')

Expand Down
15 changes: 12 additions & 3 deletions bidscoin/plugins/dcm2niix2bids.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import json
import shutil
from bids_validator import BIDSValidator
from typing import Union
from typing import Union, List
from pathlib import Path
from bidscoin import bcoin, bids, lsdirs, due, Doi
from bidscoin.utilities import physio
Expand Down Expand Up @@ -228,6 +228,7 @@ def bidscoiner_plugin(session: Path, bidsmap: dict, bidsses: Path) -> Union[None
scans_table.index.name = 'filename'

# Process all the source files or run subfolders
matched_runs: List[dict] = []
sourcefile = Path()
for source in sources:

Expand All @@ -254,6 +255,7 @@ def bidscoiner_plugin(session: Path, bidsmap: dict, bidsses: Path) -> Union[None
continue

LOGGER.info(f"--> Coining: {source}")
matched_runs.append(run)

# Create the BIDS session/datatype output folder
suffix = datasource.dynamicvalue(run['bids']['suffix'], True, True)
Expand All @@ -267,7 +269,7 @@ def bidscoiner_plugin(session: Path, bidsmap: dict, bidsses: Path) -> Union[None
bidsignore = bids.check_ignore(datasource.datatype, bidsmap['Options']['bidscoin']['bidsignore'])
bidsname = bids.get_bidsname(subid, sesid, run, not bidsignore, runtime=True)
bidsignore = bidsignore or bids.check_ignore(bidsname+'.json', bidsmap['Options']['bidscoin']['bidsignore'], 'file')
bidsname = bids.increment_runindex(outfolder, bidsname, run, scans_table)
bidsname = bids.increment_runindex(outfolder, bidsname, run)
jsonfiles = set() # Set -> Collect the associated json-files (for updating them later) -- possibly > 1

# Check if the bidsname is valid
Expand Down Expand Up @@ -312,6 +314,7 @@ def bidscoiner_plugin(session: Path, bidsmap: dict, bidsses: Path) -> Union[None
if not list(outfolder.glob(f"{bidsname}.*nii*")): continue

jsonfiles.update(outfolder.glob(f"{bidsname}.json")) # add existing created json files: bidsname.json
run["targets"].update(outfolder.glob(f"{bidsname}.*[!json]")) # add files created using this bidsmap run-item (except sidecars)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This line is all that is need when targets are added to the datasource


# Handle the ABCD GE pepolar sequence
extrafile = list(outfolder.glob(f"{bidsname}a.nii*"))
Expand Down Expand Up @@ -419,12 +422,13 @@ def bidscoiner_plugin(session: Path, bidsmap: dict, bidsses: Path) -> Union[None
LOGGER.warning(f"The {newbidsname} image is a derivate / not BIDS-compliant -- you can probably delete it safely and update {scans_tsv}")

# Save the NIfTI file with the newly constructed name
newbidsname = bids.increment_runindex(outfolder, newbidsname, run, scans_table) # Update the runindex now that the acq-label has changed
newbidsname = bids.increment_runindex(outfolder, newbidsname, run) # Update the runindex now that the acq-label has changed
newbidsfile = outfolder/newbidsname
LOGGER.verbose(f"Found dcm2niix {postfixes} postfixes, renaming\n{dcm2niixfile} ->\n{newbidsfile}")
if newbidsfile.is_file():
LOGGER.warning(f"Overwriting existing {newbidsfile} file -- check your results carefully!")
dcm2niixfile.replace(newbidsfile)
run["targets"].add(newbidsfile)

# Rename all associated files (i.e. the json-, bval- and bvec-files)
oldjsonfile = dcm2niixfile.with_suffix('').with_suffix('.json')
Expand Down Expand Up @@ -458,6 +462,8 @@ def bidscoiner_plugin(session: Path, bidsmap: dict, bidsses: Path) -> Union[None
LOGGER.verbose(f"Removing BIDS-invalid b0-file: {bfile} -> {jsonfile}")
metadata[ext[1:]] = bdata.values.tolist()
bfile.unlink()
if bfile in run["targets"]:
run["targets"].remove(bfile)

# Save the meta-data to the json sidecar-file
with jsonfile.open('w') as json_fid:
Expand Down Expand Up @@ -486,6 +492,9 @@ def bidscoiner_plugin(session: Path, bidsmap: dict, bidsses: Path) -> Union[None
scanpath = outputfile[0].relative_to(bidsses)
scans_table.loc[scanpath.as_posix(), 'acq_time'] = acq_time

# Handle dynamic index for run-1
bids.rename_runless_to_run1(matched_runs, scans_table)

# Write the scans_table to disk
LOGGER.verbose(f"Writing acquisition time data to: {scans_tsv}")
scans_table.sort_values(by=['acq_time','filename'], inplace=True)
Expand Down
11 changes: 9 additions & 2 deletions bidscoin/plugins/nibabel2bids.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,10 @@
import pandas as pd
import nibabel as nib
from bids_validator import BIDSValidator
from typing import Union
from typing import List, Union
from pathlib import Path
from bidscoin import bids

try:
from nibabel.testing import data_path
except ImportError:
Expand Down Expand Up @@ -188,6 +189,7 @@ def bidscoiner_plugin(session: Path, bidsmap: dict, bidsses: Path) -> None:
scans_table.index.name = 'filename'

# Collect the different Nibabel source files for all files in the session
matched_runs: List[dict] = []
for sourcefile in sourcefiles:

datasource = bids.DataSource(sourcefile, {'nibabel2bids':options})
Expand All @@ -204,6 +206,7 @@ def bidscoiner_plugin(session: Path, bidsmap: dict, bidsses: Path) -> None:
continue

LOGGER.info(f"--> Coining: {sourcefile}")
matched_runs.append(run)

# Create the BIDS session/datatype output folder
outfolder = bidsses/datasource.datatype
Expand All @@ -213,7 +216,7 @@ def bidscoiner_plugin(session: Path, bidsmap: dict, bidsses: Path) -> None:
bidsignore = bids.check_ignore(datasource.datatype, bidsmap['Options']['bidscoin']['bidsignore'])
bidsname = bids.get_bidsname(subid, sesid, run, not bidsignore, runtime=True)
bidsignore = bidsignore or bids.check_ignore(bidsname+'.json', bidsmap['Options']['bidscoin']['bidsignore'], 'file')
bidsname = bids.increment_runindex(outfolder, bidsname, run, scans_table)
bidsname = bids.increment_runindex(outfolder, bidsname, run)
bidsfile = (outfolder/bidsname).with_suffix(ext)

# Check if the bidsname is valid
Expand All @@ -229,6 +232,7 @@ def bidscoiner_plugin(session: Path, bidsmap: dict, bidsses: Path) -> None:

# Save the sourcefile as a BIDS NIfTI file
nib.save(nib.load(sourcefile), bidsfile)
run["targets"].add(bidsfile)

# Load / copy over the source meta-data
sidecar = bidsfile.with_suffix('').with_suffix('.json')
Expand All @@ -240,6 +244,9 @@ def bidscoiner_plugin(session: Path, bidsmap: dict, bidsses: Path) -> None:
acq_time = dateutil.parser.parse(f"1925-01-01T{metadata.get('AcquisitionTime', '')}")
scans_table.loc[bidsfile.relative_to(bidsses).as_posix(), 'acq_time'] = acq_time.isoformat()

# Handle dynamic index for run-1
bids.rename_runless_to_run1(matched_runs, scans_table)

# Write the scans_table to disk
LOGGER.verbose(f"Writing data to: {scans_tsv}")
scans_table.replace('','n/a').to_csv(scans_tsv, sep='\t', encoding='utf-8', na_rep='n/a')
11 changes: 9 additions & 2 deletions bidscoin/plugins/spec2nii2bids.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import json
import pandas as pd
import dateutil.parser
from typing import Union
from typing import List, Union
from bids_validator import BIDSValidator
from pathlib import Path
from bidscoin import bcoin, bids, due, Doi
Expand Down Expand Up @@ -197,6 +197,7 @@ def bidscoiner_plugin(session: Path, bidsmap: dict, bidsses: Path) -> Union[None
scans_table.index.name = 'filename'

# Loop over all MRS source data files and convert them to BIDS
matched_runs: List[dict] = []
for sourcefile in sourcefiles:

# Get a data source, a matching run from the bidsmap
Expand All @@ -214,6 +215,7 @@ def bidscoiner_plugin(session: Path, bidsmap: dict, bidsses: Path) -> Union[None
continue

LOGGER.info(f"--> Coining: {sourcefile}")
matched_runs.append(run)

# Create the BIDS session/datatype output folder
outfolder = bidsses/datasource.datatype
Expand All @@ -223,7 +225,7 @@ def bidscoiner_plugin(session: Path, bidsmap: dict, bidsses: Path) -> Union[None
bidsignore = bids.check_ignore(datasource.datatype, bidsmap['Options']['bidscoin']['bidsignore'])
bidsname = bids.get_bidsname(subid, sesid, run, not bidsignore, runtime=True)
bidsignore = bidsignore or bids.check_ignore(bidsname+'.json', bidsmap['Options']['bidscoin']['bidsignore'], 'file')
bidsname = bids.increment_runindex(outfolder, bidsname, run, scans_table)
bidsname = bids.increment_runindex(outfolder, bidsname, run)
sidecar = (outfolder/bidsname).with_suffix('.json')

# Check if the bidsname is valid
Expand Down Expand Up @@ -256,6 +258,8 @@ def bidscoiner_plugin(session: Path, bidsmap: dict, bidsses: Path) -> Union[None
if bcoin.run_command(f'{command} {dformat} -j -f "{bidsname}" -o "{outfolder}" {args} {arg} "{sourcefile}"'):
if not list(outfolder.glob(f"{bidsname}.nii*")): continue

run["targets"].update(outfolder.glob(f"{bidsname}.*[!json]")) # add files created using this bidsmap run-item (except sidecars)

# Load / copy over and adapt the newly produced json sidecar-file (NB: assumes every NIfTI-file comes with a json-file)
metadata = bids.updatemetadata(sourcefile, sidecar, run['meta'], options['meta'], datasource)
with sidecar.open('w') as json_fid:
Expand Down Expand Up @@ -283,6 +287,9 @@ def bidscoiner_plugin(session: Path, bidsmap: dict, bidsses: Path) -> Union[None
acq_time = 'n/a'
scans_table.loc[sidecar.with_suffix('.nii.gz').relative_to(bidsses).as_posix(), 'acq_time'] = acq_time

# Handle dynamic index for run-1
bids.rename_runless_to_run1(matched_runs, scans_table)

# Write the scans_table to disk
LOGGER.verbose(f"Writing acquisition time data to: {scans_tsv}")
scans_table.sort_values(by=['acq_time','filename'], inplace=True)
Expand Down
Loading