Skip to content

Commit

Permalink
Merge pull request #3077 from mirpedrol/subworkflows-meta-yml-restruc…
Browse files Browse the repository at this point in the history
…ture

Add option `--fix` to update the `meta.yml` file of subworkflows.
  • Loading branch information
mirpedrol authored Sep 27, 2024
2 parents 9e05574 + 8d5f898 commit b3042f3
Show file tree
Hide file tree
Showing 25 changed files with 912 additions and 307 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@
- The `modules_nfcore` tag in the `main.nf.test` file of modules/subworkflows now displays the organization name in custom modules repositories ([#3005](https://github.com/nf-core/tools/pull/3005))
- Add `--migrate_pytest` option to `nf-core <modules|subworkflows> test` command ([#3085](https://github.com/nf-core/tools/pull/3085))
- Components: allow spaces at the beginning of include statements ([#3115](https://github.com/nf-core/tools/pull/3115))
- Add option `--fix` to update the `meta.yml` file of subworkflows ([#3077](https://github.com/nf-core/tools/pull/3077))

### General

Expand Down
7 changes: 5 additions & 2 deletions nf_core/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1479,11 +1479,14 @@ def command_subworkflows_list_local(ctx, keywords, json, directory): # pylint:
help="Sort lint output by subworkflow or test name.",
show_default=True,
)
def command_subworkflows_lint(ctx, subworkflow, directory, registry, key, all, fail_warned, local, passed, sort_by):
@click.option("--fix", is_flag=True, help="Fix all linting tests if possible.")
def command_subworkflows_lint(
ctx, subworkflow, directory, registry, key, all, fail_warned, local, passed, sort_by, fix
):
"""
Lint one or more subworkflows in a directory.
"""
subworkflows_lint(ctx, subworkflow, directory, registry, key, all, fail_warned, local, passed, sort_by)
subworkflows_lint(ctx, subworkflow, directory, registry, key, all, fail_warned, local, passed, sort_by, fix)


# nf-core subworkflows info
Expand Down
3 changes: 2 additions & 1 deletion nf_core/commands_subworkflows.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def subworkflows_list_local(ctx, keywords, json, directory): # pylint: disable=
sys.exit(1)


def subworkflows_lint(ctx, subworkflow, directory, registry, key, all, fail_warned, local, passed, sort_by):
def subworkflows_lint(ctx, subworkflow, directory, registry, key, all, fail_warned, local, passed, sort_by, fix):
"""
Lint one or more subworkflows in a directory.
Expand All @@ -121,6 +121,7 @@ def subworkflows_lint(ctx, subworkflow, directory, registry, key, all, fail_warn
subworkflow_lint = SubworkflowLint(
directory,
fail_warned=fail_warned,
fix=fix,
registry=ctx.params["registry"],
remote_url=ctx.obj["modules_repo_url"],
branch=ctx.obj["modules_repo_branch"],
Expand Down
76 changes: 55 additions & 21 deletions nf_core/components/info.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,25 @@ def get_remote_yaml(self) -> Optional[dict]:
self.remote_location = self.modules_repo.remote_url
return yaml.safe_load(file_contents)

def generate_params_table(self, type) -> Table:
"Generate a rich table for inputs and outputs"
table = Table(expand=True, show_lines=True, box=box.MINIMAL_HEAVY_HEAD, padding=0)
table.add_column(f":inbox_tray: {type}")
table.add_column("Description")
if self.component_type == "modules":
table.add_column("Pattern", justify="right", style="green")
elif self.component_type == "subworkflows":
table.add_column("Structure", justify="right", style="green")
return table

def get_channel_structure(self, structure: dict) -> str:
"Get the structure of a channel"
structure_str = ""
for key, info in structure.items():
pattern = f" - {info['pattern']}" if info.get("pattern") else ""
structure_str += f"{key} ({info['type']}{pattern})"
return structure_str

def generate_component_info_help(self):
"""Take the parsed meta.yml and generate rich help.
Expand Down Expand Up @@ -277,33 +296,48 @@ def generate_component_info_help(self):

# Inputs
if self.meta.get("input"):
inputs_table = Table(expand=True, show_lines=True, box=box.MINIMAL_HEAVY_HEAD, padding=0)
inputs_table.add_column(":inbox_tray: Inputs")
inputs_table.add_column("Description")
inputs_table.add_column("Pattern", justify="right", style="green")
for input in self.meta["input"]:
for key, info in input.items():
inputs_table.add_row(
f"[orange1 on black] {key} [/][dim i] ({info['type']})",
Markdown(info["description"] if info["description"] else ""),
info.get("pattern", ""),
)
inputs_table = self.generate_params_table("Inputs")
for i, input in enumerate(self.meta["input"]):
inputs_table.add_row(f"[italic]input[{i}][/]", "", "")
if self.component_type == "modules":
for element in input:
for key, info in element.items():
inputs_table.add_row(
f"[orange1 on black] {key} [/][dim i] ({info['type']})",
Markdown(info["description"] if info["description"] else ""),
info.get("pattern", ""),
)
elif self.component_type == "subworkflows":
for key, info in input.items():
inputs_table.add_row(
f"[orange1 on black] {key} [/][dim i]",
Markdown(info["description"] if info["description"] else ""),
self.get_channel_structure(info["structure"]) if info.get("structure") else "",
)

renderables.append(inputs_table)

# Outputs
if self.meta.get("output"):
outputs_table = Table(expand=True, show_lines=True, box=box.MINIMAL_HEAVY_HEAD, padding=0)
outputs_table.add_column(":outbox_tray: Outputs")
outputs_table.add_column("Description")
outputs_table.add_column("Pattern", justify="right", style="green")
outputs_table = self.generate_params_table("Outputs")
for output in self.meta["output"]:
for key, info in output.items():
outputs_table.add_row(
f"[orange1 on black] {key} [/][dim i] ({info['type']})",
Markdown(info["description"] if info["description"] else ""),
info.get("pattern", ""),
)
if self.component_type == "modules":
for ch_name, elements in output.items():
outputs_table.add_row(f"{ch_name}", "", "")
for element in elements:
for key, info in element.items():
outputs_table.add_row(
f"[orange1 on black] {key} [/][dim i] ({info['type']})",
Markdown(info["description"] if info["description"] else ""),
info.get("pattern", ""),
)
elif self.component_type == "subworkflows":
for key, info in output.items():
outputs_table.add_row(
f"[orange1 on black] {key} [/][dim i]",
Markdown(info["description"] if info["description"] else ""),
self.get_channel_structure(info["structure"]) if info.get("structure") else "",
)

renderables.append(outputs_table)

Expand Down
143 changes: 87 additions & 56 deletions nf_core/components/nfcore_component.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import logging
import re
from pathlib import Path
from typing import List, Optional, Tuple, Union
from typing import Any, List, Optional, Tuple, Union

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -41,6 +41,7 @@ def __init__(
remote_component (bool): Whether the module is to be treated as a
nf-core or local component
"""
self.component_type = component_type
self.component_name = component_name
self.repo_url = repo_url
self.component_dir = component_dir
Expand Down Expand Up @@ -170,65 +171,95 @@ def _get_included_components_in_chained_tests(self, main_nf_test: Union[Path, st

def get_inputs_from_main_nf(self) -> None:
"""Collect all inputs from the main.nf file."""
inputs: list[list[dict[str, dict[str, str]]]] = []
inputs: Any = [] # Can be 'list[list[dict[str, dict[str, str]]]]' or 'list[str]'
with open(self.main_nf) as f:
data = f.read()
# get input values from main.nf after "input:", which can be formatted as tuple val(foo) path(bar) or val foo or val bar or path bar or path foo
# regex matches:
# val(foo)
# path(bar)
# val foo
# val bar
# path bar
# path foo
# don't match anything inside comments or after "output:"
if "input:" not in data:
log.debug(f"Could not find any inputs in {self.main_nf}")
return
input_data = data.split("input:")[1].split("output:")[0]
for line in input_data.split("\n"):
channel_elements: list[dict[str, dict[str, str]]] = []
regex = r"(val|path)\s*(\(([^)]+)\)|\s*([^)\s,]+))"
matches = re.finditer(regex, line)
for _, match in enumerate(matches, start=1):
input_val = None
if match.group(3):
input_val = match.group(3).split(",")[0] # handle `files, stageAs: "inputs/*"` cases
elif match.group(4):
input_val = match.group(4).split(",")[0] # handle `files, stageAs: "inputs/*"` cases
if input_val:
channel_elements.append({input_val: {}})
if len(channel_elements) > 0:
inputs.append(channel_elements)
log.debug(f"Found {len(inputs)} inputs in {self.main_nf}")
self.inputs = inputs
if self.component_type == "modules":
# get input values from main.nf after "input:", which can be formatted as tuple val(foo) path(bar) or val foo or val bar or path bar or path foo
# regex matches:
# val(foo)
# path(bar)
# val foo
# val bar
# path bar
# path foo
# don't match anything inside comments or after "output:"
if "input:" not in data:
log.debug(f"Could not find any inputs in {self.main_nf}")
return
input_data = data.split("input:")[1].split("output:")[0]
for line in input_data.split("\n"):
channel_elements: Any = []
regex = r"(val|path)\s*(\(([^)]+)\)|\s*([^)\s,]+))"
matches = re.finditer(regex, line)
for _, match in enumerate(matches, start=1):
input_val = None
if match.group(3):
input_val = match.group(3).split(",")[0] # handle `files, stageAs: "inputs/*"` cases
elif match.group(4):
input_val = match.group(4).split(",")[0] # handle `files, stageAs: "inputs/*"` cases
if input_val:
channel_elements.append({input_val: {}})
if len(channel_elements) > 0:
inputs.append(channel_elements)
log.debug(f"Found {len(inputs)} inputs in {self.main_nf}")
self.inputs = inputs
elif self.component_type == "subworkflows":
# get input values from main.nf after "take:"
if "take:" not in data:
log.debug(f"Could not find any inputs in {self.main_nf}")
return
# get all lines between "take" and "main" or "emit"
input_data = data.split("take:")[1].split("main:")[0].split("emit:")[0]
for line in input_data.split("\n"):
try:
inputs.append(line.split()[0])
except IndexError:
pass # Empty lines
log.debug(f"Found {len(inputs)} inputs in {self.main_nf}")
self.inputs = inputs

def get_outputs_from_main_nf(self):
outputs = []
with open(self.main_nf) as f:
data = f.read()
# get output values from main.nf after "output:". the names are always after "emit:"
if "output:" not in data:
log.debug(f"Could not find any outputs in {self.main_nf}")
return outputs
output_data = data.split("output:")[1].split("when:")[0]
regex_emit = r"emit:\s*([^)\s,]+)"
regex_elements = r"(val|path|env|stdout)\s*(\(([^)]+)\)|\s*([^)\s,]+))"
for line in output_data.split("\n"):
match_emit = re.search(regex_emit, line)
matches_elements = re.finditer(regex_elements, line)
if not match_emit:
continue
output_channel = {match_emit.group(1): []}
for _, match_element in enumerate(matches_elements, start=1):
output_val = None
if match_element.group(3):
output_val = match_element.group(3)
elif match_element.group(4):
output_val = match_element.group(4)
if output_val:
output_val = output_val.strip("'").strip('"') # remove quotes
output_channel[match_emit.group(1)].append({output_val: {}})
outputs.append(output_channel)
log.debug(f"Found {len(outputs)} outputs in {self.main_nf}")
self.outputs = outputs
if self.component_type == "modules":
# get output values from main.nf after "output:". the names are always after "emit:"
if "output:" not in data:
log.debug(f"Could not find any outputs in {self.main_nf}")
return outputs
output_data = data.split("output:")[1].split("when:")[0]
regex_emit = r"emit:\s*([^)\s,]+)"
regex_elements = r"(val|path|env|stdout)\s*(\(([^)]+)\)|\s*([^)\s,]+))"
for line in output_data.split("\n"):
match_emit = re.search(regex_emit, line)
matches_elements = re.finditer(regex_elements, line)
if not match_emit:
continue
output_channel = {match_emit.group(1): []}
for _, match_element in enumerate(matches_elements, start=1):
output_val = None
if match_element.group(3):
output_val = match_element.group(3)
elif match_element.group(4):
output_val = match_element.group(4)
if output_val:
output_val = output_val.strip("'").strip('"') # remove quotes
output_channel[match_emit.group(1)].append({output_val: {}})
outputs.append(output_channel)
log.debug(f"Found {len(outputs)} outputs in {self.main_nf}")
self.outputs = outputs
elif self.component_type == "subworkflows":
# get output values from main.nf after "emit:". Can be named outputs or not.
if "emit:" not in data:
log.debug(f"Could not find any outputs in {self.main_nf}")
return outputs
output_data = data.split("emit:")[1].split("}")[0]
for line in output_data.split("\n"):
try:
outputs.append(line.split("=")[0].split()[0])
except IndexError:
# Empty lines
pass
log.debug(f"Found {len(outputs)} outputs in {self.main_nf}")
self.outputs = outputs
10 changes: 5 additions & 5 deletions nf_core/module-template/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,6 @@ input:
## TODO nf-core: Add a description of all of the variables used as output
{% endif -%}
output:
- versions:
- "versions.yml":
type: file
description: File containing software versions
pattern: "versions.yml"
- {{ 'bam:' if not_empty_template else "output:" }}
#{% if has_meta -%} Only when we have meta
- meta:
Expand All @@ -81,6 +76,11 @@ output:
{% else -%}
- edam: ""
{%- endif %}
- versions:
- "versions.yml":
type: file
description: File containing software versions
pattern: "versions.yml"

authors:
- "{{ author }}"
Expand Down
17 changes: 17 additions & 0 deletions nf_core/modules/lint/meta_yml.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,23 @@ def meta_yml(module_lint_object: ComponentLint, module: NFCoreComponent) -> None

# Confirm that all input and output channels are correctly specified
if valid_meta_yml:
# confirm that the name matches the process name in main.nf
if meta_yaml["name"].upper() == module.process_name:
module.passed.append(
(
"meta_name",
"Correct name specified in `meta.yml`.",
module.meta_yml,
)
)
else:
module.failed.append(
(
"meta_name",
f"Conflicting `process` name between meta.yml (`{meta_yaml['name']}`) and main.nf (`{module.process_name}`)",
module.meta_yml,
)
)
# Check that inputs are specified in meta.yml
if len(module.inputs) > 0 and "input" not in meta_yaml:
module.failed.append(
Expand Down
4 changes: 2 additions & 2 deletions nf_core/pipeline-template/modules.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@
{%- if fastqc %}
"fastqc": {
"branch": "master",
"git_sha": "285a50500f9e02578d90b3ce6382ea3c30216acd",
"git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
"installed_by": ["modules"]
}{% endif %}{%- if multiqc %}{% if fastqc %},{% endif %}
"multiqc": {
"branch": "master",
"git_sha": "b7ebe95761cd389603f9cc0e0dc384c0f663815a",
"git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
"installed_by": ["modules"]
}
{%- endif %}
Expand Down
5 changes: 4 additions & 1 deletion nf_core/pipeline-template/modules/nf-core/fastqc/main.nf

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit b3042f3

Please sign in to comment.