Skip to content

Commit

Permalink
first draft
Browse files Browse the repository at this point in the history
  • Loading branch information
kedhammar committed Dec 4, 2024
1 parent aa2d05b commit 35c768e
Show file tree
Hide file tree
Showing 2 changed files with 120 additions and 0 deletions.
10 changes: 10 additions & 0 deletions taca/analysis/analysis_nanopore.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,12 @@ def process_user_run(ont_user_run: ONT_user_run):
logger.info(f"{ont_user_run.run_name}: Putting HTML report on GenStat...")
ont_user_run.copy_html_report()

# Generate and publish TouliggQC report
logger.info(
f"{ont_user_run.run_name}: Generating and publishing ToulligQC report..."
)
ont_user_run.touliggqc_report()

# Copy metadata
logger.info(f"{ont_user_run.run_name}: Copying metadata...")
ont_user_run.copy_metadata()
Expand Down Expand Up @@ -166,6 +172,10 @@ def process_qc_run(ont_qc_run: ONT_qc_run):
logger.info(f"{ont_qc_run.run_name}: Putting HTML report on GenStat...")
ont_qc_run.copy_html_report()

# Generate and publish TouliggQC report
logger.info(f"{ont_qc_run.run_name}: Generating and publishing ToulligQC report...")
ont_qc_run.touliggqc_report()

# Look at seq data
if not ont_qc_run.has_raw_seq_output():
logger.info(f"{ont_qc_run.run_name}: Run has no sequencing output, continuing")
Expand Down
110 changes: 110 additions & 0 deletions taca/nanopore/ONT_run_classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,9 @@ def __init__(self, run_abspath: str):

# Get attributes from config
self.minknow_reports_dir = CONFIG["nanopore_analysis"]["minknow_reports_dir"]
self.toulligqc_reports_dir = CONFIG["nanopore_analysis"][
"toulligqc_reports_dir"
] # TODO
self.analysis_server = CONFIG["nanopore_analysis"].get("analysis_server", None)
self.rsync_options = CONFIG["nanopore_analysis"]["rsync_options"]
for k, v in self.rsync_options.items():
Expand Down Expand Up @@ -338,6 +341,113 @@ def copy_html_report(self):
logger.error(msg)
raise RsyncError(msg)

def touliggqc_report(self):
"""Generate a QC report for the run using ToulligQC and publish it to GenStat."""

# Get sequencing summary file
glob_summary = glob.glob(f"{self.run_abspath}/sequencing_summary*.txt")
assert len(glob_summary) == 1, f"Found {len(glob_summary)} summary files"
summary = glob_summary[0]

# Determine the format of the raw sequencing data, sorted by preference
raw_data_dir_options = [
"pod5_passed",
"pod5",
"fast5_pass",
"fast5",
]
raw_data_dir = None
for raw_data_dir_option in raw_data_dir_options:
if os.path.exists(f"{self.run_abspath}/{raw_data_dir_option}"):
raw_data_dir = raw_data_dir_option
break
if raw_data_dir is None:
raise AssertionError(f"No seq data found in {self.run_abspath}")
raw_data_format = "pod5" if "pod5" in raw_data_dir else "fast5"

# Load samplesheet, if any
ss_glob = glob.glob(f"{self.run_abspath}/sample_sheet*.csv")
if len(ss_glob) == 0:
samplesheet = None
elif len(ss_glob) > 1:
# If multiple samplesheet, use latest one
samplesheet = ss_glob.sort()[-1]
else:
samplesheet = ss_glob[0]

# Run has barcode subdirs
if len(glob.glob(f"{self.run_abspath}/fastq_pass/barcode*")) > 0:
barcode_dirs = True
else:
barcode_dirs = False

# Determine barcodes
if samplesheet:
ss_df = pd.read_csv(samplesheet)
if "barcode" in ss_df.columns:
ss_barcodes = list(ss_df["barcode"].unique())
ss_barcodes.sort()
barcode_nums = [int(bc[-2:]) for bc in ss_barcodes]
# If barcodes are numbered sequentially, write arg as range
if barcode_nums == list(range(1, len(barcode_nums) + 1)):
barcodes_arg = f"{ss_barcodes[0]}:{ss_barcodes[-1]}"
else:
barcodes_arg = ":".join(ss_barcodes)
else:
ss_barcodes = None

command_args = {
"--sequencing-summary-source": summary,
f"--{raw_data_format}-source": f"{self.run_abspath}/{raw_data_dir}",
"--output-directory": self.run_abspath,
"--report-name": "toulligqc_report",
}
if barcode_dirs:
command_args["--barcoding"] = ""
if ss_barcodes:
command_args["--samplesheet"] = samplesheet
command_args["--barcodes"] = barcodes_arg

# Build command list
command_list = ["toulligqc"]
for k, v in command_args.items():
command_list.append(k)
if v:
command_list.append(v)

# Run the command
# Small enough to wait for, should be done in 1-5 minutes
process = subprocess.run(command_list)

# Check if the command was successful
if process.returncode == 0:
logging.info(f"{self.run_name}: ToulligQC report generated successfully.")
else:
raise subprocess.CalledProcessError(process.returncode, command_list)

# Copy the report to GenStat

logger.info(
f"{self.run_name}: Transferring ToulligQC report to ngi-internal..."
)
# Transfer the ToulligQC .html report file to ngi-internal, renaming it to the full run ID. Requires password-free SSH access.
report_src_path = self.get_file("toulligqc_report/report*.html")
report_dest_path = os.path.join(
self.toulligqc_reports_dir,
f"ToulligQC_{self.run_name}.html",
)
transfer_object = RsyncAgent(
src_path=report_src_path,
dest_path=report_dest_path,
validate=False,
)
try:
transfer_object.transfer()
except RsyncError:
msg = f"{self.run_name}: An error occurred while attempting to transfer the report {report_src_path} to {report_dest_path}."
logger.error(msg)
raise RsyncError(msg)

# Transfer run

def transfer_run(self):
Expand Down

0 comments on commit 35c768e

Please sign in to comment.