From 22fffd52dd2ed1ec7c8dfaf5236a7cec4e0020e9 Mon Sep 17 00:00:00 2001 From: glrs <5999366+glrs@users.noreply.github.com> Date: Thu, 31 Oct 2024 15:18:24 +0100 Subject: [PATCH] Move paths to SampleFileHandler and create get methods for some --- lib/realms/tenx/run_sample.py | 24 +++++++------------- lib/realms/tenx/tenx_project.py | 1 + lib/realms/tenx/utils/sample_file_handler.py | 21 +++++++++++------ 3 files changed, 23 insertions(+), 23 deletions(-) diff --git a/lib/realms/tenx/run_sample.py b/lib/realms/tenx/run_sample.py index d54c302..2a78c4e 100644 --- a/lib/realms/tenx/run_sample.py +++ b/lib/realms/tenx/run_sample.py @@ -49,7 +49,7 @@ def __init__( self.file_handler: SampleFileHandler = SampleFileHandler(self) self.features: List[str] = self._collect_features() - self.pipeline_info: Optional[Dict[str, Any]] = self._get_pipeline_info() + self.pipeline_info: Optional[Dict[str, Any]] = self._get_pipeline_info() or {} self.reference_genomes: Dict[str, str] = ( self.collect_reference_genomes() ) or {} @@ -197,7 +197,7 @@ async def pre_process(self): slurm_metadata = { "sample_id": self.run_sample_id, "project_name": self.project_info.get("project_name", ""), - "output_dir": str(self.file_handler.base_dir), + "output_dir": str(self.file_handler.project_dir), "cellranger_command": cellranger_command, } @@ -265,21 +265,14 @@ def assemble_cellranger_command(self) -> str: # Mapping of argument names to their values arg_values: Dict[str, Any] = { "--id": self.run_sample_id, - "--csv": str( - self.file_handler.base_dir / f"{self.run_sample_id}_multi.csv" - ), + "--csv": str(self.file_handler.get_multi_csv_path()), "--transcriptome": self.reference_genomes["gex"], "--fastqs": ",".join( [",".join(paths) for paths in self.lab_samples[0].fastq_dirs.values()] ), "--sample": self.lab_samples[0].lab_sample_id, - "--libraries": str( - self.file_handler.base_dir / f"{self.run_sample_id}_libraries.csv" - ), - "--feature-ref": str( - self.file_handler.base_dir - / f"{self.run_sample_id}_feature_reference.csv" - ), + "--libraries": str(self.file_handler.get_libraries_csv_path()), + "--feature-ref": str(self.file_handler.get_feature_reference_csv_path()), } # Add references based on the pipeline @@ -341,9 +334,7 @@ def collect_libraries_data(self) -> List[Dict[str, str]]: def generate_libraries_csv(self) -> None: """Generate the libraries CSV file required for processing.""" logging.info(f"[{self.run_sample_id}] Generating library CSV") - library_csv_path = ( - self.file_handler.base_dir / f"{self.run_sample_id}_libraries.csv" - ) + library_csv_path = self.file_handler.get_libraries_csv_path() # Ensure the directory exists library_csv_path.parent.mkdir(parents=True, exist_ok=True) @@ -365,12 +356,13 @@ def generate_libraries_csv(self) -> None: def generate_feature_reference_csv(self) -> None: """Generate the feature reference CSV file required for processing.""" logging.info(f"[{self.run_sample_id}] Generating feature reference CSV") + # feature_ref_csv_path = self.file_handler.get_feature_reference_csv_path() pass def generate_multi_sample_csv(self) -> None: """Generate the multi-sample CSV file required for processing.""" logging.info(f"[{self.run_sample_id}] Generating multi-sample CSV") - multi_csv_path = self.file_handler.base_dir / f"{self.run_sample_id}_multi.csv" + multi_csv_path = self.file_handler.get_multi_csv_path() # Ensure the directory exists multi_csv_path.parent.mkdir(parents=True, exist_ok=True) diff --git a/lib/realms/tenx/tenx_project.py b/lib/realms/tenx/tenx_project.py index 82a2f58..273c0ff 100644 --- a/lib/realms/tenx/tenx_project.py +++ b/lib/realms/tenx/tenx_project.py @@ -46,6 +46,7 @@ def __init__(self, doc: Dict[str, Any], yggdrasil_db_manager: Any) -> None: return self.project_dir: Optional[Path] = self.ensure_project_directory() + self.project_info["project_dir"] = self.project_dir self.samples: List[TenXRunSample] = [] self.case_type: str = self.project_info.get("case_type", "unknown") logging.info(f"Case type: {self.case_type}") diff --git a/lib/realms/tenx/utils/sample_file_handler.py b/lib/realms/tenx/utils/sample_file_handler.py index 868d53d..277ca28 100644 --- a/lib/realms/tenx/utils/sample_file_handler.py +++ b/lib/realms/tenx/utils/sample_file_handler.py @@ -19,7 +19,7 @@ class SampleFileHandler: sample_ref (str): Reference genome for the sample. organism (str): Organism associated with the sample. config (Dict[str, Any]): Configuration settings. - base_dir (Path): Base directory path for the project. + project_dir (Path): Base directory path for the project. sample_dir (Path): Directory path for the sample. fastq_files_dir (Path): Directory path for FASTQ files. fastq_files (Dict[str, Any]): Dictionary of FASTQ file paths. @@ -41,11 +41,9 @@ def __init__(self, sample: Any) -> None: self.config: Dict[str, Any] = sample.config # Define sample folder structure - self.base_dir: Path = ( - Path(self.config["10x_dir"]) / "projects" / self.project_name - ) - self.sample_dir: Path = self.base_dir / self.sample_id - self.fastq_files_dir: Path = self.base_dir / "fastq_files" + self.project_dir: Path = sample.project_info.get("project_dir", "") + self.sample_dir: Path = self.project_dir / self.sample_id + self.fastq_files_dir: Path = self.project_dir / "fastq_files" self.fastq_files: Dict[str, Any] = {} @@ -56,9 +54,18 @@ def init_file_paths(self) -> None: """Initialize critical file paths.""" # Files needed for processing self.slurm_script_path: Path = ( - self.base_dir / f"{self.sample_id}_slurm_script.sh" + self.project_dir / f"{self.sample_id}_slurm_script.sh" ) # Report output files # NOTE: Different pipelines may produce summaries in different locations self.summary_fpath: Path = self.sample_dir / "outs" / "web_summary.html" + + def get_libraries_csv_path(self) -> Path: + return self.project_dir / f"{self.sample_id}_libraries.csv" + + def get_multi_csv_path(self) -> Path: + return self.project_dir / f"{self.sample_id}_multi.csv" + + def get_feature_reference_csv_path(self) -> Path: + return self.project_dir / f"{self.sample_id}_feature_reference.csv"