Skip to content

Commit

Permalink
Move paths to SampleFileHandler and create get methods for some
Browse files Browse the repository at this point in the history
  • Loading branch information
glrs committed Oct 31, 2024
1 parent e9e1741 commit 22fffd5
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 23 deletions.
24 changes: 8 additions & 16 deletions lib/realms/tenx/run_sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def __init__(
self.file_handler: SampleFileHandler = SampleFileHandler(self)

self.features: List[str] = self._collect_features()
self.pipeline_info: Optional[Dict[str, Any]] = self._get_pipeline_info()
self.pipeline_info: Optional[Dict[str, Any]] = self._get_pipeline_info() or {}
self.reference_genomes: Dict[str, str] = (
self.collect_reference_genomes()
) or {}
Expand Down Expand Up @@ -197,7 +197,7 @@ async def pre_process(self):
slurm_metadata = {
"sample_id": self.run_sample_id,
"project_name": self.project_info.get("project_name", ""),
"output_dir": str(self.file_handler.base_dir),
"output_dir": str(self.file_handler.project_dir),
"cellranger_command": cellranger_command,
}

Expand Down Expand Up @@ -265,21 +265,14 @@ def assemble_cellranger_command(self) -> str:
# Mapping of argument names to their values
arg_values: Dict[str, Any] = {
"--id": self.run_sample_id,
"--csv": str(
self.file_handler.base_dir / f"{self.run_sample_id}_multi.csv"
),
"--csv": str(self.file_handler.get_multi_csv_path()),
"--transcriptome": self.reference_genomes["gex"],
"--fastqs": ",".join(
[",".join(paths) for paths in self.lab_samples[0].fastq_dirs.values()]
),
"--sample": self.lab_samples[0].lab_sample_id,
"--libraries": str(
self.file_handler.base_dir / f"{self.run_sample_id}_libraries.csv"
),
"--feature-ref": str(
self.file_handler.base_dir
/ f"{self.run_sample_id}_feature_reference.csv"
),
"--libraries": str(self.file_handler.get_libraries_csv_path()),
"--feature-ref": str(self.file_handler.get_feature_reference_csv_path()),
}

# Add references based on the pipeline
Expand Down Expand Up @@ -341,9 +334,7 @@ def collect_libraries_data(self) -> List[Dict[str, str]]:
def generate_libraries_csv(self) -> None:
"""Generate the libraries CSV file required for processing."""
logging.info(f"[{self.run_sample_id}] Generating library CSV")
library_csv_path = (
self.file_handler.base_dir / f"{self.run_sample_id}_libraries.csv"
)
library_csv_path = self.file_handler.get_libraries_csv_path()

# Ensure the directory exists
library_csv_path.parent.mkdir(parents=True, exist_ok=True)
Expand All @@ -365,12 +356,13 @@ def generate_libraries_csv(self) -> None:
def generate_feature_reference_csv(self) -> None:
"""Generate the feature reference CSV file required for processing."""
logging.info(f"[{self.run_sample_id}] Generating feature reference CSV")
# feature_ref_csv_path = self.file_handler.get_feature_reference_csv_path()
pass

def generate_multi_sample_csv(self) -> None:
"""Generate the multi-sample CSV file required for processing."""
logging.info(f"[{self.run_sample_id}] Generating multi-sample CSV")
multi_csv_path = self.file_handler.base_dir / f"{self.run_sample_id}_multi.csv"
multi_csv_path = self.file_handler.get_multi_csv_path()

# Ensure the directory exists
multi_csv_path.parent.mkdir(parents=True, exist_ok=True)
Expand Down
1 change: 1 addition & 0 deletions lib/realms/tenx/tenx_project.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ def __init__(self, doc: Dict[str, Any], yggdrasil_db_manager: Any) -> None:
return

self.project_dir: Optional[Path] = self.ensure_project_directory()
self.project_info["project_dir"] = self.project_dir
self.samples: List[TenXRunSample] = []
self.case_type: str = self.project_info.get("case_type", "unknown")
logging.info(f"Case type: {self.case_type}")
Expand Down
21 changes: 14 additions & 7 deletions lib/realms/tenx/utils/sample_file_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ class SampleFileHandler:
sample_ref (str): Reference genome for the sample.
organism (str): Organism associated with the sample.
config (Dict[str, Any]): Configuration settings.
base_dir (Path): Base directory path for the project.
project_dir (Path): Base directory path for the project.
sample_dir (Path): Directory path for the sample.
fastq_files_dir (Path): Directory path for FASTQ files.
fastq_files (Dict[str, Any]): Dictionary of FASTQ file paths.
Expand All @@ -41,11 +41,9 @@ def __init__(self, sample: Any) -> None:
self.config: Dict[str, Any] = sample.config

# Define sample folder structure
self.base_dir: Path = (
Path(self.config["10x_dir"]) / "projects" / self.project_name
)
self.sample_dir: Path = self.base_dir / self.sample_id
self.fastq_files_dir: Path = self.base_dir / "fastq_files"
self.project_dir: Path = sample.project_info.get("project_dir", "")
self.sample_dir: Path = self.project_dir / self.sample_id
self.fastq_files_dir: Path = self.project_dir / "fastq_files"

self.fastq_files: Dict[str, Any] = {}

Expand All @@ -56,9 +54,18 @@ def init_file_paths(self) -> None:
"""Initialize critical file paths."""
# Files needed for processing
self.slurm_script_path: Path = (
self.base_dir / f"{self.sample_id}_slurm_script.sh"
self.project_dir / f"{self.sample_id}_slurm_script.sh"
)

# Report output files
# NOTE: Different pipelines may produce summaries in different locations
self.summary_fpath: Path = self.sample_dir / "outs" / "web_summary.html"

def get_libraries_csv_path(self) -> Path:
return self.project_dir / f"{self.sample_id}_libraries.csv"

def get_multi_csv_path(self) -> Path:
return self.project_dir / f"{self.sample_id}_multi.csv"

def get_feature_reference_csv_path(self) -> Path:
return self.project_dir / f"{self.sample_id}_feature_reference.csv"

0 comments on commit 22fffd5

Please sign in to comment.