Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move paths to SampleFileHandler and create get methods for some #11

Merged
merged 1 commit into from
Oct 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 8 additions & 16 deletions lib/realms/tenx/run_sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def __init__(
self.file_handler: SampleFileHandler = SampleFileHandler(self)

self.features: List[str] = self._collect_features()
self.pipeline_info: Optional[Dict[str, Any]] = self._get_pipeline_info()
self.pipeline_info: Optional[Dict[str, Any]] = self._get_pipeline_info() or {}
self.reference_genomes: Dict[str, str] = (
self.collect_reference_genomes()
) or {}
Expand Down Expand Up @@ -197,7 +197,7 @@ async def pre_process(self):
slurm_metadata = {
"sample_id": self.run_sample_id,
"project_name": self.project_info.get("project_name", ""),
"output_dir": str(self.file_handler.base_dir),
"output_dir": str(self.file_handler.project_dir),
"cellranger_command": cellranger_command,
}

Expand Down Expand Up @@ -265,21 +265,14 @@ def assemble_cellranger_command(self) -> str:
# Mapping of argument names to their values
arg_values: Dict[str, Any] = {
"--id": self.run_sample_id,
"--csv": str(
self.file_handler.base_dir / f"{self.run_sample_id}_multi.csv"
),
"--csv": str(self.file_handler.get_multi_csv_path()),
"--transcriptome": self.reference_genomes["gex"],
"--fastqs": ",".join(
[",".join(paths) for paths in self.lab_samples[0].fastq_dirs.values()]
),
"--sample": self.lab_samples[0].lab_sample_id,
"--libraries": str(
self.file_handler.base_dir / f"{self.run_sample_id}_libraries.csv"
),
"--feature-ref": str(
self.file_handler.base_dir
/ f"{self.run_sample_id}_feature_reference.csv"
),
"--libraries": str(self.file_handler.get_libraries_csv_path()),
"--feature-ref": str(self.file_handler.get_feature_reference_csv_path()),
}

# Add references based on the pipeline
Expand Down Expand Up @@ -341,9 +334,7 @@ def collect_libraries_data(self) -> List[Dict[str, str]]:
def generate_libraries_csv(self) -> None:
"""Generate the libraries CSV file required for processing."""
logging.info(f"[{self.run_sample_id}] Generating library CSV")
library_csv_path = (
self.file_handler.base_dir / f"{self.run_sample_id}_libraries.csv"
)
library_csv_path = self.file_handler.get_libraries_csv_path()

# Ensure the directory exists
library_csv_path.parent.mkdir(parents=True, exist_ok=True)
Expand All @@ -365,12 +356,13 @@ def generate_libraries_csv(self) -> None:
def generate_feature_reference_csv(self) -> None:
"""Generate the feature reference CSV file required for processing."""
logging.info(f"[{self.run_sample_id}] Generating feature reference CSV")
# feature_ref_csv_path = self.file_handler.get_feature_reference_csv_path()
pass

def generate_multi_sample_csv(self) -> None:
"""Generate the multi-sample CSV file required for processing."""
logging.info(f"[{self.run_sample_id}] Generating multi-sample CSV")
multi_csv_path = self.file_handler.base_dir / f"{self.run_sample_id}_multi.csv"
multi_csv_path = self.file_handler.get_multi_csv_path()

# Ensure the directory exists
multi_csv_path.parent.mkdir(parents=True, exist_ok=True)
Expand Down
1 change: 1 addition & 0 deletions lib/realms/tenx/tenx_project.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ def __init__(self, doc: Dict[str, Any], yggdrasil_db_manager: Any) -> None:
return

self.project_dir: Optional[Path] = self.ensure_project_directory()
self.project_info["project_dir"] = self.project_dir
self.samples: List[TenXRunSample] = []
self.case_type: str = self.project_info.get("case_type", "unknown")
logging.info(f"Case type: {self.case_type}")
Expand Down
21 changes: 14 additions & 7 deletions lib/realms/tenx/utils/sample_file_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ class SampleFileHandler:
sample_ref (str): Reference genome for the sample.
organism (str): Organism associated with the sample.
config (Dict[str, Any]): Configuration settings.
base_dir (Path): Base directory path for the project.
project_dir (Path): Base directory path for the project.
sample_dir (Path): Directory path for the sample.
fastq_files_dir (Path): Directory path for FASTQ files.
fastq_files (Dict[str, Any]): Dictionary of FASTQ file paths.
Expand All @@ -41,11 +41,9 @@ def __init__(self, sample: Any) -> None:
self.config: Dict[str, Any] = sample.config

# Define sample folder structure
self.base_dir: Path = (
Path(self.config["10x_dir"]) / "projects" / self.project_name
)
self.sample_dir: Path = self.base_dir / self.sample_id
self.fastq_files_dir: Path = self.base_dir / "fastq_files"
self.project_dir: Path = sample.project_info.get("project_dir", "")
self.sample_dir: Path = self.project_dir / self.sample_id
self.fastq_files_dir: Path = self.project_dir / "fastq_files"

self.fastq_files: Dict[str, Any] = {}

Expand All @@ -56,9 +54,18 @@ def init_file_paths(self) -> None:
"""Initialize critical file paths."""
# Files needed for processing
self.slurm_script_path: Path = (
self.base_dir / f"{self.sample_id}_slurm_script.sh"
self.project_dir / f"{self.sample_id}_slurm_script.sh"
)

# Report output files
# NOTE: Different pipelines may produce summaries in different locations
self.summary_fpath: Path = self.sample_dir / "outs" / "web_summary.html"

def get_libraries_csv_path(self) -> Path:
return self.project_dir / f"{self.sample_id}_libraries.csv"

def get_multi_csv_path(self) -> Path:
return self.project_dir / f"{self.sample_id}_multi.csv"

def get_feature_reference_csv_path(self) -> Path:
return self.project_dir / f"{self.sample_id}_feature_reference.csv"
Loading