Skip to content

Commit

Permalink
Merge branch 'main' into 317-add-arguments-hg19-vcf-dir-and-hg38-vcf-…
Browse files Browse the repository at this point in the history
…dir-to-spike-vcf-command
  • Loading branch information
yaseminbridges authored May 16, 2024
2 parents c3479e0 + 9fbffde commit 8a9f01a
Show file tree
Hide file tree
Showing 11 changed files with 220 additions and 410 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "pheval"
version = "0.3.3"
version = "0.3.7"
description = ""
authors = ["Yasemin Bridges <[email protected]>",
"Julius Jacobsen <[email protected]>",
Expand Down
21 changes: 7 additions & 14 deletions src/pheval/analyse/disease_prioritisation_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,7 @@
from pheval.analyse.rank_stats import RankStats
from pheval.analyse.run_data_parser import TrackInputOutputDirectories
from pheval.post_processing.post_processing import RankedPhEvalDiseaseResult
from pheval.utils.file_utils import (
all_files,
files_with_suffix,
obtain_phenopacket_path_from_pheval_result,
)
from pheval.utils.file_utils import all_files
from pheval.utils.phenopacket_utils import PhenopacketUtil, ProbandDisease, phenopacket_reader


Expand Down Expand Up @@ -217,7 +213,7 @@ def _obtain_causative_diseases(phenopacket_path: Path) -> List[ProbandDisease]:


def assess_phenopacket_disease_prioritisation(
standardised_disease_result: Path,
phenopacket_path: Path,
score_order: str,
results_dir_and_input: TrackInputOutputDirectories,
threshold: float,
Expand All @@ -230,16 +226,16 @@ def assess_phenopacket_disease_prioritisation(
against the recorded causative diseases for a proband in the Phenopacket.
Args:
standardised_disease_result (Path): Path to the PhEval standardised disease result file.
phenopacket_path (Path): Path to the Phenopacket.
score_order (str): The order in which scores are arranged, either ascending or descending.
results_dir_and_input (TrackInputOutputDirectories): Input and output directories.
threshold (float): Threshold for assessment.
disease_rank_stats (RankStats): RankStats class instance.
disease_rank_comparison (defaultdict): Default dictionary for disease rank comparisons.
disease_binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance.
"""
phenopacket_path = obtain_phenopacket_path_from_pheval_result(
standardised_disease_result, all_files(results_dir_and_input.phenopacket_dir)
standardised_disease_result = results_dir_and_input.results_dir.joinpath(
f"pheval_disease_results/{phenopacket_path.stem}-pheval_disease_result.tsv"
)
pheval_disease_result = read_standardised_result(standardised_disease_result)
proband_diseases = _obtain_causative_diseases(phenopacket_path)
Expand Down Expand Up @@ -276,12 +272,9 @@ def benchmark_disease_prioritisation(
"""
disease_rank_stats = RankStats()
disease_binary_classification_stats = BinaryClassificationStats()
for standardised_result in files_with_suffix(
results_directory_and_input.results_dir.joinpath("pheval_disease_results/"),
".tsv",
):
for phenopacket_path in all_files(results_directory_and_input.phenopacket_dir):
assess_phenopacket_disease_prioritisation(
standardised_result,
phenopacket_path,
score_order,
results_directory_and_input,
threshold,
Expand Down
20 changes: 7 additions & 13 deletions src/pheval/analyse/gene_prioritisation_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,7 @@
from pheval.analyse.rank_stats import RankStats
from pheval.analyse.run_data_parser import TrackInputOutputDirectories
from pheval.post_processing.post_processing import RankedPhEvalGeneResult
from pheval.utils.file_utils import (
all_files,
files_with_suffix,
obtain_phenopacket_path_from_pheval_result,
)
from pheval.utils.file_utils import all_files
from pheval.utils.phenopacket_utils import PhenopacketUtil, ProbandCausativeGene, phenopacket_reader


Expand Down Expand Up @@ -209,7 +205,7 @@ def _obtain_causative_genes(phenopacket_path: Path) -> List[ProbandCausativeGene


def assess_phenopacket_gene_prioritisation(
standardised_gene_result: Path,
phenopacket_path: Path,
score_order: str,
results_dir_and_input: TrackInputOutputDirectories,
threshold: float,
Expand All @@ -222,16 +218,16 @@ def assess_phenopacket_gene_prioritisation(
against the recorded causative genes for a proband in the Phenopacket.
Args:
standardised_gene_result (Path): Path to the PhEval standardised gene result file.
phenopacket_path (Path): Path to the Phenopacket.
score_order (str): The order in which scores are arranged, either ascending or descending.
results_dir_and_input (TrackInputOutputDirectories): Input and output directories.
threshold (float): Threshold for assessment.
gene_rank_stats (RankStats): RankStats class instance.
gene_rank_comparison (defaultdict): Default dictionary for gene rank comparisons.
gene_binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance.
"""
phenopacket_path = obtain_phenopacket_path_from_pheval_result(
standardised_gene_result, all_files(results_dir_and_input.phenopacket_dir)
standardised_gene_result = results_dir_and_input.results_dir.joinpath(
f"pheval_gene_results/{phenopacket_path.stem}-pheval_gene_result.tsv"
)
pheval_gene_result = read_standardised_result(standardised_gene_result)
proband_causative_genes = _obtain_causative_genes(phenopacket_path)
Expand Down Expand Up @@ -266,11 +262,9 @@ def benchmark_gene_prioritisation(
"""
gene_rank_stats = RankStats()
gene_binary_classification_stats = BinaryClassificationStats()
for standardised_result in files_with_suffix(
results_directory_and_input.results_dir.joinpath("pheval_gene_results/"), ".tsv"
):
for phenopacket_path in all_files(results_directory_and_input.phenopacket_dir):
assess_phenopacket_gene_prioritisation(
standardised_result,
phenopacket_path,
score_order,
results_directory_and_input,
threshold,
Expand Down
9 changes: 8 additions & 1 deletion src/pheval/analyse/parse_pheval_result.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
import logging
from pathlib import Path
from typing import List

import pandas as pd

from pheval.post_processing.post_processing import PhEvalResult

info_log = logging.getLogger("info")


def read_standardised_result(standardised_result_path: Path) -> List[dict]:
"""
Expand All @@ -16,7 +19,11 @@ def read_standardised_result(standardised_result_path: Path) -> List[dict]:
Returns:
List[dict]: A list of dictionaries representing the content of the standardised result file.
"""
return pd.read_csv(standardised_result_path, delimiter="\t").to_dict("records")
if standardised_result_path.is_file():
return pd.read_csv(standardised_result_path, delimiter="\t").to_dict("records")
else:
info_log.info(f"Could not find {standardised_result_path}")
return pd.DataFrame().to_dict("records")


def parse_pheval_result(
Expand Down
23 changes: 8 additions & 15 deletions src/pheval/analyse/variant_prioritisation_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,7 @@
from pheval.analyse.rank_stats import RankStats
from pheval.analyse.run_data_parser import TrackInputOutputDirectories
from pheval.post_processing.post_processing import RankedPhEvalVariantResult
from pheval.utils.file_utils import (
all_files,
files_with_suffix,
obtain_phenopacket_path_from_pheval_result,
)
from pheval.utils.file_utils import all_files
from pheval.utils.phenopacket_utils import GenomicVariant, PhenopacketUtil, phenopacket_reader


Expand Down Expand Up @@ -211,7 +207,7 @@ def _obtain_causative_variants(phenopacket_path: Path) -> List[GenomicVariant]:


def assess_phenopacket_variant_prioritisation(
standardised_variant_result: Path,
phenopacket_path: Path,
score_order: str,
results_dir_and_input: TrackInputOutputDirectories,
threshold: float,
Expand All @@ -224,18 +220,18 @@ def assess_phenopacket_variant_prioritisation(
against the recorded causative variants for a proband in the Phenopacket.
Args:
standardised_variant_result (Path): Path to the PhEval standardised variant result file.
phenopacket_path (Path): Path to the Phenopacket.
score_order (str): The order in which scores are arranged, either ascending or descending.
results_dir_and_input (TrackInputOutputDirectories): Input and output directories.
threshold (float): Threshold for assessment.
variant_rank_stats (RankStats): RankStats class instance.
variant_rank_comparison (defaultdict): Default dictionary for variant rank comparisons.
variant_binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance.
"""
phenopacket_path = obtain_phenopacket_path_from_pheval_result(
standardised_variant_result, all_files(results_dir_and_input.phenopacket_dir)
)
proband_causative_variants = _obtain_causative_variants(phenopacket_path)
standardised_variant_result = results_dir_and_input.results_dir.joinpath(
f"pheval_variant_results/{phenopacket_path.stem}-pheval_variant_result.tsv"
)
pheval_variant_result = read_standardised_result(standardised_variant_result)
AssessVariantPrioritisation(
phenopacket_path,
Expand Down Expand Up @@ -270,12 +266,9 @@ def benchmark_variant_prioritisation(
"""
variant_rank_stats = RankStats()
variant_binary_classification_stats = BinaryClassificationStats()
for standardised_result in files_with_suffix(
results_directory_and_input.results_dir.joinpath("pheval_variant_results/"),
".tsv",
):
for phenopacket_path in all_files(results_directory_and_input.phenopacket_dir):
assess_phenopacket_variant_prioritisation(
standardised_result,
phenopacket_path,
score_order,
results_directory_and_input,
threshold,
Expand Down
Loading

0 comments on commit 8a9f01a

Please sign in to comment.