Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(pics): remove variants from locus when PICS cannot be applied #361

Merged
merged 12 commits into from
Dec 21, 2023
Merged
2 changes: 2 additions & 0 deletions src/otg/dataset/study_locus.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ class StudyLocusQualityCheck(Enum):
AMBIGUOUS_STUDY (str): Association with ambiguous study
UNRESOLVED_LD (str): Variant not found in LD reference
LD_CLUMPED (str): Explained by a more significant variant in high LD (clumped)
UNPICSABLE (str): Unable to calculate PIPs with the provided data
"""

SUBSIGNIFICANT_FLAG = "Subsignificant p-value"
Expand All @@ -49,6 +50,7 @@ class StudyLocusQualityCheck(Enum):
UNRESOLVED_LD = "Variant not found in LD reference"
LD_CLUMPED = "Explained by a more significant variant in high LD (clumped)"
NO_POPULATION = "Study does not have population annotation to resolve LD"
UNPICSABLE = "Unable to calculate PIPs with the provided data"
ireneisdoomed marked this conversation as resolved.
Show resolved Hide resolved


class CredibleInterval(Enum):
Expand Down
23 changes: 18 additions & 5 deletions src/otg/method/pics.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import pyspark.sql.types as t
from scipy.stats import norm

from otg.dataset.study_locus import StudyLocus
from otg.dataset.study_locus import StudyLocus, StudyLocusQualityCheck

if TYPE_CHECKING:
from pyspark.sql import Row
Expand Down Expand Up @@ -127,7 +127,7 @@ def _finemap(
... Row(variantId="var2", r2Overall=None),
... ]
>>> PICS._finemap(ld_set_with_no_r2, lead_neglog_p=10.0, k=6.4)
[{'variantId': 'var1', 'r2Overall': None}, {'variantId': 'var2', 'r2Overall': None}]
[]
"""
if ld_set is None:
return None
Expand All @@ -145,8 +145,7 @@ def _finemap(
or tag_dict["r2Overall"] < 0.5
or not lead_neglog_p
):
# If PICS cannot be calculated, we'll return the original credible set
new_credible_set.append(tag_dict)
# If PICS cannot be calculated, we drop the variant from the credible set
continue

pics_snp_mu = PICS._pics_mu(lead_neglog_p, tag_dict["r2Overall"])
Expand Down Expand Up @@ -222,6 +221,9 @@ def finemap(
lambda locus, neglog_p: PICS._finemap(locus, neglog_p, k),
picsed_ldset_schema,
)
non_picsable_expr = (
f.size(f.filter(f.col("ldSet"), lambda x: x.r2Overall >= 0.5)) == 0
)
return StudyLocus(
_df=(
associations.df
Expand All @@ -239,7 +241,18 @@ def finemap(
),
),
)
# Rename tagVariantId to variantId
.withColumn(
"qualityControls",
StudyLocus.update_quality_flag(
f.col("qualityControls"),
non_picsable_expr,
StudyLocusQualityCheck.UNPICSABLE,
ireneisdoomed marked this conversation as resolved.
Show resolved Hide resolved
),
)
.withColumn(
"finemappingMethod",
f.coalesce(f.col("finemappingMethod"), f.lit("pics")),
)
.drop("neglog_pvalue")
),
_schema=StudyLocus.get_schema(),
Expand Down
17 changes: 16 additions & 1 deletion tests/method/test_pics.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,26 @@ def test_finemap_empty_array(
def test_finemap_null_ld_set(
self: TestFinemap, mock_study_locus: StudyLocus
) -> None:
"""Test how we apply `finemap` when `locus` is null by returning a null field."""
"""Test how we apply `finemap` when `ldSet` is null by returning a null field."""
mock_study_locus.df = mock_study_locus.df.filter(f.col("ldSet").isNull())
observed_df = PICS.finemap(mock_study_locus).df.limit(1)
assert observed_df.collect()[0]["locus"] is None

def test_finemap_quality_control(
self: TestFinemap, mock_study_locus: StudyLocus
) -> None:
"""Test that we add a `empty locus` flag when any variant in the locus meets PICS criteria."""
mock_study_locus.df = mock_study_locus.df.withColumn(
# Association with an empty ldSet
"ldSet",
f.when(f.col("ldSet").isNull(), f.array()).otherwise(f.col("ldSet")),
).filter(f.size("ldSet") == 0)
observed_df = PICS.finemap(mock_study_locus).df.limit(1)
qc_flag = "Unable to calculate PIPs with the provided data"
ireneisdoomed marked this conversation as resolved.
Show resolved Hide resolved
assert (
qc_flag in observed_df.collect()[0]["qualityControls"]
), "Empty locus QC flag is missing."


def test__finemap_udf() -> None:
"""Test the _finemap UDF with a simple case."""
Expand Down