opentargets · DSuveges · Dec 21, 2023 · Dec 18, 2023 · Dec 18, 2023 · Dec 18, 2023
diff --git a/src/otg/dataset/study_locus.py b/src/otg/dataset/study_locus.py
@@ -37,6 +37,7 @@ class StudyLocusQualityCheck(Enum):
         AMBIGUOUS_STUDY (str): Association with ambiguous study
         UNRESOLVED_LD (str): Variant not found in LD reference
         LD_CLUMPED (str): Explained by a more significant variant in high LD (clumped)
+        UNPICSABLE (str): Unable to calculate PIPs with the provided data
     """
 
     SUBSIGNIFICANT_FLAG = "Subsignificant p-value"
@@ -49,6 +50,7 @@ class StudyLocusQualityCheck(Enum):
     UNRESOLVED_LD = "Variant not found in LD reference"
     LD_CLUMPED = "Explained by a more significant variant in high LD (clumped)"
     NO_POPULATION = "Study does not have population annotation to resolve LD"
+    UNPICSABLE = "Unable to calculate PIPs with the provided data"
 
 
 class CredibleInterval(Enum):

diff --git a/src/otg/method/pics.py b/src/otg/method/pics.py
@@ -8,7 +8,7 @@
 import pyspark.sql.types as t
 from scipy.stats import norm
 
-from otg.dataset.study_locus import StudyLocus
+from otg.dataset.study_locus import StudyLocus, StudyLocusQualityCheck
 
 if TYPE_CHECKING:
     from pyspark.sql import Row
@@ -127,7 +127,7 @@ def _finemap(
             ...     Row(variantId="var2", r2Overall=None),
             ... ]
             >>> PICS._finemap(ld_set_with_no_r2, lead_neglog_p=10.0, k=6.4)
-            [{'variantId': 'var1', 'r2Overall': None}, {'variantId': 'var2', 'r2Overall': None}]
+            []
         """
         if ld_set is None:
             return None
@@ -145,8 +145,7 @@ def _finemap(
                 or tag_dict["r2Overall"] < 0.5
                 or not lead_neglog_p
             ):
-                # If PICS cannot be calculated, we'll return the original credible set
-                new_credible_set.append(tag_dict)
+                # If PICS cannot be calculated, we drop the variant from the credible set
                 continue
 
             pics_snp_mu = PICS._pics_mu(lead_neglog_p, tag_dict["r2Overall"])
@@ -222,6 +221,9 @@ def finemap(
             lambda locus, neglog_p: PICS._finemap(locus, neglog_p, k),
             picsed_ldset_schema,
         )
+        non_picsable_expr = (
+            f.size(f.filter(f.col("ldSet"), lambda x: x.r2Overall >= 0.5)) == 0
+        )
         return StudyLocus(
             _df=(
                 associations.df
@@ -239,7 +241,18 @@ def finemap(
                         ),
                     ),
                 )
-                # Rename tagVariantId to variantId
+                .withColumn(
+                    "qualityControls",
+                    StudyLocus.update_quality_flag(
+                        f.col("qualityControls"),
+                        non_picsable_expr,
+                        StudyLocusQualityCheck.UNPICSABLE,
+                    ),
+                )
+                .withColumn(
+                    "finemappingMethod",
+                    f.coalesce(f.col("finemappingMethod"), f.lit("pics")),
+                )
                 .drop("neglog_pvalue")
             ),
             _schema=StudyLocus.get_schema(),

diff --git a/tests/method/test_pics.py b/tests/method/test_pics.py
@@ -30,11 +30,26 @@ def test_finemap_empty_array(
     def test_finemap_null_ld_set(
         self: TestFinemap, mock_study_locus: StudyLocus
     ) -> None:
-        """Test how we apply `finemap` when `locus` is null by returning a null field."""
+        """Test how we apply `finemap` when `ldSet` is null by returning a null field."""
         mock_study_locus.df = mock_study_locus.df.filter(f.col("ldSet").isNull())
         observed_df = PICS.finemap(mock_study_locus).df.limit(1)
         assert observed_df.collect()[0]["locus"] is None
 
+    def test_finemap_quality_control(
+        self: TestFinemap, mock_study_locus: StudyLocus
+    ) -> None:
+        """Test that we add a `empty locus` flag when any variant in the locus meets PICS criteria."""
+        mock_study_locus.df = mock_study_locus.df.withColumn(
+            # Association with an empty ldSet
+            "ldSet",
+            f.when(f.col("ldSet").isNull(), f.array()).otherwise(f.col("ldSet")),
+        ).filter(f.size("ldSet") == 0)
+        observed_df = PICS.finemap(mock_study_locus).df.limit(1)
+        qc_flag = "Unable to calculate PIPs with the provided data"
+        assert (
+            qc_flag in observed_df.collect()[0]["qualityControls"]
+        ), "Empty locus QC flag is missing."
+
 
 def test__finemap_udf() -> None:
     """Test the _finemap UDF with a simple case."""