chore: update poetry to version 2 (#969)

* fix: `poetry shell` is deprecated * chore: update versionin precommit hook * chore: update versionin precommit hook * chore: migrate toml * chore: update version in dockerfile * chore: pre-commit auto fixes [...] * chore: do not use typing deprecated classes according to ruff * build: update `deptry` * build: update `deptry`
opentargets · Jan 14, 2025 · 0a958f8 · 0a958f8
1 parent e8724cf
commit 0a958f8
Show file tree

Hide file tree

Showing 18 changed files with 364 additions and 100 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -103,8 +103,7 @@ repos:
       - id: pydoclint
 
   - repo: https://github.com/python-poetry/poetry
-    rev: "1.8.0"
+    rev: "2.0.0"
     hooks:
       - id: poetry-check
       - id: poetry-lock
-        args: ["--no-update"]
diff --git a/Dockerfile b/Dockerfile
@@ -11,7 +11,7 @@ ENV POETRY_NO_INTERACTION=1 \
     POETRY_CACHE_DIR=/tmp/poetry_cache \
     JAVA_HOME=/usr
 
-RUN pip install poetry==1.7.1
+RUN pip install poetry>=2.0.0
 WORKDIR /app
 
 COPY pyproject.toml poetry.lock ./

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,43 +1,45 @@
-[tool.poetry]
+[project]
 name = "gentropy"
 # !! version is managed by semantic_release
 version = "0.0.0"
 description = "Open Targets python framework for post-GWAS analysis"
-authors = ["Open Targets core team"]
+authors = [
+  { name = "Open Targets core team", email = "[email protected]" }
+]
 license = "Apache-2.0"
 readme = "README.md"
 documentation = "https://opentargets.github.io/gentropy/"
-repository = "https://github.com/opentargets/gentropy"
 packages = [{ include = "gentropy", from = "src" }]
+requires-poetry = ">=2.0"
+requires-python = ">=3.10, <3.11"
+dependencies = [
+    "pyspark (==3.3.4)",
+    "scipy (>=1.11.4, <1.12.0)",
+    "hydra-core (>=1.3.2, <1.4.0)",
+    "pyliftover (>=0.4, <0.5)",
+    "numpy (>=1.26.2, <1.27.0)",
+    "hail (==0.2.127)",
+    "wandb (>=0.19.0, <0.20.0)",
+    "google (>=3.0.0, <3.1.0)",
+    "omegaconf (>=2.3.0, <2.4.0)",
+    "typing-extensions (>=4.9.0, <4.13.0)",
+    "scikit-learn (>=1.6.0, <1.7.0)",
+    "pandas[gcp,parquet] (>=2.2.2, <2.3.0)",
+    "skops (>=0.11, <0.12)",
+    "google-cloud-secret-manager (>=2.20.0, <2.22.0)",
+    "shap (>=0.46.0, <0.47.0)",
+    "matplotlib (>=3.7.3, <3.8.0)"
+]
 
-[tool.poetry.urls]
+[project.urls]
+Source = "https://github.com/opentargets/gentropy"
 "Bug Tracker" = "http://github.com/opentargets/issues"
-"Funding" = "https://www.opentargets.org"
-"Documentation" = "https://opentargets.github.io/gentropy/"
+Funding = "https://www.opentargets.org"
+Documentation = "https://opentargets.github.io/gentropy/"
 
-[tool.poetry.scripts]
+[project.scripts]
 gentropy = "gentropy.cli:main"
 
-[tool.poetry.dependencies]
-python = ">=3.10, <3.11"
-pyspark = "3.3.4"
-scipy = ">=1.11.4, <1.12.0"
-hydra-core = ">=1.3.2, <1.4.0"
-pyliftover = ">=0.4, <0.5"
-numpy = ">=1.26.2, <1.27.0"
-hail = "0.2.127"
-wandb = ">=0.19.0, <0.20.0"
-google = ">=3.0.0, <3.1.0"
-omegaconf = ">=2.3.0, <2.4.0"
-typing-extensions = ">=4.9.0, <4.13.0"
-scikit-learn = ">=1.6.0, <1.7.0"
-pandas = { extras = ["gcp", "parquet"], version = ">=2.2.2, <2.3.0" }
-skops = ">=0.11, <0.12"
-google-cloud-secret-manager = ">=2.20.0, <2.22.0"
-shap = ">=0.46.0, <0.47.0"
-matplotlib = ">=3.7.3, <3.8.0"
-
-
 [tool.poetry.group.docs.dependencies]
 mkdocs = ">=1.5.3, <1.6.0"
 mkdocstrings-python = ">=1.12.2, <1.13.0"
@@ -67,7 +69,7 @@ ipykernel = ">=6.28.0, <6.30.0"
 google-cloud-dataproc = ">=5.8.0, <5.16.0"
 pydoclint = ">=0.3.8,<0.6.0"
 prettier = ">=0.0.7, <0.1.0"
-deptry = ">=0.12,<0.21"
+deptry = ">=0.22.0, <0.23.0"
 yamllint = ">=1.33.0, <1.36.0"
 pre-commit = ">=4.0.0, <4.1.0"
 mypy = ">=1.13, <1.14"

diff --git a/src/gentropy/colocalisation.py b/src/gentropy/colocalisation.py
@@ -3,7 +3,7 @@
 from __future__ import annotations
 
 from functools import partial
-from typing import Any, Type
+from typing import Any
 
 from pyspark.sql.functions import col
 
@@ -77,14 +77,14 @@ def __init__(
     @classmethod
     def _get_colocalisation_class(
         cls, method: str
-    ) -> Type[ColocalisationMethodInterface]:
+    ) -> type[ColocalisationMethodInterface]:
         """Get colocalisation class.
 
         Args:
             method (str): Colocalisation method.
 
         Returns:
-            Type[ColocalisationMethodInterface]: Class that implements the ColocalisationMethodInterface.
+            type[ColocalisationMethodInterface]: Class that implements the ColocalisationMethodInterface.
 
         Raises:
             ValueError: if method not available.
@@ -96,5 +96,4 @@ def _get_colocalisation_class(
         method = method.lower()
         if method not in cls.__coloc_methods__:
             raise ValueError(f"Colocalisation method {method} not available.")
-        coloc_method = cls.__coloc_methods__[method]
-        return coloc_method
+        return cls.__coloc_methods__[method]
diff --git a/src/gentropy/common/spark_helpers.py b/src/gentropy/common/spark_helpers.py
@@ -4,9 +4,10 @@
 
 import re
 import sys
+from collections.abc import Callable, Iterable
 from functools import reduce, wraps
 from itertools import chain
-from typing import TYPE_CHECKING, Any, Callable, Iterable, Optional, TypeVar
+from typing import TYPE_CHECKING, Any, Optional, TypeVar
 
 import pyspark.sql.functions as f
 import pyspark.sql.types as t

diff --git a/src/gentropy/config.py b/src/gentropy/config.py
@@ -2,7 +2,7 @@
 
 import os
 from dataclasses import dataclass, field
-from typing import Any, ClassVar, List, TypedDict
+from typing import Any, ClassVar, TypedDict
 
 from hail import __file__ as hail_location
 from hydra.core.config_store import ConfigStore
@@ -17,8 +17,7 @@ class SessionConfig:
     write_mode: str = "errorifexists"
     spark_uri: str = "local[*]"
     hail_home: str = os.path.dirname(hail_location)
-    extended_spark_conf: dict[str, str] | None = field(
-        default_factory=dict[str, str])
+    extended_spark_conf: dict[str, str] | None = field(default_factory=dict[str, str])
     output_partitions: int = 200
     _target_: str = "gentropy.common.session.Session"
 
@@ -28,7 +27,7 @@ class StepConfig:
     """Base step configuration."""
 
     session: SessionConfig
-    defaults: List[Any] = field(
+    defaults: list[Any] = field(
         default_factory=lambda: [{"session": "base_session"}, "_self_"]
     )
 
@@ -40,8 +39,7 @@ class ColocalisationConfig(StepConfig):
     credible_set_path: str = MISSING
     coloc_path: str = MISSING
     colocalisation_method: str = MISSING
-    colocalisation_method_params: dict[str, Any] = field(
-        default_factory=dict[str, Any])
+    colocalisation_method_params: dict[str, Any] = field(default_factory=dict[str, Any])
     _target_: str = "gentropy.colocalisation.ColocalisationStep"
 
 
@@ -126,8 +124,7 @@ class EqtlCatalogueConfig(StepConfig):
     eqtl_catalogue_paths_imported: str = MISSING
     eqtl_catalogue_study_index_out: str = MISSING
     eqtl_catalogue_credible_sets_out: str = MISSING
-    mqtl_quantification_methods_blacklist: list[str] = field(
-        default_factory=lambda: [])
+    mqtl_quantification_methods_blacklist: list[str] = field(default_factory=lambda: [])
     eqtl_lead_pvalue_threshold: float = 1e-3
     _target_: str = "gentropy.eqtl_catalogue.EqtlCatalogueStep"
 
@@ -681,8 +678,7 @@ class Config:
     """Application configuration."""
 
     # this is unfortunately verbose due to @dataclass limitations
-    defaults: List[Any] = field(default_factory=lambda: [
-                                "_self_", {"step": MISSING}])
+    defaults: list[Any] = field(default_factory=lambda: ["_self_", {"step": MISSING}])
     step: StepConfig = MISSING
     datasets: dict[str, str] = field(default_factory=dict)
 
@@ -716,8 +712,7 @@ def register_config() -> None:
         name="gwas_catalog_top_hit_ingestion",
         node=GWASCatalogTopHitIngestionConfig,
     )
-    cs.store(group="step", name="ld_based_clumping",
-             node=LDBasedClumpingConfig)
+    cs.store(group="step", name="ld_based_clumping", node=LDBasedClumpingConfig)
     cs.store(group="step", name="ld_index", node=LDIndexConfig)
     cs.store(group="step", name="locus_to_gene", node=LocusToGeneConfig)
     cs.store(
@@ -735,8 +730,7 @@ def register_config() -> None:
 
     cs.store(group="step", name="pics", node=PICSConfig)
     cs.store(group="step", name="gnomad_variants", node=GnomadVariantConfig)
-    cs.store(group="step", name="ukb_ppp_eur_sumstat_preprocess",
-             node=UkbPppEurConfig)
+    cs.store(group="step", name="ukb_ppp_eur_sumstat_preprocess", node=UkbPppEurConfig)
     cs.store(group="step", name="variant_index", node=VariantIndexConfig)
     cs.store(group="step", name="variant_to_vcf", node=ConvertToVcfStepConfig)
     cs.store(
@@ -769,7 +763,5 @@ def register_config() -> None:
         name="locus_to_gene_associations",
         node=LocusToGeneAssociationsStepConfig,
     )
-    cs.store(group="step", name="finngen_ukb_meta_ingestion",
-             node=FinngenUkbMetaConfig)
-    cs.store(group="step", name="credible_set_qc",
-             node=CredibleSetQCStepConfig)
+    cs.store(group="step", name="finngen_ukb_meta_ingestion", node=FinngenUkbMetaConfig)
+    cs.store(group="step", name="credible_set_qc", node=CredibleSetQCStepConfig)
diff --git a/src/gentropy/dataset/l2g_feature_matrix.py b/src/gentropy/dataset/l2g_feature_matrix.py
@@ -3,7 +3,7 @@
 from __future__ import annotations
 
 from functools import reduce
-from typing import TYPE_CHECKING, Type
+from typing import TYPE_CHECKING
 
 import pyspark.sql.functions as f
 from pyspark.sql import Window
@@ -55,7 +55,7 @@ def __init__(
 
     @classmethod
     def from_features_list(
-        cls: Type[L2GFeatureMatrix],
+        cls: type[L2GFeatureMatrix],
         study_loci_to_annotate: StudyLocus | L2GGoldStandard,
         features_list: list[str],
         features_input_loader: L2GFeatureInputLoader,

diff --git a/src/gentropy/dataset/l2g_gold_standard.py b/src/gentropy/dataset/l2g_gold_standard.py
@@ -3,7 +3,7 @@
 from __future__ import annotations
 
 from dataclasses import dataclass
-from typing import TYPE_CHECKING, Type
+from typing import TYPE_CHECKING
 
 import pyspark.sql.functions as f
 from pyspark.sql import Window
@@ -74,7 +74,7 @@ def get_schema(cls: type[L2GGoldStandard]) -> StructType:
 
     @classmethod
     def process_gene_interactions(
-        cls: Type[L2GGoldStandard], interactions: DataFrame
+        cls: type[L2GGoldStandard], interactions: DataFrame
     ) -> DataFrame:
         """Extract top scoring gene-gene interaction from the interactions dataset of the Platform.
 

diff --git a/src/gentropy/dataset/l2g_prediction.py b/src/gentropy/dataset/l2g_prediction.py
@@ -3,7 +3,7 @@
 from __future__ import annotations
 
 from dataclasses import dataclass
-from typing import TYPE_CHECKING, Type
+from typing import TYPE_CHECKING
 
 import pyspark.sql.functions as f
 from pyspark.sql import DataFrame
@@ -40,7 +40,7 @@ def get_schema(cls: type[L2GPrediction]) -> StructType:
 
     @classmethod
     def from_credible_set(
-        cls: Type[L2GPrediction],
+        cls: type[L2GPrediction],
         session: Session,
         credible_set: StudyLocus,
         feature_matrix: L2GFeatureMatrix,

diff --git a/src/gentropy/datasource/gwas_catalog/study_splitter.py b/src/gentropy/datasource/gwas_catalog/study_splitter.py
@@ -2,7 +2,7 @@
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Tuple
+from typing import TYPE_CHECKING
 
 import pyspark.sql.functions as f
 from pyspark.sql.window import Window
@@ -84,7 +84,7 @@ def split(
         cls: type[GWASCatalogStudySplitter],
         studies: StudyIndexGWASCatalog,
         associations: StudyLocusGWASCatalog,
-    ) -> Tuple[StudyIndexGWASCatalog, StudyLocusGWASCatalog]:
+    ) -> tuple[StudyIndexGWASCatalog, StudyLocusGWASCatalog]:
         """Splitting multi-trait GWAS Catalog studies.
 
         If assigned disease of the study and the association don't agree, we assume the study needs to be split.
@@ -95,7 +95,7 @@ def split(
             associations (StudyLocusGWASCatalog): GWAS Catalog associations.
 
         Returns:
-            Tuple[StudyIndexGWASCatalog, StudyLocusGWASCatalog]: Split studies and associations.
+            tuple[StudyIndexGWASCatalog, StudyLocusGWASCatalog]: Split studies and associations.
         """
         # Composite of studies and associations to resolve scattered information
         st_ass = (

diff --git a/src/gentropy/datasource/open_targets/l2g_gold_standard.py b/src/gentropy/datasource/open_targets/l2g_gold_standard.py
@@ -2,8 +2,6 @@
 
 from __future__ import annotations
 
-from typing import Type
-
 import pyspark.sql.functions as f
 from pyspark.sql import DataFrame
 
@@ -24,7 +22,7 @@ class OpenTargetsL2GGoldStandard:
 
     @classmethod
     def parse_positive_curation(
-        cls: Type[OpenTargetsL2GGoldStandard], gold_standard_curation: DataFrame
+        cls: type[OpenTargetsL2GGoldStandard], gold_standard_curation: DataFrame
     ) -> DataFrame:
         """Parse positive set from gold standard curation.
 
@@ -60,7 +58,7 @@ def parse_positive_curation(
 
     @classmethod
     def expand_gold_standard_with_negatives(
-        cls: Type[OpenTargetsL2GGoldStandard],
+        cls: type[OpenTargetsL2GGoldStandard],
         positive_set: DataFrame,
         variant_index: VariantIndex,
     ) -> DataFrame:

diff --git a/src/gentropy/method/l2g/feature_factory.py b/src/gentropy/method/l2g/feature_factory.py
@@ -3,7 +3,8 @@
 
 from __future__ import annotations
 
-from typing import Any, Iterator, Mapping
+from typing import Any
+from collections.abc import Iterator, Mapping
 
 from gentropy.dataset.l2g_features.colocalisation import (
     EQtlColocClppMaximumFeature,

diff --git a/src/gentropy/method/l2g/model.py b/src/gentropy/method/l2g/model.py
@@ -5,7 +5,7 @@
 import json
 from dataclasses import dataclass, field
 from pathlib import Path
-from typing import TYPE_CHECKING, Any, Type
+from typing import TYPE_CHECKING, Any
 
 import pandas as pd
 import skops.io as sio
@@ -51,7 +51,7 @@ def __post_init__(self: LocusToGeneModel) -> None:
         self.model.set_params(**self.hyperparameters_dict)
 
     @classmethod
-    def load_from_disk(cls: Type[LocusToGeneModel], path: str) -> LocusToGeneModel:
+    def load_from_disk(cls: type[LocusToGeneModel], path: str) -> LocusToGeneModel:
         """Load a fitted model from disk.
 
         Args:
@@ -83,7 +83,7 @@ def load_from_disk(cls: Type[LocusToGeneModel], path: str) -> LocusToGeneModel:
 
     @classmethod
     def load_from_hub(
-        cls: Type[LocusToGeneModel],
+        cls: type[LocusToGeneModel],
         model_id: str,
         hf_token: str | None = None,
         model_name: str = "classifier.skops",

diff --git a/tests/gentropy/datasource/finngen/test_finngen_study_index.py b/tests/gentropy/datasource/finngen/test_finngen_study_index.py
@@ -18,8 +18,8 @@
 from gentropy.finngen_studies import FinnGenStudiesStep
 
 if TYPE_CHECKING:
+    from collections.abc import Callable
     from pathlib import Path
-    from typing import Callable
 
     from pyspark.sql import SparkSession