Skip to content

Commit

Permalink
chore: pre-commit auto fixes [...]
Browse files Browse the repository at this point in the history
  • Loading branch information
pre-commit-ci[bot] committed Jan 14, 2025
1 parent 9253915 commit f623a19
Show file tree
Hide file tree
Showing 24 changed files with 180 additions and 171 deletions.
3 changes: 2 additions & 1 deletion src/gentropy/common/spark_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@

import re
import sys
from collections.abc import Iterable
from functools import reduce, wraps
from itertools import chain
from typing import TYPE_CHECKING, Any, Callable, Iterable, Optional, TypeVar
from typing import TYPE_CHECKING, Any, Callable, Optional, TypeVar

import pyspark.sql.functions as f
import pyspark.sql.types as t
Expand Down
3 changes: 2 additions & 1 deletion src/gentropy/method/l2g/feature_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@

from __future__ import annotations

from typing import Any, Iterator, Mapping
from typing import Any
from collections.abc import Iterator, Mapping

from gentropy.dataset.l2g_features.colocalisation import (
EQtlColocClppMaximumFeature,
Expand Down
4 changes: 3 additions & 1 deletion tests/gentropy/dataset/test_colocalisation.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,9 @@ def test_append_study_metadata_right(
assert (
observed_df.select(f"{colocalisation_side}GeneId").collect()[0][0]
== expected_geneId
), f"Expected {colocalisation_side}GeneId {expected_geneId}, but got {observed_df.select(f'{colocalisation_side}GeneId').collect()[0][0]}"
), (
f"Expected {colocalisation_side}GeneId {expected_geneId}, but got {observed_df.select(f'{colocalisation_side}GeneId').collect()[0][0]}"
)

@pytest.fixture(autouse=True)
def _setup(self: TestAppendStudyMetadata, spark: SparkSession) -> None:
Expand Down
12 changes: 6 additions & 6 deletions tests/gentropy/dataset/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,9 @@ def test_initialize_without_schema(self: TestDataset, spark: SparkSession) -> No
"""Test if Dataset derived class collects the schema from assets if schema is not provided."""
df = spark.createDataFrame([(1,)], schema=MockDataset.get_schema())
ds = MockDataset(_df=df)
assert (
ds.schema == MockDataset.get_schema()
), "Schema should be inferred from df"
assert ds.schema == MockDataset.get_schema(), (
"Schema should be inferred from df"
)

def test_passing_incorrect_types(self: TestDataset, spark: SparkSession) -> None:
"""Test if passing incorrect object types to Dataset raises an error."""
Expand Down Expand Up @@ -97,6 +97,6 @@ def test_process_class_params(spark: SparkSession) -> None:
}
class_params, spark_params = Dataset._process_class_params(params)
assert "_df" in class_params, "Class params should contain _df"
assert (
"recursiveFileLookup" in spark_params
), "Spark params should contain recursiveFileLookup"
assert "recursiveFileLookup" in spark_params, (
"Spark params should contain recursiveFileLookup"
)
24 changes: 12 additions & 12 deletions tests/gentropy/dataset/test_l2g.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@ def test_process_gene_interactions(sample_otp_interactions: DataFrame) -> None:
"""Tests processing of gene interactions from OTP."""
expected_cols = ["geneIdA", "geneIdB", "score"]
observed_df = L2GGoldStandard.process_gene_interactions(sample_otp_interactions)
assert (
observed_df.columns == expected_cols
), "Gene interactions has a different schema."
assert observed_df.columns == expected_cols, (
"Gene interactions has a different schema."
)


def test_predictions(mock_l2g_predictions: L2GPrediction) -> None:
Expand Down Expand Up @@ -171,9 +171,9 @@ def test_l2g_feature_constructor_with_schema_mismatch(
),
with_gold_standard=False,
)
assert (
fm._df.schema["distanceTssMean"].dataType == FloatType()
), "Feature `distanceTssMean` is not being casted to FloatType. Check L2GFeatureMatrix constructor."
assert fm._df.schema["distanceTssMean"].dataType == FloatType(), (
"Feature `distanceTssMean` is not being casted to FloatType. Check L2GFeatureMatrix constructor."
)


def test_calculate_feature_missingness_rate(
Expand All @@ -185,9 +185,9 @@ def test_calculate_feature_missingness_rate(
assert isinstance(observed_missingness, dict)
assert mock_l2g_feature_matrix.features_list is not None and len(
observed_missingness
) == len(
mock_l2g_feature_matrix.features_list
), "Missing features in the missingness rate dictionary."
assert (
observed_missingness == expected_missingness
), "Missingness rate is incorrect."
) == len(mock_l2g_feature_matrix.features_list), (
"Missing features in the missingness rate dictionary."
)
assert observed_missingness == expected_missingness, (
"Missingness rate is incorrect."
)
54 changes: 27 additions & 27 deletions tests/gentropy/dataset/test_l2g_feature.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,9 +287,9 @@ def test__common_colocalisation_feature_logic(
},
],
).select("studyLocusId", "geneId", "eQtlColocH4Maximum")
assert (
observed_df.collect() == expected_df.collect()
), "The feature values are not as expected."
assert observed_df.collect() == expected_df.collect(), (
"The feature values are not as expected."
)

def test_extend_missing_colocalisation_to_neighbourhood_genes(
self: TestCommonColocalisationFeatureLogic,
Expand Down Expand Up @@ -322,9 +322,9 @@ def test_extend_missing_colocalisation_to_neighbourhood_genes(
expected_df = spark.createDataFrame(
[{"geneId": "gene3", "studyLocusId": "1", "eQtlColocH4Maximum": 0.0}]
).select("studyLocusId", "geneId", "eQtlColocH4Maximum")
assert (
observed_df.collect() == expected_df.collect()
), "The feature values are not as expected."
assert observed_df.collect() == expected_df.collect(), (
"The feature values are not as expected."
)

def test_common_neighbourhood_colocalisation_feature_logic(
self: TestCommonColocalisationFeatureLogic,
Expand Down Expand Up @@ -361,9 +361,9 @@ def test_common_neighbourhood_colocalisation_feature_logic(
},
],
).select("geneId", "studyLocusId", "eQtlColocH4MaximumNeighbourhood")
assert (
observed_df.collect() == expected_df.collect()
), "The expected and observed dataframes do not match."
assert observed_df.collect() == expected_df.collect(), (
"The expected and observed dataframes do not match."
)

@pytest.fixture(autouse=True)
def _setup(self: TestCommonColocalisationFeatureLogic, spark: SparkSession) -> None:
Expand Down Expand Up @@ -547,9 +547,9 @@ def test_common_distance_feature_logic(
.select("studyLocusId", "geneId", feature_name)
.orderBy(feature_name)
)
assert (
observed_df.collect() == expected_df.collect()
), f"Expected and observed dataframes are not equal for feature {feature_name}."
assert observed_df.collect() == expected_df.collect(), (
f"Expected and observed dataframes are not equal for feature {feature_name}."
)

def test_common_neighbourhood_distance_feature_logic(
self: TestCommonDistanceFeatureLogic,
Expand All @@ -576,9 +576,9 @@ def test_common_neighbourhood_distance_feature_logic(
), # 0.91/0.91
["geneId", "studyLocusId", feature_name],
).orderBy(feature_name)
assert (
observed_df.collect() == expected_df.collect()
), "Output doesn't meet the expectation."
assert observed_df.collect() == expected_df.collect(), (
"Output doesn't meet the expectation."
)

@pytest.fixture(autouse=True)
def _setup(
Expand Down Expand Up @@ -753,9 +753,9 @@ def test_common_vep_feature_logic(
.orderBy(feature_name)
.select("studyLocusId", "geneId", feature_name)
)
assert (
observed_df.collect() == expected_df.collect()
), f"Expected and observed dataframes are not equal for feature {feature_name}."
assert observed_df.collect() == expected_df.collect(), (
f"Expected and observed dataframes are not equal for feature {feature_name}."
)

def test_common_neighbourhood_vep_feature_logic(
self: TestCommonVepFeatureLogic,
Expand Down Expand Up @@ -787,9 +787,9 @@ def test_common_neighbourhood_vep_feature_logic(
.orderBy(feature_name)
.select("studyLocusId", "geneId", feature_name)
)
assert (
observed_df.collect() == expected_df.collect()
), "Output doesn't meet the expectation."
assert observed_df.collect() == expected_df.collect(), (
"Output doesn't meet the expectation."
)

@pytest.fixture(autouse=True)
def _setup(self: TestCommonVepFeatureLogic, spark: SparkSession) -> None:
Expand Down Expand Up @@ -870,9 +870,9 @@ def test_common_genecount_feature_logic(
.orderBy("studyLocusId", "geneId")
)

assert (
observed_df.collect() == expected_df.collect()
), f"Expected and observed dataframes do not match for feature {feature_name}."
assert observed_df.collect() == expected_df.collect(), (
f"Expected and observed dataframes do not match for feature {feature_name}."
)

@pytest.fixture(autouse=True)
def _setup(self: TestCommonGeneCountFeatureLogic, spark: SparkSession) -> None:
Expand Down Expand Up @@ -957,9 +957,9 @@ def test_is_protein_coding_feature_logic(
.select("studyLocusId", "geneId", "isProteinCoding500kb")
.orderBy("studyLocusId", "geneId")
)
assert (
observed_df.collect() == expected_df.collect()
), "Expected and observed DataFrames do not match."
assert observed_df.collect() == expected_df.collect(), (
"Expected and observed DataFrames do not match."
)

@pytest.fixture(autouse=True)
def _setup(self: TestCommonProteinCodingFeatureLogic, spark: SparkSession) -> None:
Expand Down
12 changes: 6 additions & 6 deletions tests/gentropy/dataset/test_l2g_feature_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,9 @@ def test_study_locus(
self.sample_study_locus, features_list, loader
)
for feature in features_list:
assert (
feature in fm._df.columns
), f"Feature {feature} not found in feature matrix."
assert feature in fm._df.columns, (
f"Feature {feature} not found in feature matrix."
)

def test_gold_standard(
self: TestFromFeaturesList,
Expand All @@ -78,9 +78,9 @@ def test_gold_standard(
self.sample_gold_standard, features_list, loader
)
for feature in features_list:
assert (
feature in fm._df.columns
), f"Feature {feature} not found in feature matrix."
assert feature in fm._df.columns, (
f"Feature {feature} not found in feature matrix."
)

@pytest.fixture(autouse=True)
def _setup(self: TestFromFeaturesList, spark: SparkSession) -> None:
Expand Down
12 changes: 6 additions & 6 deletions tests/gentropy/dataset/test_study_locus.py
Original file line number Diff line number Diff line change
Expand Up @@ -517,9 +517,9 @@ def test_filter_ld_set(spark: SparkSession) -> None:
observed_data, ["studyLocusId", "ldSet"]
).withColumn("ldSet", StudyLocus.filter_ld_set(f.col("ldSet"), 0.5))
expected_tags_in_ld = 0
assert (
observed_df.filter(f.size("ldSet") > 1).count() == expected_tags_in_ld
), "Expected tags in ld set differ from observed."
assert observed_df.filter(f.size("ldSet") > 1).count() == expected_tags_in_ld, (
"Expected tags in ld set differ from observed."
)


def test_annotate_locus_statistics_boundaries(
Expand Down Expand Up @@ -860,9 +860,9 @@ def test_build_feature_matrix(
study_locus=mock_study_locus,
)
fm = mock_study_locus.build_feature_matrix(features_list, loader)
assert isinstance(
fm, L2GFeatureMatrix
), "Feature matrix should be of type L2GFeatureMatrix"
assert isinstance(fm, L2GFeatureMatrix), (
"Feature matrix should be of type L2GFeatureMatrix"
)


class TestStudyLocusRedundancyFlagging:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,15 @@ def test_ontology_parser(self: TestOntologyParger, spark: SparkSession) -> None:
self.SAMPLE_EFO_PATH, spark
).retain_rows_with_ancestor_id(["CL_0000000"])

assert isinstance(
cell_ontology, BiosampleIndex
), "Cell ontology subset is not parsed correctly to BiosampleIndex."
assert isinstance(
uberon, BiosampleIndex
), "Uberon subset is not parsed correctly to BiosampleIndex."
assert isinstance(
efo_cell_line, BiosampleIndex
), "EFO cell line subset is not parsed correctly to BiosampleIndex."
assert isinstance(cell_ontology, BiosampleIndex), (
"Cell ontology subset is not parsed correctly to BiosampleIndex."
)
assert isinstance(uberon, BiosampleIndex), (
"Uberon subset is not parsed correctly to BiosampleIndex."
)
assert isinstance(efo_cell_line, BiosampleIndex), (
"EFO cell line subset is not parsed correctly to BiosampleIndex."
)

def test_merge_biosample_indices(
self: TestOntologyParger, spark: SparkSession
Expand All @@ -49,6 +49,6 @@ def test_merge_biosample_indices(
efo = extract_ontology_from_json(self.SAMPLE_EFO_PATH, spark)

merged = cell_ontology.merge_indices([uberon, efo])
assert isinstance(
merged, BiosampleIndex
), "Merging of biosample indices is not correct."
assert isinstance(merged, BiosampleIndex), (
"Merging of biosample indices is not correct."
)
33 changes: 15 additions & 18 deletions tests/gentropy/datasource/ensembl/test_vep_variants.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,15 +75,12 @@ def test_in_silico_output_missing_value(
x[0] for x in filter(lambda x: x[2] is None, self.SAMPLE_DATA)
]
# Assert that the correct variants return null:
assert (
[
x["variantId"]
for x in self.df.filter(
f.col("in_silico_predictions").isNull()
).collect()
]
== variant_with_missing_score
), "Not the right variants got nullified in-silico predictor object."
assert [
x["variantId"]
for x in self.df.filter(f.col("in_silico_predictions").isNull()).collect()
] == variant_with_missing_score, (
"Not the right variants got nullified in-silico predictor object."
)


class TestVEPParser:
Expand Down Expand Up @@ -120,18 +117,18 @@ def test_conversion(self: TestVEPParser) -> None:
_schema=VariantIndex.get_schema(),
)

assert isinstance(
variant_index, VariantIndex
), "VariantIndex object not created."
assert isinstance(variant_index, VariantIndex), (
"VariantIndex object not created."
)

def test_variant_count(self: TestVEPParser) -> None:
"""Test if the number of variants is correct.
It is expected that all rows from the parsed VEP output are present in the processed VEP output.
"""
assert (
self.raw_vep_output.count() == self.processed_vep_output.count()
), f"Incorrect number of variants in processed VEP output: expected {self.raw_vep_output.count()}, got {self.processed_vep_output.count()}."
assert self.raw_vep_output.count() == self.processed_vep_output.count(), (
f"Incorrect number of variants in processed VEP output: expected {self.raw_vep_output.count()}, got {self.processed_vep_output.count()}."
)

def test_collection(self: TestVEPParser) -> None:
"""Test if the collection of VEP variantIndex runs without failures."""
Expand All @@ -150,6 +147,6 @@ def test_ensembl_transcripts_no_duplicates(self: TestVEPParser) -> None:
)

asserted_targets = [t["targetId"] for t in targets]
assert len(asserted_targets) == len(
set(asserted_targets)
), "Duplicate ensembl transcripts in a single row."
assert len(asserted_targets) == len(set(asserted_targets)), (
"Duplicate ensembl transcripts in a single row."
)
6 changes: 3 additions & 3 deletions tests/gentropy/datasource/finngen/test_finngen_study_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,9 +354,9 @@ def test_finngen_validate_release_prefix(
) -> None:
"""Test validate_release_prefix."""
if not xfail:
assert (
FinnGenStudyIndex.validate_release_prefix(prefix) == expected_output
), "Incorrect match object"
assert FinnGenStudyIndex.validate_release_prefix(prefix) == expected_output, (
"Incorrect match object"
)
else:
with pytest.raises(ValueError):
FinnGenStudyIndex.validate_release_prefix(prefix)
Expand Down
Loading

0 comments on commit f623a19

Please sign in to comment.