From 5e1a475a3bc46153cb8e74c4f5112e326aff83be Mon Sep 17 00:00:00 2001
From: vivienho <56025826+vivienho@users.noreply.github.com>
Date: Thu, 5 Dec 2024 14:20:32 +0000
Subject: [PATCH 1/3] refactor: rename all gene_index related files to
 target_index

---
 docs/python_api/datasets/{gene_index.md => target_index.md}       | 0
 docs/python_api/steps/{gene_index.md => target_index.md}          | 0
 .../assets/schemas/{gene_index.json => target_index.json}         | 0
 src/gentropy/dataset/{gene_index.py => target_index.py}           | 0
 src/gentropy/{gene_index.py => target_index.py}                   | 0
 .../gentropy/dataset/{test_gene_index.py => test_target_index.py} | 0
 6 files changed, 0 insertions(+), 0 deletions(-)
 rename docs/python_api/datasets/{gene_index.md => target_index.md} (100%)
 rename docs/python_api/steps/{gene_index.md => target_index.md} (100%)
 rename src/gentropy/assets/schemas/{gene_index.json => target_index.json} (100%)
 rename src/gentropy/dataset/{gene_index.py => target_index.py} (100%)
 rename src/gentropy/{gene_index.py => target_index.py} (100%)
 rename tests/gentropy/dataset/{test_gene_index.py => test_target_index.py} (100%)

diff --git a/docs/python_api/datasets/gene_index.md b/docs/python_api/datasets/target_index.md
similarity index 100%
rename from docs/python_api/datasets/gene_index.md
rename to docs/python_api/datasets/target_index.md
diff --git a/docs/python_api/steps/gene_index.md b/docs/python_api/steps/target_index.md
similarity index 100%
rename from docs/python_api/steps/gene_index.md
rename to docs/python_api/steps/target_index.md
diff --git a/src/gentropy/assets/schemas/gene_index.json b/src/gentropy/assets/schemas/target_index.json
similarity index 100%
rename from src/gentropy/assets/schemas/gene_index.json
rename to src/gentropy/assets/schemas/target_index.json
diff --git a/src/gentropy/dataset/gene_index.py b/src/gentropy/dataset/target_index.py
similarity index 100%
rename from src/gentropy/dataset/gene_index.py
rename to src/gentropy/dataset/target_index.py
diff --git a/src/gentropy/gene_index.py b/src/gentropy/target_index.py
similarity index 100%
rename from src/gentropy/gene_index.py
rename to src/gentropy/target_index.py
diff --git a/tests/gentropy/dataset/test_gene_index.py b/tests/gentropy/dataset/test_target_index.py
similarity index 100%
rename from tests/gentropy/dataset/test_gene_index.py
rename to tests/gentropy/dataset/test_target_index.py

From 26abb1b6f8f8acf82d65dfe61c149a941ea1aa31 Mon Sep 17 00:00:00 2001
From: vivienho <56025826+vivienho@users.noreply.github.com>
Date: Thu, 5 Dec 2024 17:08:56 +0000
Subject: [PATCH 2/3] refactor: rename gene_index to target_index in various
 files

---
 docs/python_api/datasets/target_index.md      |  6 ++--
 docs/python_api/steps/target_index.md         |  4 +--
 src/gentropy/biosample_index.py               |  2 +-
 src/gentropy/config.py                        | 12 +++----
 src/gentropy/dataset/intervals.py             |  8 ++---
 .../dataset/l2g_features/colocalisation.py    | 28 ++++++++--------
 src/gentropy/dataset/l2g_features/distance.py | 16 +++++-----
 src/gentropy/dataset/l2g_features/other.py    | 32 +++++++++----------
 src/gentropy/dataset/l2g_features/vep.py      | 12 +++----
 src/gentropy/dataset/study_index.py           |  6 ++--
 src/gentropy/dataset/target_index.py          | 22 ++++++-------
 .../datasource/intervals/andersson.py         | 10 +++---
 src/gentropy/datasource/intervals/javierre.py |  8 ++---
 src/gentropy/datasource/intervals/jung.py     |  8 ++---
 src/gentropy/datasource/intervals/thurman.py  |  8 ++---
 .../datasource/open_targets/target.py         | 16 +++++-----
 src/gentropy/l2g.py                           | 14 ++++----
 src/gentropy/study_validation.py              |  4 +--
 src/gentropy/target_index.py                  | 18 +++++------
 19 files changed, 117 insertions(+), 117 deletions(-)

diff --git a/docs/python_api/datasets/target_index.md b/docs/python_api/datasets/target_index.md
index d66d3a6bc..1995e8a4d 100644
--- a/docs/python_api/datasets/target_index.md
+++ b/docs/python_api/datasets/target_index.md
@@ -1,9 +1,9 @@
 ---
-title: Gene Index
+title: Target Index
 ---
 
-::: gentropy.dataset.gene_index.GeneIndex
+::: gentropy.dataset.target_index.TargetIndex
 
 ## Schema
 
---8<-- "assets/schemas/gene_index.md"
+--8<-- "assets/schemas/target_index.md"
diff --git a/docs/python_api/steps/target_index.md b/docs/python_api/steps/target_index.md
index a0808dcad..4a572eca7 100644
--- a/docs/python_api/steps/target_index.md
+++ b/docs/python_api/steps/target_index.md
@@ -1,5 +1,5 @@
 ---
-title: gene_index
+title: target_index
 ---
 
-::: gentropy.gene_index.GeneIndexStep
+::: gentropy.target_index.TargetIndexStep
diff --git a/src/gentropy/biosample_index.py b/src/gentropy/biosample_index.py
index a6e8b5223..846bb97ce 100644
--- a/src/gentropy/biosample_index.py
+++ b/src/gentropy/biosample_index.py
@@ -27,7 +27,7 @@ def __init__(
             cell_ontology_input_path (str): Input cell ontology dataset path.
             uberon_input_path (str): Input uberon dataset path.
             efo_input_path (str): Input efo dataset path.
-            biosample_index_path (str): Output gene index dataset path.
+            biosample_index_path (str): Output biosample index dataset path.
         """
         cell_ontology_index = extract_ontology_from_json(
             cell_ontology_input_path, session.spark
diff --git a/src/gentropy/config.py b/src/gentropy/config.py
index 65fdb5897..f5233100f 100644
--- a/src/gentropy/config.py
+++ b/src/gentropy/config.py
@@ -44,12 +44,12 @@ class ColocalisationConfig(StepConfig):
 
 
 @dataclass
-class GeneIndexConfig(StepConfig):
-    """Gene index step configuration."""
+class TargetIndexConfig(StepConfig):
+    """Target index step configuration."""
 
     target_path: str = MISSING
-    gene_index_path: str = MISSING
-    _target_: str = "gentropy.gene_index.GeneIndexStep"
+    target_index_path: str = MISSING
+    _target_: str = "gentropy.target_index.TargetIndexStep"
 
 
 @dataclass
@@ -298,7 +298,7 @@ class LocusToGeneFeatureMatrixConfig(StepConfig):
     variant_index_path: str | None = None
     colocalisation_path: str | None = None
     study_index_path: str | None = None
-    gene_index_path: str | None = None
+    target_index_path: str | None = None
     feature_matrix_path: str = MISSING
     features_list: list[str] = field(
         default_factory=lambda: [
@@ -688,7 +688,7 @@ def register_config() -> None:
     cs.store(group="step/session", name="base_session", node=SessionConfig)
     cs.store(group="step", name="colocalisation", node=ColocalisationConfig)
     cs.store(group="step", name="eqtl_catalogue", node=EqtlCatalogueConfig)
-    cs.store(group="step", name="gene_index", node=GeneIndexConfig)
+    cs.store(group="step", name="target_index", node=TargetIndexConfig)
     cs.store(group="step", name="biosample_index", node=BiosampleIndexConfig)
     cs.store(
         group="step",
diff --git a/src/gentropy/dataset/intervals.py b/src/gentropy/dataset/intervals.py
index 37158810b..e38960b27 100644
--- a/src/gentropy/dataset/intervals.py
+++ b/src/gentropy/dataset/intervals.py
@@ -8,7 +8,7 @@
 from gentropy.common.Liftover import LiftOverSpark
 from gentropy.common.schemas import parse_spark_schema
 from gentropy.dataset.dataset import Dataset
-from gentropy.dataset.gene_index import GeneIndex
+from gentropy.dataset.target_index import TargetIndex
 
 if TYPE_CHECKING:
     from pyspark.sql import SparkSession
@@ -35,7 +35,7 @@ def from_source(
         spark: SparkSession,
         source_name: str,
         source_path: str,
-        gene_index: GeneIndex,
+        target_index: TargetIndex,
         lift: LiftOverSpark,
     ) -> Intervals:
         """Collect interval data for a particular source.
@@ -44,7 +44,7 @@ def from_source(
             spark (SparkSession): Spark session
             source_name (str): Name of the interval source
             source_path (str): Path to the interval source file
-            gene_index (GeneIndex): Gene index
+            target_index (TargetIndex): Target index
             lift (LiftOverSpark): LiftOverSpark instance to convert coordinats from hg37 to hg38
 
         Returns:
@@ -70,4 +70,4 @@ def from_source(
 
         source_class = source_to_class[source_name]
         data = source_class.read(spark, source_path)  # type: ignore
-        return source_class.parse(data, gene_index, lift)  # type: ignore
+        return source_class.parse(data, target_index, lift)  # type: ignore
diff --git a/src/gentropy/dataset/l2g_features/colocalisation.py b/src/gentropy/dataset/l2g_features/colocalisation.py
index 68509ca79..39f35835d 100644
--- a/src/gentropy/dataset/l2g_features/colocalisation.py
+++ b/src/gentropy/dataset/l2g_features/colocalisation.py
@@ -9,11 +9,11 @@
 
 from gentropy.common.spark_helpers import convert_from_wide_to_long
 from gentropy.dataset.colocalisation import Colocalisation
-from gentropy.dataset.gene_index import GeneIndex
 from gentropy.dataset.l2g_features.l2g_feature import L2GFeature
 from gentropy.dataset.l2g_gold_standard import L2GGoldStandard
 from gentropy.dataset.study_index import StudyIndex
 from gentropy.dataset.study_locus import StudyLocus
+from gentropy.dataset.target_index import TargetIndex
 from gentropy.dataset.variant_index import VariantIndex
 
 if TYPE_CHECKING:
@@ -74,7 +74,7 @@ def extend_missing_colocalisation_to_neighbourhood_genes(
     feature_name: str,
     local_features: DataFrame,
     variant_index: VariantIndex,
-    gene_index: GeneIndex,
+    target_index: TargetIndex,
     study_locus: StudyLocus,
 ) -> DataFrame:
     """This function creates an artificial dataset of features that represents the missing colocalisation to the neighbourhood genes.
@@ -83,7 +83,7 @@ def extend_missing_colocalisation_to_neighbourhood_genes(
         feature_name (str): The name of the feature to extend
         local_features (DataFrame): The dataframe of features to extend
         variant_index (VariantIndex): Variant index containing all variant/gene relationships
-        gene_index (GeneIndex): Gene index to fetch the gene information
+        target_index (TargetIndex): Target index to fetch the gene information
         study_locus (StudyLocus): Study locus to traverse between colocalisation and variant index
 
     Returns:
@@ -94,7 +94,7 @@ def extend_missing_colocalisation_to_neighbourhood_genes(
             "variantId", f.explode("transcriptConsequences").alias("tc")
         )
         .select(f.col("tc.targetId").alias("geneId"), "variantId")
-        .join(gene_index.df.select("geneId", "biotype"), "geneId", "left")
+        .join(target_index.df.select("geneId", "biotype"), "geneId", "left")
         .filter(f.col("biotype") == "protein_coding")
         .drop("biotype")
         .distinct()
@@ -127,7 +127,7 @@ def common_neighbourhood_colocalisation_feature_logic(
     *,
     colocalisation: Colocalisation,
     study_index: StudyIndex,
-    gene_index: GeneIndex,
+    target_index: TargetIndex,
     study_locus: StudyLocus,
     variant_index: VariantIndex,
 ) -> DataFrame:
@@ -141,7 +141,7 @@ def common_neighbourhood_colocalisation_feature_logic(
         qtl_types (list[str] | str): The types of QTL to filter the data by
         colocalisation (Colocalisation): Dataset with the colocalisation results
         study_index (StudyIndex): Study index to fetch study type and gene
-        gene_index (GeneIndex): Gene index to add gene type
+        target_index (TargetIndex): Target index to add gene type
         study_locus (StudyLocus): Study locus to traverse between colocalisation and study index
         variant_index (VariantIndex): Variant index to annotate all overlapping genes
 
@@ -165,7 +165,7 @@ def common_neighbourhood_colocalisation_feature_logic(
             local_feature_name,
             local_max,
             variant_index,
-            gene_index,
+            target_index,
             study_locus,
         )
     )
@@ -173,7 +173,7 @@ def common_neighbourhood_colocalisation_feature_logic(
         extended_local_max.join(
             # Compute average score in the vicinity (feature will be the same for any gene associated with a studyLocus)
             # (non protein coding genes in the vicinity are excluded see #3552)
-            gene_index.df.filter(f.col("biotype") == "protein_coding").select("geneId"),
+            target_index.df.filter(f.col("biotype") == "protein_coding").select("geneId"),
             "geneId",
             "inner",
         )
@@ -242,7 +242,7 @@ class EQtlColocClppMaximumNeighbourhoodFeature(L2GFeature):
     feature_dependency_type = [
         Colocalisation,
         StudyIndex,
-        GeneIndex,
+        TargetIndex,
         StudyLocus,
         VariantIndex,
     ]
@@ -333,7 +333,7 @@ class PQtlColocClppMaximumNeighbourhoodFeature(L2GFeature):
     feature_dependency_type = [
         Colocalisation,
         StudyIndex,
-        GeneIndex,
+        TargetIndex,
         StudyLocus,
         VariantIndex,
     ]
@@ -423,7 +423,7 @@ class SQtlColocClppMaximumNeighbourhoodFeature(L2GFeature):
     feature_dependency_type = [
         Colocalisation,
         StudyIndex,
-        GeneIndex,
+        TargetIndex,
         StudyLocus,
         VariantIndex,
     ]
@@ -513,7 +513,7 @@ class EQtlColocH4MaximumNeighbourhoodFeature(L2GFeature):
     feature_dependency_type = [
         Colocalisation,
         StudyIndex,
-        GeneIndex,
+        TargetIndex,
         StudyLocus,
         VariantIndex,
     ]
@@ -603,7 +603,7 @@ class PQtlColocH4MaximumNeighbourhoodFeature(L2GFeature):
     feature_dependency_type = [
         Colocalisation,
         StudyIndex,
-        GeneIndex,
+        TargetIndex,
         StudyLocus,
         VariantIndex,
     ]
@@ -693,7 +693,7 @@ class SQtlColocH4MaximumNeighbourhoodFeature(L2GFeature):
     feature_dependency_type = [
         Colocalisation,
         StudyIndex,
-        GeneIndex,
+        TargetIndex,
         StudyLocus,
         VariantIndex,
     ]
diff --git a/src/gentropy/dataset/l2g_features/distance.py b/src/gentropy/dataset/l2g_features/distance.py
index 40ad568ac..f95862ddd 100644
--- a/src/gentropy/dataset/l2g_features/distance.py
+++ b/src/gentropy/dataset/l2g_features/distance.py
@@ -8,10 +8,10 @@
 from pyspark.sql import Window
 
 from gentropy.common.spark_helpers import convert_from_wide_to_long
-from gentropy.dataset.gene_index import GeneIndex
 from gentropy.dataset.l2g_features.l2g_feature import L2GFeature
 from gentropy.dataset.l2g_gold_standard import L2GGoldStandard
 from gentropy.dataset.study_locus import StudyLocus
+from gentropy.dataset.target_index import TargetIndex
 from gentropy.dataset.variant_index import VariantIndex
 
 if TYPE_CHECKING:
@@ -85,7 +85,7 @@ def common_neighbourhood_distance_feature_logic(
     variant_index: VariantIndex,
     feature_name: str,
     distance_type: str,
-    gene_index: GeneIndex,
+    target_index: TargetIndex,
     genomic_window: int = 500_000,
 ) -> DataFrame:
     """Calculate the distance feature that correlates any variant in a credible set with any protein coding gene nearby the locus. The distance is weighted by the posterior probability of the variant to factor in its contribution to the trait.
@@ -95,7 +95,7 @@ def common_neighbourhood_distance_feature_logic(
         variant_index (VariantIndex): The dataset containing distance to gene information
         feature_name (str): The name of the feature
         distance_type (str): The type of distance to gene
-        gene_index (GeneIndex): The dataset containing gene information
+        target_index (TargetIndex): The dataset containing gene information
         genomic_window (int): The maximum window size to consider
 
     Returns:
@@ -113,7 +113,7 @@ def common_neighbourhood_distance_feature_logic(
     return (
         # Then compute mean distance in the vicinity (feature will be the same for any gene associated with a studyLocus)
         local_metric.join(
-            gene_index.df.filter(f.col("biotype") == "protein_coding").select("geneId"),
+            target_index.df.filter(f.col("biotype") == "protein_coding").select("geneId"),
             "geneId",
             "inner",
         )
@@ -185,7 +185,7 @@ def compute(
 class DistanceTssMeanNeighbourhoodFeature(L2GFeature):
     """Minimum mean distance to TSS for all genes in the vicinity of a studyLocus."""
 
-    feature_dependency_type = [VariantIndex, GeneIndex]
+    feature_dependency_type = [VariantIndex, TargetIndex]
     feature_name = "distanceTssMeanNeighbourhood"
 
     @classmethod
@@ -261,7 +261,7 @@ def compute(
 class DistanceSentinelTssNeighbourhoodFeature(L2GFeature):
     """Distance between the sentinel variant and a gene TSS as a relation of the distnace with all the genes in the vicinity of a studyLocus. This is not weighted by the causal probability."""
 
-    feature_dependency_type = [VariantIndex, GeneIndex]
+    feature_dependency_type = [VariantIndex, TargetIndex]
     feature_name = "distanceSentinelTssNeighbourhood"
 
     @classmethod
@@ -342,7 +342,7 @@ def compute(
 class DistanceFootprintMeanNeighbourhoodFeature(L2GFeature):
     """Minimum mean distance to footprint for all genes in the vicinity of a studyLocus."""
 
-    feature_dependency_type = [VariantIndex, GeneIndex]
+    feature_dependency_type = [VariantIndex, TargetIndex]
     feature_name = "distanceFootprintMeanNeighbourhood"
 
     @classmethod
@@ -418,7 +418,7 @@ def compute(
 class DistanceSentinelFootprintNeighbourhoodFeature(L2GFeature):
     """Distance between the sentinel variant and a gene footprint as a relation of the distnace with all the genes in the vicinity of a studyLocus. This is not weighted by the causal probability."""
 
-    feature_dependency_type = [VariantIndex, GeneIndex]
+    feature_dependency_type = [VariantIndex, TargetIndex]
     feature_name = "distanceSentinelFootprintNeighbourhood"
 
     @classmethod
diff --git a/src/gentropy/dataset/l2g_features/other.py b/src/gentropy/dataset/l2g_features/other.py
index 2fc32592b..4c9b5520d 100644
--- a/src/gentropy/dataset/l2g_features/other.py
+++ b/src/gentropy/dataset/l2g_features/other.py
@@ -7,10 +7,10 @@
 import pyspark.sql.functions as f
 
 from gentropy.common.spark_helpers import convert_from_wide_to_long
-from gentropy.dataset.gene_index import GeneIndex
 from gentropy.dataset.l2g_features.l2g_feature import L2GFeature
 from gentropy.dataset.l2g_gold_standard import L2GGoldStandard
 from gentropy.dataset.study_locus import CredibleSetConfidenceClasses, StudyLocus
+from gentropy.dataset.target_index import TargetIndex
 from gentropy.dataset.variant_index import VariantIndex
 
 if TYPE_CHECKING:
@@ -20,7 +20,7 @@
 def common_genecount_feature_logic(
     study_loci_to_annotate: StudyLocus | L2GGoldStandard,
     *,
-    gene_index: GeneIndex,
+    target_index: TargetIndex,
     feature_name: str,
     genomic_window: int,
     protein_coding_only: bool = False,
@@ -30,7 +30,7 @@ def common_genecount_feature_logic(
     Args:
         study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci
             that will be used for annotation
-        gene_index (GeneIndex): Dataset containing information related to all genes in release.
+        target_index (TargetIndex): Dataset containing information related to all genes in release.
         feature_name (str): The name of the feature
         genomic_window (int): The maximum window size to consider
         protein_coding_only (bool): Whether to only consider protein coding genes in calculation.
@@ -45,16 +45,16 @@ def common_genecount_feature_logic(
         .withColumn("window_end", f.col("position") + (genomic_window / 2))
         .withColumnRenamed("chromosome", "SL_chromosome")
     )
-    gene_index_filter = gene_index.df
+    target_index_filter = target_index.df
 
     if protein_coding_only:
-        gene_index_filter = gene_index_filter.filter(
+        target_index_filter = target_index_filter.filter(
             f.col("biotype") == "protein_coding"
         )
 
     distinct_gene_counts = (
         study_loci_window.join(
-            gene_index_filter.alias("genes"),
+            target_index_filter.alias("genes"),
             on=(
                 (f.col("SL_chromosome") == f.col("genes.chromosome"))
                 & (f.col("genes.tss") >= f.col("window_start"))
@@ -68,7 +68,7 @@ def common_genecount_feature_logic(
 
     return (
         study_loci_window.join(
-            gene_index_filter.alias("genes"),
+            target_index_filter.alias("genes"),
             on=(
                 (f.col("SL_chromosome") == f.col("genes.chromosome"))
                 & (f.col("genes.tss") >= f.col("window_start"))
@@ -85,7 +85,7 @@ def common_genecount_feature_logic(
 def is_protein_coding_feature_logic(
     study_loci_to_annotate: StudyLocus | L2GGoldStandard,
     *,
-    gene_index: GeneIndex,
+    target_index: TargetIndex,
     feature_name: str,
     genomic_window: int,
 ) -> DataFrame:
@@ -94,7 +94,7 @@ def is_protein_coding_feature_logic(
     Args:
         study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci
             that will be used for annotation
-        gene_index (GeneIndex): Dataset containing information related to all genes in release.
+        target_index (TargetIndex): Dataset containing information related to all genes in release.
         feature_name (str): The name of the feature
         genomic_window (int): The maximum window size to consider
 
@@ -110,7 +110,7 @@ def is_protein_coding_feature_logic(
     )
     return (
         study_loci_window.join(
-            gene_index.df.alias("genes"),
+            target_index.df.alias("genes"),
             on=(
                 (f.col("SL_chromosome") == f.col("genes.chromosome"))
                 & (f.col("genes.tss") >= f.col("window_start"))
@@ -130,7 +130,7 @@ def is_protein_coding_feature_logic(
 class GeneCountFeature(L2GFeature):
     """Counts the number of genes within a specified window size from the study locus."""
 
-    feature_dependency_type = GeneIndex
+    feature_dependency_type = TargetIndex
     feature_name = "geneCount500kb"
 
     @classmethod
@@ -143,7 +143,7 @@ def compute(
 
         Args:
             study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation
-            feature_dependency (dict[str, Any]): Dictionary containing dependencies, with gene index and window size
+            feature_dependency (dict[str, Any]): Dictionary containing dependencies, with target index and window size
 
         Returns:
             GeneCountFeature: Feature dataset
@@ -170,7 +170,7 @@ def compute(
 class ProteinGeneCountFeature(L2GFeature):
     """Counts the number of protein coding genes within a specified window size from the study locus."""
 
-    feature_dependency_type = GeneIndex
+    feature_dependency_type = TargetIndex
     feature_name = "proteinGeneCount500kb"
 
     @classmethod
@@ -183,7 +183,7 @@ def compute(
 
         Args:
             study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation
-            feature_dependency (dict[str, Any]): Dictionary containing dependencies, with gene index and window size
+            feature_dependency (dict[str, Any]): Dictionary containing dependencies, with target index and window size
 
         Returns:
             ProteinGeneCountFeature: Feature dataset
@@ -211,7 +211,7 @@ def compute(
 class ProteinCodingFeature(L2GFeature):
     """Indicates whether a gene is protein-coding within a specified window size from the study locus."""
 
-    feature_dependency_type = GeneIndex
+    feature_dependency_type = TargetIndex
     feature_name = "isProteinCoding"
 
     @classmethod
@@ -224,7 +224,7 @@ def compute(
 
         Args:
             study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation
-            feature_dependency (dict[str, Any]): Dictionary containing dependencies, including gene index
+            feature_dependency (dict[str, Any]): Dictionary containing dependencies, including target index
 
         Returns:
             ProteinCodingFeature: Feature dataset with 1 if the gene is protein-coding, 0 otherwise
diff --git a/src/gentropy/dataset/l2g_features/vep.py b/src/gentropy/dataset/l2g_features/vep.py
index 4f8dd6779..11f056ec5 100644
--- a/src/gentropy/dataset/l2g_features/vep.py
+++ b/src/gentropy/dataset/l2g_features/vep.py
@@ -8,10 +8,10 @@
 from pyspark.sql import Window
 
 from gentropy.common.spark_helpers import convert_from_wide_to_long
-from gentropy.dataset.gene_index import GeneIndex
 from gentropy.dataset.l2g_features.l2g_feature import L2GFeature
 from gentropy.dataset.l2g_gold_standard import L2GGoldStandard
 from gentropy.dataset.study_locus import StudyLocus
+from gentropy.dataset.target_index import TargetIndex
 from gentropy.dataset.variant_index import VariantIndex
 
 if TYPE_CHECKING:
@@ -77,7 +77,7 @@ def common_neighbourhood_vep_feature_logic(
     study_loci_to_annotate: StudyLocus | L2GGoldStandard,
     *,
     variant_index: VariantIndex,
-    gene_index: GeneIndex,
+    target_index: TargetIndex,
     feature_name: str,
 ) -> DataFrame:
     """Extracts variant severity score computed from VEP for any gene, based on what is the max score for protein coding genes that are nearby the locus.
@@ -85,7 +85,7 @@ def common_neighbourhood_vep_feature_logic(
     Args:
         study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation
         variant_index (VariantIndex): The dataset containing functional consequence information
-        gene_index (GeneIndex): The dataset containing the gene biotype
+        target_index (TargetIndex): The dataset containing the gene biotype
         feature_name (str): The name of the feature
 
     Returns:
@@ -102,7 +102,7 @@ def common_neighbourhood_vep_feature_logic(
         # Compute average score in the vicinity (feature will be the same for any gene associated with a studyLocus)
         # (non protein coding genes in the vicinity are excluded see #3552)
         .join(
-            gene_index.df.filter(f.col("biotype") == "protein_coding").select("geneId"),
+            target_index.df.filter(f.col("biotype") == "protein_coding").select("geneId"),
             "geneId",
             "inner",
         )
@@ -161,7 +161,7 @@ def compute(
 class VepMaximumNeighbourhoodFeature(L2GFeature):
     """Maximum functional consequence score among all variants in a credible set for a studyLocus/gene relative to the mean VEP score across all protein coding genes in the vicinity."""
 
-    feature_dependency_type = [VariantIndex, GeneIndex]
+    feature_dependency_type = [VariantIndex, TargetIndex]
     feature_name = "vepMaximumNeighbourhood"
 
     @classmethod
@@ -239,7 +239,7 @@ class VepMeanNeighbourhoodFeature(L2GFeature):
     The mean severity score is weighted by the posterior probability of each variant.
     """
 
-    feature_dependency_type = [VariantIndex, GeneIndex]
+    feature_dependency_type = [VariantIndex, TargetIndex]
     feature_name = "vepMeanNeighbourhood"
 
     @classmethod
diff --git a/src/gentropy/dataset/study_index.py b/src/gentropy/dataset/study_index.py
index da310f6f1..3f973bb9f 100644
--- a/src/gentropy/dataset/study_index.py
+++ b/src/gentropy/dataset/study_index.py
@@ -23,7 +23,7 @@
     from pyspark.sql.types import StructType
 
     from gentropy.dataset.biosample_index import BiosampleIndex
-    from gentropy.dataset.gene_index import GeneIndex
+    from gentropy.dataset.target_index import TargetIndex
 
 
 class StudyQualityCheck(Enum):
@@ -392,11 +392,11 @@ def validate_study_type(self: StudyIndex) -> StudyIndex:
         )
         return StudyIndex(_df=validated_df, _schema=StudyIndex.get_schema())
 
-    def validate_target(self: StudyIndex, target_index: GeneIndex) -> StudyIndex:
+    def validate_target(self: StudyIndex, target_index: TargetIndex) -> StudyIndex:
         """Validating gene identifiers in the study index against the provided target index.
 
         Args:
-            target_index (GeneIndex): gene index containing the reference gene identifiers (Ensembl gene identifiers).
+            target_index (TargetIndex): target index containing the reference gene identifiers (Ensembl gene identifiers).
 
         Returns:
             StudyIndex: with flagged studies if geneId could not be validated.
diff --git a/src/gentropy/dataset/target_index.py b/src/gentropy/dataset/target_index.py
index 31259d2d1..8248f5f44 100644
--- a/src/gentropy/dataset/target_index.py
+++ b/src/gentropy/dataset/target_index.py
@@ -1,4 +1,4 @@
-"""Gene index dataset."""
+"""Target index dataset."""
 from __future__ import annotations
 
 from dataclasses import dataclass
@@ -15,34 +15,34 @@
 
 
 @dataclass
-class GeneIndex(Dataset):
-    """Gene index dataset.
+class TargetIndex(Dataset):
+    """Target index dataset.
 
     Gene-based annotation.
     """
 
     @classmethod
-    def get_schema(cls: type[GeneIndex]) -> StructType:
-        """Provides the schema for the GeneIndex dataset.
+    def get_schema(cls: type[TargetIndex]) -> StructType:
+        """Provides the schema for the TargetIndex dataset.
 
         Returns:
-            StructType: Schema for the GeneIndex dataset
+            StructType: Schema for the TargetIndex dataset
         """
-        return parse_spark_schema("gene_index.json")
+        return parse_spark_schema("target_index.json")
 
-    def filter_by_biotypes(self: GeneIndex, biotypes: list[str]) -> GeneIndex:
+    def filter_by_biotypes(self: TargetIndex, biotypes: list[str]) -> TargetIndex:
         """Filter by approved biotypes.
 
         Args:
             biotypes (list[str]): List of Ensembl biotypes to keep.
 
         Returns:
-            GeneIndex: Gene index dataset filtered by biotypes.
+            TargetIndex: Target index dataset filtered by biotypes.
         """
         self.df = self._df.filter(f.col("biotype").isin(biotypes))
         return self
 
-    def locations_lut(self: GeneIndex) -> DataFrame:
+    def locations_lut(self: TargetIndex) -> DataFrame:
         """Gene location information.
 
         Returns:
@@ -57,7 +57,7 @@ def locations_lut(self: GeneIndex) -> DataFrame:
             "tss",
         )
 
-    def symbols_lut(self: GeneIndex) -> DataFrame:
+    def symbols_lut(self: TargetIndex) -> DataFrame:
         """Gene symbol lookup table.
 
         Pre-processess gene/target dataset to create lookup table of gene symbols, including
diff --git a/src/gentropy/datasource/intervals/andersson.py b/src/gentropy/datasource/intervals/andersson.py
index a6e92470c..254f334a8 100644
--- a/src/gentropy/datasource/intervals/andersson.py
+++ b/src/gentropy/datasource/intervals/andersson.py
@@ -15,7 +15,7 @@
     from pyspark.sql import DataFrame, SparkSession
 
     from gentropy.common.Liftover import LiftOverSpark
-    from gentropy.dataset.gene_index import GeneIndex
+    from gentropy.dataset.target_index import TargetIndex
 
 
 class IntervalsAndersson:
@@ -49,14 +49,14 @@ def read(spark: SparkSession, path: str) -> DataFrame:
     def parse(
         cls: type[IntervalsAndersson],
         raw_anderson_df: DataFrame,
-        gene_index: GeneIndex,
+        target_index: TargetIndex,
         lift: LiftOverSpark,
     ) -> Intervals:
         """Parse Andersson et al. 2014 dataset.
 
         Args:
             raw_anderson_df (DataFrame): Raw Andersson et al. dataset
-            gene_index (GeneIndex): Gene index
+            target_index (TargetIndex): Target index
             lift (LiftOverSpark): LiftOverSpark instance
 
         Returns:
@@ -108,10 +108,10 @@ def parse(
                 .withColumnRenamed("mapped_start", "start")
                 .withColumnRenamed("mapped_end", "end")
                 .distinct()
-                # Joining with the gene index
+                # Joining with the target index
                 .alias("intervals")
                 .join(
-                    gene_index.symbols_lut().alias("genes"),
+                    target_index.symbols_lut().alias("genes"),
                     on=[
                         f.col("intervals.gene_symbol") == f.col("genes.geneSymbol"),
                         # Drop rows where the TSS is far from the start of the region
diff --git a/src/gentropy/datasource/intervals/javierre.py b/src/gentropy/datasource/intervals/javierre.py
index a05fa9886..6ecd9351a 100644
--- a/src/gentropy/datasource/intervals/javierre.py
+++ b/src/gentropy/datasource/intervals/javierre.py
@@ -12,7 +12,7 @@
     from pyspark.sql import DataFrame, SparkSession
 
     from gentropy.common.Liftover import LiftOverSpark
-    from gentropy.dataset.gene_index import GeneIndex
+    from gentropy.dataset.target_index import TargetIndex
 
 
 class IntervalsJavierre:
@@ -35,14 +35,14 @@ def read(spark: SparkSession, path: str) -> DataFrame:
     def parse(
         cls: type[IntervalsJavierre],
         javierre_raw: DataFrame,
-        gene_index: GeneIndex,
+        target_index: TargetIndex,
         lift: LiftOverSpark,
     ) -> Intervals:
         """Parse Javierre et al. 2016 dataset.
 
         Args:
             javierre_raw (DataFrame): Raw Javierre data
-            gene_index (GeneIndex): Gene index
+            target_index (TargetIndex): Target index
             lift (LiftOverSpark): LiftOverSpark instance
 
         Returns:
@@ -115,7 +115,7 @@ def parse(
             .distinct()
             .alias("intervals")
             .join(
-                gene_index.locations_lut().alias("genes"),
+                target_index.locations_lut().alias("genes"),
                 on=[
                     f.col("intervals.chrom") == f.col("genes.chromosome"),
                     (
diff --git a/src/gentropy/datasource/intervals/jung.py b/src/gentropy/datasource/intervals/jung.py
index 64b56967a..62f9963a6 100644
--- a/src/gentropy/datasource/intervals/jung.py
+++ b/src/gentropy/datasource/intervals/jung.py
@@ -12,7 +12,7 @@
     from pyspark.sql import DataFrame, SparkSession
 
     from gentropy.common.Liftover import LiftOverSpark
-    from gentropy.dataset.gene_index import GeneIndex
+    from gentropy.dataset.target_index import TargetIndex
 
 
 class IntervalsJung:
@@ -35,14 +35,14 @@ def read(spark: SparkSession, path: str) -> DataFrame:
     def parse(
         cls: type[IntervalsJung],
         jung_raw: DataFrame,
-        gene_index: GeneIndex,
+        target_index: TargetIndex,
         lift: LiftOverSpark,
     ) -> Intervals:
         """Parse the Jung et al. 2019 dataset.
 
         Args:
             jung_raw (DataFrame): raw Jung et al. 2019 dataset
-            gene_index (GeneIndex): gene index
+            target_index (TargetIndex): Target index
             lift (LiftOverSpark): LiftOverSpark instance
 
         Returns:
@@ -81,7 +81,7 @@ def parse(
                 .alias("intervals")
                 # Joining with genes:
                 .join(
-                    gene_index.symbols_lut().alias("genes"),
+                    target_index.symbols_lut().alias("genes"),
                     on=[f.col("intervals.gene_name") == f.col("genes.geneSymbol")],
                     how="inner",
                 )
diff --git a/src/gentropy/datasource/intervals/thurman.py b/src/gentropy/datasource/intervals/thurman.py
index a8113e5a6..459e72f2d 100644
--- a/src/gentropy/datasource/intervals/thurman.py
+++ b/src/gentropy/datasource/intervals/thurman.py
@@ -12,7 +12,7 @@
     from pyspark.sql import DataFrame, SparkSession
 
     from gentropy.common.Liftover import LiftOverSpark
-    from gentropy.dataset.gene_index import GeneIndex
+    from gentropy.dataset.target_index import TargetIndex
 
 
 class IntervalsThurman:
@@ -47,14 +47,14 @@ def read(spark: SparkSession, path: str) -> DataFrame:
     def parse(
         cls: type[IntervalsThurman],
         thurman_raw: DataFrame,
-        gene_index: GeneIndex,
+        target_index: TargetIndex,
         lift: LiftOverSpark,
     ) -> Intervals:
         """Parse the Thurman et al. 2012 dataset.
 
         Args:
             thurman_raw (DataFrame): raw Thurman et al. 2019 dataset
-            gene_index (GeneIndex): gene index
+            target_index (TargetIndex): Target index
             lift (LiftOverSpark): LiftOverSpark instance
 
         Returns:
@@ -80,7 +80,7 @@ def parse(
                 .alias("intervals")
                 # Map gene names to gene IDs:
                 .join(
-                    gene_index.symbols_lut().alias("genes"),
+                    target_index.symbols_lut().alias("genes"),
                     on=[
                         f.col("intervals.gene_name") == f.col("genes.geneSymbol"),
                         f.col("intervals.chrom") == f.col("genes.chromosome"),
diff --git a/src/gentropy/datasource/open_targets/target.py b/src/gentropy/datasource/open_targets/target.py
index 5e795c19b..40c1cca7d 100644
--- a/src/gentropy/datasource/open_targets/target.py
+++ b/src/gentropy/datasource/open_targets/target.py
@@ -4,13 +4,13 @@
 import pyspark.sql.functions as f
 from pyspark.sql import Column, DataFrame
 
-from gentropy.dataset.gene_index import GeneIndex
+from gentropy.dataset.target_index import TargetIndex
 
 
 class OpenTargetsTarget:
     """Parser for OTPlatform target dataset.
 
-    Genomic data from Open Targets provides gene identification and genomic coordinates that are integrated into the gene index of our ETL pipeline.
+    Genomic data from Open Targets provides gene identification and genomic coordinates that are integrated into the target index of our ETL pipeline.
 
     The EMBL-EBI Ensembl database is used as a source for human targets in the Platform, with the Ensembl gene ID as the primary identifier. The criteria for target inclusion is:
     - Genes from all biotypes encoded in canonical chromosomes
@@ -44,18 +44,18 @@ def _get_gene_tss(strand_col: Column, start_col: Column, end_col: Column) -> Col
         return f.when(strand_col == 1, start_col).when(strand_col == -1, end_col)
 
     @classmethod
-    def as_gene_index(
+    def as_target_index(
         cls: type[OpenTargetsTarget], target_index: DataFrame
-    ) -> GeneIndex:
-        """Initialise GeneIndex from source dataset.
+    ) -> TargetIndex:
+        """Initialise TargetIndex from source dataset.
 
         Args:
             target_index (DataFrame): Target index dataframe
 
         Returns:
-            GeneIndex: Gene index dataset
+            TargetIndex: Target index dataset
         """
-        return GeneIndex(
+        return TargetIndex(
             _df=target_index.select(
                 f.coalesce(f.col("id"), f.lit("unknown")).alias("geneId"),
                 "approvedSymbol",
@@ -74,5 +74,5 @@ def as_gene_index(
                 f.col("genomicLocation.end").alias("end"),
                 f.col("genomicLocation.strand").alias("strand"),
             ),
-            _schema=GeneIndex.get_schema(),
+            _schema=TargetIndex.get_schema(),
         )
diff --git a/src/gentropy/l2g.py b/src/gentropy/l2g.py
index 16922ef78..9c2da149b 100644
--- a/src/gentropy/l2g.py
+++ b/src/gentropy/l2g.py
@@ -14,12 +14,12 @@
 from gentropy.common.spark_helpers import calculate_harmonic_sum
 from gentropy.common.utils import access_gcp_secret
 from gentropy.dataset.colocalisation import Colocalisation
-from gentropy.dataset.gene_index import GeneIndex
 from gentropy.dataset.l2g_feature_matrix import L2GFeatureMatrix
 from gentropy.dataset.l2g_gold_standard import L2GGoldStandard
 from gentropy.dataset.l2g_prediction import L2GPrediction
 from gentropy.dataset.study_index import StudyIndex
 from gentropy.dataset.study_locus import StudyLocus
+from gentropy.dataset.target_index import TargetIndex
 from gentropy.dataset.variant_index import VariantIndex
 from gentropy.method.l2g.feature_factory import L2GFeatureInputLoader
 from gentropy.method.l2g.model import LocusToGeneModel
@@ -38,7 +38,7 @@ def __init__(
         variant_index_path: str | None = None,
         colocalisation_path: str | None = None,
         study_index_path: str | None = None,
-        gene_index_path: str | None = None,
+        target_index_path: str | None = None,
         feature_matrix_path: str,
     ) -> None:
         """Initialise the step and run the logic based on mode.
@@ -50,7 +50,7 @@ def __init__(
             variant_index_path (str | None): Path to the variant index dataset
             colocalisation_path (str | None): Path to the colocalisation dataset
             study_index_path (str | None): Path to the study index dataset
-            gene_index_path (str | None): Path to the gene index dataset
+            target_index_path (str | None): Path to the target index dataset
             feature_matrix_path (str): Path to the L2G feature matrix output dataset
         """
         credible_set = StudyLocus.from_parquet(
@@ -73,9 +73,9 @@ def __init__(
             if colocalisation_path
             else None
         )
-        gene_index = (
-            GeneIndex.from_parquet(session, gene_index_path, recursiveFileLookup=True)
-            if gene_index_path
+        target_index = (
+            TargetIndex.from_parquet(session, target_index_path, recursiveFileLookup=True)
+            if target_index_path
             else None
         )
         features_input_loader = L2GFeatureInputLoader(
@@ -83,7 +83,7 @@ def __init__(
             colocalisation=coloc,
             study_index=studies,
             study_locus=credible_set,
-            gene_index=gene_index,
+            target_index=target_index,
         )
 
         fm = credible_set.filter(f.col("studyType") == "gwas").build_feature_matrix(
diff --git a/src/gentropy/study_validation.py b/src/gentropy/study_validation.py
index 3d2fdd060..1aecbabb4 100644
--- a/src/gentropy/study_validation.py
+++ b/src/gentropy/study_validation.py
@@ -6,8 +6,8 @@
 
 from gentropy.common.session import Session
 from gentropy.dataset.biosample_index import BiosampleIndex
-from gentropy.dataset.gene_index import GeneIndex
 from gentropy.dataset.study_index import StudyIndex
+from gentropy.dataset.target_index import TargetIndex
 
 
 class StudyValidationStep:
@@ -41,7 +41,7 @@ def __init__(
             invalid_qc_reasons (list[str]): List of invalid quality check reason names from `StudyQualityCheck` (e.g. ['DUPLICATED_STUDY']).
         """
         # Reading datasets:
-        target_index = GeneIndex.from_parquet(session, target_index_path)
+        target_index = TargetIndex.from_parquet(session, target_index_path)
         biosample_index = BiosampleIndex.from_parquet(session, biosample_index_path)
         # Reading disease index and pre-process.
         # This logic does not belong anywhere, but gentorpy has no disease dataset yet.
diff --git a/src/gentropy/target_index.py b/src/gentropy/target_index.py
index 0a317d077..b57b0e835 100644
--- a/src/gentropy/target_index.py
+++ b/src/gentropy/target_index.py
@@ -1,4 +1,4 @@
-"""Step to generate gene index dataset."""
+"""Step to generate target index dataset."""
 
 from __future__ import annotations
 
@@ -6,29 +6,29 @@
 from gentropy.datasource.open_targets.target import OpenTargetsTarget
 
 
-class GeneIndexStep:
-    """Gene index step.
+class TargetIndexStep:
+    """Target index step.
 
-    This step generates a gene index dataset from an Open Targets Platform target dataset.
+    This step generates a target index dataset from an Open Targets Platform target dataset.
     """
 
     def __init__(
         self,
         session: Session,
         target_path: str,
-        gene_index_path: str,
+        target_index_path: str,
     ) -> None:
         """Initialize step.
 
         Args:
             session (Session): Session object.
             target_path (str): Input Open Targets Platform target dataset path.
-            gene_index_path (str): Output gene index dataset path.
+            target_index_path (str): Output target index dataset path.
         """
         platform_target = session.spark.read.parquet(target_path)
         # Transform
-        gene_index = OpenTargetsTarget.as_gene_index(platform_target)
+        target_index = OpenTargetsTarget.as_target_index(platform_target)
         # Load
-        gene_index.df.coalesce(session.output_partitions).write.mode(
+        target_index.df.coalesce(session.output_partitions).write.mode(
             session.write_mode
-        ).parquet(gene_index_path)
+        ).parquet(target_index_path)

From 19bb7c98afcfeaa538fe926f93d58cf85d4f1b1d Mon Sep 17 00:00:00 2001
From: vivienho <56025826+vivienho@users.noreply.github.com>
Date: Fri, 6 Dec 2024 15:14:53 +0000
Subject: [PATCH 3/3] refactor: rename gene_index to target_index in tests

---
 tests/gentropy/conftest.py                    | 12 ++---
 tests/gentropy/dataset/test_l2g_feature.py    | 54 +++++++++----------
 .../dataset/test_l2g_feature_matrix.py        |  8 +--
 tests/gentropy/dataset/test_study_index.py    | 14 ++---
 .../dataset/test_summary_statistics.py        |  2 +-
 tests/gentropy/dataset/test_target_index.py   | 28 +++++-----
 tests/gentropy/dataset/test_variant_index.py  |  2 +-
 .../datasource/intervals/test_andersson.py    |  6 +--
 .../datasource/intervals/test_javierre.py     |  6 +--
 .../datasource/intervals/test_jung.py         |  6 +--
 .../datasource/intervals/test_thurman.py      |  6 +--
 .../open_targets/test_l2g_gold_standard.py    |  6 +--
 .../datasource/open_targets/test_target.py    | 10 ++--
 tests/gentropy/test_schemas.py                | 22 ++++----
 14 files changed, 92 insertions(+), 90 deletions(-)

diff --git a/tests/gentropy/conftest.py b/tests/gentropy/conftest.py
index f19c28623..c4178ba37 100644
--- a/tests/gentropy/conftest.py
+++ b/tests/gentropy/conftest.py
@@ -15,7 +15,6 @@
 from gentropy.common.session import Session
 from gentropy.dataset.biosample_index import BiosampleIndex
 from gentropy.dataset.colocalisation import Colocalisation
-from gentropy.dataset.gene_index import GeneIndex
 from gentropy.dataset.intervals import Intervals
 from gentropy.dataset.l2g_feature_matrix import L2GFeatureMatrix
 from gentropy.dataset.l2g_gold_standard import L2GGoldStandard
@@ -25,6 +24,7 @@
 from gentropy.dataset.study_locus import StudyLocus
 from gentropy.dataset.study_locus_overlap import StudyLocusOverlap
 from gentropy.dataset.summary_statistics import SummaryStatistics
+from gentropy.dataset.target_index import TargetIndex
 from gentropy.dataset.variant_index import VariantIndex
 from gentropy.datasource.eqtl_catalogue.finemapping import EqtlCatalogueFinemapping
 from gentropy.datasource.eqtl_catalogue.study_index import EqtlCatalogueStudyIndex
@@ -379,7 +379,7 @@ def mock_summary_statistics(
 
 @pytest.fixture()
 def mock_ld_index(spark: SparkSession) -> LDIndex:
-    """Mock gene index."""
+    """Mock ld index."""
     ld_schema = LDIndex.get_schema()
 
     data_spec = (
@@ -519,9 +519,9 @@ def sample_target_index(spark: SparkSession) -> DataFrame:
 
 
 @pytest.fixture()
-def mock_gene_index(spark: SparkSession) -> GeneIndex:
-    """Mock gene index dataset."""
-    gi_schema = GeneIndex.get_schema()
+def mock_target_index(spark: SparkSession) -> TargetIndex:
+    """Mock target index dataset."""
+    gi_schema = TargetIndex.get_schema()
 
     data_spec = (
         dg.DataGenerator(
@@ -540,7 +540,7 @@ def mock_gene_index(spark: SparkSession) -> GeneIndex:
         .withColumnSpec("strand", percentNulls=0.1)
     )
 
-    return GeneIndex(_df=data_spec.build(), _schema=gi_schema)
+    return TargetIndex(_df=data_spec.build(), _schema=gi_schema)
 
 
 @pytest.fixture()
diff --git a/tests/gentropy/dataset/test_l2g_feature.py b/tests/gentropy/dataset/test_l2g_feature.py
index feb8e449a..0ae9fea85 100644
--- a/tests/gentropy/dataset/test_l2g_feature.py
+++ b/tests/gentropy/dataset/test_l2g_feature.py
@@ -21,7 +21,7 @@
 )
 
 from gentropy.dataset.colocalisation import Colocalisation
-from gentropy.dataset.gene_index import GeneIndex
+from gentropy.dataset.target_index import TargetIndex
 from gentropy.dataset.l2g_features.colocalisation import (
     EQtlColocClppMaximumFeature,
     EQtlColocClppMaximumNeighbourhoodFeature,
@@ -116,7 +116,7 @@ def test_feature_factory_return_type(
     mock_colocalisation: Colocalisation,
     mock_study_index: StudyIndex,
     mock_variant_index: VariantIndex,
-    mock_gene_index: GeneIndex,
+    mock_target_index: TargetIndex,
 ) -> None:
     """Test that every feature factory returns a L2GFeature dataset."""
     loader = L2GFeatureInputLoader(
@@ -124,7 +124,7 @@ def test_feature_factory_return_type(
         study_index=mock_study_index,
         variant_index=mock_variant_index,
         study_locus=mock_study_locus,
-        gene_index=mock_gene_index,
+        target_index=mock_target_index,
     )
     feature_dataset = feature_class.compute(
         study_loci_to_annotate=mock_study_locus,
@@ -136,9 +136,9 @@ def test_feature_factory_return_type(
 
 
 @pytest.fixture(scope="module")
-def sample_gene_index(spark: SparkSession) -> GeneIndex:
-    """Create a sample gene index for testing."""
-    return GeneIndex(
+def sample_target_index(spark: SparkSession) -> TargetIndex:
+    """Create a sample target index for testing."""
+    return TargetIndex(
         _df=spark.createDataFrame(
             [
                 {
@@ -157,9 +157,9 @@ def sample_gene_index(spark: SparkSession) -> GeneIndex:
                     "chromosome": "1",
                 },
             ],
-            GeneIndex.get_schema(),
+            TargetIndex.get_schema(),
         ),
-        _schema=GeneIndex.get_schema(),
+        _schema=TargetIndex.get_schema(),
     )
 
 
@@ -294,7 +294,7 @@ def test__common_colocalisation_feature_logic(
     def test_extend_missing_colocalisation_to_neighbourhood_genes(
         self: TestCommonColocalisationFeatureLogic,
         spark: SparkSession,
-        sample_gene_index: GeneIndex,
+        sample_target_index: TargetIndex,
         sample_variant_index: VariantIndex,
     ) -> None:
         """Test the extend_missing_colocalisation_to_neighbourhood_genes function."""
@@ -316,7 +316,7 @@ def test_extend_missing_colocalisation_to_neighbourhood_genes(
             feature_name="eQtlColocH4Maximum",
             local_features=local_features,
             variant_index=sample_variant_index,
-            gene_index=sample_gene_index,
+            target_index=sample_target_index,
             study_locus=self.sample_study_locus,
         ).select("studyLocusId", "geneId", "eQtlColocH4Maximum")
         expected_df = spark.createDataFrame(
@@ -329,7 +329,7 @@ def test_extend_missing_colocalisation_to_neighbourhood_genes(
     def test_common_neighbourhood_colocalisation_feature_logic(
         self: TestCommonColocalisationFeatureLogic,
         spark: SparkSession,
-        sample_gene_index: GeneIndex,
+        sample_target_index: TargetIndex,
         sample_variant_index: VariantIndex,
     ) -> None:
         """Test the common logic of the neighbourhood colocalisation features."""
@@ -343,7 +343,7 @@ def test_common_neighbourhood_colocalisation_feature_logic(
             colocalisation=self.sample_colocalisation,
             study_index=self.sample_studies,
             study_locus=self.sample_study_locus,
-            gene_index=sample_gene_index,
+            target_index=sample_target_index,
             variant_index=sample_variant_index,
         ).withColumn(feature_name, f.round(f.col(feature_name), 3))
         # expected max is 0.81
@@ -561,7 +561,7 @@ def test_common_neighbourhood_distance_feature_logic(
             common_neighbourhood_distance_feature_logic(
                 self.sample_study_locus,
                 variant_index=self.sample_variant_index,
-                gene_index=self.sample_gene_index,
+                target_index=self.sample_target_index,
                 feature_name=feature_name,
                 distance_type=self.distance_type,
                 genomic_window=10,
@@ -653,7 +653,7 @@ def _setup(
             ),
             _schema=VariantIndex.get_schema(),
         )
-        self.sample_gene_index = GeneIndex(
+        self.sample_target_index = TargetIndex(
             _df=spark.createDataFrame(
                 [
                     {
@@ -675,9 +675,9 @@ def _setup(
                         "biotype": "non_coding",
                     },
                 ],
-                GeneIndex.get_schema(),
+                TargetIndex.get_schema(),
             ),
-            _schema=GeneIndex.get_schema(),
+            _schema=TargetIndex.get_schema(),
         )
 
 
@@ -760,7 +760,7 @@ def test_common_vep_feature_logic(
         def test_common_neighbourhood_vep_feature_logic(
             self: TestCommonVepFeatureLogic,
             spark: SparkSession,
-            sample_gene_index: GeneIndex,
+            sample_target_index: TargetIndex,
             sample_variant_index: VariantIndex,
         ) -> None:
             """Test the logic of the function that extracts the maximum severity score for a gene given the maximum of the maximum scores for all protein coding genes in the vicinity."""
@@ -769,7 +769,7 @@ def test_common_neighbourhood_vep_feature_logic(
                 common_neighbourhood_vep_feature_logic(
                     self.sample_study_locus,
                     variant_index=sample_variant_index,
-                    gene_index=sample_gene_index,
+                    target_index=sample_target_index,
                     feature_name=feature_name,
                 )
                 .withColumn(feature_name, f.round(f.col(feature_name), 2))
@@ -859,7 +859,7 @@ def test_common_genecount_feature_logic(
         """Test the common logic of the gene count features."""
         observed_df = common_genecount_feature_logic(
             study_loci_to_annotate=self.sample_study_locus,
-            gene_index=self.sample_gene_index,
+            target_index=self.sample_target_index,
             feature_name=feature_name,
             genomic_window=500000,
             protein_coding_only=protein_coding_only,
@@ -892,7 +892,7 @@ def _setup(self: TestCommonGeneCountFeatureLogic, spark: SparkSession) -> None:
             ),
             _schema=StudyLocus.get_schema(),
         )
-        self.sample_gene_index = GeneIndex(
+        self.sample_target_index = TargetIndex(
             _df=spark.createDataFrame(
                 [
                     {
@@ -914,9 +914,9 @@ def _setup(self: TestCommonGeneCountFeatureLogic, spark: SparkSession) -> None:
                         "biotype": "non_coding",
                     },
                 ],
-                GeneIndex.get_schema(),
+                TargetIndex.get_schema(),
             ),
-            _schema=GeneIndex.get_schema(),
+            _schema=TargetIndex.get_schema(),
         )
 
 
@@ -944,7 +944,7 @@ def test_is_protein_coding_feature_logic(
         observed_df = (
             is_protein_coding_feature_logic(
                 study_loci_to_annotate=self.sample_study_locus,
-                gene_index=self.sample_gene_index,
+                target_index=self.sample_target_index,
                 feature_name="isProteinCoding500kb",
                 genomic_window=500000,
             )
@@ -981,8 +981,8 @@ def _setup(self: TestCommonProteinCodingFeatureLogic, spark: SparkSession) -> No
             _schema=StudyLocus.get_schema(),
         )
 
-        # Sample gene index data with biotype
-        self.sample_gene_index = GeneIndex(
+        # Sample target index data with biotype
+        self.sample_target_index = TargetIndex(
             _df=spark.createDataFrame(
                 [
                     {
@@ -1004,9 +1004,9 @@ def _setup(self: TestCommonProteinCodingFeatureLogic, spark: SparkSession) -> No
                         "biotype": "non_coding",
                     },
                 ],
-                GeneIndex.get_schema(),
+                TargetIndex.get_schema(),
             ),
-            _schema=GeneIndex.get_schema(),
+            _schema=TargetIndex.get_schema(),
         )
 
 
diff --git a/tests/gentropy/dataset/test_l2g_feature_matrix.py b/tests/gentropy/dataset/test_l2g_feature_matrix.py
index 6677d123e..8d63bc5ee 100644
--- a/tests/gentropy/dataset/test_l2g_feature_matrix.py
+++ b/tests/gentropy/dataset/test_l2g_feature_matrix.py
@@ -16,11 +16,11 @@
 )
 
 from gentropy.dataset.colocalisation import Colocalisation
-from gentropy.dataset.gene_index import GeneIndex
 from gentropy.dataset.l2g_feature_matrix import L2GFeatureMatrix
 from gentropy.dataset.l2g_gold_standard import L2GGoldStandard
 from gentropy.dataset.study_index import StudyIndex
 from gentropy.dataset.study_locus import StudyLocus
+from gentropy.dataset.target_index import TargetIndex
 from gentropy.method.l2g.feature_factory import L2GFeatureInputLoader
 
 if TYPE_CHECKING:
@@ -54,7 +54,7 @@ def test_study_locus(
             colocalisation=self.sample_colocalisation,
             study_index=self.sample_study_index,
             study_locus=self.sample_study_locus,
-            gene_index=self.sample_gene_index,
+            target_index=self.sample_target_index,
         )
         fm = L2GFeatureMatrix.from_features_list(
             self.sample_study_locus, features_list, loader
@@ -170,7 +170,7 @@ def _setup(self: TestFromFeaturesList, spark: SparkSession) -> None:
             ),
             _schema=Colocalisation.get_schema(),
         )
-        self.sample_gene_index = GeneIndex(
+        self.sample_target_index = TargetIndex(
             _df=spark.createDataFrame(
                 [
                     ("g1", "X", "protein_coding", 200),
@@ -183,7 +183,7 @@ def _setup(self: TestFromFeaturesList, spark: SparkSession) -> None:
                     "tss",
                 ],
             ),
-            _schema=GeneIndex.get_schema(),
+            _schema=TargetIndex.get_schema(),
         )
 
 
diff --git a/tests/gentropy/dataset/test_study_index.py b/tests/gentropy/dataset/test_study_index.py
index 05b652752..22391e8ea 100644
--- a/tests/gentropy/dataset/test_study_index.py
+++ b/tests/gentropy/dataset/test_study_index.py
@@ -7,8 +7,8 @@
 from pyspark.sql import functions as f
 
 from gentropy.dataset.biosample_index import BiosampleIndex
-from gentropy.dataset.gene_index import GeneIndex
 from gentropy.dataset.study_index import StudyIndex
+from gentropy.dataset.target_index import TargetIndex
 
 
 def test_study_index_creation(mock_study_index: StudyIndex) -> None:
@@ -188,9 +188,9 @@ def create_study_index(drop_column: str) -> StudyIndex:
         self.study_index_no_gene = create_study_index("geneId")
         self.study_index_no_biosample_id = create_study_index("biosampleFromSourceId")
 
-        self.gene_index = GeneIndex(
+        self.target_index = TargetIndex(
             _df=spark.createDataFrame(self.GENE_DATA, self.GENE_COLUMNS),
-            _schema=GeneIndex.get_schema(),
+            _schema=TargetIndex.get_schema(),
         )
         self.biosample_index = BiosampleIndex(
             _df=spark.createDataFrame(self.BIOSAMPLE_DATA, self.BIOSAMPLE_COLUMNS),
@@ -199,7 +199,7 @@ def create_study_index(drop_column: str) -> StudyIndex:
 
     def test_gene_validation_type(self: TestQTLValidation) -> None:
         """Testing if the target validation runs and returns the expected type."""
-        validated = self.study_index.validate_target(self.gene_index)
+        validated = self.study_index.validate_target(self.target_index)
         assert isinstance(validated, StudyIndex)
 
     def test_biosample_validation_type(self: TestQTLValidation) -> None:
@@ -211,7 +211,7 @@ def test_biosample_validation_type(self: TestQTLValidation) -> None:
     def test_qtl_validation_correctness(self: TestQTLValidation, test: str) -> None:
         """Testing if the QTL validation only flags the expected studies."""
         if test == "gene":
-            validated = self.study_index.validate_target(self.gene_index).persist()
+            validated = self.study_index.validate_target(self.target_index).persist()
             bad_study = "s2"
         if test == "biosample":
             validated = self.study_index.validate_biosample(
@@ -252,7 +252,7 @@ def test_qtl_validation_drop_relevant_column(
         """Testing what happens if an expected column is not present."""
         if drop == "gene":
             if test == "gene":
-                validated = self.study_index_no_gene.validate_target(self.gene_index)
+                validated = self.study_index_no_gene.validate_target(self.target_index)
             if test == "biosample":
                 validated = self.study_index_no_gene.validate_biosample(
                     self.biosample_index
@@ -260,7 +260,7 @@ def test_qtl_validation_drop_relevant_column(
         if drop == "biosample":
             if test == "gene":
                 validated = self.study_index_no_biosample_id.validate_target(
-                    self.gene_index
+                    self.target_index
                 )
             if test == "biosample":
                 validated = self.study_index_no_biosample_id.validate_biosample(
diff --git a/tests/gentropy/dataset/test_summary_statistics.py b/tests/gentropy/dataset/test_summary_statistics.py
index b1b06442b..033fba663 100644
--- a/tests/gentropy/dataset/test_summary_statistics.py
+++ b/tests/gentropy/dataset/test_summary_statistics.py
@@ -17,7 +17,7 @@
 def test_summary_statistics__creation(
     mock_summary_statistics: SummaryStatistics,
 ) -> None:
-    """Test gene index creation with mock gene index."""
+    """Test summary statistics creation with mock summary statistics."""
     assert isinstance(mock_summary_statistics, SummaryStatistics)
 
 
diff --git a/tests/gentropy/dataset/test_target_index.py b/tests/gentropy/dataset/test_target_index.py
index e4ae8e581..070bf7d8a 100644
--- a/tests/gentropy/dataset/test_target_index.py
+++ b/tests/gentropy/dataset/test_target_index.py
@@ -4,29 +4,29 @@
 
 from pyspark.sql import DataFrame
 
-from gentropy.dataset.gene_index import GeneIndex
+from gentropy.dataset.target_index import TargetIndex
 
 
-def test_gene_index_creation(mock_gene_index: GeneIndex) -> None:
-    """Test gene index creation with mock gene index."""
-    assert isinstance(mock_gene_index, GeneIndex)
+def test_target_index_creation(mock_target_index: TargetIndex) -> None:
+    """Test target index creation with mock target index."""
+    assert isinstance(mock_target_index, TargetIndex)
 
 
-def test_gene_index_location_lut(mock_gene_index: GeneIndex) -> None:
-    """Test gene index location lut."""
-    assert isinstance(mock_gene_index.locations_lut(), DataFrame)
+def test_target_index_location_lut(mock_target_index: TargetIndex) -> None:
+    """Test target index location lut."""
+    assert isinstance(mock_target_index.locations_lut(), DataFrame)
 
 
-def test_gene_index_symbols_lut(mock_gene_index: GeneIndex) -> None:
-    """Test gene index symbols lut."""
-    assert isinstance(mock_gene_index.symbols_lut(), DataFrame)
+def test_target_index_symbols_lut(mock_target_index: TargetIndex) -> None:
+    """Test target index symbols lut."""
+    assert isinstance(mock_target_index.symbols_lut(), DataFrame)
 
 
-def test_gene_index_filter_by_biotypes(mock_gene_index: GeneIndex) -> None:
-    """Test gene index filter by biotypes."""
+def test_target_index_filter_by_biotypes(mock_target_index: TargetIndex) -> None:
+    """Test target index filter by biotypes."""
     assert isinstance(
-        mock_gene_index.filter_by_biotypes(
+        mock_target_index.filter_by_biotypes(
             biotypes=["protein_coding", "3prime_overlapping_ncRNA", "antisense"]
         ),
-        GeneIndex,
+        TargetIndex,
     )
diff --git a/tests/gentropy/dataset/test_variant_index.py b/tests/gentropy/dataset/test_variant_index.py
index 43c409ea6..11f1c966f 100644
--- a/tests/gentropy/dataset/test_variant_index.py
+++ b/tests/gentropy/dataset/test_variant_index.py
@@ -15,7 +15,7 @@
 
 
 def test_variant_index_creation(mock_variant_index: VariantIndex) -> None:
-    """Test gene index creation with mock gene index."""
+    """Test variant index creation with mock variant index."""
     assert isinstance(mock_variant_index, VariantIndex)
 
 
diff --git a/tests/gentropy/datasource/intervals/test_andersson.py b/tests/gentropy/datasource/intervals/test_andersson.py
index 69575b7c3..1820c8322 100644
--- a/tests/gentropy/datasource/intervals/test_andersson.py
+++ b/tests/gentropy/datasource/intervals/test_andersson.py
@@ -6,8 +6,8 @@
 from pyspark.sql import DataFrame, SparkSession
 
 from gentropy.common.Liftover import LiftOverSpark
-from gentropy.dataset.gene_index import GeneIndex
 from gentropy.dataset.intervals import Intervals
+from gentropy.dataset.target_index import TargetIndex
 from gentropy.datasource.intervals.andersson import IntervalsAndersson
 
 
@@ -26,13 +26,13 @@ def test_read_andersson(sample_intervals_andersson: DataFrame) -> None:
 
 def test_andersson_intervals_from_source(
     sample_intervals_andersson: DataFrame,
-    mock_gene_index: GeneIndex,
+    mock_target_index: TargetIndex,
     liftover_chain_37_to_38: LiftOverSpark,
 ) -> None:
     """Test AnderssonIntervals creation with mock data."""
     assert isinstance(
         IntervalsAndersson.parse(
-            sample_intervals_andersson, mock_gene_index, liftover_chain_37_to_38
+            sample_intervals_andersson, mock_target_index, liftover_chain_37_to_38
         ),
         Intervals,
     )
diff --git a/tests/gentropy/datasource/intervals/test_javierre.py b/tests/gentropy/datasource/intervals/test_javierre.py
index 886a28c52..4fdd9db7a 100644
--- a/tests/gentropy/datasource/intervals/test_javierre.py
+++ b/tests/gentropy/datasource/intervals/test_javierre.py
@@ -6,8 +6,8 @@
 from pyspark.sql import DataFrame, SparkSession
 
 from gentropy.common.Liftover import LiftOverSpark
-from gentropy.dataset.gene_index import GeneIndex
 from gentropy.dataset.intervals import Intervals
+from gentropy.dataset.target_index import TargetIndex
 from gentropy.datasource.intervals.javierre import IntervalsJavierre
 
 
@@ -26,13 +26,13 @@ def test_read_javierre(sample_intervals_javierre: DataFrame) -> None:
 
 def test_javierre_intervals_from_source(
     sample_intervals_javierre: DataFrame,
-    mock_gene_index: GeneIndex,
+    mock_target_index: TargetIndex,
     liftover_chain_37_to_38: LiftOverSpark,
 ) -> None:
     """Test JavierreIntervals creation with mock data."""
     assert isinstance(
         IntervalsJavierre.parse(
-            sample_intervals_javierre, mock_gene_index, liftover_chain_37_to_38
+            sample_intervals_javierre, mock_target_index, liftover_chain_37_to_38
         ),
         Intervals,
     )
diff --git a/tests/gentropy/datasource/intervals/test_jung.py b/tests/gentropy/datasource/intervals/test_jung.py
index e391b8f96..bac6918b8 100644
--- a/tests/gentropy/datasource/intervals/test_jung.py
+++ b/tests/gentropy/datasource/intervals/test_jung.py
@@ -6,8 +6,8 @@
 from pyspark.sql import DataFrame, SparkSession
 
 from gentropy.common.Liftover import LiftOverSpark
-from gentropy.dataset.gene_index import GeneIndex
 from gentropy.dataset.intervals import Intervals
+from gentropy.dataset.target_index import TargetIndex
 from gentropy.datasource.intervals.jung import IntervalsJung
 
 
@@ -24,13 +24,13 @@ def test_read_jung(sample_intervals_jung: DataFrame) -> None:
 
 def test_jung_intervals_from_source(
     sample_intervals_jung: DataFrame,
-    mock_gene_index: GeneIndex,
+    mock_target_index: TargetIndex,
     liftover_chain_37_to_38: LiftOverSpark,
 ) -> None:
     """Test JungIntervals creation with mock data."""
     assert isinstance(
         IntervalsJung.parse(
-            sample_intervals_jung, mock_gene_index, liftover_chain_37_to_38
+            sample_intervals_jung, mock_target_index, liftover_chain_37_to_38
         ),
         Intervals,
     )
diff --git a/tests/gentropy/datasource/intervals/test_thurman.py b/tests/gentropy/datasource/intervals/test_thurman.py
index 616e1abec..a6f4074b0 100644
--- a/tests/gentropy/datasource/intervals/test_thurman.py
+++ b/tests/gentropy/datasource/intervals/test_thurman.py
@@ -6,8 +6,8 @@
 from pyspark.sql import DataFrame, SparkSession
 
 from gentropy.common.Liftover import LiftOverSpark
-from gentropy.dataset.gene_index import GeneIndex
 from gentropy.dataset.intervals import Intervals
+from gentropy.dataset.target_index import TargetIndex
 from gentropy.datasource.intervals.thurman import IntervalsThurman
 
 
@@ -26,13 +26,13 @@ def test_read_thurman(sample_intervals_thurman: DataFrame) -> None:
 
 def test_thurman_intervals_from_source(
     sample_intervals_thurman: DataFrame,
-    mock_gene_index: GeneIndex,
+    mock_target_index: TargetIndex,
     liftover_chain_37_to_38: LiftOverSpark,
 ) -> None:
     """Test IntervalsThurman creation with mock data."""
     assert isinstance(
         IntervalsThurman.parse(
-            sample_intervals_thurman, mock_gene_index, liftover_chain_37_to_38
+            sample_intervals_thurman, mock_target_index, liftover_chain_37_to_38
         ),
         Intervals,
     )
diff --git a/tests/gentropy/datasource/open_targets/test_l2g_gold_standard.py b/tests/gentropy/datasource/open_targets/test_l2g_gold_standard.py
index 79f9d925a..3c8ff1aed 100644
--- a/tests/gentropy/datasource/open_targets/test_l2g_gold_standard.py
+++ b/tests/gentropy/datasource/open_targets/test_l2g_gold_standard.py
@@ -29,8 +29,8 @@
     from pyspark.sql.session import SparkSession
 
     from gentropy.dataset.colocalisation import Colocalisation
-    from gentropy.dataset.gene_index import GeneIndex
     from gentropy.dataset.study_locus import StudyLocus
+    from gentropy.dataset.target_index import TargetIndex
 
 
 def test_open_targets_as_l2g_gold_standard(
@@ -162,7 +162,7 @@ def test_build_feature_matrix(
     mock_study_locus: StudyLocus,
     mock_colocalisation: Colocalisation,
     mock_study_index: StudyIndex,
-    mock_gene_index: GeneIndex,
+    mock_target_index: TargetIndex,
 ) -> None:
     """Test building feature matrix with the eQtlColocH4Maximum feature."""
     features_list = ["eQtlColocH4Maximum", "isProteinCoding"]
@@ -170,7 +170,7 @@ def test_build_feature_matrix(
         colocalisation=mock_colocalisation,
         study_index=mock_study_index,
         study_locus=mock_study_locus,
-        gene_index=mock_gene_index,
+        target_index=mock_target_index,
     )
     fm = mock_study_locus.build_feature_matrix(features_list, loader)
     assert isinstance(
diff --git a/tests/gentropy/datasource/open_targets/test_target.py b/tests/gentropy/datasource/open_targets/test_target.py
index 091dcea53..b32886a4b 100644
--- a/tests/gentropy/datasource/open_targets/test_target.py
+++ b/tests/gentropy/datasource/open_targets/test_target.py
@@ -4,10 +4,12 @@
 
 from pyspark.sql import DataFrame
 
-from gentropy.dataset.gene_index import GeneIndex
+from gentropy.dataset.target_index import TargetIndex
 from gentropy.datasource.open_targets.target import OpenTargetsTarget
 
 
-def test_open_targets_as_gene_index(sample_target_index: DataFrame) -> None:
-    """Test gene index from source."""
-    assert isinstance(OpenTargetsTarget.as_gene_index(sample_target_index), GeneIndex)
+def test_open_targets_as_target_index(sample_target_index: DataFrame) -> None:
+    """Test target index from source."""
+    assert isinstance(
+        OpenTargetsTarget.as_target_index(sample_target_index), TargetIndex
+    )
diff --git a/tests/gentropy/test_schemas.py b/tests/gentropy/test_schemas.py
index 1b06076d0..500fbcd69 100644
--- a/tests/gentropy/test_schemas.py
+++ b/tests/gentropy/test_schemas.py
@@ -17,8 +17,8 @@
 if TYPE_CHECKING:
     from _pytest.fixtures import FixtureRequest
 
-    from gentropy.dataset.gene_index import GeneIndex
     from gentropy.dataset.l2g_prediction import L2GPrediction
+    from gentropy.dataset.target_index import TargetIndex
 
 SCHEMA_DIR = "src/gentropy/assets/schemas"
 
@@ -75,23 +75,23 @@ def test_schema_columns_camelcase(schema_json: str) -> None:
 
 
 class TestValidateSchema:
-    """Test validate_schema method using L2GPrediction (unnested) and GeneIndex (nested) as a testing dataset."""
+    """Test validate_schema method using L2GPrediction (unnested) and TargetIndex (nested) as a testing dataset."""
 
     @pytest.fixture()
     def mock_dataset_instance(
         self: TestValidateSchema, request: FixtureRequest
-    ) -> L2GPrediction | GeneIndex:
+    ) -> L2GPrediction | TargetIndex:
         """Meta fixture to return the value of any requested fixture."""
         return request.getfixturevalue(request.param)
 
     @pytest.mark.parametrize(
         "mock_dataset_instance",
-        ["mock_l2g_predictions", "mock_gene_index"],
+        ["mock_l2g_predictions", "mock_target_index"],
         indirect=True,
     )
     def test_validate_schema_extra_field(
         self: TestValidateSchema,
-        mock_dataset_instance: L2GPrediction | GeneIndex,
+        mock_dataset_instance: L2GPrediction | TargetIndex,
     ) -> None:
         """Test that validate_schema raises an error if the observed schema has an extra field."""
         with pytest.raises(SchemaValidationError, match="extraField"):
@@ -101,12 +101,12 @@ def test_validate_schema_extra_field(
 
     @pytest.mark.parametrize(
         "mock_dataset_instance",
-        ["mock_l2g_predictions", "mock_gene_index"],
+        ["mock_l2g_predictions", "mock_target_index"],
         indirect=True,
     )
     def test_validate_schema_missing_field(
         self: TestValidateSchema,
-        mock_dataset_instance: L2GPrediction | GeneIndex,
+        mock_dataset_instance: L2GPrediction | TargetIndex,
     ) -> None:
         """Test that validate_schema raises an error if the observed schema is missing a required field, geneId in this case."""
         with pytest.raises(SchemaValidationError, match="geneId"):
@@ -114,12 +114,12 @@ def test_validate_schema_missing_field(
 
     @pytest.mark.parametrize(
         "mock_dataset_instance",
-        ["mock_l2g_predictions", "mock_gene_index"],
+        ["mock_l2g_predictions", "mock_target_index"],
         indirect=True,
     )
     def test_validate_schema_duplicated_field(
         self: TestValidateSchema,
-        mock_dataset_instance: L2GPrediction | GeneIndex,
+        mock_dataset_instance: L2GPrediction | TargetIndex,
     ) -> None:
         """Test that validate_schema raises an error if the observed schema has a duplicated field, geneId in this case."""
         with pytest.raises(SchemaValidationError, match="geneId"):
@@ -129,12 +129,12 @@ def test_validate_schema_duplicated_field(
 
     @pytest.mark.parametrize(
         "mock_dataset_instance",
-        ["mock_l2g_predictions", "mock_gene_index"],
+        ["mock_l2g_predictions", "mock_target_index"],
         indirect=True,
     )
     def test_validate_schema_different_datatype(
         self: TestValidateSchema,
-        mock_dataset_instance: L2GPrediction | GeneIndex,
+        mock_dataset_instance: L2GPrediction | TargetIndex,
     ) -> None:
         """Test that validate_schema raises an error if any field in the observed schema has a different type than expected."""
         with pytest.raises(SchemaValidationError, match="geneId"):