Skip to content

Commit

Permalink
Merge pull request #189 from opentargets/il-geneindex-missing-cols
Browse files Browse the repository at this point in the history
Add missing columns in gene index
  • Loading branch information
DSuveges authored Oct 19, 2023
2 parents e26f2c4 + ec23766 commit 6389ff1
Show file tree
Hide file tree
Showing 4 changed files with 12 additions and 24 deletions.
2 changes: 1 addition & 1 deletion config/datasets/gcp.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ ld_index_raw_template: gs://gcp-public-data--gnomad/release/2.1.1/ld/gnomad.geno
ld_matrix_template: gs://gcp-public-data--gnomad/release/2.1.1/ld/gnomad.genomes.r2.1.1.{POP}.common.adj.ld.bm

# Output datasets
gene_index: ${datasets.outputs}/gene_index/gene_index
gene_index: ${datasets.outputs}/gene_index
variant_annotation: ${datasets.outputs}/variant_annotation
variant_index: ${datasets.outputs}/variant_index
study_locus: ${datasets.outputs}/study_locus
Expand Down
28 changes: 6 additions & 22 deletions src/otg/assets/schemas/gene_index.json
Original file line number Diff line number Diff line change
Expand Up @@ -32,30 +32,14 @@
"metadata": {}
},
{
"metadata": {},
"name": "obsoleteSymbols",
"type": {
"type": "array",
"elementType": {
"type": "struct",
"fields": [
{
"name": "label",
"type": "string",
"nullable": true,
"metadata": {}
},
{
"name": "source",
"type": "string",
"nullable": true,
"metadata": {}
}
]
},
"containsNull": true
},
"nullable": true,
"metadata": {}
"type": {
"containsNull": true,
"elementType": "string",
"type": "array"
}
},
{
"name": "tss",
Expand Down
2 changes: 1 addition & 1 deletion src/otg/dataset/gene_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def symbols_lut(self: GeneIndex) -> DataFrame:
"""
return self.df.select(
f.explode(
f.array_union(f.array("approvedSymbol"), f.col("obsoleteSymbols.label"))
f.array_union(f.array("approvedSymbol"), f.col("obsoleteSymbols"))
).alias("geneSymbol"),
"*",
)
4 changes: 4 additions & 0 deletions src/otg/datasource/open_targets/target.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,10 @@ def as_gene_index(cls: type[GeneIndex], target_index: DataFrame) -> GeneIndex:
return GeneIndex(
_df=target_index.select(
f.coalesce(f.col("id"), f.lit("unknown")).alias("geneId"),
"approvedSymbol",
"approvedName",
"biotype",
f.col("obsoleteSymbols.label").alias("obsoleteSymbols"),
f.coalesce(f.col("genomicLocation.chromosome"), f.lit("unknown")).alias(
"chromosome"
),
Expand Down

0 comments on commit 6389ff1

Please sign in to comment.