Skip to content

Commit

Permalink
Merge branch 'dev' into yt_out_sample
Browse files Browse the repository at this point in the history
  • Loading branch information
project-defiant authored Jan 14, 2025
2 parents d3ea92d + 45ff1be commit 4354927
Show file tree
Hide file tree
Showing 55 changed files with 3,264 additions and 2,178 deletions.
15 changes: 9 additions & 6 deletions .github/workflows/release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ jobs:
release:
runs-on: ubuntu-latest
concurrency: release
outputs:
released: ${{ steps.semrelease.outputs.released }}
permissions:
# NOTE: this enables trusted publishing.
# See https://github.com/pypa/gh-action-pypi-publish/tree/release/v1#trusted-publishing
Expand Down Expand Up @@ -46,7 +48,8 @@ jobs:

- name: Publish package to GitHub Release
uses: python-semantic-release/upload-to-gh-release@main
if: ${{ steps.semrelease.outputs.released }} == 'true'
# NOTE: semrelease output is a string, so we need to compare it to a string
if: steps.semrelease.outputs.released == 'true'
with:
# NOTE: allow to start the workflow when push action on tag gets executed
# requires using GH_APP to authenitcate, otherwise push authorised with
Expand All @@ -56,16 +59,16 @@ jobs:
tag: ${{ steps.semrelease.outputs.tag }}

- name: Store the distribution packages
if: steps.semrelease.outputs.released == 'true'
uses: actions/upload-artifact@v4
with:
name: python-package-distributions
path: dist/

publish-to-pypi:
needs: release
name: >-
Publish 📦 in PyPI
if: github.ref == 'refs/heads/main'
name: Publish 📦 in PyPI
if: github.ref == 'refs/heads/main' && needs.release.outputs.released == 'true'
runs-on: ubuntu-latest
environment:
name: pypi
Expand All @@ -84,7 +87,7 @@ jobs:
publish-to-testpypi:
name: Publish 📦 in TestPyPI
needs: release
if: github.ref != 'refs/heads/main'
if: github.ref == 'refs/heads/main' && needs.release.outputs.released == 'true'
runs-on: ubuntu-latest

environment:
Expand All @@ -108,7 +111,7 @@ jobs:
documentation:
needs: release
runs-on: ubuntu-latest
if: github.ref == 'refs/heads/main'
if: github.ref == 'refs/heads/main' && needs.release.outputs.released == 'true'
steps:
- uses: actions/checkout@v4
with:
Expand Down
5 changes: 2 additions & 3 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ ci:
skip: [poetry-lock]
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.7.1
rev: v0.7.4
hooks:
- id: ruff
args:
Expand Down Expand Up @@ -103,8 +103,7 @@ repos:
- id: pydoclint

- repo: https://github.com/python-poetry/poetry
rev: "1.8.0"
rev: "2.0.0"
hooks:
- id: poetry-check
- id: poetry-lock
args: ["--no-update"]
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ ENV POETRY_NO_INTERACTION=1 \
POETRY_CACHE_DIR=/tmp/poetry_cache \
JAVA_HOME=/usr

RUN pip install poetry==1.7.1
RUN pip install poetry>=2.0.0
WORKDIR /app

COPY pyproject.toml poetry.lock ./
Expand Down
9 changes: 8 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,8 +1,15 @@
PROJECT_ID ?= open-targets-genetics-dev
REGION ?= europe-west1
APP_NAME ?= $$(cat pyproject.toml | grep -m 1 "name" | cut -d" " -f3 | sed 's/"//g')
REF ?= $$(git rev-parse --abbrev-ref HEAD)
PACKAGE_VERSION ?= $$(poetry version --short)
# NOTE: git rev-parse will always return the HEAD if it sits in the tag,
# this way we can distinguish the tag vs branch name
ifeq ($(shell git rev-parse --abbrev-ref HEAD),HEAD)
REF := $(shell git describe --exact-match --tags)
else
REF := $(shell git rev-parse --abbrev-ref HEAD)
endif

CLEAN_PACKAGE_VERSION := $(shell echo "$(PACKAGE_VERSION)" | tr -cd '[:alnum:]')
BUCKET_NAME=gs://genetics_etl_python_playground/initialisation/${APP_NAME}/${REF}

Expand Down
1 change: 0 additions & 1 deletion docs/python_api/common/_common.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,4 @@ title: Common
Common utilities used in gentropy package.

- [**Genomic Region**](genomic_region.md): class to represent genomic regions
- [**Version Engine**](version_engine.md): class to extract version from datasource input paths
- [**Types**](types.md): Literal types used in the gentropy
12 changes: 0 additions & 12 deletions docs/python_api/common/version_engine.md

This file was deleted.

3,067 changes: 1,739 additions & 1,328 deletions poetry.lock

Large diffs are not rendered by default.

107 changes: 54 additions & 53 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,82 +1,83 @@
[tool.poetry]
[project]
name = "gentropy"
# !! version is managed by semantic_release
version = "0.0.0"
description = "Open Targets python framework for post-GWAS analysis"
authors = ["Open Targets core team"]
authors = [
{ name = "Open Targets core team", email = "[email protected]" }
]
license = "Apache-2.0"
readme = "README.md"
documentation = "https://opentargets.github.io/gentropy/"
repository = "https://github.com/opentargets/gentropy"
packages = [{ include = "gentropy", from = "src" }]
requires-poetry = ">=2.0"
requires-python = ">=3.10, <3.11"
dependencies = [
"pyspark (==3.3.4)",
"scipy (>=1.11.4, <1.12.0)",
"hydra-core (>=1.3.2, <1.4.0)",
"pyliftover (>=0.4, <0.5)",
"numpy (>=1.26.2, <1.27.0)",
"hail (==0.2.127)",
"wandb (>=0.19.0, <0.20.0)",
"google (>=3.0.0, <3.1.0)",
"omegaconf (>=2.3.0, <2.4.0)",
"typing-extensions (>=4.9.0, <4.13.0)",
"scikit-learn (>=1.6.0, <1.7.0)",
"pandas[gcp,parquet] (>=2.2.2, <2.3.0)",
"skops (>=0.11, <0.12)",
"google-cloud-secret-manager (>=2.20.0, <2.22.0)",
"shap (>=0.46.0, <0.47.0)",
"matplotlib (>=3.7.3, <3.8.0)"
]

[tool.poetry.urls]
[project.urls]
Source = "https://github.com/opentargets/gentropy"
"Bug Tracker" = "http://github.com/opentargets/issues"
"Funding" = "https://www.opentargets.org"
"Documentation" = "https://opentargets.github.io/gentropy/"
Funding = "https://www.opentargets.org"
Documentation = "https://opentargets.github.io/gentropy/"

[tool.poetry.scripts]
[project.scripts]
gentropy = "gentropy.cli:main"

[tool.poetry.dependencies]
python = "^3.10, <3.11"
pyspark = "3.3.4"
scipy = "^1.11.4"
hydra-core = "^1.3.2"
pyliftover = "^0.4"
numpy = "^1.26.2"
hail = "0.2.127"
wandb = ">=0.16.2,<0.19.0"
google = "^3.0.0"
omegaconf = "^2.3.0"
typing-extensions = "^4.9.0"
scikit-learn = "^1.3.2"
pandas = { extras = ["gcp", "parquet"], version = "^2.2.2" }
skops = ">=0.9,<0.11"
google-cloud-secret-manager = "^2.20.0"
shap = "^0.46.0"
matplotlib = "3.7.3"

[tool.poetry.dev-dependencies]
pre-commit = "^4.0.0"
mypy = "^1.13"
pep8-naming = "^0.14.1"
interrogate = "^1.7.0"
isort = "^5.13.2"
darglint = "^1.8.1"
ruff = "^0.7.0"

[tool.poetry.group.docs.dependencies]
mkdocs = "^1.5.3"
mkdocstrings-python = "^1.8.0"
mkdocs = ">=1.5.3, <1.6.0"
mkdocstrings-python = ">=1.12.2, <1.13.0"
mkdocs-material = "*"
mkdocs-section-index = "^0.3.4"
mkdocs-git-revision-date-localized-plugin = "^1.2.2"
mkdocs-autolinks-plugin = "^0.7.1"
mkdocs-awesome-pages-plugin = "^2.9.2"
mkdocs-exclude = "^1.0.2"
mkdocs-git-committers-plugin-2 = "^2.2.3"
lxml = "^5.1.0"
pymdown-extensions = "^10.7"
mkdocs-section-index = ">=0.3.4, <0.4.0"
mkdocs-git-revision-date-localized-plugin = ">=1.2.2, <1.4.0"
mkdocs-autolinks-plugin = ">=0.7.1, <0.8.0"
mkdocs-awesome-pages-plugin = ">=2.9.2, <3.0.0"
mkdocs-exclude = ">=1.0.2, <1.1.0"
mkdocs-git-committers-plugin-2 = ">=2.2.3, <2.5.0"
lxml = ">=5.1.0, <5.4.0"
pymdown-extensions = ">=10.7, <10.13"


[tool.poetry.group.tests.dependencies]
pytest-cov = ">=4.1,<7.0"
pytest-sugar = ">=0.9.5,<1.1.0"
dbldatagen = ">=0.3.1,<0.5.0"
pyparsing = "^3.1.1"
pyparsing = ">=3.1.1, <3.3.0"
pytest = ">=7.4.4,<9.0.0"
pytest-xdist = "^3.5.0"
pytest-xdist = ">=3.5.0, <3.7.0"


[tool.poetry.group.dev.dependencies]
ipython = "^8.19.0"
ipykernel = "^6.28.0"
google-cloud-dataproc = "^5.8.0"
ipython = ">=8.19.0, <8.31.0"
ipykernel = ">=6.28.0, <6.30.0"
google-cloud-dataproc = ">=5.8.0, <5.16.0"
pydoclint = ">=0.3.8,<0.6.0"
prettier = "^0.0.7"
deptry = ">=0.12,<0.21"
yamllint = "^1.33.0"
prettier = ">=0.0.7, <0.1.0"
deptry = ">=0.22.0, <0.23.0"
yamllint = ">=1.33.0, <1.36.0"
pre-commit = ">=4.0.0, <4.1.0"
mypy = ">=1.13, <1.14"
pep8-naming = ">=0.14.1, <0.15.0"
interrogate = ">=1.7.0, <1.8.0"
isort = ">=5.13.2, <5.14.0"
darglint = ">=1.8.1, <1.9.0"
ruff = ">=0.8.1, <0.9.0"

[tool.semantic_release]
logging_use_named_masks = true
Expand Down
6 changes: 6 additions & 0 deletions src/gentropy/assets/schemas/l2g_gold_standard.json
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,12 @@
"nullable": false,
"metadata": {}
},
{
"name": "traitFromSourceMappedId",
"type": "string",
"nullable": true,
"metadata": {}
},
{
"name": "goldStandardSet",
"type": "string",
Expand Down
24 changes: 24 additions & 0 deletions src/gentropy/assets/schemas/variant_index.json
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,12 @@
"name": "targetId",
"nullable": true,
"type": "string"
},
{
"metadata": {},
"name": "normalisedScore",
"nullable": true,
"type": "double"
}
],
"type": "struct"
Expand Down Expand Up @@ -192,6 +198,18 @@
"nullable": true,
"type": "integer"
},
{
"metadata": {},
"name": "approvedSymbol",
"nullable": true,
"type": "string"
},
{
"metadata": {},
"name": "biotype",
"nullable": true,
"type": "string"
},
{
"metadata": {},
"name": "transcriptId",
Expand Down Expand Up @@ -271,6 +289,12 @@
},
"type": "array"
}
},
{
"metadata": {},
"name": "variantDescription",
"nullable": true,
"type": "string"
}
],
"type": "struct"
Expand Down
24 changes: 24 additions & 0 deletions src/gentropy/assets/schemas/vep_json_output.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@
"containsNull": true,
"elementType": {
"fields": [
{
"metadata": {},
"name": "conservation",
"nullable": true,
"type": "double"
},
{
"metadata": {},
"name": "hgvsg",
Expand Down Expand Up @@ -294,6 +300,12 @@
"containsNull": true,
"elementType": {
"fields": [
{
"metadata": {},
"name": "conservation",
"nullable": true,
"type": "double"
},
{
"metadata": {},
"name": "alphamissense",
Expand Down Expand Up @@ -340,6 +352,18 @@
"nullable": true,
"type": "string"
},
{
"metadata": {},
"name": "gene_symbol",
"nullable": true,
"type": "string"
},
{
"metadata": {},
"name": "biotype",
"nullable": true,
"type": "string"
},
{
"metadata": {},
"name": "appris",
Expand Down
13 changes: 10 additions & 3 deletions src/gentropy/biosample_index.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Step to generate biosample index dataset."""

from __future__ import annotations

from gentropy.common.session import Session
Expand Down Expand Up @@ -28,10 +29,16 @@ def __init__(
efo_input_path (str): Input efo dataset path.
biosample_index_path (str): Output gene index dataset path.
"""
cell_ontology_index = extract_ontology_from_json(cell_ontology_input_path, session.spark)
cell_ontology_index = extract_ontology_from_json(
cell_ontology_input_path, session.spark
)
uberon_index = extract_ontology_from_json(uberon_input_path, session.spark)
efo_index = extract_ontology_from_json(efo_input_path, session.spark).retain_rows_with_ancestor_id(["CL_0000000"])
efo_index = extract_ontology_from_json(
efo_input_path, session.spark
).retain_rows_with_ancestor_id(["CL_0000000"])

biosample_index = cell_ontology_index.merge_indices([uberon_index, efo_index])

biosample_index.df.write.mode(session.write_mode).parquet(biosample_index_path)
biosample_index.df.coalesce(session.output_partitions).write.mode(
session.write_mode
).parquet(biosample_index_path)
Loading

0 comments on commit 4354927

Please sign in to comment.