Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/artifact.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ jobs:

- id: checkout
name: Check out repo
uses: actions/checkout@v4
uses: actions/checkout@v6

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ jobs:
python-version: ["3.11", "3.12", "3.13"]
os: [ubuntu-latest, ubuntu-22.04-arm] # macos-latest is arm64
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v6
with:
fetch-depth: 1
- name: Set up Python
Expand All @@ -39,7 +39,7 @@ jobs:
- name: Check dependencies
run: uv run deptry .
- name: Run tests
run: uv run pytest
run: make test
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v5
with:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/pr_release_trigger.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ jobs:
pull-request:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v6
- name: pull-request
uses: diillson/auto-pull-request@v1.0.1
with:
Expand Down
10 changes: 5 additions & 5 deletions .github/workflows/release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ jobs:
with:
app-id: ${{ vars.TRIGGER_WORKFLOW_GH_APP_ID}}
private-key: ${{ secrets.TRIGGER_WORKFLOW_GH_APP_KEY }}
- uses: actions/checkout@v4
- uses: actions/checkout@v6
with:
fetch-depth: 0
ref: ${{ github.ref_name }}
Expand All @@ -46,7 +46,7 @@ jobs:
with:
github_token: ${{ steps.trigger-token.outputs.token }}
tag: ${{ steps.semrelease.outputs.tag }}
- uses: actions/upload-artifact@v5
- uses: actions/upload-artifact@v7
if: steps.semrelease.outputs.released == 'true'
with:
name: python-package-distributions
Expand All @@ -63,7 +63,7 @@ jobs:
permissions:
id-token: write # IMPORTANT: mandatory for trusted publishing
steps:
- uses: actions/download-artifact@v6
- uses: actions/download-artifact@v7
with:
name: python-package-distributions
path: dist/
Expand All @@ -84,7 +84,7 @@ jobs:
permissions:
id-token: write # IMPORTANT: mandatory for trusted publishing
steps:
- uses: actions/download-artifact@v6
- uses: actions/download-artifact@v7
with:
name: python-package-distributions
path: dist/
Expand All @@ -95,7 +95,7 @@ jobs:
runs-on: ubuntu-latest
if: github.ref == 'refs/heads/main' && needs.release.outputs.released == 'true'
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v6
with:
fetch-depth: 0
token: ${{ secrets.GITHUB_TOKEN }}
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,4 @@ wandb/
hail*.log
.python-version
.idea
.venv/
14 changes: 7 additions & 7 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ ci:
skip: [uv-lock]
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.14.4
rev: v0.15.2
hooks:
- id: ruff
args:
Expand Down Expand Up @@ -35,7 +35,7 @@ repos:
- id: debug-statements
- id: check-docstring-first
- repo: https://github.com/adrienverge/yamllint.git
rev: v1.37.1
rev: v1.38.0
hooks:
- id: yamllint

Expand All @@ -59,14 +59,14 @@ repos:
exclude: "CHANGELOG.md"

- repo: https://github.com/alessandrojcm/commitlint-pre-commit-hook
rev: v9.23.0
rev: v9.24.0
hooks:
- id: commitlint
additional_dependencies: ["@commitlint/config-conventional@18.6.3"]
stages: [commit-msg]

- repo: https://github.com/pre-commit/mirrors-mypy
rev: "v1.18.2"
rev: "v1.19.1"
hooks:
- id: mypy
args:
Expand Down Expand Up @@ -94,16 +94,16 @@ repos:
args: [--convention=google, --add-ignore=D107]

- repo: https://github.com/lovesegfault/beautysh
rev: v6.4.1
rev: v6.4.2
hooks:
- id: beautysh

- repo: https://github.com/jsh9/pydoclint
rev: 0.8.1
rev: 0.8.3
hooks:
- id: pydoclint
- repo: https://github.com/astral-sh/uv-pre-commit
rev: 0.9.8
rev: 0.10.4
hooks:
- id: uv-lock
args: [--check]
6 changes: 6 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,9 @@
"bgzip",
"biobank",
"biosample",
"biosamples",
"colocalisation",
"contig",
"diffpval",
"eqtl",
"finngen",
Expand All @@ -47,6 +49,10 @@
"harmonised",
"Harmonises",
"Harmonising",
"iend",
"INTRAGENIC",
"istart",
"itype",
"liftover",
"logpval",
"logsum",
Expand Down
19 changes: 16 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,22 @@ check: ## Lint and format code
@uv run pydoclint --config=pyproject.toml src
@uv run pydoclint --config=pyproject.toml --skip-checking-short-docstrings=true tests

test: ## Run tests
@echo "Running Tests..."
@uv run pytest
test-no-shared-spark-session: ## Run tests that can not rely on shared SparkSession.
@echo "Running tests that can not rely on shared SparkSession fixture..."
@COVERAGE_FILE=.coverage.no_shared_spark uv run pytest -m "no_shared_spark and not download_jars_from_web" -n0 --cov-report=

test-shared-spark-session: ## Run tests that can use shared SparkSession fixture.
@echo "Running tests that can share SparkSession fixture..."
@COVERAGE_FILE=.coverage.shared_spark uv run pytest --cov-report=

test-no-shared-spark-session-web-dependencies: ## Run tests that require to download spark dependency jars from the web (not run by default).
@echo "Running tests that can not rely on shared SparkSession and require downloading jar dependencies from web..."
@COVERAGE_FILE=.coverage.no_shared_spark_web_deps uv run pytest -n0 -m "download_jars_from_web" --cov-report=

test: test-no-shared-spark-session test-shared-spark-session ## Run default test suite
@uv run coverage combine .coverage.shared_spark .coverage.no_shared_spark
@uv run coverage xml
@rm -f .coverage.shared_spark .coverage.no_shared_spark

build-documentation: ## Create local server with documentation
@echo "Building Documentation..."
Expand Down
2 changes: 1 addition & 1 deletion docs/python_api/common/session.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ title: session
## Spark Session wrapper for gentropy

:::gentropy.common.session.Session
:::gentropy.common.session.Log4j
:::gentropy.common.session.SparkWriteMode
13 changes: 13 additions & 0 deletions docs/python_api/datasets/l2g_features/intervals.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
---
title: Epigenetic regulatory region features
---

## List of features

::: gentropy.dataset.l2g_features.intervals.E2gMeanFeature
::: gentropy.dataset.l2g_features.intervals.E2gMeanNeighbourhoodFeature

## Common logic

::: gentropy.dataset.l2g_features.intervals.e2g_interval_feature_wide_logic
::: gentropy.dataset.l2g_features.intervals.get_or_make_e2g_wide
8 changes: 2 additions & 6 deletions docs/python_api/datasources/intervals/_intervals.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,10 @@ In this section, we provide a list of studies that focus on interaction and inte

1. **E2G (Gschwind et al., Nov 2023):**
_Title:_ "An encyclopedia of enhancer-gene regulatory interactions in the human genome".
This study comprises of a large, curated compendium of enhancer→gene links built by integrating multiple evidence types (epigenomic signals, 3D contacts, expression correlations, and CRISPR perturbations) across many biosamples. The resource reports confidence/score per enhancer–gene pair and is organised by biosample/cell type.

DOI: 10.1101/2023.11.09.563812
This study comprises of a large, curated compendium of enhancer→gene links built by integrating multiple evidence types (epigenomic signals, 3D contacts, expression correlations, and CRISPR perturbations) across many biosamples from ENCODE. The resource reports confidence/score per enhancer–gene pair and is organised by biosample/cell type. DOI: 10.1101/2023.11.09.563812

2. **EPIraction (Nurtdinov et al., Feb 2025):**
_Title:_ "EPIraction - an atlas of candidate enhancer-gene interactions in human tissues and cell lines".
This study is a genome-wide atlas of candidate enhancer–gene links inferred primarily from H3K27ac ChIP-seq (enhancer activity) integrated with Hi-C contact probabilities, scored per tissue/cell line—methodologically similar in spirit to ABC-style scoring.

DOI: 10.1101/2025.02.18.638885
This study is a genome-wide atlas of candidate enhancer–gene links inferred primarily from H3K27ac ChIP-seq (enhancer activity) integrated with Hi-C contact probabilities, scored per tissue/cell line—methodologically similar in spirit to ABC-style scoring. A UCSC track hub is available. DOI: 10.1101/2025.02.18.638885

For in-depth details on each study, you may refer to the respective publications.
21 changes: 11 additions & 10 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ dependencies = [
"xgboost-cpu>=3.0.4 ; (platform_machine == 'amd64' and sys_platform != 'darwin') or (platform_machine == 'x86_64' and sys_platform != 'darwin')",
"xgboost>=3.0.4 ; platform_machine == 'x86_64' and sys_platform == 'darwin'",
"huggingface-hub>=0.27.1",
"wandb (>=0.19.4, <0.23.0)",
"wandb (>=0.19.4, <0.26.0)",
]
classifiers = [
"Programming Language :: Python :: 3.11",
Expand Down Expand Up @@ -67,19 +67,19 @@ test = [
]

dev = [
"ipython >=8.19.0, <8.38.0",
"ipython >=8.19.0, <9.9.0",
"pydoclint >=0.3.8,<0.9.0",
"ipykernel >=6.28.0, <6.31.0",
"ipykernel >=6.28.0, <7.3.0",
"prettier >=0.0.7, <0.1.0",
"deptry >=0.22.0, <0.25.0",
"yamllint >=1.33.0, <1.38.0",
"pre-commit >=4.0.0, <4.4.0",
"pre-commit >=4.0.0, <4.6.0",
"mypy >=1.13, <1.19",
"pep8-naming >=0.14.1, <0.16.0",
"interrogate >=1.7.0, <1.8.0",
"isort >=5.13.2, <6.1.0",
"isort >=5.13.2, <7.1.0",
"darglint >=1.8.1, <1.9.0",
"ruff >=0.8.1, <0.15.0",
"ruff >=0.8.1, <0.16.0",
]
[tool.semantic_release]
logging_use_named_masks = true
Expand Down Expand Up @@ -130,13 +130,12 @@ color = true
exclude = ["dist"]

[tool.pytest.ini_options]
addopts = "-n auto --doctest-modules --cov=src/ --cov-report=xml --cache-clear"
addopts = "-n auto --doctest-modules --cov=src/ --cov-report=xml --cache-clear -m 'not download_jars_from_web and not no_shared_spark'"
pythonpath = ["."]
testpaths = ["tests/gentropy", "src/gentropy"]
markers = ["step_test", "long_test"]
markers = ["step_test", "download_jars_from_web", "no_shared_spark"]
filterwarnings = [
"ignore:.*it is preferred to specify type hints for pandas UDF.*:UserWarning"

]

# Semi-strict mode for mypy
Expand Down Expand Up @@ -252,9 +251,11 @@ ignore = [
"PLW2901", # Outer {outer_kind} variable {name} overwritten by inner {inner_kind} target
"UP006", # keep type annotation style as is
"UP007", # keep type annotation style as is
"UP042", # use of the str and Enum (double inheritance) for strEnum
"PLW0108", # Use of lambda expression assigned to variable. Use a def instead.
# Ignored due to performance: https://github.com/charliermarsh/ruff/issues/2923
"UP038", # Use `X | Y` in `isinstance` call instead of `(X, Y)`
"G004", # f-string used in logging function
"G004" # f-string used in logging function

]

Expand Down
8 changes: 7 additions & 1 deletion src/gentropy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,18 @@

import warnings

# NOTE: Suppress DeprecationWarnings from pyspark related to pandas API on Spark due to LooseVersion being deprecated in Python 3.12+
# NOTE: Suppress DeprecationWarnings and UserWarnings from pyspark related to pandas API on Spark due to LooseVersion being deprecated in Python 3.12+
warnings.filterwarnings(
"ignore",
category=DeprecationWarning,
module="pyspark.sql.pandas.utils",
)
warnings.filterwarnings(
"ignore",
category=UserWarning,
module="pyspark.sql.pandas.functions",
)


from gentropy.common.session import Session
from gentropy.dataset.biosample_index import BiosampleIndex
Expand Down
12 changes: 12 additions & 0 deletions src/gentropy/assets/log4j.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Source - https://stackoverflow.com/a/76196464

# Set everything to be logged to the console
log4j.rootCategory=ERROR, console
log4j.appender.console=org.apache.log4j.ConsoleAppender
log4j.appender.console.target=System.err
log4j.appender.console.layout=org.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n

# Set the log level to ERROR for everything
log4j.logger.org.apache=ERROR
log4j.logger.org.apache.spark=ERROR
23 changes: 23 additions & 0 deletions src/gentropy/assets/schemas/contig_index.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
{
"fields": [
{
"metadata": {},
"name": "id",
"nullable": false,
"type": "string"
},
{
"metadata": {},
"name": "start",
"nullable": false,
"type": "long"
},
{
"metadata": {},
"name": "end",
"nullable": false,
"type": "long"
}
],
"type": "struct"
}
20 changes: 15 additions & 5 deletions src/gentropy/assets/schemas/intervals.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@
"metadata": {},
"name": "start",
"nullable": false,
"type": "string"
"type": "long"
},
{
"metadata": {},
"name": "end",
"nullable": false,
"type": "string"
"type": "long"
},
{
"metadata": {},
Expand Down Expand Up @@ -82,13 +82,13 @@
},
{
"metadata": {},
"name": "biofeature",
"name": "biosampleName",
"nullable": true,
"type": "string"
},
{
"metadata": {},
"name": "biosampleName",
"name": "biosampleFromSourceId",
"nullable": true,
"type": "string"
},
Expand All @@ -107,8 +107,18 @@
{
"metadata": {},
"name": "intervalId",
"nullable": true,
"nullable": false,
"type": "string"
},
{
"metadata": {},
"name": "qualityControls",
"type": {
"type": "array",
"elementType": "string",
"containsNull": true
},
"nullable": true
}
],
"type": "struct"
Expand Down
Loading