From a35e77bcef1a0c2300611097d3837071ae031987 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 24 Feb 2026 18:41:54 +0000 Subject: [PATCH 1/2] Initial plan From d87ddb5d8ab8cf35c088ee09789b8a1feadfb912 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 24 Feb 2026 18:47:32 +0000 Subject: [PATCH 2/2] Add Python examples, How to Use, and Data License to FastAPI docs Co-authored-by: ckouder <20468259+ckouder@users.noreply.github.com> --- app/core/config.py | 9 ++- app/main.py | 153 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 161 insertions(+), 1 deletion(-) diff --git a/app/core/config.py b/app/core/config.py index a643920..3eca3b2 100644 --- a/app/core/config.py +++ b/app/core/config.py @@ -7,7 +7,14 @@ class Settings(BaseSettings): # API information PROJECT_NAME: str = "CLEAN Data API" - DESCRIPTION: str = "API for accessing enzyme kinetic data from the CLEAN database" + DESCRIPTION: str = ( + "API for accessing enzyme EC number predictions from the CLEAN database. " + "CLEAN (Contrastive Learning-Enabled Enzyme ANnotation) is a machine-learning " + "tool that predicts enzyme function using contrastive learning on protein sequences. " + "This API provides access to CLEAN-predicted EC numbers combined with UniProt protein " + "annotations, enabling researchers to explore and download enzyme function predictions " + "at scale." + ) VERSION: str = "0.1.0" # API behavior configuration diff --git a/app/main.py b/app/main.py index 54ab19f..6e336e6 100644 --- a/app/main.py +++ b/app/main.py @@ -33,6 +33,159 @@ async def lifespan(app: FastAPI): to next and previous pages. This threshold can be configured using the AUTO_PAGINATION_THRESHOLD environment variable. + +## How to Use + +The CLEAN Data API provides programmatic access to enzyme EC number predictions generated +by the [CLEAN tool](https://github.com/tttianhao/CLEAN). You can query the database by +organism, protein name, gene name, UniProt accession, EC number, curation status, EC +confidence score, and sequence length. + +**Base URL:** `https://fastapi.cleandb.mmli2.ncsa.illinois.edu/api/v1` + +**Available Endpoints:** +- `GET /search` — Search and filter enzyme records +- `GET /typeahead` — Retrieve typeahead suggestions for a given field and search term +- `GET /ec_lookup` — Look up EC numbers or enzyme class names +- `GET /curation-statuses` — List available curation status options + +Use the interactive documentation below to explore query parameters and response schemas, +or refer to the Python examples in the next section to get started quickly. + +## Python Examples + +The following examples use the [requests](https://docs.python-requests.org/) library. +Install it with `pip install requests` if needed. + +### Search by organism name + +```python +import requests + +BASE_URL = "https://fastapi.cleandb.mmli2.ncsa.illinois.edu/api/v1" + +response = requests.get( + f"{{BASE_URL}}/search", + params={{"organism": "Homo sapiens"}}, +) +response.raise_for_status() +data = response.json() +print(f"Total results: {{data['total']}}") +for record in data["data"]: + print(record["uniprot"], record["predicted_ec"]) +``` + +### Search by EC number + +```python +import requests + +BASE_URL = "https://fastapi.cleandb.mmli2.ncsa.illinois.edu/api/v1" + +response = requests.get( + f"{{BASE_URL}}/search", + params={{"ec_number": "3.5.1.18"}}, +) +response.raise_for_status() +data = response.json() +print(f"Total results: {{data['total']}}") +for record in data["data"]: + print(record["accession"], record["organism"]) +``` + +### Filter by EC confidence and curation status + +```python +import requests + +BASE_URL = "https://fastapi.cleandb.mmli2.ncsa.illinois.edu/api/v1" + +response = requests.get( + f"{{BASE_URL}}/search", + params={{ + "clean_ec_confidence_min": 0.9, + "curation_status": "reviewed", + "limit": 100, + "offset": 0, + }}, +) +response.raise_for_status() +data = response.json() +print(f"Total results: {{data['total']}}") +for record in data["data"]: + print(record["accession"], record["predicted_ec"]) +``` + +### Download results as CSV + +```python +import requests + +BASE_URL = "https://fastapi.cleandb.mmli2.ncsa.illinois.edu/api/v1" + +response = requests.get( + f"{{BASE_URL}}/search", + params={{"organism": "Escherichia coli", "format": "csv"}}, +) +response.raise_for_status() +with open("cleandb_results.csv", "wb") as f: + f.write(response.content) +print("Results saved to cleandb_results.csv") +``` + +### Typeahead suggestions for organism field + +```python +import requests + +BASE_URL = "https://fastapi.cleandb.mmli2.ncsa.illinois.edu/api/v1" + +response = requests.get( + f"{{BASE_URL}}/typeahead", + params={{"field_name": "organism", "search": "Homo"}}, +) +response.raise_for_status() +data = response.json() +print(data["matches"]) +``` + +### Look up EC numbers by name or number + +```python +import requests + +BASE_URL = "https://fastapi.cleandb.mmli2.ncsa.illinois.edu/api/v1" + +response = requests.get( + f"{{BASE_URL}}/ec_lookup", + params={{"search": "hydrolase"}}, +) +response.raise_for_status() +data = response.json() +for match in data["matches"]: + print(match["ec_number"], match["ec_name"]) +``` + +## Data License + +The CLEAN Data API provides access to enzyme EC number predictions produced by the CLEAN +machine-learning tool, combined with protein annotations sourced from +[UniProt](https://www.uniprot.org/). + +**UniProt data** is made available under the +[Creative Commons Attribution 4.0 International (CC BY 4.0)](https://creativecommons.org/licenses/by/4.0/) +license. See the [UniProt license page](https://www.uniprot.org/help/license) for details. + +**CLEAN predictions** are provided for research and educational use. If you use this data +in your research, please cite: + +> Tianhao Yu, Haiyang Cui, Jianan Canal Li, Yunan Luo, Guangde Jiang, Huimin Zhao. +> *Enzyme function prediction using contrastive learning.* +> **Science**, 379(6639), 1358-1363 (2023). +> [https://doi.org/10.1126/science.adf2465](https://doi.org/10.1126/science.adf2465) + +This API and its source code are released under the +[MIT License](https://opensource.org/licenses/MIT). """, version=settings.VERSION, lifespan=lifespan,