moleculemaker · Copilot · Feb 24, 2026 · Feb 24, 2026
diff --git a/app/core/config.py b/app/core/config.py
@@ -7,7 +7,14 @@
 class Settings(BaseSettings):
     # API information
     PROJECT_NAME: str = "CLEAN Data API"
-    DESCRIPTION: str = "API for accessing enzyme kinetic data from the CLEAN database"
+    DESCRIPTION: str = (
+        "API for accessing enzyme EC number predictions from the CLEAN database. "
+        "CLEAN (Contrastive Learning-Enabled Enzyme ANnotation) is a machine-learning "
+        "tool that predicts enzyme function using contrastive learning on protein sequences. "
+        "This API provides access to CLEAN-predicted EC numbers combined with UniProt protein "
+        "annotations, enabling researchers to explore and download enzyme function predictions "
+        "at scale."
+    )
     VERSION: str = "0.1.0"
 
     # API behavior configuration

diff --git a/app/main.py b/app/main.py
@@ -33,6 +33,159 @@ async def lifespan(app: FastAPI):
 to next and previous pages.
 
 This threshold can be configured using the AUTO_PAGINATION_THRESHOLD environment variable.
+
+## How to Use
+
+The CLEAN Data API provides programmatic access to enzyme EC number predictions generated
+by the [CLEAN tool](https://github.com/tttianhao/CLEAN). You can query the database by
+organism, protein name, gene name, UniProt accession, EC number, curation status, EC
+confidence score, and sequence length.
+
+**Base URL:** `https://fastapi.cleandb.mmli2.ncsa.illinois.edu/api/v1`
+
+**Available Endpoints:**
+- `GET /search` — Search and filter enzyme records
+- `GET /typeahead` — Retrieve typeahead suggestions for a given field and search term
+- `GET /ec_lookup` — Look up EC numbers or enzyme class names
+- `GET /curation-statuses` — List available curation status options
+
+Use the interactive documentation below to explore query parameters and response schemas,
+or refer to the Python examples in the next section to get started quickly.
+
+## Python Examples
+
+The following examples use the [requests](https://docs.python-requests.org/) library.
+Install it with `pip install requests` if needed.
+
+### Search by organism name
+
+```python
+import requests
+
+BASE_URL = "https://fastapi.cleandb.mmli2.ncsa.illinois.edu/api/v1"
+
+response = requests.get(
+    f"{{BASE_URL}}/search",
+    params={{"organism": "Homo sapiens"}},
+)
+response.raise_for_status()
+data = response.json()
+print(f"Total results: {{data['total']}}")
+for record in data["data"]:
+    print(record["uniprot"], record["predicted_ec"])
+```
+
+### Search by EC number
+
+```python
+import requests
+
+BASE_URL = "https://fastapi.cleandb.mmli2.ncsa.illinois.edu/api/v1"
+
+response = requests.get(
+    f"{{BASE_URL}}/search",
+    params={{"ec_number": "3.5.1.18"}},
+)
+response.raise_for_status()
+data = response.json()
+print(f"Total results: {{data['total']}}")
+for record in data["data"]:
+    print(record["accession"], record["organism"])
+```
+
+### Filter by EC confidence and curation status
+
+```python
+import requests
+
+BASE_URL = "https://fastapi.cleandb.mmli2.ncsa.illinois.edu/api/v1"
+
+response = requests.get(
+    f"{{BASE_URL}}/search",
+    params={{
+        "clean_ec_confidence_min": 0.9,
+        "curation_status": "reviewed",
+        "limit": 100,
+        "offset": 0,
+    }},
+)
+response.raise_for_status()
+data = response.json()
+print(f"Total results: {{data['total']}}")
+for record in data["data"]:
+    print(record["accession"], record["predicted_ec"])
+```
+
+### Download results as CSV
+
+```python
+import requests
+
+BASE_URL = "https://fastapi.cleandb.mmli2.ncsa.illinois.edu/api/v1"
+
+response = requests.get(
+    f"{{BASE_URL}}/search",
+    params={{"organism": "Escherichia coli", "format": "csv"}},
+)
+response.raise_for_status()
+with open("cleandb_results.csv", "wb") as f:
+    f.write(response.content)
+print("Results saved to cleandb_results.csv")
+```
+
+### Typeahead suggestions for organism field
+
+```python
+import requests
+
+BASE_URL = "https://fastapi.cleandb.mmli2.ncsa.illinois.edu/api/v1"
+
+response = requests.get(
+    f"{{BASE_URL}}/typeahead",
+    params={{"field_name": "organism", "search": "Homo"}},
+)
+response.raise_for_status()
+data = response.json()
+print(data["matches"])
+```
+
+### Look up EC numbers by name or number
+
+```python
+import requests
+
+BASE_URL = "https://fastapi.cleandb.mmli2.ncsa.illinois.edu/api/v1"
+
+response = requests.get(
+    f"{{BASE_URL}}/ec_lookup",
+    params={{"search": "hydrolase"}},
+)
+response.raise_for_status()
+data = response.json()
+for match in data["matches"]:
+    print(match["ec_number"], match["ec_name"])
+```
+
+## Data License
+
+The CLEAN Data API provides access to enzyme EC number predictions produced by the CLEAN
+machine-learning tool, combined with protein annotations sourced from
+[UniProt](https://www.uniprot.org/).
+
+**UniProt data** is made available under the
+[Creative Commons Attribution 4.0 International (CC BY 4.0)](https://creativecommons.org/licenses/by/4.0/)
+license. See the [UniProt license page](https://www.uniprot.org/help/license) for details.
+
+**CLEAN predictions** are provided for research and educational use. If you use this data
+in your research, please cite:
+
+> Tianhao Yu, Haiyang Cui, Jianan Canal Li, Yunan Luo, Guangde Jiang, Huimin Zhao.
+> *Enzyme function prediction using contrastive learning.*
+> **Science**, 379(6639), 1358-1363 (2023).
+> [https://doi.org/10.1126/science.adf2465](https://doi.org/10.1126/science.adf2465)
+
+This API and its source code are released under the
+[MIT License](https://opensource.org/licenses/MIT).
 """,
     version=settings.VERSION,
     lifespan=lifespan,