-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathelicit.py
More file actions
executable file
·385 lines (315 loc) · 13.7 KB
/
elicit.py
File metadata and controls
executable file
·385 lines (315 loc) · 13.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
#!/usr/bin/env python3
"""
Elicit API — Command-line interface
Search 125M+ academic papers and create AI-powered research reports
from your terminal. No dependencies beyond the Python standard library.
Setup:
export ELICIT_API_KEY="your-key"
Usage:
python elicit.py search "effects of sleep deprivation on cognition"
python elicit.py search "CBT for insomnia" --type RCT --min-year 2020
python elicit.py report "What is the evidence for melatonin improving sleep quality?"
python elicit.py reports --status completed --limit 5
Get your API key at https://elicit.com/settings
"""
import argparse
import json
import os
import sys
import textwrap
import time
import urllib.error
import urllib.request
BASE_URL = "https://elicit.com/api/v1"
# ── Helpers ──────────────────────────────────────────────────────────────────
def get_api_key():
key = os.environ.get("ELICIT_API_KEY")
if not key:
print("Error: ELICIT_API_KEY environment variable is not set.", file=sys.stderr)
print("Get your API key at https://elicit.com/settings", file=sys.stderr)
sys.exit(1)
return key
def api_request(method, path, body=None, query=None):
"""Make an authenticated request to the Elicit API and return parsed JSON."""
url = BASE_URL + path
if query:
pairs = [f"{k}={urllib.request.quote(str(v))}" for k, v in query.items() if v is not None]
if pairs:
url += "?" + "&".join(pairs)
data = json.dumps(body).encode() if body else None
req = urllib.request.Request(url, data=data, method=method)
req.add_header("Authorization", f"Bearer {get_api_key()}")
req.add_header("Content-Type", "application/json")
req.add_header("Accept", "application/json")
# urllib's default User-Agent is blocked by some CDNs
req.add_header("User-Agent", "elicit-cli/1.0")
try:
with urllib.request.urlopen(req) as resp:
return json.loads(resp.read().decode())
except urllib.error.HTTPError as e:
# Surface the API's error message when available — it's usually more
# helpful than the generic HTTP status text.
error_body = e.read().decode() if e.fp else ""
try:
detail = json.loads(error_body).get("message", error_body)
except (json.JSONDecodeError, AttributeError):
detail = error_body
print(f"API error {e.code}: {detail}", file=sys.stderr)
sys.exit(1)
except urllib.error.URLError as e:
print(f"Connection error: {e.reason}", file=sys.stderr)
sys.exit(1)
def truncate(text, width):
"""Truncate text to width, adding an ellipsis if it was trimmed."""
if not text:
return ""
text = " ".join(text.split()) # collapse whitespace / newlines
if len(text) <= width:
return text
return text[: width - 1] + "\u2026"
def format_authors(authors, max_len=40):
"""Condense an author list into a readable short form."""
if not authors:
return ""
# The API returns authors as a flat array of strings.
names = [a for a in authors if isinstance(a, str) and a]
if not names:
return ""
joined = ", ".join(names)
if len(joined) <= max_len:
return joined
# Show first author + et al. when the full list is too long.
return truncate(names[0], max_len - 7) + " et al."
# ── Pretty printers ─────────────────────────────────────────────────────────
def print_papers(papers):
if not papers:
print("No papers found.")
return
# Determine column widths from terminal size.
try:
term_width = os.get_terminal_size().columns
except OSError:
term_width = 100
for i, p in enumerate(papers, 1):
year = p.get("year") or "n/a"
cites = p.get("citedByCount")
cite_str = f"{cites} citations" if cites is not None else ""
venue = p.get("venue") or ""
doi = p.get("doi") or ""
authors = format_authors(p.get("authors"), max_len=60)
title = p.get("title") or "(untitled)"
# Header line: number, title, year
header = f" {i}. {title}"
print(f"\033[1m{truncate(header, term_width - 8)}\033[0m ({year})")
# Metadata line
meta_parts = [s for s in [authors, venue, cite_str] if s]
if meta_parts:
print(f" {truncate(' | '.join(meta_parts), term_width - 6)}")
# Abstract preview
abstract = p.get("abstract") or ""
if abstract:
preview = truncate(abstract, term_width - 6)
print(f" \033[2m{preview}\033[0m")
# DOI / links
if doi:
print(f" https://doi.org/{doi}")
print()
print(f" {len(papers)} paper(s) returned.")
def print_report_status(report, verbose=True):
"""Print a single report's status in a human-friendly way."""
status = report.get("status", "unknown")
# The list endpoint puts title at the top level; the detail endpoint nests it under result.
title = report.get("title") or report.get("result", {}).get("title") or report.get("reportId", "")
url = report.get("url", "")
status_icons = {
"completed": "\033[32m done \033[0m",
"processing": "\033[33m running \033[0m",
"failed": "\033[31m failed \033[0m",
}
badge = status_icons.get(status, f" {status} ")
print(f" [{badge}] {truncate(title, 70)}")
if url:
print(f" {url}")
if verbose and status == "completed":
result = report.get("result", {})
summary = result.get("summary")
if summary:
try:
tw = os.get_terminal_size().columns - 12
except OSError:
tw = 88
wrapped = textwrap.fill(summary, width=tw)
indented = textwrap.indent(wrapped, " ")
print(f"\n\033[2m{indented}\033[0m")
downloads = []
if report.get("pdfUrl"):
downloads.append("PDF")
if report.get("docxUrl"):
downloads.append("DOCX")
if downloads:
print(f"\n Downloads available: {', '.join(downloads)} (use --json for URLs)")
print()
def print_reports_list(reports):
if not reports:
print("No reports found.")
return
for r in reports:
print_report_status(r, verbose=False)
print(f" {len(reports)} report(s).")
# ── Subcommands ──────────────────────────────────────────────────────────────
def cmd_search(args):
"""Search 125M+ academic papers."""
body = {"query": args.query}
if args.max_results is not None:
body["maxResults"] = args.max_results
# Build filters only when the user supplies at least one filter flag.
filters = {}
if args.min_year is not None:
filters["minYear"] = args.min_year
if args.max_year is not None:
filters["maxYear"] = args.max_year
if args.type:
filters["typeTags"] = [args.type]
if args.pubmed_only:
filters["pubmedOnly"] = True
if filters:
body["filters"] = filters
data = api_request("POST", "/search", body=body)
if args.json:
print(json.dumps(data, indent=2))
else:
print()
print_papers(data.get("papers", []))
def cmd_report(args):
"""Create an AI-powered research report."""
body = {"researchQuestion": args.question}
if args.search_papers is not None:
body["maxSearchPapers"] = args.search_papers
if args.extract_papers is not None:
body["maxExtractPapers"] = args.extract_papers
data = api_request("POST", "/reports", body=body)
report_id = data.get("reportId")
url = data.get("url", "")
if args.no_wait:
if args.json:
print(json.dumps(data, indent=2))
else:
print(f"\n Report created: {report_id}")
if url:
print(f" View at: {url}")
print(" Poll status with: python elicit.py reports")
return
# Poll until the report completes or fails (timeout after 20 minutes).
print(f"\n Creating report\u2026", end="", flush=True)
poll_interval = 15 # seconds between polls
max_wait = 20 * 60 # 20 minutes
elapsed = 0
while elapsed < max_wait:
time.sleep(poll_interval)
elapsed += poll_interval
report = api_request("GET", f"/reports/{report_id}")
status = report.get("status", "unknown")
if status == "completed":
print(" done.\n")
if args.json:
print(json.dumps(report, indent=2))
else:
print_report_status(report)
return
elif status == "failed":
print(" failed.\n", file=sys.stderr)
if args.json:
print(json.dumps(report, indent=2), file=sys.stderr)
else:
print_report_status(report)
sys.exit(1)
else:
print(".", end="", flush=True)
# Back off gently so we don't hammer the API on long reports.
poll_interval = min(poll_interval + 5, 30)
print(f"\n\n Report is still processing after {max_wait // 60} minutes.")
print(f" Check status at: {url}")
print(f" Or run: python elicit.py reports")
def cmd_reports(args):
"""List your reports."""
query = {}
if args.limit is not None:
query["limit"] = args.limit
if args.status:
query["status"] = args.status
data = api_request("GET", "/reports", query=query if query else None)
# The response may be a list directly or wrapped in an object.
reports = data if isinstance(data, list) else data.get("reports", data.get("items", []))
if args.json:
print(json.dumps(data, indent=2))
else:
print()
print_reports_list(reports)
# ── CLI definition ───────────────────────────────────────────────────────────
def build_parser():
parser = argparse.ArgumentParser(
prog="elicit",
description="Search 125M+ academic papers and create research reports from your terminal.",
epilog="Get your API key at https://elicit.com/settings",
formatter_class=argparse.RawDescriptionHelpFormatter,
)
subparsers = parser.add_subparsers(dest="command", required=True)
# ── search ───────────────────────────────────────────────────────────
sp_search = subparsers.add_parser(
"search",
help="Search academic papers",
description="Search 125M+ papers by keyword, topic, or research question.",
)
sp_search.add_argument("query", help="Search query (e.g. 'CRISPR gene editing efficiency')")
sp_search.add_argument("--max-results", type=int, metavar="N", help="Maximum papers to return")
sp_search.add_argument("--min-year", type=int, metavar="YYYY", help="Earliest publication year")
sp_search.add_argument("--max-year", type=int, metavar="YYYY", help="Latest publication year")
sp_search.add_argument(
"--type",
metavar="TYPE",
help="Study type filter (e.g. RCT, Meta-analysis, Review, CaseReport)",
)
sp_search.add_argument("--pubmed-only", action="store_true", help="Restrict to PubMed-indexed papers")
sp_search.add_argument("--json", action="store_true", help="Output raw JSON instead of formatted table")
sp_search.set_defaults(func=cmd_search)
# ── report ───────────────────────────────────────────────────────────
sp_report = subparsers.add_parser(
"report",
help="Create a research report",
description="Generate an AI-powered research report from a question.",
)
sp_report.add_argument("question", help="Research question for the report")
sp_report.add_argument(
"--search-papers", type=int, metavar="N", help="Max papers to search (default: API default)"
)
sp_report.add_argument(
"--extract-papers", type=int, metavar="N", help="Max papers to extract data from (default: API default)"
)
sp_report.add_argument(
"--no-wait",
action="store_true",
help="Don't wait for completion — just create the report and print its URL",
)
sp_report.add_argument("--json", action="store_true", help="Output raw JSON instead of formatted text")
sp_report.set_defaults(func=cmd_report)
# ── reports (list) ───────────────────────────────────────────────────
sp_reports = subparsers.add_parser(
"reports",
help="List your reports",
description="List reports you've created via the API.",
)
sp_reports.add_argument("--limit", type=int, metavar="N", help="Number of reports to return (default: 20)")
sp_reports.add_argument(
"--status",
metavar="STATUS",
help="Filter by status (e.g. completed, processing, failed)",
)
sp_reports.add_argument("--json", action="store_true", help="Output raw JSON instead of formatted table")
sp_reports.set_defaults(func=cmd_reports)
return parser
def main():
parser = build_parser()
args = parser.parse_args()
args.func(args)
if __name__ == "__main__":
main()