api-examples/integrations/cli/elicit.py at main · elicit/api-examples · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
#!/usr/bin/env python3
"""
Elicit API — Command-line interface

Search 125M+ academic papers and create AI-powered research reports
from your terminal. No dependencies beyond the Python standard library.

Setup:
    export ELICIT_API_KEY="your-key"

Usage:
    python elicit.py search "effects of sleep deprivation on cognition"
    python elicit.py search "CBT for insomnia" --type RCT --min-year 2020
    python elicit.py report "What is the evidence for melatonin improving sleep quality?"
    python elicit.py reports --status completed --limit 5

Get your API key at https://elicit.com/settings
"""

import argparse
import json
import os
import sys
import textwrap
import time
import urllib.error
import urllib.request

BASE_URL = "https://elicit.com/api/v1"

# ── Helpers ──────────────────────────────────────────────────────────────────


def get_api_key():
    key = os.environ.get("ELICIT_API_KEY")
    if not key:
        print("Error: ELICIT_API_KEY environment variable is not set.", file=sys.stderr)
        print("Get your API key at https://elicit.com/settings", file=sys.stderr)
        sys.exit(1)
    return key


def api_request(method, path, body=None, query=None):
    """Make an authenticated request to the Elicit API and return parsed JSON."""
    url = BASE_URL + path
    if query:
        pairs = [f"{k}={urllib.request.quote(str(v))}" for k, v in query.items() if v is not None]
        if pairs:
            url += "?" + "&".join(pairs)

    data = json.dumps(body).encode() if body else None
    req = urllib.request.Request(url, data=data, method=method)
    req.add_header("Authorization", f"Bearer {get_api_key()}")
    req.add_header("Content-Type", "application/json")
    req.add_header("Accept", "application/json")
    # urllib's default User-Agent is blocked by some CDNs
    req.add_header("User-Agent", "elicit-cli/1.0")

    try:
        with urllib.request.urlopen(req) as resp:
            return json.loads(resp.read().decode())
    except urllib.error.HTTPError as e:
        # Surface the API's error message when available — it's usually more
        # helpful than the generic HTTP status text.
        error_body = e.read().decode() if e.fp else ""
        try:
            detail = json.loads(error_body).get("message", error_body)
        except (json.JSONDecodeError, AttributeError):
            detail = error_body
        print(f"API error {e.code}: {detail}", file=sys.stderr)
        sys.exit(1)
    except urllib.error.URLError as e:
        print(f"Connection error: {e.reason}", file=sys.stderr)
        sys.exit(1)


def truncate(text, width):
    """Truncate text to width, adding an ellipsis if it was trimmed."""
    if not text:
        return ""
    text = " ".join(text.split())  # collapse whitespace / newlines
    if len(text) <= width:
        return text
    return text[: width - 1] + "\u2026"


def format_authors(authors, max_len=40):
    """Condense an author list into a readable short form."""
    if not authors:
        return ""
    # The API returns authors as a flat array of strings.
    names = [a for a in authors if isinstance(a, str) and a]
    if not names:
        return ""
    joined = ", ".join(names)
    if len(joined) <= max_len:
        return joined
    # Show first author + et al. when the full list is too long.
    return truncate(names[0], max_len - 7) + " et al."


# ── Pretty printers ─────────────────────────────────────────────────────────


def print_papers(papers):
    if not papers:
        print("No papers found.")
        return

    # Determine column widths from terminal size.
    try:
        term_width = os.get_terminal_size().columns
    except OSError:
        term_width = 100

    for i, p in enumerate(papers, 1):
        year = p.get("year") or "n/a"
        cites = p.get("citedByCount")
        cite_str = f"{cites} citations" if cites is not None else ""
        venue = p.get("venue") or ""
        doi = p.get("doi") or ""
        authors = format_authors(p.get("authors"), max_len=60)
        title = p.get("title") or "(untitled)"

        # Header line: number, title, year
        header = f"  {i}. {title}"
        print(f"\033[1m{truncate(header, term_width - 8)}\033[0m  ({year})")

        # Metadata line
        meta_parts = [s for s in [authors, venue, cite_str] if s]
        if meta_parts:
            print(f"     {truncate('  |  '.join(meta_parts), term_width - 6)}")

        # Abstract preview
        abstract = p.get("abstract") or ""
        if abstract:
            preview = truncate(abstract, term_width - 6)
            print(f"     \033[2m{preview}\033[0m")

        # DOI / links
        if doi:
            print(f"     https://doi.org/{doi}")

        print()

    print(f"  {len(papers)} paper(s) returned.")


def print_report_status(report, verbose=True):
    """Print a single report's status in a human-friendly way."""
    status = report.get("status", "unknown")
    # The list endpoint puts title at the top level; the detail endpoint nests it under result.
    title = report.get("title") or report.get("result", {}).get("title") or report.get("reportId", "")
    url = report.get("url", "")

    status_icons = {
        "completed": "\033[32m done \033[0m",
        "processing": "\033[33m running \033[0m",
        "failed": "\033[31m failed \033[0m",
    }
    badge = status_icons.get(status, f" {status} ")

    print(f"  [{badge}]  {truncate(title, 70)}")
    if url:
        print(f"           {url}")

    if verbose and status == "completed":
        result = report.get("result", {})
        summary = result.get("summary")
        if summary:
            try:
                tw = os.get_terminal_size().columns - 12
            except OSError:
                tw = 88
            wrapped = textwrap.fill(summary, width=tw)
            indented = textwrap.indent(wrapped, "           ")
            print(f"\n\033[2m{indented}\033[0m")

        downloads = []
        if report.get("pdfUrl"):
            downloads.append("PDF")
        if report.get("docxUrl"):
            downloads.append("DOCX")
        if downloads:
            print(f"\n           Downloads available: {', '.join(downloads)} (use --json for URLs)")

    print()


def print_reports_list(reports):
    if not reports:
        print("No reports found.")
        return
    for r in reports:
        print_report_status(r, verbose=False)
    print(f"  {len(reports)} report(s).")


# ── Subcommands ──────────────────────────────────────────────────────────────


def cmd_search(args):
    """Search 125M+ academic papers."""
    body = {"query": args.query}

    if args.max_results is not None:
        body["maxResults"] = args.max_results

    # Build filters only when the user supplies at least one filter flag.
    filters = {}
    if args.min_year is not None:
        filters["minYear"] = args.min_year
    if args.max_year is not None:
        filters["maxYear"] = args.max_year
    if args.type:
        filters["typeTags"] = [args.type]
    if args.pubmed_only:
        filters["pubmedOnly"] = True
    if filters:
        body["filters"] = filters

    data = api_request("POST", "/search", body=body)

    if args.json:
        print(json.dumps(data, indent=2))
    else:
        print()
        print_papers(data.get("papers", []))


def cmd_report(args):
    """Create an AI-powered research report."""
    body = {"researchQuestion": args.question}
    if args.search_papers is not None:
        body["maxSearchPapers"] = args.search_papers
    if args.extract_papers is not None:
        body["maxExtractPapers"] = args.extract_papers

    data = api_request("POST", "/reports", body=body)
    report_id = data.get("reportId")
    url = data.get("url", "")

    if args.no_wait:
        if args.json:
            print(json.dumps(data, indent=2))
        else:
            print(f"\n  Report created: {report_id}")
            if url:
                print(f"  View at: {url}")
            print("  Poll status with:  python elicit.py reports")
        return

    # Poll until the report completes or fails (timeout after 20 minutes).
    print(f"\n  Creating report\u2026", end="", flush=True)
    poll_interval = 15  # seconds between polls
    max_wait = 20 * 60  # 20 minutes
    elapsed = 0
    while elapsed < max_wait:
        time.sleep(poll_interval)
        elapsed += poll_interval
        report = api_request("GET", f"/reports/{report_id}")
        status = report.get("status", "unknown")

        if status == "completed":
            print(" done.\n")
            if args.json:
                print(json.dumps(report, indent=2))
            else:
                print_report_status(report)
            return
        elif status == "failed":
            print(" failed.\n", file=sys.stderr)
            if args.json:
                print(json.dumps(report, indent=2), file=sys.stderr)
            else:
                print_report_status(report)
            sys.exit(1)
        else:
            print(".", end="", flush=True)
            # Back off gently so we don't hammer the API on long reports.
            poll_interval = min(poll_interval + 5, 30)

    print(f"\n\n  Report is still processing after {max_wait // 60} minutes.")
    print(f"  Check status at: {url}")
    print(f"  Or run: python elicit.py reports")


def cmd_reports(args):
    """List your reports."""
    query = {}
    if args.limit is not None:
        query["limit"] = args.limit
    if args.status:
        query["status"] = args.status

    data = api_request("GET", "/reports", query=query if query else None)

    # The response may be a list directly or wrapped in an object.
    reports = data if isinstance(data, list) else data.get("reports", data.get("items", []))

    if args.json:
        print(json.dumps(data, indent=2))
    else:
        print()
        print_reports_list(reports)


# ── CLI definition ───────────────────────────────────────────────────────────


def build_parser():
    parser = argparse.ArgumentParser(
        prog="elicit",
        description="Search 125M+ academic papers and create research reports from your terminal.",
        epilog="Get your API key at https://elicit.com/settings",
        formatter_class=argparse.RawDescriptionHelpFormatter,
    )
    subparsers = parser.add_subparsers(dest="command", required=True)

    # ── search ───────────────────────────────────────────────────────────
    sp_search = subparsers.add_parser(
        "search",
        help="Search academic papers",
        description="Search 125M+ papers by keyword, topic, or research question.",
    )
    sp_search.add_argument("query", help="Search query (e.g. 'CRISPR gene editing efficiency')")
    sp_search.add_argument("--max-results", type=int, metavar="N", help="Maximum papers to return")
    sp_search.add_argument("--min-year", type=int, metavar="YYYY", help="Earliest publication year")
    sp_search.add_argument("--max-year", type=int, metavar="YYYY", help="Latest publication year")
    sp_search.add_argument(
        "--type",
        metavar="TYPE",
        help="Study type filter (e.g. RCT, Meta-analysis, Review, CaseReport)",
    )
    sp_search.add_argument("--pubmed-only", action="store_true", help="Restrict to PubMed-indexed papers")
    sp_search.add_argument("--json", action="store_true", help="Output raw JSON instead of formatted table")
    sp_search.set_defaults(func=cmd_search)

    # ── report ───────────────────────────────────────────────────────────
    sp_report = subparsers.add_parser(
        "report",
        help="Create a research report",
        description="Generate an AI-powered research report from a question.",
    )
    sp_report.add_argument("question", help="Research question for the report")
    sp_report.add_argument(
        "--search-papers", type=int, metavar="N", help="Max papers to search (default: API default)"
    )
    sp_report.add_argument(
        "--extract-papers", type=int, metavar="N", help="Max papers to extract data from (default: API default)"
    )
    sp_report.add_argument(
        "--no-wait",
        action="store_true",
        help="Don't wait for completion — just create the report and print its URL",
    )
    sp_report.add_argument("--json", action="store_true", help="Output raw JSON instead of formatted text")
    sp_report.set_defaults(func=cmd_report)

    # ── reports (list) ───────────────────────────────────────────────────
    sp_reports = subparsers.add_parser(
        "reports",
        help="List your reports",
        description="List reports you've created via the API.",
    )
    sp_reports.add_argument("--limit", type=int, metavar="N", help="Number of reports to return (default: 20)")
    sp_reports.add_argument(
        "--status",
        metavar="STATUS",
        help="Filter by status (e.g. completed, processing, failed)",
    )
    sp_reports.add_argument("--json", action="store_true", help="Output raw JSON instead of formatted table")
    sp_reports.set_defaults(func=cmd_reports)

    return parser


def main():
    parser = build_parser()
    args = parser.parse_args()
    args.func(args)


if __name__ == "__main__":
    main()