-
Notifications
You must be signed in to change notification settings - Fork 33
Expand file tree
/
Copy pathhandler.ts
More file actions
105 lines (92 loc) · 2.6 KB
/
handler.ts
File metadata and controls
105 lines (92 loc) · 2.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
/**
* AWS Lambda Handler for Reader
*
* NOTE: Running a full browser in Lambda requires special configuration:
* - Use Lambda container images (not zip packages)
* - Include Chrome/Chromium in the container
* - Configure sufficient memory (2GB+)
* - Set longer timeout (30-60 seconds)
*
* Consider using AWS ECS/Fargate for production browser workloads.
*/
import { ReaderClient } from "@vakra-dev/reader";
import type { APIGatewayProxyEvent, APIGatewayProxyResult } from "aws-lambda";
interface ScrapeRequest {
urls: string[];
formats?: string[];
}
// Reuse client across warm Lambda invocations
let reader: ReaderClient | null = null;
async function getReader(): Promise<ReaderClient> {
if (!reader) {
reader = new ReaderClient();
await reader.start();
}
return reader;
}
export async function handler(event: APIGatewayProxyEvent): Promise<APIGatewayProxyResult> {
// CORS headers
const headers = {
"Content-Type": "application/json",
"Access-Control-Allow-Origin": "*",
"Access-Control-Allow-Methods": "POST, OPTIONS",
"Access-Control-Allow-Headers": "Content-Type",
};
// Handle preflight
if (event.httpMethod === "OPTIONS") {
return { statusCode: 200, headers, body: "" };
}
try {
// Parse request body
const body: ScrapeRequest = JSON.parse(event.body || "{}");
if (!body.urls || !Array.isArray(body.urls) || body.urls.length === 0) {
return {
statusCode: 400,
headers,
body: JSON.stringify({
success: false,
error: "urls is required and must be a non-empty array",
}),
};
}
// Limit URLs per request
if (body.urls.length > 5) {
return {
statusCode: 400,
headers,
body: JSON.stringify({
success: false,
error: "Maximum 5 URLs per request",
}),
};
}
// Get or initialize reader client
const client = await getReader();
// Scrape URLs
const result = await client.scrape({
urls: body.urls,
formats: (body.formats as any) || ["markdown"],
batchConcurrency: 1, // Sequential in Lambda
timeoutMs: 25000, // Leave buffer for Lambda timeout
});
return {
statusCode: 200,
headers,
body: JSON.stringify({
success: true,
data: result.data,
batchMetadata: result.batchMetadata,
}),
};
} catch (error: any) {
console.error("Lambda error:", error);
return {
statusCode: 500,
headers,
body: JSON.stringify({
success: false,
error: error.message || "Internal server error",
}),
};
}
}