@@ -6,13 +6,19 @@ x-crawl is a Nodejs multifunctional crawler library.
66
77## Feature
88
9- - Crawl HTML, JSON, file resources, etc. with simple configuration
10- - Use puppeteer to crawl HTML, and use JSDOM library to parse HTML, or parse HTML by yourself
11- - Support asynchronous/synchronous way to crawl data
12- - Support Promise/Callback way to get the result
13- - Polling function
14- - Anthropomorphic request interval
15- - Written in TypeScript, provides generics
9+ - Crawl HTML, JSON, file resources, etc. with simple configuration.
10+ - Built-in puppeteer crawls HTML and uses JSDOM library to parse HTML.
11+ - Support asynchronous/synchronous way to crawl data.
12+ - Support Promise/Callback way to get the result.
13+ - Polling function.
14+ - Anthropomorphic request interval.
15+ - Written in TypeScript, provides generics.
16+
17+ ## Benefits provided by using puppeter
18+
19+ - Generate screenshots and PDFs of pages.
20+ - Crawl a SPA (Single-Page Application) and generate pre-rendered content (i.e. "SSR" (Server-Side Rendering)).
21+ - Automate form submission, UI testing, keyboard input, etc.
1622
1723# Table of Contents
1824
@@ -41,14 +47,15 @@ x-crawl is a Nodejs multifunctional crawler library.
4147 * [ Method] ( #Method )
4248 * [ RequestConfig] ( #RequestConfig )
4349 * [ IntervalTime] ( #IntervalTime )
44- * [ FetchBaseConifg] ( #FetchBaseConifg )
4550 * [ XCrawlBaseConifg] ( #XCrawlBaseConifg )
51+ * [ FetchBaseConifgV1] ( #FetchBaseConifgV1 )
52+ * [ FetchBaseConifgV2] ( #FetchBaseConifgV2 )
4653 * [ FetchHTMLConfig] ( #FetchHTMLConfig )
47- * [ FetchDataConfig] ( #FetchDataConfig )
54+ * [ FetchDataConfig] ( #FetchDataConfig )
4855 * [ FetchFileConfig] ( #FetchFileConfig )
4956 * [ StartPollingConfig] ( #StartPollingConfig )
50- * [ FetchCommon ] ( #FetchCommon )
51- * [ FetchCommonArr ] ( #FetchCommonArr )
57+ * [ FetchResCommonV1 ] ( #FetchResCommonV1 )
58+ * [ FetchResCommonArrV1 ] ( #FetchResCommonArrV1 )
5259 * [ FileInfo] ( #FileInfo )
5360 * [ FetchHTML] ( #FetchHTML )
5461- [ More] ( #More )
@@ -318,7 +325,6 @@ interface FetchBaseConifgV1 {
318325` ` ` ts
319326interface FetchBaseConifgV2 {
320327 url: string
321- header?: AnyObject
322328 timeout?: number
323329 proxy?: string
324330}
@@ -364,7 +370,7 @@ interface StartPollingConfig {
364370interface FetchCommon< T > {
365371 id: number
366372 statusCode: number | undefined
367- headers: IncomingHttpHeaders // node : http type
373+ headers: IncomingHttpHeaders // nodejs : http type
368374 data: T
369375}
370376` ` `
@@ -392,8 +398,7 @@ interface FileInfo {
392398interface FetchHTML {
393399 httpResponse: HTTPResponse | null // The type of HTTPResponse in the puppeteer library
394400 data: {
395- page: Page
396- content: string
401+ page: Page // The type of Page in the puppeteer library
397402 jsdom: JSDOM // The type of JSDOM in the jsdom library
398403 }
399404}
0 commit comments