@@ -41,20 +41,20 @@ The crawlPage API internally uses the [puppeteer](https://github.com/puppeteer/p
4141 * [ Multiple ways of writing requestConfig options] ( #Multiple-ways-of-writing-requestConfig-options )
4242 * [ Multiple ways to get results] ( #Multiple-ways-to-get-results )
4343- [ API] ( #API )
44- * [ x-crawl ] ( #x-crawl-2 )
45- + [ Type] ( #Type-1 )
44+ * [ xCrawl ] ( #xCrawl )
45+ + [ Type] ( #Type )
4646 + [ Example] ( #Example-1 )
4747 * [ crawlPage] ( #crawlPage )
48- + [ Type] ( #Type-2 )
48+ + [ Type] ( #Type-1 )
4949 + [ Example] ( #Example-2 )
5050 * [ crawlData] ( #crawlData )
51- + [ Type] ( #Type-3 )
51+ + [ Type] ( #Type-2 )
5252 + [ Example] ( #Example-3 )
5353 * [ crawlFile] ( #crawlFile )
54- + [ Type] ( #Type-4 )
54+ + [ Type] ( #Type-3 )
5555 + [ Example] ( #Example-4 )
5656 * [ crawlPolling] ( #crawlPolling )
57- + [ Type] ( #Type-5 )
57+ + [ Type] ( #Type-4 )
5858 + [ Example] ( #Example-5 )
5959- [ Types] ( #Types )
6060 * [ AnyObject] ( #AnyObject )
@@ -64,14 +64,14 @@ The crawlPage API internally uses the [puppeteer](https://github.com/puppeteer/p
6464 * [ RequestConfig] ( #RequestConfig )
6565 * [ IntervalTime] ( #IntervalTime )
6666 * [ XCrawlBaseConfig] ( #XCrawlBaseConfig )
67- * [ CrawlPageConfig] ( #CrawlPageConfig )
67+ * [ CrawlPageConfig] ( #CrawlPageConfig )
6868 * [ CrawlBaseConfigV1] ( #CrawlBaseConfigV1 )
6969 * [ CrawlDataConfig] ( #CrawlDataConfig )
7070 * [ CrawlFileConfig] ( #CrawlFileConfig )
7171 * [ StartPollingConfig] ( #StartPollingConfig )
7272 * [ CrawlResCommonV1] ( #CrawlResCommonV1 )
7373 * [ CrawlResCommonArrV1] ( #CrawlResCommonArrV1 )
74- * [ CrawlPage] ( #CrawlPage-2 )
74+ * [ CrawlPage] ( #CrawlPage-1 )
7575 * [ FileInfo] ( #FileInfo )
7676- [ More] ( #More )
7777
@@ -98,23 +98,25 @@ const myXCrawl = xCrawl({
9898})
9999
100100// 3.Set the crawling task
101- // Call the startPolling API to start the polling function, and the callback function will be called every other day
102- myXCrawl .startPolling ({ d: 1 }, (count , stopPolling ) => {
103- myXCrawl .crawlPage (' https://zh.airbnb.com/s/*/plus_homes' ).then ((res ) => {
104- const { jsdom } = res // By default, the JSDOM library is used to parse Page
105-
106- // Get the cover image elements for Plus listings
107- const imgEls = jsdom .window .document
108- .querySelector (' .a1stauiv' )
109- ? .querySelectorAll (' picture img' )
110-
111- // set request configuration
112- const requestConfig: string [] = []
113- imgEls? .forEach ((item ) => requestConfig .push (item .src ))
114-
115- // Call the crawlFile API to crawl pictures
116- myXCrawl .crawlFile ({ requestConfig, fileConfig: { storeDir: ' ./upload' } })
117- })
101+ /*
102+ Call the startPolling API to start the polling function,
103+ and the callback function will be called every other day
104+ */
105+ myXCrawl .startPolling ({ d: 1 }, async (count , stopPolling ) => {
106+ // Call crawlPage API to crawl Page
107+ const { jsdom } = await myXCrawl .crawlPage (' https://zh.airbnb.com/s/*/plus_homes' )
108+
109+ // Get the cover image elements for Plus listings
110+ const imgEls = jsdom .window .document
111+ .querySelector (' .a1stauiv' )
112+ ? .querySelectorAll (' picture img' )
113+
114+ // set request configuration
115+ const requestConfig: string [] = []
116+ imgEls? .forEach ((item ) => requestConfig .push (item .src ))
117+
118+ // Call the crawlFile API to crawl pictures
119+ myXCrawl .crawlFile ({ requestConfig, fileConfig: { storeDir: ' ./upload' } })
118120})
119121` ` `
120122
@@ -136,7 +138,7 @@ running result:
136138
137139#### An example of a crawler application
138140
139- Create a new **application instance** via [xCrawl()](#x-crawl-2 ):
141+ Create a new **application instance** via [xCrawl()](#xCrawl ):
140142
141143` ` ` js
142144import xCrawl from ' x-crawl'
@@ -321,13 +323,10 @@ const myXCrawl = xCrawl({
321323 intervalTime: { max: 3000 , min: 1000 }
322324})
323325
324- myXCrawl . startPolling ({ h: 2 , m: 30 }, (count , stopPolling ) => {
326+ myXCrawl . startPolling ({ h: 2 , m: 30 }, async (count , stopPolling ) => {
325327 // will be executed every two and a half hours
326328 // crawlPage/crawlData/crawlFile
327- myXCrawl .crawlPage (' https://xxx.com' ).then (res => {
328- const { jsdom , browser , page } = res
329-
330- })
329+ const { jsdom , browser , page } = await myXCrawl .crawlPage (' https://xxx.com' )
331330})
332331` ` `
333332
@@ -476,7 +475,7 @@ It can be selected according to the actual situation.
476475
477476## API
478477
479- ### x-crawl
478+ ### xCrawl
480479
481480Create a crawler instance via call xCrawl. The request queue is maintained by the instance method itself, not by the instance itself.
482481
@@ -515,7 +514,7 @@ crawlPage is the method of the crawler instance, usually used to crawl page.
515514#### Type
516515
517516- Look at the [CrawlPageConfig](#CrawlPageConfig) type
518- - Look at the [CrawlPage](#CrawlPage-2 ) type
517+ - Look at the [CrawlPage](#CrawlPage-1 ) type
519518
520519` ` ` ts
521520function crawlPage: (
0 commit comments