1+ import enum
12import json
23import logging
4+ import multiprocessing
35import os
46import shutil
57import time
1012from tqdm import tqdm
1113
1214
13- class ExportType :
15+ class ExportType ( enum . StrEnum ) :
1416 MARKDOWN = "markdown"
1517 HTML = "html"
1618 PDF = "pdf"
1719
1820
19- class ViewExportType :
21+ class ViewExportType ( enum . StrEnum ) :
2022 CURRENT_VIEW = "currentView"
2123 ALL = "all"
2224
@@ -33,6 +35,7 @@ def __init__(
3335 current_view_export_type = ViewExportType .CURRENT_VIEW ,
3436 include_files = False ,
3537 recursive = True ,
38+ workers = multiprocessing .cpu_count (),
3639 ):
3740 self .export_name = f"export-{ datetime .now ().strftime ('%Y-%m-%d-%H-%M-%S' )} "
3841 self .token_v2 = token_v2
@@ -52,14 +55,15 @@ def __init__(
5255 "content-type" : "application/json" ,
5356 "cookie" : f"token_v2={ self .token_v2 } ;" ,
5457 }
58+ self .workers = workers
5559 os .makedirs (f"{ self .export_directory } { self .export_name } " , exist_ok = True )
5660
5761 def to_uuid_format (self , s ):
5862 if "-" == s [8 ] and "-" == s [13 ] and "-" == s [18 ] and "-" == s [23 ]:
5963 return s
6064 return f"{ s [:8 ]} -{ s [8 :12 ]} -{ s [12 :16 ]} -{ s [16 :20 ]} -{ s [20 :]} "
6165
62- def get_format_options (self , export_type : str , include_files = False ):
66+ def get_format_options (self , export_type : ExportType , include_files = False ):
6367 format_options = {}
6468 if export_type == ExportType .PDF :
6569 format_options ["pdfFormat" ] = "Letter"
@@ -73,10 +77,10 @@ def export(self, id):
7377 url = "https://www.notion.so/api/v3/enqueueTask"
7478 id = self .to_uuid_format (s = id )
7579 export_options = {
76- "exportType" : self .export_type ,
80+ "exportType" : self .export_type . value ,
7781 "locale" : "en" ,
7882 "timeZone" : "Europe/London" ,
79- "collectionViewExportType" : self .current_view_export_type ,
83+ "collectionViewExportType" : self .current_view_export_type . value ,
8084 "flattenExportFiletree" : self .flatten_export_file_tree ,
8185 }
8286
@@ -117,7 +121,7 @@ def get_status(self, task_id):
117121 ).json ()["results" ]
118122 return response [0 ]
119123
120- def download (self , url , name ):
124+ def download (self , url ):
121125 response = requests .request ("GET" , url , headers = self .download_headers )
122126 file_name = url .split ("/" )[- 1 ][100 :]
123127 with open (
@@ -139,7 +143,7 @@ def process_page(self, page_details):
139143
140144 export_url = status .get ("status" , {}).get ("exportURL" )
141145 if export_url :
142- self .download (export_url , name )
146+ self .download (export_url )
143147 else :
144148 logging .warning (f"Failed to get exportURL for { name } " )
145149
@@ -179,7 +183,8 @@ def unpack(self):
179183 os .remove (full_file_path )
180184
181185 def process (self ):
182- with Pool () as pool :
186+ logging .info (f"Exporting { len (self .pages )} pages..." )
187+ with Pool (processes = self .workers ) as pool :
183188 with tqdm (total = len (self .pages ), dynamic_ncols = True ) as pbar :
184189 for result in pool .imap_unordered (
185190 self .process_page , self .pages .items ()
@@ -192,18 +197,6 @@ def process(self):
192197 pbar .set_postfix_str (
193198 f"Exporting { name } ... { pagesExported } pages already exported"
194199 )
195- pbar .update (1 ) # Update the bar by one task completion
200+ pbar .update (1 )
196201
197202 self .unpack ()
198-
199-
200- if __name__ == "__main__" :
201- export = NotionExporter (
202- token_v2 = TOKEN_V2 ,
203- file_token = FILE_TOKEN ,
204- pages = PAGES ,
205- export_directory = "test" ,
206- export_type = ExportType .PDF ,
207- include_files = True ,
208- )
209- export .process ()
0 commit comments