1212from tqdm import tqdm
1313
1414
15- class ExportType () :
15+ class ExportType :
1616 MARKDOWN = "markdown"
1717 HTML = "html"
1818 PDF = "pdf"
1919
2020
21- class ViewExportType () :
21+ class ViewExportType :
2222 CURRENT_VIEW = "currentView"
2323 ALL = "all"
2424
@@ -37,6 +37,22 @@ def __init__(
3737 recursive = True ,
3838 workers = multiprocessing .cpu_count (),
3939 ):
40+ """
41+ Initializes the NotionExporter class.
42+
43+ Args:
44+ token_v2 (str): The user's Notion V2 token.
45+ file_token (str): The user's file token for Notion.
46+ pages (dict): Dictionary of pages to be exported.
47+ export_directory (str, optional): Directory where exports will be saved. Defaults to the current directory.
48+ flatten_export_file_tree (bool, optional): If True, flattens the export file tree. Defaults to True.
49+ export_type (ExportType, optional): Type of export (e.g., MARKDOWN, HTML, PDF). Defaults to MARKDOWN.
50+ current_view_export_type (ViewExportType, optional): Type of view export (e.g., CURRENT_VIEW, ALL). Defaults to CURRENT_VIEW.
51+ include_files (bool, optional): If True, includes files in the export. Defaults to False.
52+ recursive (bool, optional): If True, exports will be recursive. Defaults to True.
53+ workers (int, optional): Number of worker threads for exporting. Defaults to the number of CPUs available.
54+ """
55+
4056 self .export_name = f"export-{ datetime .now ().strftime ('%Y-%m-%d-%H-%M-%S' )} "
4157 self .token_v2 = token_v2
4258 self .file_token = file_token
@@ -59,11 +75,30 @@ def __init__(
5975 os .makedirs (f"{ self .export_directory } { self .export_name } " , exist_ok = True )
6076
6177 def _to_uuid_format (self , s ):
78+ """
79+ Converts a string to UUID format.
80+
81+ Args:
82+ s (str): The input string.
83+
84+ Returns:
85+ str: The string in UUID format.
86+ """
6287 if "-" == s [8 ] and "-" == s [13 ] and "-" == s [18 ] and "-" == s [23 ]:
6388 return s
6489 return f"{ s [:8 ]} -{ s [8 :12 ]} -{ s [12 :16 ]} -{ s [16 :20 ]} -{ s [20 :]} "
6590
6691 def _get_format_options (self , export_type : ExportType , include_files = False ):
92+ """
93+ Retrieves format options based on the export type and whether to include files.
94+
95+ Args:
96+ export_type (ExportType): Type of export (e.g., MARKDOWN, HTML, PDF).
97+ include_files (bool, optional): If True, includes files in the export. Defaults to False.
98+
99+ Returns:
100+ dict: A dictionary containing format options.
101+ """
67102 format_options = {}
68103 if export_type == ExportType .PDF :
69104 format_options ["pdfFormat" ] = "Letter"
@@ -74,6 +109,15 @@ def _get_format_options(self, export_type: ExportType, include_files=False):
74109 return format_options
75110
76111 def _export (self , id ):
112+ """
113+ Initiates the export of a Notion page.
114+
115+ Args:
116+ id (str): The ID of the Notion page.
117+
118+ Returns:
119+ str: The task ID of the initiated export.
120+ """
77121 url = "https://www.notion.so/api/v3/enqueueTask"
78122 id = self ._to_uuid_format (s = id )
79123 export_options = {
@@ -112,6 +156,15 @@ def _export(self, id):
112156 return response ["taskId" ]
113157
114158 def _get_status (self , task_id ):
159+ """
160+ Fetches the status of an export task.
161+
162+ Args:
163+ task_id (str): The ID of the export task.
164+
165+ Returns:
166+ dict: A dictionary containing details about the task status.
167+ """
115168 url = "https://www.notion.so/api/v3/getTasks"
116169
117170 payload = json .dumps ({"taskIds" : [task_id ]})
@@ -122,6 +175,12 @@ def _get_status(self, task_id):
122175 return response [0 ]
123176
124177 def _download (self , url ):
178+ """
179+ Downloads an exported file from a given URL.
180+
181+ Args:
182+ url (str): The URL of the exported file.
183+ """
125184 response = requests .request ("GET" , url , headers = self .download_headers )
126185 file_name = url .split ("/" )[- 1 ][100 :]
127186 with open (
@@ -131,6 +190,15 @@ def _download(self, url):
131190 f .write (response .content )
132191
133192 def _process_page (self , page_details ):
193+ """
194+ Processes an individual Notion page for export.
195+
196+ Args:
197+ page_details (tuple): Tuple containing the name and ID of the Notion page.
198+
199+ Returns:
200+ dict: Details about the export status and any errors.
201+ """
134202 name , id = page_details
135203 task_id = self ._export (id )
136204
@@ -155,10 +223,17 @@ def _process_page(self, page_details):
155223 }
156224
157225 def _wait_for_export_completion (self , task_id ):
158- """Helper method to wait until the export is complete or failed."""
226+ """
227+ Waits until a given export task completes or fails.
228+
229+ Args:
230+ task_id (str): The ID of the export task.
231+
232+ Returns:
233+ tuple: A tuple containing the status, state, error, and number of pages exported.
234+ """
159235 while True :
160236 status = self ._get_status (task_id )
161- # print(status)
162237
163238 if not status :
164239 time .sleep (1 )
@@ -175,6 +250,9 @@ def _wait_for_export_completion(self, task_id):
175250 time .sleep (1 )
176251
177252 def _unpack (self ):
253+ """
254+ Unpacks and saves exported content from zip archives.
255+ """
178256 directory_path = f"{ self .export_directory } { self .export_name } "
179257 for file in os .listdir (directory_path ):
180258 if file .endswith (".zip" ):
@@ -183,11 +261,17 @@ def _unpack(self):
183261 os .remove (full_file_path )
184262
185263 def process (self ):
264+ """
265+ Processes and exports all provided Notion pages.
266+ """
186267 logging .info (f"Exporting { len (self .pages )} pages..." )
187268
188269 with ThreadPoolExecutor (max_workers = self .workers ) as executor :
189270 with tqdm (total = len (self .pages ), dynamic_ncols = True ) as pbar :
190- futures = {executor .submit (self ._process_page , item ): item for item in self .pages .items ()}
271+ futures = {
272+ executor .submit (self ._process_page , item ): item
273+ for item in self .pages .items ()
274+ }
191275 for future in concurrent .futures .as_completed (futures ):
192276 result = future .result ()
193277 if result ["state" ] == "failure" :
0 commit comments