22import logging
33from typing import Any
44
5- from pydantic import BaseModel , TypeAdapter
5+ from pydantic import BaseModel , Field , TypeAdapter
66from requests import Session
77
88from ..notion_schemas .notion_block import (
@@ -135,7 +135,7 @@ def convert_rich_texts(rich_texts: list[NotionRichText]) -> list[dict[str, Any]]
135135 {
136136 "type" : "link" ,
137137 "content" : [convert_rich_text (rich_text )],
138- "href" : rich_text .href ,
138+ "href" : rich_text .href , # FIXME: if it was a notion link, we should convert it to a link to the document
139139 }
140140 )
141141 else :
@@ -156,6 +156,11 @@ class ImportedAttachment(BaseModel):
156156 file : NotionFileHosted
157157
158158
159+ class ImportedChildPage (BaseModel ):
160+ child_page_block : NotionBlock
161+ block_to_update : Any
162+
163+
159164def convert_image (
160165 image : NotionImage , attachments : list [ImportedAttachment ]
161166) -> list [dict [str , Any ]]:
@@ -185,17 +190,21 @@ def convert_image(
185190
186191
187192def convert_block (
188- block : NotionBlock , attachments : list [ImportedAttachment ]
193+ block : NotionBlock ,
194+ attachments : list [ImportedAttachment ],
195+ child_page_blocks : list [ImportedChildPage ],
189196) -> list [dict [str , Any ]]:
190197 match block .specific :
191198 case NotionColumnList ():
192199 columns_content = []
193200 for column in block .children :
194- columns_content .extend (convert_block (column , attachments ))
201+ columns_content .extend (
202+ convert_block (column , attachments , child_page_blocks )
203+ )
195204 return columns_content
196205 case NotionColumn ():
197206 return [
198- convert_block (child_content , attachments )[0 ]
207+ convert_block (child_content , attachments , child_page_blocks )[0 ]
199208 for child_content in block .children
200209 ]
201210
@@ -222,7 +231,7 @@ def convert_block(
222231 }
223232 ]
224233 # case NotionDivider():
225- # return {"type": "divider", "properties": {}}
234+ # return [ {"type": "divider"}]
226235 case NotionCallout ():
227236 return [
228237 {
@@ -289,15 +298,23 @@ def convert_block(
289298 {
290299 "type" : "bulletListItem" ,
291300 "content" : convert_rich_texts (block .specific .rich_text ),
292- "children" : convert_block_list (block .children , attachments ),
301+ "children" : convert_block_list (
302+ block .children ,
303+ attachments ,
304+ child_page_blocks ,
305+ ),
293306 }
294307 ]
295308 case NotionNumberedListItem ():
296309 return [
297310 {
298311 "type" : "numberedListItem" ,
299312 "content" : convert_rich_texts (block .specific .rich_text ),
300- "children" : convert_block_list (block .children , attachments ),
313+ "children" : convert_block_list (
314+ block .children ,
315+ attachments ,
316+ child_page_blocks ,
317+ ),
301318 }
302319 ]
303320 case NotionToDo ():
@@ -306,7 +323,11 @@ def convert_block(
306323 "type" : "checkListItem" ,
307324 "content" : convert_rich_texts (block .specific .rich_text ),
308325 "checked" : block .specific .checked ,
309- "children" : convert_block_list (block .children , attachments ),
326+ "children" : convert_block_list (
327+ block .children ,
328+ attachments ,
329+ child_page_blocks ,
330+ ),
310331 }
311332 ]
312333 case NotionCode ():
@@ -333,6 +354,22 @@ def convert_block(
333354 ],
334355 }
335356 ]
357+ case NotionChildPage ():
358+ # TODO: convert to a link
359+ res = {
360+ "type" : "paragraph" ,
361+ "content" : [
362+ {
363+ "type" : "link" ,
364+ "content" : f"Child page: { block .specific .title } " ,
365+ "href" : "about:blank" , # populated later on
366+ },
367+ ],
368+ }
369+ child_page_blocks .append (
370+ ImportedChildPage (child_page_block = block , block_to_update = res )
371+ )
372+ return [res ]
336373 case NotionUnsupported ():
337374 return [
338375 {
@@ -368,19 +405,22 @@ def convert_annotations(annotations: NotionRichTextAnnotation) -> dict[str, str]
368405
369406
370407def convert_block_list (
371- blocks : list [NotionBlock ], attachments : list [ImportedAttachment ]
408+ blocks : list [NotionBlock ],
409+ attachments : list [ImportedAttachment ],
410+ child_page_blocks : list [ImportedChildPage ],
372411) -> list [dict [str , Any ]]:
373412 converted_blocks = []
374413 for block in blocks :
375- converted_blocks .extend (convert_block (block , attachments ))
414+ converted_blocks .extend (convert_block (block , attachments , child_page_blocks ))
376415 return converted_blocks
377416
378417
379418class ImportedDocument (BaseModel ):
380419 page : NotionPage
381- blocks : list [dict [str , Any ]] = []
382- children : list ["ImportedDocument" ] = []
383- attachments : list [ImportedAttachment ] = []
420+ blocks : list [dict [str , Any ]] = Field (default_factory = list )
421+ children : list ["ImportedDocument" ] = Field (default_factory = list )
422+ attachments : list [ImportedAttachment ] = Field (default_factory = list )
423+ child_page_blocks : list [ImportedChildPage ] = Field (default_factory = list )
384424
385425
386426def find_block_child_page (block_id : str , all_pages : list [NotionPage ]):
@@ -393,57 +433,62 @@ def find_block_child_page(block_id: str, all_pages: list[NotionPage]):
393433 return None
394434
395435
396- def convert_child_pages (
397- session : Session ,
398- parent : NotionPage ,
399- blocks : list [NotionBlock ],
400- all_pages : list [NotionPage ],
401- ) -> list [ImportedDocument ]:
402- children = []
403-
404- for page in all_pages :
405- if (
406- isinstance (page .parent , NotionParentPage )
407- and page .parent .page_id == parent .id
408- ):
409- children .append (import_page (session , page , all_pages ))
410-
411- for block in blocks :
412- if not isinstance (block .specific , NotionChildPage ):
413- continue
414-
415- # TODO: doesn't work, never finds the child
416- child_page = find_block_child_page (block .id , all_pages )
417- if child_page == None :
418- logger .warning (f"Cannot find child page of block { block .id } " )
419- continue
420- children .append (import_page (session , child_page , all_pages ))
421-
422- return children
423-
424-
425436def import_page (
426- session : Session , page : NotionPage , all_pages : list [NotionPage ]
437+ session : Session ,
438+ page : NotionPage ,
439+ child_page_blocs_ids_to_parent_page_ids : dict [str , str ],
427440) -> ImportedDocument :
428441 blocks = fetch_block_children (session , page .id )
429442 logger .info (f"Page { page .get_title ()} (id { page .id } )" )
430443 logger .info (blocks )
431- attachments = []
432- converted_blocks = convert_block_list (blocks , attachments )
444+ attachments : list [ImportedAttachment ] = []
445+
446+ child_page_blocks : list [ImportedChildPage ] = []
447+
448+ converted_blocks = convert_block_list (blocks , attachments , child_page_blocks )
449+
450+ for child_page_block in child_page_blocks :
451+ child_page_blocs_ids_to_parent_page_ids [
452+ child_page_block .child_page_block .id
453+ ] = page .id
454+
433455 return ImportedDocument (
434456 page = page ,
435457 blocks = converted_blocks ,
436- children = convert_child_pages (session , page , blocks , all_pages ),
437458 attachments = attachments ,
459+ child_page_blocks = child_page_blocks ,
438460 )
439461
440462
441463def import_notion (token : str ) -> list [ImportedDocument ]:
442464 """Recursively imports all Notion pages and blocks accessible using the given token."""
443465 session = build_notion_session (token )
444466 all_pages = fetch_all_pages (session )
445- docs = []
467+ docs_by_page_id : dict [str , ImportedDocument ] = {}
468+ child_page_blocs_ids_to_parent_page_ids : dict [str , str ] = {}
446469 for page in all_pages :
447- if isinstance (page .parent , NotionParentWorkspace ):
448- docs .append (import_page (session , page , all_pages ))
449- return docs
470+ docs_by_page_id [page .id ] = import_page (
471+ session , page , child_page_blocs_ids_to_parent_page_ids
472+ )
473+
474+ root_pages = []
475+ for page in all_pages :
476+ if isinstance (page .parent , NotionParentPage ):
477+ docs_by_page_id [page .parent .page_id ].children .append (
478+ docs_by_page_id [page .id ]
479+ )
480+ elif isinstance (page .parent , NotionParentBlock ):
481+ parent_page_id = child_page_blocs_ids_to_parent_page_ids .get (page .id )
482+ if parent_page_id :
483+ docs_by_page_id [parent_page_id ].children .append (
484+ docs_by_page_id [page .id ]
485+ )
486+ else :
487+ logger .warning (
488+ f"Page { page .id } has a parent block, but no parent page found."
489+ )
490+ elif isinstance (page .parent , NotionParentWorkspace ):
491+ # This is a root page, not a child of another page
492+ root_pages .append (docs_by_page_id [page .id ])
493+
494+ return root_pages
0 commit comments