22import logging
33from typing import Any
44
5- from pydantic import BaseModel , TypeAdapter
5+ from pydantic import BaseModel , Field , TypeAdapter
66from requests import Session
77
88from ..notion_schemas .notion_block import (
@@ -138,7 +138,7 @@ def convert_rich_texts(rich_texts: list[NotionRichText]) -> list[dict[str, Any]]
138138 {
139139 "type" : "link" ,
140140 "content" : [convert_rich_text (rich_text )],
141- "href" : rich_text .href ,
141+ "href" : rich_text .href , # FIXME: if it was a notion link, we should convert it to a link to the document
142142 }
143143 )
144144 else :
@@ -159,6 +159,11 @@ class ImportedAttachment(BaseModel):
159159 file : NotionFileHosted
160160
161161
162+ class ImportedChildPage (BaseModel ):
163+ child_page_block : NotionBlock
164+ block_to_update : Any
165+
166+
162167def convert_image (
163168 image : NotionImage , attachments : list [ImportedAttachment ]
164169) -> list [dict [str , Any ]]:
@@ -188,17 +193,21 @@ def convert_image(
188193
189194
190195def convert_block (
191- block : NotionBlock , attachments : list [ImportedAttachment ]
196+ block : NotionBlock ,
197+ attachments : list [ImportedAttachment ],
198+ child_page_blocks : list [ImportedChildPage ],
192199) -> list [dict [str , Any ]]:
193200 match block .specific :
194201 case NotionColumnList ():
195202 columns_content = []
196203 for column in block .children :
197- columns_content .extend (convert_block (column , attachments ))
204+ columns_content .extend (
205+ convert_block (column , attachments , child_page_blocks )
206+ )
198207 return columns_content
199208 case NotionColumn ():
200209 return [
201- convert_block (child_content , attachments )[0 ]
210+ convert_block (child_content , attachments , child_page_blocks )[0 ]
202211 for child_content in block .children
203212 ]
204213
@@ -225,7 +234,7 @@ def convert_block(
225234 }
226235 ]
227236 # case NotionDivider():
228- # return {"type": "divider", "properties": {}}
237+ # return [ {"type": "divider"}]
229238 case NotionCallout ():
230239 return [
231240 {
@@ -292,15 +301,23 @@ def convert_block(
292301 {
293302 "type" : "bulletListItem" ,
294303 "content" : convert_rich_texts (block .specific .rich_text ),
295- "children" : convert_block_list (block .children , attachments ),
304+ "children" : convert_block_list (
305+ block .children ,
306+ attachments ,
307+ child_page_blocks ,
308+ ),
296309 }
297310 ]
298311 case NotionNumberedListItem ():
299312 return [
300313 {
301314 "type" : "numberedListItem" ,
302315 "content" : convert_rich_texts (block .specific .rich_text ),
303- "children" : convert_block_list (block .children , attachments ),
316+ "children" : convert_block_list (
317+ block .children ,
318+ attachments ,
319+ child_page_blocks ,
320+ ),
304321 }
305322 ]
306323 case NotionToDo ():
@@ -309,7 +326,11 @@ def convert_block(
309326 "type" : "checkListItem" ,
310327 "content" : convert_rich_texts (block .specific .rich_text ),
311328 "checked" : block .specific .checked ,
312- "children" : convert_block_list (block .children , attachments ),
329+ "children" : convert_block_list (
330+ block .children ,
331+ attachments ,
332+ child_page_blocks ,
333+ ),
313334 }
314335 ]
315336 case NotionCode ():
@@ -336,6 +357,22 @@ def convert_block(
336357 ],
337358 }
338359 ]
360+ case NotionChildPage ():
361+ # TODO: convert to a link
362+ res = {
363+ "type" : "paragraph" ,
364+ "content" : [
365+ {
366+ "type" : "link" ,
367+ "content" : f"Child page: { block .specific .title } " ,
368+ "href" : "about:blank" , # populated later on
369+ },
370+ ],
371+ }
372+ child_page_blocks .append (
373+ ImportedChildPage (child_page_block = block , block_to_update = res )
374+ )
375+ return [res ]
339376 case NotionUnsupported ():
340377 return [
341378 {
@@ -375,19 +412,22 @@ def convert_annotations(annotations: NotionRichTextAnnotation) -> dict[str, str]
375412
376413
377414def convert_block_list (
378- blocks : list [NotionBlock ], attachments : list [ImportedAttachment ]
415+ blocks : list [NotionBlock ],
416+ attachments : list [ImportedAttachment ],
417+ child_page_blocks : list [ImportedChildPage ],
379418) -> list [dict [str , Any ]]:
380419 converted_blocks = []
381420 for block in blocks :
382- converted_blocks .extend (convert_block (block , attachments ))
421+ converted_blocks .extend (convert_block (block , attachments , child_page_blocks ))
383422 return converted_blocks
384423
385424
386425class ImportedDocument (BaseModel ):
387426 page : NotionPage
388- blocks : list [dict [str , Any ]] = []
389- children : list ["ImportedDocument" ] = []
390- attachments : list [ImportedAttachment ] = []
427+ blocks : list [dict [str , Any ]] = Field (default_factory = list )
428+ children : list ["ImportedDocument" ] = Field (default_factory = list )
429+ attachments : list [ImportedAttachment ] = Field (default_factory = list )
430+ child_page_blocks : list [ImportedChildPage ] = Field (default_factory = list )
391431
392432
393433def find_block_child_page (block_id : str , all_pages : list [NotionPage ]):
@@ -400,57 +440,62 @@ def find_block_child_page(block_id: str, all_pages: list[NotionPage]):
400440 return None
401441
402442
403- def convert_child_pages (
404- session : Session ,
405- parent : NotionPage ,
406- blocks : list [NotionBlock ],
407- all_pages : list [NotionPage ],
408- ) -> list [ImportedDocument ]:
409- children = []
410-
411- for page in all_pages :
412- if (
413- isinstance (page .parent , NotionParentPage )
414- and page .parent .page_id == parent .id
415- ):
416- children .append (import_page (session , page , all_pages ))
417-
418- for block in blocks :
419- if not isinstance (block .specific , NotionChildPage ):
420- continue
421-
422- # TODO: doesn't work, never finds the child
423- child_page = find_block_child_page (block .id , all_pages )
424- if child_page == None :
425- logger .warning (f"Cannot find child page of block { block .id } " )
426- continue
427- children .append (import_page (session , child_page , all_pages ))
428-
429- return children
430-
431-
432443def import_page (
433- session : Session , page : NotionPage , all_pages : list [NotionPage ]
444+ session : Session ,
445+ page : NotionPage ,
446+ child_page_blocs_ids_to_parent_page_ids : dict [str , str ],
434447) -> ImportedDocument :
435448 blocks = fetch_block_children (session , page .id )
436449 logger .info (f"Page { page .get_title ()} (id { page .id } )" )
437450 logger .info (blocks )
438- attachments = []
439- converted_blocks = convert_block_list (blocks , attachments )
451+ attachments : list [ImportedAttachment ] = []
452+
453+ child_page_blocks : list [ImportedChildPage ] = []
454+
455+ converted_blocks = convert_block_list (blocks , attachments , child_page_blocks )
456+
457+ for child_page_block in child_page_blocks :
458+ child_page_blocs_ids_to_parent_page_ids [
459+ child_page_block .child_page_block .id
460+ ] = page .id
461+
440462 return ImportedDocument (
441463 page = page ,
442464 blocks = converted_blocks ,
443- children = convert_child_pages (session , page , blocks , all_pages ),
444465 attachments = attachments ,
466+ child_page_blocks = child_page_blocks ,
445467 )
446468
447469
448470def import_notion (token : str ) -> list [ImportedDocument ]:
449471 """Recursively imports all Notion pages and blocks accessible using the given token."""
450472 session = build_notion_session (token )
451473 all_pages = fetch_all_pages (session )
452- docs = []
474+ docs_by_page_id : dict [str , ImportedDocument ] = {}
475+ child_page_blocs_ids_to_parent_page_ids : dict [str , str ] = {}
453476 for page in all_pages :
454- if isinstance (page .parent , NotionParentWorkspace ):
455- docs .append (import_page (session , page , all_pages ))
456- return docs
477+ docs_by_page_id [page .id ] = import_page (
478+ session , page , child_page_blocs_ids_to_parent_page_ids
479+ )
480+
481+ root_pages = []
482+ for page in all_pages :
483+ if isinstance (page .parent , NotionParentPage ):
484+ docs_by_page_id [page .parent .page_id ].children .append (
485+ docs_by_page_id [page .id ]
486+ )
487+ elif isinstance (page .parent , NotionParentBlock ):
488+ parent_page_id = child_page_blocs_ids_to_parent_page_ids .get (page .id )
489+ if parent_page_id :
490+ docs_by_page_id [parent_page_id ].children .append (
491+ docs_by_page_id [page .id ]
492+ )
493+ else :
494+ logger .warning (
495+ f"Page { page .id } has a parent block, but no parent page found."
496+ )
497+ elif isinstance (page .parent , NotionParentWorkspace ):
498+ # This is a root page, not a child of another page
499+ root_pages .append (docs_by_page_id [page .id ])
500+
501+ return root_pages
0 commit comments