1515from __future__ import annotations
1616
1717import logging
18+ from collections import OrderedDict
1819import mimetypes
1920import os
2021import tempfile
@@ -246,23 +247,29 @@ def ai_parse_document(stage_location: StageLocation, path: str) -> Dict[str, Any
246247 full_path = f"{ root_prefix } /{ resolved_path } " if root_prefix else resolved_path
247248
248249 # Keep metadata first for predictable JSON ordering.
249- payload : Dict [str , Any ] = {
250- "metadata" : {
251- "chunk_count" : chunk_count ,
252- "chunk_size" : DEFAULT_CHUNK_SIZE ,
253- "duration_ms" : duration_ms ,
254- "file_size" : file_size if file_size is not None else 0 ,
255- "filename" : Path (path ).name ,
256- "path" : full_path or path ,
257- "timings_ms" : {
258- "convert" : (t_convert_end_ns - t_convert_start_ns ) / 1_000_000.0 ,
259- "chunk" : (t_chunk_end_ns - t_convert_end_ns ) / 1_000_000.0 ,
260- "total" : duration_ms ,
261- },
262- "version" : 1 ,
263- },
264- "chunks" : pages ,
265- }
250+ payload : Dict [str , Any ] = OrderedDict (
251+ [
252+ (
253+ "metadata" ,
254+ {
255+ "chunk_count" : chunk_count ,
256+ "chunk_size" : DEFAULT_CHUNK_SIZE ,
257+ "duration_ms" : duration_ms ,
258+ "file_size" : file_size if file_size is not None else 0 ,
259+ "filename" : Path (path ).name ,
260+ "path" : full_path or path ,
261+ "timings_ms" : {
262+ "convert" : (t_convert_end_ns - t_convert_start_ns )
263+ / 1_000_000.0 ,
264+ "chunk" : (t_chunk_end_ns - t_convert_end_ns ) / 1_000_000.0 ,
265+ "total" : duration_ms ,
266+ },
267+ "version" : 1 ,
268+ },
269+ ),
270+ ("chunks" , pages ),
271+ ]
272+ )
266273 if fallback :
267274 payload ["error_information" ] = [
268275 {
@@ -280,11 +287,19 @@ def ai_parse_document(stage_location: StageLocation, path: str) -> Dict[str, Any
280287 )
281288 return payload
282289 except Exception as exc : # pragma: no cover - defensive for unexpected docling errors
283- return {
284- "chunks" : [],
285- "metadata" : {
286- "path" : path ,
287- "filename" : Path (path ).name ,
288- },
289- "error_information" : [{"message" : str (exc ), "type" : exc .__class__ .__name__ }],
290- }
290+ return OrderedDict (
291+ [
292+ (
293+ "metadata" ,
294+ {
295+ "path" : path ,
296+ "filename" : Path (path ).name ,
297+ },
298+ ),
299+ ("chunks" , []),
300+ (
301+ "error_information" ,
302+ [{"message" : str (exc ), "type" : exc .__class__ .__name__ }],
303+ ),
304+ ]
305+ )
0 commit comments