Skip to content

Commit 26e1cca

Browse files
committed
Keep metadata first in ai_parse_document using OrderedDict
1 parent b4aeb35 commit 26e1cca

File tree

1 file changed

+40
-25
lines changed

1 file changed

+40
-25
lines changed

databend_aiserver/udfs/docparse.py

Lines changed: 40 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from __future__ import annotations
1616

1717
import logging
18+
from collections import OrderedDict
1819
import mimetypes
1920
import os
2021
import tempfile
@@ -246,23 +247,29 @@ def ai_parse_document(stage_location: StageLocation, path: str) -> Dict[str, Any
246247
full_path = f"{root_prefix}/{resolved_path}" if root_prefix else resolved_path
247248

248249
# Keep metadata first for predictable JSON ordering.
249-
payload: Dict[str, Any] = {
250-
"metadata": {
251-
"chunk_count": chunk_count,
252-
"chunk_size": DEFAULT_CHUNK_SIZE,
253-
"duration_ms": duration_ms,
254-
"file_size": file_size if file_size is not None else 0,
255-
"filename": Path(path).name,
256-
"path": full_path or path,
257-
"timings_ms": {
258-
"convert": (t_convert_end_ns - t_convert_start_ns) / 1_000_000.0,
259-
"chunk": (t_chunk_end_ns - t_convert_end_ns) / 1_000_000.0,
260-
"total": duration_ms,
261-
},
262-
"version": 1,
263-
},
264-
"chunks": pages,
265-
}
250+
payload: Dict[str, Any] = OrderedDict(
251+
[
252+
(
253+
"metadata",
254+
{
255+
"chunk_count": chunk_count,
256+
"chunk_size": DEFAULT_CHUNK_SIZE,
257+
"duration_ms": duration_ms,
258+
"file_size": file_size if file_size is not None else 0,
259+
"filename": Path(path).name,
260+
"path": full_path or path,
261+
"timings_ms": {
262+
"convert": (t_convert_end_ns - t_convert_start_ns)
263+
/ 1_000_000.0,
264+
"chunk": (t_chunk_end_ns - t_convert_end_ns) / 1_000_000.0,
265+
"total": duration_ms,
266+
},
267+
"version": 1,
268+
},
269+
),
270+
("chunks", pages),
271+
]
272+
)
266273
if fallback:
267274
payload["error_information"] = [
268275
{
@@ -280,11 +287,19 @@ def ai_parse_document(stage_location: StageLocation, path: str) -> Dict[str, Any
280287
)
281288
return payload
282289
except Exception as exc: # pragma: no cover - defensive for unexpected docling errors
283-
return {
284-
"chunks": [],
285-
"metadata": {
286-
"path": path,
287-
"filename": Path(path).name,
288-
},
289-
"error_information": [{"message": str(exc), "type": exc.__class__.__name__}],
290-
}
290+
return OrderedDict(
291+
[
292+
(
293+
"metadata",
294+
{
295+
"path": path,
296+
"filename": Path(path).name,
297+
},
298+
),
299+
("chunks", []),
300+
(
301+
"error_information",
302+
[{"message": str(exc), "type": exc.__class__.__name__}],
303+
),
304+
]
305+
)

0 commit comments

Comments
 (0)