| 
1 | 1 | from typing import Any, Dict, List, Union, cast, Optional  | 
2 |  | -from typing_extensions import NotRequired, TypedDict  | 
 | 2 | +from typing_extensions import NotRequired, TypedDict, Literal  | 
3 | 3 | from typing import Any, Dict, List, cast  | 
4 |  | -from typing_extensions import NotRequired, TypedDict  | 
 | 4 | +from typing_extensions import NotRequired, TypedDict, Literal  | 
5 | 5 | from .request import Request, RequestConfig  | 
6 | 6 | from .async_request import AsyncRequest, AsyncRequestConfig  | 
7 | 7 | from ._config import ClientConfig  | 
8 | 8 | 
 
  | 
9 | 9 | 
 
  | 
10 |  | -class OCRParams(TypedDict):  | 
 | 10 | +class Point(TypedDict):  | 
 | 11 | +    x: int  | 
 | 12 | +    """  | 
 | 13 | +    X coordinate of the point  | 
 | 14 | +    """  | 
 | 15 | +      | 
 | 16 | +    y: int  | 
 | 17 | +    """  | 
 | 18 | +    Y coordinate of the point  | 
 | 19 | +    """  | 
 | 20 | + | 
 | 21 | + | 
 | 22 | +class BoundingBox(TypedDict):  | 
 | 23 | +    top_left: Point  | 
 | 24 | +    """  | 
 | 25 | +    Top-left corner of the bounding box  | 
 | 26 | +    """  | 
 | 27 | +      | 
 | 28 | +    top_right: Point  | 
 | 29 | +    """  | 
 | 30 | +    Top-right corner of the bounding box  | 
 | 31 | +    """  | 
 | 32 | +      | 
 | 33 | +    bottom_left: Point  | 
 | 34 | +    """  | 
 | 35 | +    Bottom-left corner of the bounding box  | 
 | 36 | +    """  | 
 | 37 | +      | 
 | 38 | +    bottom_right: Point  | 
 | 39 | +    """  | 
 | 40 | +    Bottom-right corner of the bounding box  | 
 | 41 | +    """  | 
 | 42 | +      | 
 | 43 | +    width: int  | 
 | 44 | +    """  | 
 | 45 | +    Width of the bounding box  | 
 | 46 | +    """  | 
 | 47 | +      | 
 | 48 | +    height: int  | 
 | 49 | +    """  | 
 | 50 | +    Height of the bounding box  | 
 | 51 | +    """  | 
 | 52 | + | 
 | 53 | + | 
 | 54 | +class GuiElement(TypedDict):  | 
 | 55 | +    bounds: BoundingBox  | 
 | 56 | +    """  | 
 | 57 | +    Bounding box coordinates of the GUI element  | 
 | 58 | +    """  | 
 | 59 | +      | 
 | 60 | +    content: Union[str, None]  | 
 | 61 | +    """  | 
 | 62 | +    Content of the GUI element, can be null if no object detected  | 
 | 63 | +    """  | 
 | 64 | + | 
 | 65 | + | 
 | 66 | +class DetectedObject(TypedDict):  | 
 | 67 | +    bounds: BoundingBox  | 
 | 68 | +    """  | 
 | 69 | +    Bounding box coordinates of the detected object  | 
 | 70 | +    """  | 
 | 71 | +      | 
 | 72 | +    mask: NotRequired[str]  | 
 | 73 | +    """  | 
 | 74 | +    URL or base64 string depending on return_type - only present for some objects  | 
 | 75 | +    """  | 
 | 76 | + | 
 | 77 | + | 
 | 78 | + | 
 | 79 | +class ObjectDetectionParams(TypedDict):  | 
11 | 80 |     url: NotRequired[str]  | 
 | 81 | +    """  | 
 | 82 | +    URL of the image to process  | 
 | 83 | +    """  | 
 | 84 | +      | 
12 | 85 |     file_store_key: NotRequired[str]  | 
 | 86 | +    """  | 
 | 87 | +    File store key of the image to process  | 
 | 88 | +    """  | 
 | 89 | +      | 
 | 90 | +    prompts: NotRequired[List[str]]  | 
 | 91 | +    """  | 
 | 92 | +    List of prompts for object detection  | 
 | 93 | +    """  | 
 | 94 | +      | 
 | 95 | +    features: NotRequired[List[Literal["object_detection", "gui"]]]  | 
 | 96 | +    """  | 
 | 97 | +    List of features to enable: object_detection, gui  | 
 | 98 | +    """  | 
 | 99 | +      | 
 | 100 | +    annotated_image: NotRequired[bool]  | 
 | 101 | +    """  | 
 | 102 | +    Whether to return an annotated image  | 
 | 103 | +    """  | 
 | 104 | +      | 
 | 105 | +    return_type: NotRequired[Literal["url", "base64"]]  | 
 | 106 | +    """  | 
 | 107 | +    Format for returned images: url or base64  | 
 | 108 | +    """  | 
 | 109 | + | 
 | 110 | + | 
 | 111 | +class ObjectDetectionResponse(TypedDict):  | 
 | 112 | +    annotated_image: NotRequired[str]  | 
 | 113 | +    """  | 
 | 114 | +    URL or base64 string of annotated image (included only if annotated_image=true and objects/gui_elements exist)  | 
 | 115 | +    """  | 
 | 116 | +      | 
 | 117 | +    gui_elements: NotRequired[List[GuiElement]]  | 
 | 118 | +    """  | 
 | 119 | +    List of detected GUI elements (included only if features includes "gui")  | 
 | 120 | +    """  | 
 | 121 | +      | 
 | 122 | +    objects: NotRequired[List[DetectedObject]]  | 
 | 123 | +    """  | 
 | 124 | +    List of detected objects (included only if features includes "object_detection")  | 
 | 125 | +    """  | 
13 | 126 | 
 
  | 
14 | 127 | 
 
  | 
15 | 128 | class VOCRParams(TypedDict):  | 
@@ -60,7 +173,7 @@ def vocr(self, params: VOCRParams) -> OCRResponse:  | 
60 | 173 |         ).perform_with_content()  | 
61 | 174 |         return resp  | 
62 | 175 | 
 
  | 
63 |  | -    def object_detection(self, params: OCRParams) -> OCRResponse:  | 
 | 176 | +    def object_detection(self, params: ObjectDetectionParams) -> ObjectDetectionResponse:  | 
64 | 177 |         path = "/ai/object_detection"  | 
65 | 178 |         resp = Request(  | 
66 | 179 |             config=self.config,  | 
@@ -97,9 +210,9 @@ async def vocr(self, params: VOCRParams) -> OCRResponse:  | 
97 | 210 |         ).perform_with_content()  | 
98 | 211 |         return resp  | 
99 | 212 | 
 
  | 
100 |  | -    async def object_detection(self, params: OCRParams) -> OCRResponse:  | 
 | 213 | +    async def object_detection(self, params: ObjectDetectionParams) -> ObjectDetectionResponse:  | 
101 | 214 |         path = "/ai/object_detection"  | 
102 |  | -        resp = AsyncRequest(  | 
 | 215 | +        resp = await AsyncRequest(  | 
103 | 216 |             config=self.config,  | 
104 | 217 |             path=path,  | 
105 | 218 |             params=cast(Dict[Any, Any], params),  | 
 | 
0 commit comments