JigsawStack
diff --git a/‎jigsawstack/object_detection.py‎
Lines changed: 0 additions & 206 deletions b/‎jigsawstack/object_detection.py‎
Lines changed: 0 additions & 206 deletions
diff --git a/‎jigsawstack/vision.py‎
Lines changed: 118 additions & 5 deletions b/‎jigsawstack/vision.py‎
Lines changed: 118 additions & 5 deletions
@@ -1,15 +1,128 @@
 from typing import Any, Dict, List, Union, cast, Optional
-from typing_extensions import NotRequired, TypedDict
+from typing_extensions import NotRequired, TypedDict, Literal
 from typing import Any, Dict, List, cast
-from typing_extensions import NotRequired, TypedDict
+from typing_extensions import NotRequired, TypedDict, Literal
 from .request import Request, RequestConfig
 from .async_request import AsyncRequest, AsyncRequestConfig
 from ._config import ClientConfig
 
 
-class OCRParams(TypedDict):
+class Point(TypedDict):
+    x: int
+    """
+    X coordinate of the point
+    """
+    
+    y: int
+    """
+    Y coordinate of the point
+    """
+
+
+class BoundingBox(TypedDict):
+    top_left: Point
+    """
+    Top-left corner of the bounding box
+    """
+    
+    top_right: Point
+    """
+    Top-right corner of the bounding box
+    """
+    
+    bottom_left: Point
+    """
+    Bottom-left corner of the bounding box
+    """
+    
+    bottom_right: Point
+    """
+    Bottom-right corner of the bounding box
+    """
+    
+    width: int
+    """
+    Width of the bounding box
+    """
+    
+    height: int
+    """
+    Height of the bounding box
+    """
+
+
+class GuiElement(TypedDict):
+    bounds: BoundingBox
+    """
+    Bounding box coordinates of the GUI element
+    """
+    
+    content: Union[str, None]
+    """
+    Content of the GUI element, can be null if no object detected
+    """
+
+
+class DetectedObject(TypedDict):
+    bounds: BoundingBox
+    """
+    Bounding box coordinates of the detected object
+    """
+    
+    mask: NotRequired[str]
+    """
+    URL or base64 string depending on return_type - only present for some objects
+    """
+
+
+
+class ObjectDetectionParams(TypedDict):
     url: NotRequired[str]
+    """
+    URL of the image to process
+    """
+    
     file_store_key: NotRequired[str]
+    """
+    File store key of the image to process
+    """
+    
+    prompts: NotRequired[List[str]]
+    """
+    List of prompts for object detection
+    """
+    
+    features: NotRequired[List[Literal["object_detection", "gui"]]]
+    """
+    List of features to enable: object_detection, gui
+    """
+    
+    annotated_image: NotRequired[bool]
+    """
+    Whether to return an annotated image
+    """
+    
+    return_type: NotRequired[Literal["url", "base64"]]
+    """
+    Format for returned images: url or base64
+    """
+
+
+class ObjectDetectionResponse(TypedDict):
+    annotated_image: NotRequired[str]
+    """
+    URL or base64 string of annotated image (included only if annotated_image=true and objects/gui_elements exist)
+    """
+    
+    gui_elements: NotRequired[List[GuiElement]]
+    """
+    List of detected GUI elements (included only if features includes "gui")
+    """
+    
+    objects: NotRequired[List[DetectedObject]]
+    """
+    List of detected objects (included only if features includes "object_detection")
+    """
 
 
 class VOCRParams(TypedDict):
@@ -60,7 +173,7 @@ def vocr(self, params: VOCRParams) -> OCRResponse:
         ).perform_with_content()
         return resp
 
-    def object_detection(self, params: OCRParams) -> OCRResponse:
+    def object_detection(self, params: ObjectDetectionParams) -> ObjectDetectionResponse:
         path = "/ai/object_detection"
         resp = Request(
             config=self.config,
@@ -97,7 +210,7 @@ async def vocr(self, params: VOCRParams) -> OCRResponse:
         ).perform_with_content()
         return resp
 
-    async def object_detection(self, params: OCRParams) -> OCRResponse:
+    async def object_detection(self, params: ObjectDetectionParams) -> ObjectDetectionResponse:
         path = "/ai/object_detection"
         resp = AsyncRequest(
             config=self.config,