Skip to content

Commit 95e95ea

Browse files
committed
updated obj_detect to be under vision
1 parent ac4bba5 commit 95e95ea

File tree

2 files changed

+118
-211
lines changed

2 files changed

+118
-211
lines changed

jigsawstack/object_detection.py

Lines changed: 0 additions & 206 deletions
This file was deleted.

jigsawstack/vision.py

Lines changed: 118 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,128 @@
11
from typing import Any, Dict, List, Union, cast, Optional
2-
from typing_extensions import NotRequired, TypedDict
2+
from typing_extensions import NotRequired, TypedDict, Literal
33
from typing import Any, Dict, List, cast
4-
from typing_extensions import NotRequired, TypedDict
4+
from typing_extensions import NotRequired, TypedDict, Literal
55
from .request import Request, RequestConfig
66
from .async_request import AsyncRequest, AsyncRequestConfig
77
from ._config import ClientConfig
88

99

10-
class OCRParams(TypedDict):
10+
class Point(TypedDict):
11+
x: int
12+
"""
13+
X coordinate of the point
14+
"""
15+
16+
y: int
17+
"""
18+
Y coordinate of the point
19+
"""
20+
21+
22+
class BoundingBox(TypedDict):
23+
top_left: Point
24+
"""
25+
Top-left corner of the bounding box
26+
"""
27+
28+
top_right: Point
29+
"""
30+
Top-right corner of the bounding box
31+
"""
32+
33+
bottom_left: Point
34+
"""
35+
Bottom-left corner of the bounding box
36+
"""
37+
38+
bottom_right: Point
39+
"""
40+
Bottom-right corner of the bounding box
41+
"""
42+
43+
width: int
44+
"""
45+
Width of the bounding box
46+
"""
47+
48+
height: int
49+
"""
50+
Height of the bounding box
51+
"""
52+
53+
54+
class GuiElement(TypedDict):
55+
bounds: BoundingBox
56+
"""
57+
Bounding box coordinates of the GUI element
58+
"""
59+
60+
content: Union[str, None]
61+
"""
62+
Content of the GUI element, can be null if no object detected
63+
"""
64+
65+
66+
class DetectedObject(TypedDict):
67+
bounds: BoundingBox
68+
"""
69+
Bounding box coordinates of the detected object
70+
"""
71+
72+
mask: NotRequired[str]
73+
"""
74+
URL or base64 string depending on return_type - only present for some objects
75+
"""
76+
77+
78+
79+
class ObjectDetectionParams(TypedDict):
1180
url: NotRequired[str]
81+
"""
82+
URL of the image to process
83+
"""
84+
1285
file_store_key: NotRequired[str]
86+
"""
87+
File store key of the image to process
88+
"""
89+
90+
prompts: NotRequired[List[str]]
91+
"""
92+
List of prompts for object detection
93+
"""
94+
95+
features: NotRequired[List[Literal["object_detection", "gui"]]]
96+
"""
97+
List of features to enable: object_detection, gui
98+
"""
99+
100+
annotated_image: NotRequired[bool]
101+
"""
102+
Whether to return an annotated image
103+
"""
104+
105+
return_type: NotRequired[Literal["url", "base64"]]
106+
"""
107+
Format for returned images: url or base64
108+
"""
109+
110+
111+
class ObjectDetectionResponse(TypedDict):
112+
annotated_image: NotRequired[str]
113+
"""
114+
URL or base64 string of annotated image (included only if annotated_image=true and objects/gui_elements exist)
115+
"""
116+
117+
gui_elements: NotRequired[List[GuiElement]]
118+
"""
119+
List of detected GUI elements (included only if features includes "gui")
120+
"""
121+
122+
objects: NotRequired[List[DetectedObject]]
123+
"""
124+
List of detected objects (included only if features includes "object_detection")
125+
"""
13126

14127

15128
class VOCRParams(TypedDict):
@@ -60,7 +173,7 @@ def vocr(self, params: VOCRParams) -> OCRResponse:
60173
).perform_with_content()
61174
return resp
62175

63-
def object_detection(self, params: OCRParams) -> OCRResponse:
176+
def object_detection(self, params: ObjectDetectionParams) -> ObjectDetectionResponse:
64177
path = "/ai/object_detection"
65178
resp = Request(
66179
config=self.config,
@@ -97,7 +210,7 @@ async def vocr(self, params: VOCRParams) -> OCRResponse:
97210
).perform_with_content()
98211
return resp
99212

100-
async def object_detection(self, params: OCRParams) -> OCRResponse:
213+
async def object_detection(self, params: ObjectDetectionParams) -> ObjectDetectionResponse:
101214
path = "/ai/object_detection"
102215
resp = AsyncRequest(
103216
config=self.config,

0 commit comments

Comments
 (0)