Skip to content
This repository was archived by the owner on Feb 22, 2024. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# 0.10.6

* Refinement around handling of mixed text file / non-text file requests

# 0.10.5

* Add optional CORS to api
Expand Down
106 changes: 97 additions & 9 deletions test_unstructured_api_tools/api/test_file_apis.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@
GZIP_FILE_IMAGE,
GZIP_FILE_DOCX,
FILE_MARKDOWN,
FILE_TXT_1,
GZIP_FILE_TXT_1,
)

# accepts: files, input2
Expand Down Expand Up @@ -224,6 +226,9 @@ def _json_for_one_file(test_file):
([], P_INPUT_1_EMPTY, JSON, 400, None),
([GZIP_FILE_DOCX], P_INPUT_1_EMPTY, JSON, 200, FILENAME_FORMATS[FILE_DOCX]),
([GZIP_FILE_DOCX], P_INPUT_1_EMPTY, JSON, 200, FILENAME_FORMATS[FILE_IMAGE]),
([FILE_TXT_1], P_INPUT_1_EMPTY, JSON, 400, None),
([FILE_DOCX, FILE_IMAGE, FILE_TXT_1], P_INPUT_1_EMPTY, JSON, 400, None),
([FILE_DOCX, GZIP_FILE_TXT_1], P_INPUT_1_EMPTY, JSON, 400, None),
],
)
def test_process_file_1(
Expand All @@ -235,6 +240,8 @@ def test_process_file_1(
data = test_params
if gz_content_type:
data["gz_uncompressed_content_type"] = gz_content_type
else:
data["gz_uncompressed_content_type"] = None

response = client.post(
endpoint,
Expand Down Expand Up @@ -270,13 +277,15 @@ def test_process_file_1(
([FILE_DOCX, GZIP_FILE_IMAGE], MIXED, 200, None, False, None),
([GZIP_FILE_DOCX, GZIP_FILE_IMAGE], MIXED, 200, None, False, None),
([GZIP_FILE_DOCX, GZIP_FILE_IMAGE], TEXT_CSV, 406, None, False, None),
([FILE_MARKDOWN, GZIP_FILE_IMAGE], JSON, 200, None, False, None),
([FILE_MARKDOWN], JSON, 200, None, False, None),
([FILE_MARKDOWN], JSON, 200, None, True, None),
([FILE_MARKDOWN, GZIP_FILE_IMAGE], JSON, 400, None, False, None),
([FILE_MARKDOWN], JSON, 400, None, False, None),
([FILE_MARKDOWN], JSON, 400, None, True, None),
([FILE_MSG], JSON, 200, None, True, None),
([FILE_JSON], JSON, 200, None, True, None),
([GZIP_FILE_DOCX], JSON, 200, None, False, FILENAME_FORMATS[FILE_DOCX]),
([GZIP_FILE_DOCX], JSON, 200, None, False, FILENAME_FORMATS[FILE_IMAGE]),
([GZIP_FILE_DOCX, GZIP_FILE_TXT_1], JSON, 400, None, False, None),
([FILE_TXT_1], JSON, 400, None, False, None),
],
)
def test_process_file_2(
Expand Down Expand Up @@ -421,12 +430,12 @@ def test_process_file_2(
None,
None,
),
([FILE_MARKDOWN], JSON, RESPONSE_SCHEMA_LABELSTUDIO, 200, True, None, None),
([FILE_MARKDOWN], JSON, RESPONSE_SCHEMA_LABELSTUDIO, 400, True, None, None),
(
[FILE_MARKDOWN],
JSON,
RESPONSE_SCHEMA_LABELSTUDIO,
200,
400,
False,
FILENAME_FORMATS[FILE_MARKDOWN],
None,
Expand Down Expand Up @@ -459,6 +468,33 @@ def test_process_file_2(
None,
FILENAME_FORMATS[FILE_IMAGE],
),
(
[GZIP_FILE_TXT_1],
JSON,
RESPONSE_SCHEMA_ISD,
400,
False,
None,
None,
),
(
[FILE_DOCX, FILE_TXT_1],
JSON,
RESPONSE_SCHEMA_LABELSTUDIO,
400,
False,
None,
None,
),
(
[FILE_TXT_1],
JSON,
RESPONSE_SCHEMA_LABELSTUDIO,
400,
False,
None,
None,
),
],
)
def test_process_file_3(
Expand Down Expand Up @@ -630,7 +666,7 @@ def test_process_file_3(
False,
None,
),
([FILE_MARKDOWN], JSON, RESPONSE_SCHEMA_ISD, P_INPUT_1_SINGLE, 200, None, True, None),
([FILE_MARKDOWN], JSON, RESPONSE_SCHEMA_ISD, P_INPUT_1_SINGLE, 400, None, True, None),
(
[GZIP_FILE_DOCX, GZIP_FILE_IMAGE],
MIXED,
Expand Down Expand Up @@ -663,6 +699,36 @@ def test_process_file_3(
False,
FILENAME_FORMATS[FILE_IMAGE],
),
(
[GZIP_FILE_TXT_1],
JSON,
RESPONSE_SCHEMA_ISD,
P_INPUT_1_EMPTY,
400,
None,
False,
None,
),
(
[FILE_TXT_1],
JSON,
RESPONSE_SCHEMA_LABELSTUDIO,
P_INPUT_1_EMPTY,
400,
None,
False,
None,
),
(
[FILE_DOCX, FILE_MARKDOWN],
JSON,
RESPONSE_SCHEMA_ISD,
P_INPUT_1_EMPTY,
400,
None,
False,
None,
),
],
)
def test_process_file_4(
Expand Down Expand Up @@ -871,7 +937,7 @@ def test_process_file_4(
RESPONSE_SCHEMA_LABELSTUDIO,
P_INPUT_1_MULTI,
P_INPUT_2_EMPTY,
200,
400,
False,
None,
None,
Expand All @@ -882,7 +948,7 @@ def test_process_file_4(
RESPONSE_SCHEMA_LABELSTUDIO,
P_INPUT_1_MULTI,
P_INPUT_2_EMPTY,
200,
400,
True,
None,
None,
Expand All @@ -893,7 +959,7 @@ def test_process_file_4(
RESPONSE_SCHEMA_LABELSTUDIO,
P_INPUT_1_MULTI,
P_INPUT_2_EMPTY,
200,
400,
False,
FILENAME_FORMATS[FILE_MARKDOWN],
None,
Expand Down Expand Up @@ -975,6 +1041,28 @@ def test_process_file_4(
None,
FILENAME_FORMATS[FILE_IMAGE],
),
(
[GZIP_FILE_DOCX, FILE_TXT_1],
JSON,
RESPONSE_SCHEMA_ISD,
P_INPUT_1_EMPTY,
P_INPUT_2_EMPTY,
400,
False,
None,
None,
),
(
[FILE_TXT_1],
JSON,
RESPONSE_SCHEMA_LABELSTUDIO,
P_INPUT_1_EMPTY,
P_INPUT_2_EMPTY,
400,
False,
None,
None,
),
],
)
def test_process_file_5(
Expand Down
81 changes: 62 additions & 19 deletions test_unstructured_api_tools/api/test_file_text_apis.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,18 +311,18 @@ def _json_for_one_file(test_file=None, test_text_file=None):
([FILE_DOCX, FILE_IMAGE], [GZIP_FILE_TXT_1, GZIP_FILE_TXT_2], 200, JSON, False, None, None),
([FILE_DOCX], [GZIP_FILE_TXT_2], 200, JSON, False, None, None),
([GZIP_FILE_IMAGE], [GZIP_FILE_TXT_1], 200, JSON, False, None, None),
([FILE_MARKDOWN], [GZIP_FILE_TXT_1], 200, JSON, True, None, None),
([FILE_MARKDOWN], [GZIP_FILE_TXT_1], 400, JSON, True, None, None),
(
[FILE_MARKDOWN],
[GZIP_FILE_TXT_1],
200,
400,
JSON,
False,
f"{FILENAME_FORMATS[FILE_MARKDOWN]},{FILENAME_FORMATS[FILE_TXT_1]}",
None,
),
([FILE_MARKDOWN], [GZIP_FILE_TXT_1], 400, JSON, False, FILENAME_FORMATS[FILE_TXT_1], None),
([FILE_MARKDOWN, FILE_DOCX], [GZIP_FILE_TXT_1, FILE_TXT_2], 200, MIXED, False, None, None),
([FILE_DOCX], [GZIP_FILE_TXT_1, FILE_TXT_2, FILE_MARKDOWN], 200, MIXED, False, None, None),
([], [], 400, JSON, False, None, None),
(
[FILE_MARKDOWN, FILE_DOCX],
Expand All @@ -337,6 +337,9 @@ def _json_for_one_file(test_file=None, test_text_file=None):
([FILE_DOCX], [], 200, JSON, False, None, None),
([GZIP_FILE_DOCX], [FILE_TXT_1], 200, JSON, False, None, FILENAME_FORMATS[FILE_DOCX]),
([GZIP_FILE_IMAGE], [], 200, JSON, False, None, FILENAME_FORMATS[FILE_IMAGE]),
([FILE_TXT_1], [], 400, JSON, False, None, None),
([], [FILE_DOCX], 400, JSON, False, None, None),
([FILE_DOCX, FILE_IMAGE, FILE_MARKDOWN], [FILE_TXT_1], 400, JSON, False, None, None),
],
)
def test_process_file_text_1(
Expand Down Expand Up @@ -431,10 +434,10 @@ def test_process_file_text_1(
([GZIP_FILE_IMAGE], [GZIP_FILE_TXT_1], JSON, P_INPUT_2_MULTI, 200, False, None, None),
([], [FILE_TXT_1], TEXT_CSV, P_INPUT_2_EMPTY, 406, False, None, None),
([], [FILE_TXT_1], JSON, P_INPUT_2_EMPTY, 200, False, None, None),
([FILE_MARKDOWN], [FILE_TXT_1], JSON, P_INPUT_2_EMPTY, 200, True, None, None),
([], [FILE_TXT_1, FILE_MARKDOWN], JSON, P_INPUT_2_EMPTY, 200, True, None, None),
(
[FILE_MARKDOWN],
[FILE_TXT_1],
[],
[FILE_TXT_1, FILE_MARKDOWN],
JSON,
P_INPUT_2_MULTI,
200,
Expand All @@ -443,8 +446,8 @@ def test_process_file_text_1(
None,
),
(
[FILE_MARKDOWN],
[FILE_TXT_1],
[],
[FILE_TXT_1, FILE_MARKDOWN],
JSON,
P_INPUT_2_SINGLE,
400,
Expand All @@ -453,7 +456,7 @@ def test_process_file_text_1(
None,
),
([], [], JSON, P_INPUT_2_EMPTY, 400, False, None, None),
([FILE_MARKDOWN], [FILE_TXT_1], TEXT_CSV, P_INPUT_2_MULTI, 406, False, None, None),
([], [FILE_TXT_1, FILE_MARKDOWN], TEXT_CSV, P_INPUT_2_MULTI, 406, False, None, None),
([], [FILE_TXT_1], JSON, P_INPUT_2_SINGLE, 200, False, None, None),
([FILE_DOCX], [], JSON, P_INPUT_2_SINGLE, 200, False, None, None),
([], [FILE_TXT_1], MIXED, P_INPUT_2_EMPTY, 200, False, None, None),
Expand All @@ -477,6 +480,46 @@ def test_process_file_text_1(
None,
FILENAME_FORMATS[FILE_IMAGE],
),
(
[GZIP_FILE_DOCX, GZIP_FILE_TXT_1],
[FILE_TXT_2],
JSON,
P_INPUT_2_SINGLE,
400,
False,
None,
None,
),
(
[GZIP_FILE_DOCX],
[GZIP_FILE_IMAGE],
JSON,
P_INPUT_2_MULTI,
400,
False,
None,
None,
),
(
[],
[FILE_MARKDOWN],
JSON,
P_INPUT_2_EMPTY,
200,
True,
None,
None,
),
(
[],
[],
JSON,
P_INPUT_1_EMPTY,
400,
False,
None,
None,
),
],
)
def test_process_file_text_2(
Expand Down Expand Up @@ -645,8 +688,8 @@ def test_process_file_text_2(
([], [GZIP_FILE_TXT_1], TEXT_CSV, RESPONSE_SCHEMA_LABELSTUDIO, 406, False, None, None),
([], [GZIP_FILE_TXT_1], JSON, RESPONSE_SCHEMA_LABELSTUDIO, 200, False, None, None),
(
[FILE_DOCX, FILE_MARKDOWN],
[GZIP_FILE_TXT_1],
[FILE_DOCX],
[GZIP_FILE_TXT_1, FILE_MARKDOWN],
JSON,
RESPONSE_SCHEMA_ISD,
200,
Expand All @@ -665,8 +708,8 @@ def test_process_file_text_2(
None,
),
(
[FILE_DOCX, FILE_MARKDOWN],
[GZIP_FILE_TXT_1],
[FILE_DOCX],
[GZIP_FILE_TXT_1, FILE_MARKDOWN],
JSON,
RESPONSE_SCHEMA_ISD,
400,
Expand Down Expand Up @@ -985,8 +1028,8 @@ def test_process_file_text_3(
None,
),
(
[FILE_MARKDOWN],
[FILE_TXT_2],
[],
[FILE_TXT_2, FILE_MARKDOWN],
JSON,
RESPONSE_SCHEMA_ISD,
P_INPUT_1_MULTI,
Expand All @@ -997,8 +1040,8 @@ def test_process_file_text_3(
None,
),
(
[FILE_MARKDOWN],
[FILE_TXT_2],
[],
[FILE_TXT_2, FILE_MARKDOWN],
JSON,
RESPONSE_SCHEMA_ISD,
P_INPUT_1_MULTI,
Expand All @@ -1009,8 +1052,8 @@ def test_process_file_text_3(
None,
),
(
[FILE_MARKDOWN],
[FILE_TXT_2],
[],
[FILE_TXT_2, FILE_MARKDOWN],
JSON,
RESPONSE_SCHEMA_ISD,
P_INPUT_1_MULTI,
Expand Down
Loading