Skip to content

Commit a940036

Browse files
Merge pull request #26 from Clarifai/requirements-dev
Removed local dependency and added requirement-dev.txt
2 parents 1c9e0a5 + 12cd554 commit a940036

File tree

8 files changed

+61
-20
lines changed

8 files changed

+61
-20
lines changed

README.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,14 @@ coco_dataset.export_to('voc_detection')
8787

8888

8989
### Data Ingestion Pipelines
90+
91+
#### Setup
92+
To use Data Ingestion Pipeline, please run
93+
```python
94+
pip install -r requirements-dev.txt
95+
```
96+
97+
9098
```python
9199
from clarifai_datautils.text import Pipeline, PDFPartition
92100
from clarifai_datautils.text.pipeline.cleaners import Clean_extra_whitespace

clarifai_datautils/multimodal/pipeline/PDF.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,13 @@
11
from typing import List
2-
3-
from unstructured.chunking.basic import chunk_elements
4-
from unstructured.chunking.title import chunk_by_title
5-
from unstructured.partition.pdf import partition_pdf
2+
try:
3+
from unstructured.chunking.basic import chunk_elements
4+
from unstructured.chunking.title import chunk_by_title
5+
from unstructured.partition.pdf import partition_pdf
6+
except ImportError:
7+
raise ImportError(
8+
"Could not import unstructured package. "
9+
"Please install it with `pip install 'unstructured[pdf] @ git+https://github.com/clarifai/unstructured.git@support_clarifai_model'`."
10+
)
611

712
from clarifai_datautils.constants.pipeline import MAX_CHARACTERS
813

clarifai_datautils/multimodal/pipeline/README.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,13 @@ Load text files(pdf, doc, etc..) , transform, chunk and upload to the Clarifai P
88
- Metadata Extraction
99

1010

11+
## Setup
12+
To use Data Ingestion Pipeline, please run
13+
```python
14+
pip install -r requirements-dev.txt
15+
```
16+
17+
1118
## Usage
1219

1320
```python

clarifai_datautils/multimodal/pipeline/Text.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
from typing import List
2-
3-
from unstructured.partition.text import partition_text
2+
try:
3+
from unstructured.partition.text import partition_text
4+
except ImportError:
5+
raise ImportError(
6+
"Could not import unstructured package. "
7+
"Please install it with `pip install 'unstructured[pdf] @ git+https://github.com/clarifai/unstructured.git@support_clarifai_model'`."
8+
)
49

510
from clarifai_datautils.constants.pipeline import MAX_CHARACTERS
611

clarifai_datautils/multimodal/pipeline/cleaners.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,15 @@
11
from typing import List
22

3-
from unstructured.cleaners.core import (
4-
bytes_string_to_string, clean_bullets, clean_dashes, clean_extra_whitespace,
5-
clean_non_ascii_chars, clean_ordered_bullets, clean_postfix, clean_prefix,
6-
group_broken_paragraphs, remove_punctuation, replace_unicode_quotes)
3+
try:
4+
from unstructured.cleaners.core import (
5+
bytes_string_to_string, clean_bullets, clean_dashes, clean_extra_whitespace,
6+
clean_non_ascii_chars, clean_ordered_bullets, clean_postfix, clean_prefix,
7+
group_broken_paragraphs, remove_punctuation, replace_unicode_quotes)
8+
except ImportError:
9+
raise ImportError(
10+
"Could not import unstructured package. "
11+
"Please install it with `pip install 'unstructured[pdf] @ git+https://github.com/clarifai/unstructured.git@support_clarifai_model'`."
12+
)
713

814
from .basetransform import BaseTransform
915

clarifai_datautils/multimodal/pipeline/extractors.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,16 @@
22

33
from llama_index.core import Document
44
from llama_index.core.node_parser import SentenceSplitter
5-
from unstructured.cleaners.extract import (extract_datetimetz, extract_email_address,
6-
extract_ip_address, extract_ip_address_name,
7-
extract_text_after, extract_text_before)
8-
from unstructured.documents.elements import Element, ElementMetadata
5+
try:
6+
from unstructured.cleaners.extract import (extract_datetimetz, extract_email_address,
7+
extract_ip_address, extract_ip_address_name,
8+
extract_text_after, extract_text_before)
9+
from unstructured.documents.elements import Element, ElementMetadata
10+
except ImportError:
11+
raise ImportError(
12+
"Could not import unstructured package. "
13+
"Please install it with `pip install 'unstructured[pdf] @ git+https://github.com/clarifai/unstructured.git@support_clarifai_model'`."
14+
)
915

1016
from clarifai_datautils.constants.pipeline import MAX_NODES, SKIP_NODES
1117

requirements-dev.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# requirements-dev.txt
2+
unstructured[pdf] @ git+https://github.com/clarifai/unstructured.git@support_clarifai_model
3+
llama-index-core==0.10.33
4+
llama-index-llms-clarifai==0.1.2
5+
pi_heif==0.18.0

setup.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,11 @@
1010
assert _search_version
1111
version = _search_version.group(1)
1212

13-
install_requires = [
14-
'unstructured[pdf] @ git+https://github.com/clarifai/unstructured.git@support_clarifai_model',
15-
'llama-index-core==0.10.33',
16-
'llama-index-llms-clarifai==0.1.2',
17-
'pi_heif==0.18.0'
18-
]
13+
with open("requirements.txt", "r") as fh:
14+
install_requires = fh.read().split('\n')
15+
if install_requires and install_requires[-1] == '':
16+
# Remove the last empty line
17+
install_requires = install_requires[:-1]
1918

2019
packages = setuptools.find_namespace_packages(include=["clarifai_datautils*"])
2120

0 commit comments

Comments
 (0)