diff --git a/CHANGELOG.md b/CHANGELOG.md index cdad3161..1cbb7b8e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,22 @@ +## [1.31.1](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.31.0...v1.31.1) (2024-11-22) + + +### Bug Fixes + +* add new model istance ([2f3cafe](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/2f3cafeab0bce38571fa10d71f454b2a31766ddc)) +* fetch node regex ([e2af232](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/e2af2326f6c56e2abcc7dd5de9acdfb710507e0a)) +* generate answer node timeout ([32ef554](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/32ef5547f1d864c750cd47c115be6f38a1931d2c)) +* timeout ([c243106](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/c243106552cec3b1df254c0d0a45401eb2f5c89d)) + + +### CI + +* **release:** 1.31.0-beta.1 [skip ci] ([1df7eb0](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/1df7eb0bcd923bc62fd19dddc0ce9b757e9742cf)), closes [#805](https://github.com/ScrapeGraphAI/Scrapegraph-ai/issues/805) [#805](https://github.com/ScrapeGraphAI/Scrapegraph-ai/issues/805) +* **release:** 1.31.1-beta.1 [skip ci] ([86bf4f2](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/86bf4f24021d6e73378495d5b2b3acbfa2ff8ed5)), closes [#805](https://github.com/ScrapeGraphAI/Scrapegraph-ai/issues/805) [#805](https://github.com/ScrapeGraphAI/Scrapegraph-ai/issues/805) +* **release:** 1.31.1-beta.2 [skip ci] ([f247844](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/f247844d81e018c749c3a9a7170ed3ceded5d483)) +* **release:** 1.31.1-beta.3 [skip ci] ([30b0156](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/30b0156d17aa23e99d203eb6c7dd4f42e1e83566)) +* **release:** 1.31.1-beta.4 [skip ci] ([b2720a4](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/b2720a452f023999e3b394636773b794941cc6a1)) + ## [1.31.1-beta.4](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.31.1-beta.3...v1.31.1-beta.4) (2024-11-21) diff --git a/Dockerfile b/Dockerfile index a04c8551..a5f71732 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,9 +1,9 @@ FROM python:3.11-slim -RUN apt-get update && apt-get upgrade -y +RUN apt-get update && apt-get upgrade -y && rm -rf /var/lib/apt/lists/* -RUN pip install scrapegraphai -RUN pip install scrapegraphai[burr] +RUN pip install --no-cache-dir scrapegraphai +RUN pip install --no-cache-dir scrapegraphai[burr] RUN python3 -m playwright install-deps RUN python3 -m playwright install \ No newline at end of file diff --git a/examples/anthropic/depth_search_graph_anthropic.py b/examples/anthropic/depth_search_graph_anthropic.py index 8cac7bea..565934ed 100644 --- a/examples/anthropic/depth_search_graph_anthropic.py +++ b/examples/anthropic/depth_search_graph_anthropic.py @@ -10,7 +10,7 @@ graph_config = { "llm": { "api_key": os.getenv("ANTHROPIC_API_KEY"), - "model": "openai/gpt-4o-mini", + "model": "anthropic/claude-3-haiku-20240307", }, "verbose": True, "headless": False, diff --git a/examples/anthropic/document_scraper_anthropic.py b/examples/anthropic/document_scraper_anthropic.py new file mode 100644 index 00000000..a8f253be --- /dev/null +++ b/examples/anthropic/document_scraper_anthropic.py @@ -0,0 +1,42 @@ +""" +document_scraper example +""" +import os +import json +from dotenv import load_dotenv +from scrapegraphai.graphs import DocumentScraperGraph + +load_dotenv() + + +# ************************************************ +# Define the configuration for the graph +# ************************************************ + +graph_config = { + "llm": { + "api_key": os.getenv("ANTHROPIC_API_KEY"), + "model": "anthropic/claude-3-haiku-20240307", + } +} + + +source = """ + The Divine Comedy, Italian La Divina Commedia, original name La commedia, long narrative poem written in Italian + circa 1308/21 by Dante. It is usually held to be one of the world s great works of literature. + Divided into three major sections—Inferno, Purgatorio, and Paradiso—the narrative traces the journey of Dante + from darkness and error to the revelation of the divine light, culminating in the Beatific Vision of God. + Dante is guided by the Roman poet Virgil, who represents the epitome of human knowledge, from the dark wood + through the descending circles of the pit of Hell (Inferno). He then climbs the mountain of Purgatory, guided + by the Roman poet Statius, who represents the fulfilment of human knowledge, and is finally led by his lifelong love, + the Beatrice of his earlier poetry, through the celestial spheres of Paradise. +""" + +pdf_scraper_graph = DocumentScraperGraph( + prompt="Summarize the text and find the main topics", + source=source, + config=graph_config, +) +result = pdf_scraper_graph.run() + +print(json.dumps(result, indent=4)) \ No newline at end of file diff --git a/examples/anthropic/json_scraper_anthropic.py b/examples/anthropic/json_scraper_anthropic.py index 456643d2..fd5aa4e8 100644 --- a/examples/anthropic/json_scraper_anthropic.py +++ b/examples/anthropic/json_scraper_anthropic.py @@ -4,7 +4,6 @@ import os from dotenv import load_dotenv from scrapegraphai.graphs import JSONScraperGraph -from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info load_dotenv() @@ -27,7 +26,7 @@ "llm": { "api_key": os.getenv("ANTHROPIC_API_KEY"), "model": "anthropic/claude-3-haiku-20240307", - }, + } } # ************************************************ @@ -42,15 +41,3 @@ result = json_scraper_graph.run() print(result) - -# ************************************************ -# Get graph execution info -# ************************************************ - -graph_exec_info = json_scraper_graph.get_execution_info() -print(prettify_exec_info(graph_exec_info)) - -# Save to json or csv -convert_to_csv(result, "result") -convert_to_json(result, "result") - diff --git a/examples/azure/depth_search_graph_azure.py b/examples/azure/depth_search_graph_azure.py index 88b2cd1b..96ccc23e 100644 --- a/examples/azure/depth_search_graph_azure.py +++ b/examples/azure/depth_search_graph_azure.py @@ -1,5 +1,5 @@ """ -depth_search_graph_opeani example +depth_search_graph_azure example """ import os from dotenv import load_dotenv @@ -7,8 +7,6 @@ load_dotenv() -openai_key = os.getenv("OPENAI_APIKEY") - graph_config = { "llm": { "api_key": os.environ["AZURE_OPENAI_KEY"], diff --git a/examples/azure/document_scraper_azure.py b/examples/azure/document_scraper_azure.py new file mode 100644 index 00000000..43f00678 --- /dev/null +++ b/examples/azure/document_scraper_azure.py @@ -0,0 +1,44 @@ +""" +document_scraper example +""" +import os +import json +from dotenv import load_dotenv +from scrapegraphai.graphs import DocumentScraperGraph + +load_dotenv() + + +# ************************************************ +# Define the configuration for the graph +# ************************************************ + +graph_config = { + "llm": { + "api_key": os.environ["AZURE_OPENAI_KEY"], + "model": "azure_openai/gpt-4o" + }, + "verbose": True, + "headless": False +} + + +source = """ + The Divine Comedy, Italian La Divina Commedia, original name La commedia, long narrative poem written in Italian + circa 1308/21 by Dante. It is usually held to be one of the world s great works of literature. + Divided into three major sections—Inferno, Purgatorio, and Paradiso—the narrative traces the journey of Dante + from darkness and error to the revelation of the divine light, culminating in the Beatific Vision of God. + Dante is guided by the Roman poet Virgil, who represents the epitome of human knowledge, from the dark wood + through the descending circles of the pit of Hell (Inferno). He then climbs the mountain of Purgatory, guided + by the Roman poet Statius, who represents the fulfilment of human knowledge, and is finally led by his lifelong love, + the Beatrice of his earlier poetry, through the celestial spheres of Paradise. +""" + +pdf_scraper_graph = DocumentScraperGraph( + prompt="Summarize the text and find the main topics", + source=source, + config=graph_config, +) +result = pdf_scraper_graph.run() + +print(json.dumps(result, indent=4)) \ No newline at end of file diff --git a/examples/bedrock/csv_scraper_bedrock.py b/examples/bedrock/csv_scraper_bedrock.py index a69417c0..cf453ab3 100644 --- a/examples/bedrock/csv_scraper_bedrock.py +++ b/examples/bedrock/csv_scraper_bedrock.py @@ -10,7 +10,7 @@ import pandas as pd from scrapegraphai.graphs import CSVScraperGraph -from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info +from scrapegraphai.utils import prettify_exec_info load_dotenv() @@ -48,13 +48,3 @@ result = csv_scraper_graph.run() print(json.dumps(result, indent=4)) -# ************************************************ -# Get graph execution info -# ************************************************ - -graph_exec_info = csv_scraper_graph.get_execution_info() -print(prettify_exec_info(graph_exec_info)) - -# Save to json or csv -convert_to_csv(result, "result") -convert_to_json(result, "result") diff --git a/examples/bedrock/document_scraper_bedrock.py b/examples/bedrock/document_scraper_bedrock.py new file mode 100644 index 00000000..f9b99e1f --- /dev/null +++ b/examples/bedrock/document_scraper_bedrock.py @@ -0,0 +1,42 @@ +""" +document_scraper example +""" +import os +import json +from dotenv import load_dotenv +from scrapegraphai.graphs import DocumentScraperGraph + +load_dotenv() + + +# ************************************************ +# Define the configuration for the graph +# ************************************************ + +graph_config = { + "llm": { + "client": "client_name", + "model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0", + "temperature": 0.0 + } +} + +source = """ + The Divine Comedy, Italian La Divina Commedia, original name La commedia, long narrative poem written in Italian + circa 1308/21 by Dante. It is usually held to be one of the world s great works of literature. + Divided into three major sections—Inferno, Purgatorio, and Paradiso—the narrative traces the journey of Dante + from darkness and error to the revelation of the divine light, culminating in the Beatific Vision of God. + Dante is guided by the Roman poet Virgil, who represents the epitome of human knowledge, from the dark wood + through the descending circles of the pit of Hell (Inferno). He then climbs the mountain of Purgatory, guided + by the Roman poet Statius, who represents the fulfilment of human knowledge, and is finally led by his lifelong love, + the Beatrice of his earlier poetry, through the celestial spheres of Paradise. +""" + +pdf_scraper_graph = DocumentScraperGraph( + prompt="Summarize the text and find the main topics", + source=source, + config=graph_config, +) +result = pdf_scraper_graph.run() + +print(json.dumps(result, indent=4)) \ No newline at end of file diff --git a/examples/deepseek/document_scraper_deepseek.py b/examples/deepseek/document_scraper_deepseek.py new file mode 100644 index 00000000..e94826d3 --- /dev/null +++ b/examples/deepseek/document_scraper_deepseek.py @@ -0,0 +1,44 @@ +""" +document_scraper example +""" +import os +import json +from dotenv import load_dotenv +from scrapegraphai.graphs import DocumentScraperGraph + +load_dotenv() + + +# ************************************************ +# Define the configuration for the graph +# ************************************************ +deepseek_key = os.getenv("DEEPSEEK_APIKEY") + +graph_config = { + "llm": { + "model": "deepseek/deepseek-chat", + "api_key": deepseek_key, + }, + "verbose": True, +} + + +source = """ + The Divine Comedy, Italian La Divina Commedia, original name La commedia, long narrative poem written in Italian + circa 1308/21 by Dante. It is usually held to be one of the world s great works of literature. + Divided into three major sections—Inferno, Purgatorio, and Paradiso—the narrative traces the journey of Dante + from darkness and error to the revelation of the divine light, culminating in the Beatific Vision of God. + Dante is guided by the Roman poet Virgil, who represents the epitome of human knowledge, from the dark wood + through the descending circles of the pit of Hell (Inferno). He then climbs the mountain of Purgatory, guided + by the Roman poet Statius, who represents the fulfilment of human knowledge, and is finally led by his lifelong love, + the Beatrice of his earlier poetry, through the celestial spheres of Paradise. +""" + +pdf_scraper_graph = DocumentScraperGraph( + prompt="Summarize the text and find the main topics", + source=source, + config=graph_config, +) +result = pdf_scraper_graph.run() + +print(json.dumps(result, indent=4)) \ No newline at end of file diff --git a/examples/deepseek/json_scraper_deepseek.py b/examples/deepseek/json_scraper_deepseek.py index 9fc2f5c9..d714c1db 100644 --- a/examples/deepseek/json_scraper_deepseek.py +++ b/examples/deepseek/json_scraper_deepseek.py @@ -4,7 +4,7 @@ import os from dotenv import load_dotenv from scrapegraphai.graphs import JSONScraperGraph -from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info +from scrapegraphai.utils import prettify_exec_info load_dotenv() @@ -44,14 +44,3 @@ result = json_scraper_graph.run() print(result) - -# ************************************************ -# Get graph execution info -# ************************************************ - -graph_exec_info = json_scraper_graph.get_execution_info() -print(prettify_exec_info(graph_exec_info)) - -# Save to json or csv -convert_to_csv(result, "result") -convert_to_json(result, "result") diff --git a/examples/ernie/document_scraper_anthropic_ernie.py b/examples/ernie/document_scraper_anthropic_ernie.py new file mode 100644 index 00000000..74d91be1 --- /dev/null +++ b/examples/ernie/document_scraper_anthropic_ernie.py @@ -0,0 +1,39 @@ +""" +document_scraper example +""" +import os +import json +from scrapegraphai.graphs import DocumentScraperGraph + +# ************************************************ +# Define the configuration for the graph +# ************************************************ +graph_config = { + "llm": { + "model": "ernie/ernie-bot-turbo", + "ernie_client_id": "", + "ernie_client_secret": "", + "temperature": 0.1 + } +} + + +source = """ + The Divine Comedy, Italian La Divina Commedia, original name La commedia, long narrative poem written in Italian + circa 1308/21 by Dante. It is usually held to be one of the world s great works of literature. + Divided into three major sections—Inferno, Purgatorio, and Paradiso—the narrative traces the journey of Dante + from darkness and error to the revelation of the divine light, culminating in the Beatific Vision of God. + Dante is guided by the Roman poet Virgil, who represents the epitome of human knowledge, from the dark wood + through the descending circles of the pit of Hell (Inferno). He then climbs the mountain of Purgatory, guided + by the Roman poet Statius, who represents the fulfilment of human knowledge, and is finally led by his lifelong love, + the Beatrice of his earlier poetry, through the celestial spheres of Paradise. +""" + +pdf_scraper_graph = DocumentScraperGraph( + prompt="Summarize the text and find the main topics", + source=source, + config=graph_config, +) +result = pdf_scraper_graph.run() + +print(json.dumps(result, indent=4)) \ No newline at end of file diff --git a/examples/ernie/json_scraper_ernie.py b/examples/ernie/json_scraper_ernie.py index 4010bfde..35324da2 100644 --- a/examples/ernie/json_scraper_ernie.py +++ b/examples/ernie/json_scraper_ernie.py @@ -3,7 +3,7 @@ """ import os from scrapegraphai.graphs import JSONScraperGraph -from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info +from scrapegraphai.utils import prettify_exec_info # ************************************************ # Read the JSON file @@ -41,14 +41,3 @@ result = json_scraper_graph.run() print(result) - -# ************************************************ -# Get graph execution info -# ************************************************ - -graph_exec_info = json_scraper_graph.get_execution_info() -print(prettify_exec_info(graph_exec_info)) - -# Save to json or csv -convert_to_csv(result, "result") -convert_to_json(result, "result") diff --git a/examples/fireworks/document_scraper_anthropic_fireworks.py b/examples/fireworks/document_scraper_anthropic_fireworks.py new file mode 100644 index 00000000..33f6c0d5 --- /dev/null +++ b/examples/fireworks/document_scraper_anthropic_fireworks.py @@ -0,0 +1,44 @@ +""" +document_scraper example +""" +import os +import json +from dotenv import load_dotenv +from scrapegraphai.graphs import DocumentScraperGraph + +load_dotenv() + +# ************************************************ +# Define the configuration for the graph +# ************************************************ +fireworks_api_key = os.getenv("FIREWORKS_APIKEY") + +graph_config = { + "llm": { + "api_key": fireworks_api_key, + "model": "fireworks/accounts/fireworks/models/mixtral-8x7b-instruct" + }, + "verbose": True, + "headless": False, +} + + +source = """ + The Divine Comedy, Italian La Divina Commedia, original name La commedia, long narrative poem written in Italian + circa 1308/21 by Dante. It is usually held to be one of the world s great works of literature. + Divided into three major sections—Inferno, Purgatorio, and Paradiso—the narrative traces the journey of Dante + from darkness and error to the revelation of the divine light, culminating in the Beatific Vision of God. + Dante is guided by the Roman poet Virgil, who represents the epitome of human knowledge, from the dark wood + through the descending circles of the pit of Hell (Inferno). He then climbs the mountain of Purgatory, guided + by the Roman poet Statius, who represents the fulfilment of human knowledge, and is finally led by his lifelong love, + the Beatrice of his earlier poetry, through the celestial spheres of Paradise. +""" + +pdf_scraper_graph = DocumentScraperGraph( + prompt="Summarize the text and find the main topics", + source=source, + config=graph_config, +) +result = pdf_scraper_graph.run() + +print(json.dumps(result, indent=4)) \ No newline at end of file diff --git a/examples/fireworks/json_scraper_fireworkspy.py b/examples/fireworks/json_scraper_fireworkspy.py index a8fd1d7a..ef1b8264 100644 --- a/examples/fireworks/json_scraper_fireworkspy.py +++ b/examples/fireworks/json_scraper_fireworkspy.py @@ -4,7 +4,7 @@ import os from dotenv import load_dotenv from scrapegraphai.graphs import JSONScraperGraph -from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info +from scrapegraphai.utils import prettify_exec_info load_dotenv() # ************************************************ @@ -45,15 +45,3 @@ result = json_scraper_graph.run() print(result) - -# ************************************************ -# Get graph execution info -# ************************************************ - -graph_exec_info = json_scraper_graph.get_execution_info() -print(prettify_exec_info(graph_exec_info)) - -# Save to json or csv -convert_to_csv(result, "result") -convert_to_json(result, "result") - diff --git a/examples/google_genai/document_scraper_gemini.py b/examples/google_genai/document_scraper_gemini.py new file mode 100644 index 00000000..efb22d68 --- /dev/null +++ b/examples/google_genai/document_scraper_gemini.py @@ -0,0 +1,41 @@ +""" +document_scraper example +""" +import os +import json +from dotenv import load_dotenv +from scrapegraphai.graphs import DocumentScraperGraph + +load_dotenv() + +# ************************************************ +# Define the configuration for the graph +# ************************************************ +gemini_key = os.getenv("GOOGLE_APIKEY") + +graph_config = { + "llm": { + "api_key": gemini_key, + "model": "google_genai/gemini-pro", + }, +} + +source = """ + The Divine Comedy, Italian La Divina Commedia, original name La commedia, long narrative poem written in Italian + circa 1308/21 by Dante. It is usually held to be one of the world s great works of literature. + Divided into three major sections—Inferno, Purgatorio, and Paradiso—the narrative traces the journey of Dante + from darkness and error to the revelation of the divine light, culminating in the Beatific Vision of God. + Dante is guided by the Roman poet Virgil, who represents the epitome of human knowledge, from the dark wood + through the descending circles of the pit of Hell (Inferno). He then climbs the mountain of Purgatory, guided + by the Roman poet Statius, who represents the fulfilment of human knowledge, and is finally led by his lifelong love, + the Beatrice of his earlier poetry, through the celestial spheres of Paradise. +""" + +pdf_scraper_graph = DocumentScraperGraph( + prompt="Summarize the text and find the main topics", + source=source, + config=graph_config, +) +result = pdf_scraper_graph.run() + +print(json.dumps(result, indent=4)) \ No newline at end of file diff --git a/examples/google_genai/json_scraper_gemini.py b/examples/google_genai/json_scraper_gemini.py index 1b20a92a..343f1d42 100644 --- a/examples/google_genai/json_scraper_gemini.py +++ b/examples/google_genai/json_scraper_gemini.py @@ -4,7 +4,8 @@ import os from dotenv import load_dotenv from scrapegraphai.graphs import JSONScraperGraph -from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info +from scrapegraphai.utils import prettify_exec_info + load_dotenv() # ************************************************ @@ -43,14 +44,3 @@ result = json_scraper_graph.run() print(result) - -# ************************************************ -# Get graph execution info -# ************************************************ - -graph_exec_info = json_scraper_graph.get_execution_info() -print(prettify_exec_info(graph_exec_info)) - -# Save to json or csv -convert_to_csv(result, "result") -convert_to_json(result, "result") diff --git a/examples/google_vertexai/document_scraper_vertex.py b/examples/google_vertexai/document_scraper_vertex.py new file mode 100644 index 00000000..58f79a91 --- /dev/null +++ b/examples/google_vertexai/document_scraper_vertex.py @@ -0,0 +1,41 @@ +""" +document_scraper example +""" +import os +import json +from dotenv import load_dotenv +from scrapegraphai.graphs import DocumentScraperGraph + +load_dotenv() + +# ************************************************ +# Define the configuration for the graph +# ************************************************ +gemini_key = os.getenv("GOOGLE_APIKEY") + +graph_config = { + "llm": { + "api_key": gemini_key, + "model": "google_vertexai/gemini-1.5-pro", + }, +} + +source = """ + The Divine Comedy, Italian La Divina Commedia, original name La commedia, long narrative poem written in Italian + circa 1308/21 by Dante. It is usually held to be one of the world s great works of literature. + Divided into three major sections—Inferno, Purgatorio, and Paradiso—the narrative traces the journey of Dante + from darkness and error to the revelation of the divine light, culminating in the Beatific Vision of God. + Dante is guided by the Roman poet Virgil, who represents the epitome of human knowledge, from the dark wood + through the descending circles of the pit of Hell (Inferno). He then climbs the mountain of Purgatory, guided + by the Roman poet Statius, who represents the fulfilment of human knowledge, and is finally led by his lifelong love, + the Beatrice of his earlier poetry, through the celestial spheres of Paradise. +""" + +pdf_scraper_graph = DocumentScraperGraph( + prompt="Summarize the text and find the main topics", + source=source, + config=graph_config, +) +result = pdf_scraper_graph.run() + +print(json.dumps(result, indent=4)) \ No newline at end of file diff --git a/examples/google_vertexai/json_scraper_gemini.py b/examples/google_vertexai/json_scraper_gemini.py index bf28da03..8e9f5a9f 100644 --- a/examples/google_vertexai/json_scraper_gemini.py +++ b/examples/google_vertexai/json_scraper_gemini.py @@ -5,7 +5,7 @@ import os from dotenv import load_dotenv from scrapegraphai.graphs import JSONScraperGraph -from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info +from scrapegraphai.utils import prettify_exec_info load_dotenv() # ************************************************ @@ -44,14 +44,3 @@ result = json_scraper_graph.run() print(result) - -# ************************************************ -# Get graph execution info -# ************************************************ - -graph_exec_info = json_scraper_graph.get_execution_info() -print(prettify_exec_info(graph_exec_info)) - -# Save to json or csv -convert_to_csv(result, "result") -convert_to_json(result, "result") diff --git a/examples/groq/document_scraper_groq.py b/examples/groq/document_scraper_groq.py new file mode 100644 index 00000000..53c64f73 --- /dev/null +++ b/examples/groq/document_scraper_groq.py @@ -0,0 +1,45 @@ +""" +document_scraper example +""" +import os +import json +from dotenv import load_dotenv +from scrapegraphai.graphs import DocumentScraperGraph + +load_dotenv() + +# ************************************************ +# Define the configuration for the graph +# ************************************************ +groq_key = os.getenv("GROQ_APIKEY") + +graph_config = { + "llm": { + "model": "groq/gemma-7b-it", + "api_key": groq_key, + "temperature": 0 + }, + "verbose": True, + "headless": False +} + + +source = """ + The Divine Comedy, Italian La Divina Commedia, original name La commedia, long narrative poem written in Italian + circa 1308/21 by Dante. It is usually held to be one of the world s great works of literature. + Divided into three major sections—Inferno, Purgatorio, and Paradiso—the narrative traces the journey of Dante + from darkness and error to the revelation of the divine light, culminating in the Beatific Vision of God. + Dante is guided by the Roman poet Virgil, who represents the epitome of human knowledge, from the dark wood + through the descending circles of the pit of Hell (Inferno). He then climbs the mountain of Purgatory, guided + by the Roman poet Statius, who represents the fulfilment of human knowledge, and is finally led by his lifelong love, + the Beatrice of his earlier poetry, through the celestial spheres of Paradise. +""" + +pdf_scraper_graph = DocumentScraperGraph( + prompt="Summarize the text and find the main topics", + source=source, + config=graph_config, +) +result = pdf_scraper_graph.run() + +print(json.dumps(result, indent=4)) \ No newline at end of file diff --git a/examples/groq/json_scraper_groq.py b/examples/groq/json_scraper_groq.py index d38e1505..cac0f10d 100644 --- a/examples/groq/json_scraper_groq.py +++ b/examples/groq/json_scraper_groq.py @@ -4,7 +4,7 @@ import os from dotenv import load_dotenv from scrapegraphai.graphs import JSONScraperGraph -from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info +from scrapegraphai.utils import prettify_exec_info load_dotenv() @@ -47,15 +47,3 @@ result = json_scraper_graph.run() print(result) - -# ************************************************ -# Get graph execution info -# ************************************************ - -graph_exec_info = json_scraper_graph.get_execution_info() -print(prettify_exec_info(graph_exec_info)) - -# Save to json or csv -convert_to_csv(result, "result") -convert_to_json(result, "result") - diff --git a/examples/huggingfacehub/document_scraper_huggingfacehub.py b/examples/huggingfacehub/document_scraper_huggingfacehub.py new file mode 100644 index 00000000..5992f077 --- /dev/null +++ b/examples/huggingfacehub/document_scraper_huggingfacehub.py @@ -0,0 +1,57 @@ +""" +document_scraper example +""" +import os +import json +from dotenv import load_dotenv +from scrapegraphai.graphs import DocumentScraperGraph +from langchain_community.llms import HuggingFaceEndpoint +from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings + +load_dotenv() + +# ************************************************ +# Define the configuration for the graph +# ************************************************ +# ************************************************ +# Define the configuration for the graph +# ************************************************ +HUGGINGFACEHUB_API_TOKEN = os.getenv('HUGGINGFACEHUB_API_TOKEN') + +repo_id = "mistralai/Mistral-7B-Instruct-v0.2" + +llm_model_instance = HuggingFaceEndpoint( + repo_id=repo_id, max_length=128, temperature=0.5, token=HUGGINGFACEHUB_API_TOKEN +) + +embedder_model_instance = HuggingFaceInferenceAPIEmbeddings( + api_key=HUGGINGFACEHUB_API_TOKEN, model_name="sentence-transformers/all-MiniLM-l6-v2" +) + +# ************************************************ +# Create the SmartScraperGraph instance and run it +# ************************************************ + +graph_config = { + "llm": {"model_instance": llm_model_instance}, +} + +source = """ + The Divine Comedy, Italian La Divina Commedia, original name La commedia, long narrative poem written in Italian + circa 1308/21 by Dante. It is usually held to be one of the world s great works of literature. + Divided into three major sections—Inferno, Purgatorio, and Paradiso—the narrative traces the journey of Dante + from darkness and error to the revelation of the divine light, culminating in the Beatific Vision of God. + Dante is guided by the Roman poet Virgil, who represents the epitome of human knowledge, from the dark wood + through the descending circles of the pit of Hell (Inferno). He then climbs the mountain of Purgatory, guided + by the Roman poet Statius, who represents the fulfilment of human knowledge, and is finally led by his lifelong love, + the Beatrice of his earlier poetry, through the celestial spheres of Paradise. +""" + +pdf_scraper_graph = DocumentScraperGraph( + prompt="Summarize the text and find the main topics", + source=source, + config=graph_config, +) +result = pdf_scraper_graph.run() + +print(json.dumps(result, indent=4)) \ No newline at end of file diff --git a/examples/huggingfacehub/json_scraper_huggingfacehub.py b/examples/huggingfacehub/json_scraper_huggingfacehub.py index d709cc0d..f8223711 100644 --- a/examples/huggingfacehub/json_scraper_huggingfacehub.py +++ b/examples/huggingfacehub/json_scraper_huggingfacehub.py @@ -5,7 +5,7 @@ import os from dotenv import load_dotenv from scrapegraphai.graphs import JSONScraperGraph -from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info +from scrapegraphai.utils import prettify_exec_info from langchain_community.llms import HuggingFaceEndpoint from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings @@ -57,15 +57,3 @@ result = json_scraper_graph.run() print(result) - -# ************************************************ -# Get graph execution info -# ************************************************ - -graph_exec_info = json_scraper_graph.get_execution_info() -print(prettify_exec_info(graph_exec_info)) - -# Save to json or csv -convert_to_csv(result, "result") -convert_to_json(result, "result") - diff --git a/examples/local_models/document_scraper_ollama.py b/examples/local_models/document_scraper_ollama.py new file mode 100644 index 00000000..6853a549 --- /dev/null +++ b/examples/local_models/document_scraper_ollama.py @@ -0,0 +1,42 @@ +""" +document_scraper example +""" +import json +from dotenv import load_dotenv +from scrapegraphai.graphs import DocumentScraperGraph + +load_dotenv() + +# ************************************************ +# Define the configuration for the graph +# ************************************************ +graph_config = { + "llm": { + "model": "ollama/llama3", + "temperature": 0, + "format": "json", # Ollama needs the format to be specified explicitly + "model_tokens": 4000, + }, + "verbose": True, + "headless": False, +} + +source = """ + The Divine Comedy, Italian La Divina Commedia, original name La commedia, long narrative poem written in Italian + circa 1308/21 by Dante. It is usually held to be one of the world s great works of literature. + Divided into three major sections—Inferno, Purgatorio, and Paradiso—the narrative traces the journey of Dante + from darkness and error to the revelation of the divine light, culminating in the Beatific Vision of God. + Dante is guided by the Roman poet Virgil, who represents the epitome of human knowledge, from the dark wood + through the descending circles of the pit of Hell (Inferno). He then climbs the mountain of Purgatory, guided + by the Roman poet Statius, who represents the fulfilment of human knowledge, and is finally led by his lifelong love, + the Beatrice of his earlier poetry, through the celestial spheres of Paradise. +""" + +pdf_scraper_graph = DocumentScraperGraph( + prompt="Summarize the text and find the main topics", + source=source, + config=graph_config, +) +result = pdf_scraper_graph.run() + +print(json.dumps(result, indent=4)) diff --git a/examples/mistral/document_scraper_mistral.py b/examples/mistral/document_scraper_mistral.py new file mode 100644 index 00000000..aa75e9c4 --- /dev/null +++ b/examples/mistral/document_scraper_mistral.py @@ -0,0 +1,43 @@ +""" +document_scraper example +""" +import os +import json +from dotenv import load_dotenv +from scrapegraphai.graphs import DocumentScraperGraph + +load_dotenv() + + +# ************************************************ +# Define the configuration for the graph +# ************************************************ + +mistral_key = os.getenv("MISTRAL_API_KEY") + +graph_config = { + "llm": { + "api_key": mistral_key, + "model": "mistralai/open-mistral-nemo", + }, +} + +source = """ + The Divine Comedy, Italian La Divina Commedia, original name La commedia, long narrative poem written in Italian + circa 1308/21 by Dante. It is usually held to be one of the world s great works of literature. + Divided into three major sections—Inferno, Purgatorio, and Paradiso—the narrative traces the journey of Dante + from darkness and error to the revelation of the divine light, culminating in the Beatific Vision of God. + Dante is guided by the Roman poet Virgil, who represents the epitome of human knowledge, from the dark wood + through the descending circles of the pit of Hell (Inferno). He then climbs the mountain of Purgatory, guided + by the Roman poet Statius, who represents the fulfilment of human knowledge, and is finally led by his lifelong love, + the Beatrice of his earlier poetry, through the celestial spheres of Paradise. +""" + +pdf_scraper_graph = DocumentScraperGraph( + prompt="Summarize the text and find the main topics", + source=source, + config=graph_config, +) +result = pdf_scraper_graph.run() + +print(json.dumps(result, indent=4)) diff --git a/examples/mistral/json_scraper_mistral.py b/examples/mistral/json_scraper_mistral.py index 140ea58f..0b9be3ec 100644 --- a/examples/mistral/json_scraper_mistral.py +++ b/examples/mistral/json_scraper_mistral.py @@ -4,7 +4,7 @@ import os from dotenv import load_dotenv from scrapegraphai.graphs import JSONScraperGraph -from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info +from scrapegraphai.utils import prettify_exec_info load_dotenv() @@ -44,14 +44,3 @@ result = json_scraper_graph.run() print(result) - -# ************************************************ -# Get graph execution info -# ************************************************ - -graph_exec_info = json_scraper_graph.get_execution_info() -print(prettify_exec_info(graph_exec_info)) - -# Save to json or csv -convert_to_csv(result, "result") -convert_to_json(result, "result") diff --git a/examples/moonshot/document_scraper_moonshot.py b/examples/moonshot/document_scraper_moonshot.py new file mode 100644 index 00000000..aa75e9c4 --- /dev/null +++ b/examples/moonshot/document_scraper_moonshot.py @@ -0,0 +1,43 @@ +""" +document_scraper example +""" +import os +import json +from dotenv import load_dotenv +from scrapegraphai.graphs import DocumentScraperGraph + +load_dotenv() + + +# ************************************************ +# Define the configuration for the graph +# ************************************************ + +mistral_key = os.getenv("MISTRAL_API_KEY") + +graph_config = { + "llm": { + "api_key": mistral_key, + "model": "mistralai/open-mistral-nemo", + }, +} + +source = """ + The Divine Comedy, Italian La Divina Commedia, original name La commedia, long narrative poem written in Italian + circa 1308/21 by Dante. It is usually held to be one of the world s great works of literature. + Divided into three major sections—Inferno, Purgatorio, and Paradiso—the narrative traces the journey of Dante + from darkness and error to the revelation of the divine light, culminating in the Beatific Vision of God. + Dante is guided by the Roman poet Virgil, who represents the epitome of human knowledge, from the dark wood + through the descending circles of the pit of Hell (Inferno). He then climbs the mountain of Purgatory, guided + by the Roman poet Statius, who represents the fulfilment of human knowledge, and is finally led by his lifelong love, + the Beatrice of his earlier poetry, through the celestial spheres of Paradise. +""" + +pdf_scraper_graph = DocumentScraperGraph( + prompt="Summarize the text and find the main topics", + source=source, + config=graph_config, +) +result = pdf_scraper_graph.run() + +print(json.dumps(result, indent=4)) diff --git a/examples/nemotron/document_scraper_nemotron.py b/examples/nemotron/document_scraper_nemotron.py new file mode 100644 index 00000000..618047ee --- /dev/null +++ b/examples/nemotron/document_scraper_nemotron.py @@ -0,0 +1,44 @@ +""" +document_scraper example +""" +import os +import json +from dotenv import load_dotenv +from scrapegraphai.graphs import DocumentScraperGraph + +load_dotenv() + + +# ************************************************ +# Define the configuration for the graph +# ************************************************ + + +nemotron_key = os.getenv("NEMOTRON_APIKEY") + +graph_config = { + "llm": { + "api_key": nemotron_key, + "model": "nvidia/meta/llama3-70b-instruct", + }, +} + +source = """ + The Divine Comedy, Italian La Divina Commedia, original name La commedia, long narrative poem written in Italian + circa 1308/21 by Dante. It is usually held to be one of the world s great works of literature. + Divided into three major sections—Inferno, Purgatorio, and Paradiso—the narrative traces the journey of Dante + from darkness and error to the revelation of the divine light, culminating in the Beatific Vision of God. + Dante is guided by the Roman poet Virgil, who represents the epitome of human knowledge, from the dark wood + through the descending circles of the pit of Hell (Inferno). He then climbs the mountain of Purgatory, guided + by the Roman poet Statius, who represents the fulfilment of human knowledge, and is finally led by his lifelong love, + the Beatrice of his earlier poetry, through the celestial spheres of Paradise. +""" + +pdf_scraper_graph = DocumentScraperGraph( + prompt="Summarize the text and find the main topics", + source=source, + config=graph_config, +) +result = pdf_scraper_graph.run() + +print(json.dumps(result, indent=4)) \ No newline at end of file diff --git a/examples/oneapi/document_scraper_oneapi.py b/examples/oneapi/document_scraper_oneapi.py new file mode 100644 index 00000000..99ffe295 --- /dev/null +++ b/examples/oneapi/document_scraper_oneapi.py @@ -0,0 +1,42 @@ +""" +document_scraper example +""" +import json +from dotenv import load_dotenv +from scrapegraphai.graphs import DocumentScraperGraph + +load_dotenv() + + +# ************************************************ +# Define the configuration for the graph +# ************************************************ + +graph_config = { + "llm": { + "api_key": "***************************", + "model": "oneapi/qwen-turbo", + "base_url": "http://127.0.0.1:3000/v1", # 设置 OneAPI URL + } +} + + +source = """ + The Divine Comedy, Italian La Divina Commedia, original name La commedia, long narrative poem written in Italian + circa 1308/21 by Dante. It is usually held to be one of the world s great works of literature. + Divided into three major sections—Inferno, Purgatorio, and Paradiso—the narrative traces the journey of Dante + from darkness and error to the revelation of the divine light, culminating in the Beatific Vision of God. + Dante is guided by the Roman poet Virgil, who represents the epitome of human knowledge, from the dark wood + through the descending circles of the pit of Hell (Inferno). He then climbs the mountain of Purgatory, guided + by the Roman poet Statius, who represents the fulfilment of human knowledge, and is finally led by his lifelong love, + the Beatrice of his earlier poetry, through the celestial spheres of Paradise. +""" + +pdf_scraper_graph = DocumentScraperGraph( + prompt="Summarize the text and find the main topics", + source=source, + config=graph_config, +) +result = pdf_scraper_graph.run() + +print(json.dumps(result, indent=4)) \ No newline at end of file diff --git a/examples/oneapi/json_scraper_multi_oneapi.py b/examples/oneapi/json_scraper_multi_oneapi.py index 5dc365aa..fc1c4555 100644 --- a/examples/oneapi/json_scraper_multi_oneapi.py +++ b/examples/oneapi/json_scraper_multi_oneapi.py @@ -12,6 +12,7 @@ "base_url": "http://127.0.0.1:3000/v1", # 设置 OneAPI URL } } + FILE_NAME = "inputs/example.json" curr_dir = os.path.dirname(os.path.realpath(__file__)) file_path = os.path.join(curr_dir, FILE_NAME) diff --git a/examples/openai/document_scraper_openai.py b/examples/openai/document_scraper_openai.py new file mode 100644 index 00000000..f9475446 --- /dev/null +++ b/examples/openai/document_scraper_openai.py @@ -0,0 +1,39 @@ +""" +document_scraper example +""" +import os +import json +from dotenv import load_dotenv +from scrapegraphai.graphs import DocumentScraperGraph + +load_dotenv() + + +openai_key = os.getenv("OPENAI_APIKEY") + +graph_config = { + "llm": { + "api_key": openai_key, + "model": "openai/gpt-4o", + } +} + +source = """ + The Divine Comedy, Italian La Divina Commedia, original name La commedia, long narrative poem written in Italian + circa 1308/21 by Dante. It is usually held to be one of the world s great works of literature. + Divided into three major sections—Inferno, Purgatorio, and Paradiso—the narrative traces the journey of Dante + from darkness and error to the revelation of the divine light, culminating in the Beatific Vision of God. + Dante is guided by the Roman poet Virgil, who represents the epitome of human knowledge, from the dark wood + through the descending circles of the pit of Hell (Inferno). He then climbs the mountain of Purgatory, guided + by the Roman poet Statius, who represents the fulfilment of human knowledge, and is finally led by his lifelong love, + the Beatrice of his earlier poetry, through the celestial spheres of Paradise. +""" + +pdf_scraper_graph = DocumentScraperGraph( + prompt="Summarize the text and find the main topics", + source=source, + config=graph_config, +) +result = pdf_scraper_graph.run() + +print(json.dumps(result, indent=4)) \ No newline at end of file diff --git a/examples/together/document_scraper_together.py b/examples/together/document_scraper_together.py new file mode 100644 index 00000000..c3324330 --- /dev/null +++ b/examples/together/document_scraper_together.py @@ -0,0 +1,39 @@ +""" +document_scraper example +""" +import os +import json +from dotenv import load_dotenv +from scrapegraphai.graphs import DocumentScraperGraph + +load_dotenv() + +together_key = os.getenv("TOGETHER_APIKEY") + +graph_config = { + "llm": { + "model": "togetherai/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", + "api_key": together_key, + }, + "verbose": True, +} + +source = """ + The Divine Comedy, Italian La Divina Commedia, original name La commedia, long narrative poem written in Italian + circa 1308/21 by Dante. It is usually held to be one of the world s great works of literature. + Divided into three major sections—Inferno, Purgatorio, and Paradiso—the narrative traces the journey of Dante + from darkness and error to the revelation of the divine light, culminating in the Beatific Vision of God. + Dante is guided by the Roman poet Virgil, who represents the epitome of human knowledge, from the dark wood + through the descending circles of the pit of Hell (Inferno). He then climbs the mountain of Purgatory, guided + by the Roman poet Statius, who represents the fulfilment of human knowledge, and is finally led by his lifelong love, + the Beatrice of his earlier poetry, through the celestial spheres of Paradise. +""" + +pdf_scraper_graph = DocumentScraperGraph( + prompt="Summarize the text and find the main topics", + source=source, + config=graph_config, +) +result = pdf_scraper_graph.run() + +print(json.dumps(result, indent=4)) \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 6baff74f..86b4be43 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,7 +3,7 @@ name = "scrapegraphai" -version = "1.31.1b4" +version = "1.31.1"