Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions scrapegraphai/graphs/markdownify_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,7 @@ def __init__(
graph_name="Markdownify",
)

def execute(
self, initial_state: Dict
) -> Tuple[Dict, List[Dict]]:
def execute(self, initial_state: Dict) -> Tuple[Dict, List[Dict]]:
"""
Execute the markdownify graph.

Expand All @@ -80,4 +78,4 @@ def execute(
- Dictionary with the markdown result in the "markdown" key
- List of execution logs
"""
return super().execute(initial_state)
return super().execute(initial_state)
1 change: 0 additions & 1 deletion scrapegraphai/helpers/models_tokens.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@
"o1-preview": 128000,
"o1-mini": 128000,
"o1": 128000,
"gpt-4.5-preview": 128000,
"o3-mini": 200000,
},
"azure_openai": {
Expand Down
3 changes: 2 additions & 1 deletion scrapegraphai/models/xai.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
xAI Grok Module
"""

from langchain_openai import ChatOpenAI


Expand All @@ -19,4 +20,4 @@ def __init__(self, **llm_config):
llm_config["openai_api_key"] = llm_config.pop("api_key")
llm_config["openai_api_base"] = "https://api.x.ai/v1"

super().__init__(**llm_config)
super().__init__(**llm_config)
2 changes: 1 addition & 1 deletion scrapegraphai/nodes/markdownify_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,4 +64,4 @@ def execute(self, state: dict) -> dict:
# Update state with markdown content
state.update({self.output[0]: markdown_content})

return state
return state
140 changes: 80 additions & 60 deletions scrapegraphai/utils/code_error_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@
import json
from typing import Any, Dict, Optional

from pydantic import BaseModel, Field, validator
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from pydantic import BaseModel, Field, validator

from ..prompts import (
TEMPLATE_EXECUTION_ANALYSIS,
Expand All @@ -28,20 +28,25 @@

class AnalysisError(Exception):
"""Base exception for code analysis errors."""

pass


class InvalidStateError(AnalysisError):
"""Exception raised when state dictionary is missing required keys."""

pass


class CodeAnalysisState(BaseModel):
"""Base model for code analysis state validation."""

generated_code: str = Field(..., description="The generated code to analyze")
errors: Dict[str, Any] = Field(..., description="Dictionary containing error information")
errors: Dict[str, Any] = Field(
..., description="Dictionary containing error information"
)

@validator('errors')
@validator("errors")
def validate_errors(cls, v):
"""Ensure errors dictionary has expected structure."""
if not isinstance(v, dict):
Expand All @@ -51,39 +56,41 @@ def validate_errors(cls, v):

class ExecutionAnalysisState(CodeAnalysisState):
"""Model for execution analysis state validation."""

html_code: Optional[str] = Field(None, description="HTML code if available")
html_analysis: Optional[str] = Field(None, description="Analysis of HTML code")

@validator('errors')
@validator("errors")
def validate_execution_errors(cls, v):
"""Ensure errors dictionary contains execution key."""
super().validate_errors(v)
if 'execution' not in v:
if "execution" not in v:
raise ValueError("errors dictionary must contain 'execution' key")
return v


class ValidationAnalysisState(CodeAnalysisState):
"""Model for validation analysis state validation."""

json_schema: Dict[str, Any] = Field(..., description="JSON schema for validation")
execution_result: Any = Field(..., description="Result of code execution")

@validator('errors')
@validator("errors")
def validate_validation_errors(cls, v):
"""Ensure errors dictionary contains validation key."""
super().validate_errors(v)
if 'validation' not in v:
if "validation" not in v:
raise ValueError("errors dictionary must contain 'validation' key")
return v


def get_optimal_analysis_template(error_type: str) -> str:
"""
Returns the optimal prompt template based on the error type.

Args:
error_type (str): Type of error to analyze.

Returns:
str: The prompt template text.
"""
Expand All @@ -106,10 +113,10 @@ def syntax_focused_analysis(state: Dict[str, Any], llm_model) -> str:

Returns:
str: The result of the syntax error analysis.

Raises:
InvalidStateError: If state is missing required keys.

Example:
>>> state = {
'generated_code': 'print("Hello World")',
Expand All @@ -121,26 +128,28 @@ def syntax_focused_analysis(state: Dict[str, Any], llm_model) -> str:
# Validate state using Pydantic model
validated_state = CodeAnalysisState(
generated_code=state.get("generated_code", ""),
errors=state.get("errors", {})
errors=state.get("errors", {}),
)

# Check if syntax errors exist
if "syntax" not in validated_state.errors:
raise InvalidStateError("No syntax errors found in state dictionary")

# Create prompt template and chain
prompt = PromptTemplate(
template=get_optimal_analysis_template("syntax"),
input_variables=["generated_code", "errors"]
input_variables=["generated_code", "errors"],
)
chain = prompt | llm_model | StrOutputParser()

# Execute chain with validated state
return chain.invoke({
"generated_code": validated_state.generated_code,
"errors": validated_state.errors["syntax"]
})

return chain.invoke(
{
"generated_code": validated_state.generated_code,
"errors": validated_state.errors["syntax"],
}
)

except KeyError as e:
raise InvalidStateError(f"Missing required key in state dictionary: {e}")
except Exception as e:
Expand All @@ -157,10 +166,10 @@ def execution_focused_analysis(state: Dict[str, Any], llm_model) -> str:

Returns:
str: The result of the execution error analysis.

Raises:
InvalidStateError: If state is missing required keys.

Example:
>>> state = {
'generated_code': 'print(x)',
Expand All @@ -176,24 +185,26 @@ def execution_focused_analysis(state: Dict[str, Any], llm_model) -> str:
generated_code=state.get("generated_code", ""),
errors=state.get("errors", {}),
html_code=state.get("html_code", ""),
html_analysis=state.get("html_analysis", "")
html_analysis=state.get("html_analysis", ""),
)

# Create prompt template and chain
prompt = PromptTemplate(
template=get_optimal_analysis_template("execution"),
input_variables=["generated_code", "errors", "html_code", "html_analysis"],
)
chain = prompt | llm_model | StrOutputParser()

# Execute chain with validated state
return chain.invoke({
"generated_code": validated_state.generated_code,
"errors": validated_state.errors["execution"],
"html_code": validated_state.html_code,
"html_analysis": validated_state.html_analysis,
})

return chain.invoke(
{
"generated_code": validated_state.generated_code,
"errors": validated_state.errors["execution"],
"html_code": validated_state.html_code,
"html_analysis": validated_state.html_analysis,
}
)

except KeyError as e:
raise InvalidStateError(f"Missing required key in state dictionary: {e}")
except Exception as e:
Expand All @@ -211,10 +222,10 @@ def validation_focused_analysis(state: Dict[str, Any], llm_model) -> str:

Returns:
str: The result of the validation error analysis.

Raises:
InvalidStateError: If state is missing required keys.

Example:
>>> state = {
'generated_code': 'return {"name": "John"}',
Expand All @@ -230,24 +241,31 @@ def validation_focused_analysis(state: Dict[str, Any], llm_model) -> str:
generated_code=state.get("generated_code", ""),
errors=state.get("errors", {}),
json_schema=state.get("json_schema", {}),
execution_result=state.get("execution_result", {})
execution_result=state.get("execution_result", {}),
)

# Create prompt template and chain
prompt = PromptTemplate(
template=get_optimal_analysis_template("validation"),
input_variables=["generated_code", "errors", "json_schema", "execution_result"],
input_variables=[
"generated_code",
"errors",
"json_schema",
"execution_result",
],
)
chain = prompt | llm_model | StrOutputParser()

# Execute chain with validated state
return chain.invoke({
"generated_code": validated_state.generated_code,
"errors": validated_state.errors["validation"],
"json_schema": validated_state.json_schema,
"execution_result": validated_state.execution_result,
})

return chain.invoke(
{
"generated_code": validated_state.generated_code,
"errors": validated_state.errors["validation"],
"json_schema": validated_state.json_schema,
"execution_result": validated_state.execution_result,
}
)

except KeyError as e:
raise InvalidStateError(f"Missing required key in state dictionary: {e}")
except Exception as e:
Expand All @@ -268,10 +286,10 @@ def semantic_focused_analysis(

Returns:
str: The result of the semantic error analysis.

Raises:
InvalidStateError: If state or comparison_result is missing required keys.

Example:
>>> state = {
'generated_code': 'def add(a, b): return a + b'
Expand All @@ -286,30 +304,32 @@ def semantic_focused_analysis(
# Validate state using Pydantic model
validated_state = CodeAnalysisState(
generated_code=state.get("generated_code", ""),
errors=state.get("errors", {})
errors=state.get("errors", {}),
)

# Validate comparison_result
if "differences" not in comparison_result:
raise InvalidStateError("comparison_result missing 'differences' key")
if "explanation" not in comparison_result:
raise InvalidStateError("comparison_result missing 'explanation' key")

# Create prompt template and chain
prompt = PromptTemplate(
template=get_optimal_analysis_template("semantic"),
input_variables=["generated_code", "differences", "explanation"],
)
chain = prompt | llm_model | StrOutputParser()

# Execute chain with validated inputs
return chain.invoke({
"generated_code": validated_state.generated_code,
"differences": json.dumps(comparison_result["differences"], indent=2),
"explanation": comparison_result["explanation"],
})

return chain.invoke(
{
"generated_code": validated_state.generated_code,
"differences": json.dumps(comparison_result["differences"], indent=2),
"explanation": comparison_result["explanation"],
}
)

except KeyError as e:
raise InvalidStateError(f"Missing required key: {e}")
except Exception as e:
raise AnalysisError(f"Semantic analysis failed: {str(e)}")
raise AnalysisError(f"Semantic analysis failed: {str(e)}")
Loading
Loading