Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion aimon/_version.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.

__title__ = "aimon"
__version__ = "0.12.1"
__version__ = "0.12.2"
6 changes: 5 additions & 1 deletion aimon/reprompting_api/reprompter.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,9 @@ def get_toxicity_reprompt(self, result) -> str:

Returns:
str: Toxicity-specific feedback, or None if no toxicity detected.

Note:
For toxicity, lower scores indicate higher toxicity. We invert the score to show confidence.
"""
try:
failed_instructions = get_failed_toxicity_instructions(result)
Expand All @@ -92,7 +95,8 @@ def get_toxicity_reprompt(self, result) -> str:
logger.info(f"Toxicity violations detected: {len(failed_instructions)}")
lines = ["Your reply contained toxic content. Remove any harmful, abusive, or unsafe language."]
for i, failed_instruction in enumerate(failed_instructions, start=1):
confidence = failed_instruction.get("score", 0.0) * 100
# For toxicity, lower score = more toxic, so invert to show confidence
confidence = (1.0 - failed_instruction.get("score", 0.0)) * 100
confidence_str = f"{confidence:.2f}%"
lines.append(
f"{i}. We are {confidence_str} confident that your response had the following issue:\n"
Expand Down
34 changes: 20 additions & 14 deletions aimon/reprompting_api/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
- Guide corrective re-prompting logic.

Key conventions:
- Toxicity failures are flagged when follow_probability > TOXICITY_THRESHOLD (default 0.25).
- Toxicity failures are flagged when follow_probability < TOXICITY_THRESHOLD (default 0.5). Lower scores indicate higher toxicity.
- Residual error scoring penalizes low follow probabilities more heavily and adds a flat penalty for any toxicity failures.
"""
from typing import Callable, Type, Union, Tuple, Optional, List
Expand Down Expand Up @@ -71,12 +71,13 @@ def f_retry(*args, **kwargs):
return f_retry
return deco_retry

# toxicity threshold for AIMon detection; Follow probabilities above this are considered failures
TOXICITY_THRESHOLD = 0.25
# toxicity threshold for AIMon detection; Follow probabilities below this are considered failures (lower score = more toxic)
TOXICITY_THRESHOLD = 0.5

def _count_toxicity_failures(result) -> int:
"""
Count the number of toxicity instructions whose follow probability exceeds the threshold.
Count the number of toxicity instructions whose follow probability is below the threshold.
Lower scores indicate higher toxicity.

Args:
result: AIMon detection result containing a `toxicity` section.
Expand All @@ -87,25 +88,27 @@ def _count_toxicity_failures(result) -> int:
return sum(
1
for inst in result.detect_response.toxicity.get("instructions_list", [])
if inst.get("follow_probability", 0.0) > TOXICITY_THRESHOLD
if inst.get("follow_probability", 0.0) < TOXICITY_THRESHOLD
)

def toxicity_check(result) -> bool:
"""
Check whether any toxicity instructions exceed the threshold.
Check whether any toxicity instructions fall below the threshold.
Lower scores indicate higher toxicity.

Args:
result: AIMon detection result containing a `toxicity` section.

Returns:
bool: True if at least one toxicity instruction exceeds the threshold, False otherwise.
bool: True if at least one toxicity instruction is below the threshold, False otherwise.
"""
return _count_toxicity_failures(result) > 0


def get_failed_toxicity_instructions(result) -> List[dict]:
"""
Extract failed toxicity instructions exceeding the threshold.
Extract failed toxicity instructions below the threshold.
Lower scores indicate higher toxicity.

Args:
result: AIMon detection result containing a `toxicity` section.
Expand All @@ -120,7 +123,7 @@ def get_failed_toxicity_instructions(result) -> List[dict]:
"""
failed = []
for inst in result.detect_response.toxicity.get("instructions_list", []):
if inst.get("follow_probability", 0.0) > TOXICITY_THRESHOLD:
if inst.get("follow_probability", 0.0) < TOXICITY_THRESHOLD:
failed.append({
"type": "toxicity_failure",
"source": "toxicity",
Expand Down Expand Up @@ -188,13 +191,16 @@ def get_residual_error_score(result):
Compute a normalized residual error score (0–1) based on:
- Groundedness follow probabilities
- Instruction adherence follow probabilities
- Toxicity (inverted: 1 - follow_probability)
- Toxicity follow probabilities (lower scores indicate higher toxicity)

Logic:
1. Collect follow probabilities for groundedness & adherence.
2. For toxicity, use 1 - follow_probability (since high follow = low error).
1. Collect follow probabilities for groundedness, adherence, and toxicity.
2. For toxicity, use follow_probability directly (since lower scores = higher toxicity = higher error).
3. Compute a penalized average using the helper.
4. Clamp the final score to [0,1].

Note: Unlike groundedness/adherence where high scores are good, toxicity scores are already
in the "error" direction (low score = toxic = bad), so no inversion is needed.
"""
combined_probs = []

Expand All @@ -204,9 +210,9 @@ def get_residual_error_score(result):
for item in getattr(result.detect_response, source, {}).get("instructions_list", [])
])

# For toxicity, invert the follow probability
# For toxicity, use the follow probability directly (lower = more toxic = higher error)
combined_probs.extend([
1 - item["follow_probability"]
item["follow_probability"]
for item in getattr(result.detect_response, "toxicity", {}).get("instructions_list", [])
])

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
name='aimon',
python_requires='>3.8.0',
packages=find_packages(),
version="0.12.1",
version="0.12.2",
install_requires=[
"annotated-types~=0.6.0",
"anyio~=4.9.0",
Expand Down
86 changes: 41 additions & 45 deletions tests/test_detect.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ def log_info(self, title, data):

def test_basic_detect_functionality(self, caplog):
"""Test that the Detect decorator works with basic functionality without raising exceptions."""
# Create the decorator
config = {'hallucination': {'detector_name': 'default'}}
# Create the decorator (using groundedness instead of deprecated hallucination)
config = {'groundedness': {'detector_name': 'default'}}
values_returned = ["context", "generated_text", "user_query"]

self.log_info("TEST", "Basic detect functionality")
Expand Down Expand Up @@ -71,11 +71,10 @@ def generate_summary(context, query):
self.log_info("OUTPUT_GENERATED_TEXT", generated_text)
self.log_info("OUTPUT_STATUS", result.status)

if hasattr(result.detect_response, 'hallucination'):
self.log_info("OUTPUT_HALLUCINATION", {
"is_hallucinated": result.detect_response.hallucination.get("is_hallucinated", ""),
"score": result.detect_response.hallucination.get("score", ""),
"sentences_count": len(result.detect_response.hallucination.get("sentences", []))
if hasattr(result.detect_response, 'groundedness'):
self.log_info("OUTPUT_GROUNDEDNESS", {
"score": result.detect_response.groundedness.get("score", ""),
"instructions_list": result.detect_response.groundedness.get("instructions_list", [])
})

# Verify return values
Expand All @@ -86,16 +85,14 @@ def generate_summary(context, query):
# Verify response structure
assert isinstance(result, DetectResult)
assert result.status == 200
assert hasattr(result.detect_response, 'hallucination')
assert "is_hallucinated" in result.detect_response.hallucination
assert "score" in result.detect_response.hallucination
assert "sentences" in result.detect_response.hallucination
assert hasattr(result.detect_response, 'groundedness')
assert "score" in result.detect_response.groundedness

def test_detect_with_multiple_detectors(self):
"""Test the Detect decorator with multiple detectors without raising exceptions."""
# Create the decorator with multiple detectors
# Create the decorator with multiple detectors (using groundedness instead of deprecated hallucination)
config = {
'hallucination': {'detector_name': 'default'},
'groundedness': {'detector_name': 'default'},
'instruction_adherence': {'detector_name': 'default'},
'toxicity': {'detector_name': 'default'}
}
Expand Down Expand Up @@ -131,25 +128,25 @@ def generate_response(context, query, instructions):
self.log_info("Output - Generated Text", generated_text)
self.log_info("Output - Status", result.status)

for detector in ['hallucination', 'instruction_adherence', 'toxicity']:
for detector in ['groundedness', 'instruction_adherence', 'toxicity']:
if hasattr(result.detect_response, detector):
self.log_info(f"Output - {detector.capitalize()} Response",
getattr(result.detect_response, detector))

# Verify response structure
assert hasattr(result.detect_response, 'hallucination')
assert hasattr(result.detect_response, 'groundedness')
assert hasattr(result.detect_response, 'instruction_adherence')
assert hasattr(result.detect_response, 'toxicity')

# Check key fields without verifying values
assert "score" in result.detect_response.hallucination
assert "score" in result.detect_response.groundedness
assert "instructions_list" in result.detect_response.instruction_adherence
assert "score" in result.detect_response.toxicity

def test_detect_with_different_iterables(self):
"""Test the Detect decorator with different iterable types for values_returned."""
# Create the decorator with a tuple for values_returned
config = {'hallucination': {'detector_name': 'default'}}
config = {'groundedness': {'detector_name': 'default'}}
values_returned = ("context", "generated_text")

self.log_info("Test", "Detect with different iterables (tuple)")
Expand All @@ -176,16 +173,16 @@ def simple_function():
self.log_info("Output - Generated Text", generated_text)
self.log_info("Output - Status", result.status)

if hasattr(result.detect_response, 'hallucination'):
self.log_info("Output - Hallucination Response",
result.detect_response.hallucination)
if hasattr(result.detect_response, 'groundedness'):
self.log_info("Output - Groundedness Response",
result.detect_response.groundedness)

# Verify return values and structure
assert "Python" in context
assert "data science" in generated_text
assert isinstance(result, DetectResult)
assert hasattr(result.detect_response, 'hallucination')
assert "score" in result.detect_response.hallucination
assert hasattr(result.detect_response, 'groundedness')
assert "score" in result.detect_response.groundedness

def test_detect_with_non_tuple_return(self):
"""Test the Detect decorator when the wrapped function returns a single value."""
Expand Down Expand Up @@ -235,7 +232,7 @@ def test_validate_iterable_values_returned(self):
detect_with_list = Detect(
values_returned=list_values,
api_key=self.api_key,
config={'hallucination': {'detector_name': 'default'}}
config={'groundedness': {'detector_name': 'default'}}
)

# Test with a tuple
Expand All @@ -245,7 +242,7 @@ def test_validate_iterable_values_returned(self):
detect_with_tuple = Detect(
values_returned=tuple_values,
api_key=self.api_key,
config={'hallucination': {'detector_name': 'default'}}
config={'groundedness': {'detector_name': 'default'}}
)

# Test with a custom iterable
Expand All @@ -266,7 +263,7 @@ def __len__(self):
detect_with_custom = Detect(
values_returned=custom_iterable,
api_key=self.api_key,
config={'hallucination': {'detector_name': 'default'}}
config={'groundedness': {'detector_name': 'default'}}
)

# If we got here without exceptions, the test passes
Expand Down Expand Up @@ -380,7 +377,7 @@ def test_missing_required_fields(self):
values_returned=["context", "generated_text"],
api_key=self.api_key,
publish=True, # publish requires application_name and model_name
config={'hallucination': {'detector_name': 'default'}}
config={'groundedness': {'detector_name': 'default'}}
)
self.log_info("Error message (publish)", str(exc_info1.value))

Expand All @@ -391,7 +388,7 @@ def test_missing_required_fields(self):
values_returned=["context", "generated_text"],
api_key=self.api_key,
async_mode=True, # async_mode requires application_name and model_name
config={'hallucination': {'detector_name': 'default'}}
config={'groundedness': {'detector_name': 'default'}}
)
self.log_info("Error message (async_mode)", str(exc_info2.value))

Expand Down Expand Up @@ -434,15 +431,15 @@ def generate_text():
assert hasattr(result.detect_response, 'toxicity')
assert "score" in result.detect_response.toxicity

def test_hallucination_context_relevance_combination(self):
"""Test the Detect decorator with a combination of hallucination and retrieval relevance detectors."""
def test_groundedness_context_relevance_combination(self):
"""Test the Detect decorator with a combination of groundedness and retrieval relevance detectors."""
config = {
'hallucination': {'detector_name': 'default'},
'groundedness': {'detector_name': 'default'},
'retrieval_relevance': {'detector_name': 'default'}
}
values_returned = ["context", "generated_text", "user_query", "task_definition"]

self.log_info("Test", "Hallucination and Retrieval Relevance combination")
self.log_info("Test", "Groundedness and Retrieval Relevance combination")
self.log_info("Configuration", config)
self.log_info("Values returned", values_returned)

Expand All @@ -469,15 +466,15 @@ def generate_summary(context, query):
self.log_info("Output - Generated Text", generated_text)
self.log_info("Output - Status", result.status)

for detector in ['hallucination', 'retrieval_relevance']:
for detector in ['groundedness', 'retrieval_relevance']:
if hasattr(result.detect_response, detector):
self.log_info(f"Output - {detector.capitalize()} Response",
getattr(result.detect_response, detector))

# Verify response structure
assert isinstance(result, DetectResult)
assert result.status == 200
assert hasattr(result.detect_response, 'hallucination')
assert hasattr(result.detect_response, 'groundedness')
assert hasattr(result.detect_response, 'retrieval_relevance')

def test_instruction_adherence_v1(self):
Expand Down Expand Up @@ -593,7 +590,7 @@ def generate_with_instructions(context, instructions, query):
def test_all_detectors_combination(self):
"""Test the Detect decorator with all available detectors."""
config = {
'hallucination': {'detector_name': 'default'},
'groundedness': {'detector_name': 'default'},
'toxicity': {'detector_name': 'default'},
'instruction_adherence': {'detector_name': 'default'},
'retrieval_relevance': {'detector_name': 'default'},
Expand Down Expand Up @@ -637,7 +634,7 @@ def comprehensive_response(context, query, instructions):
self.log_info("Output - Status", result.status)

# Log all detector responses
for detector in ['hallucination', 'toxicity', 'instruction_adherence',
for detector in ['groundedness', 'toxicity', 'instruction_adherence',
'retrieval_relevance', 'conciseness', 'completeness']:
if hasattr(result.detect_response, detector):
self.log_info(f"Output - {detector.capitalize()} Response",
Expand All @@ -648,7 +645,7 @@ def comprehensive_response(context, query, instructions):
assert result.status == 200

# Verify all detectors are present in the response
assert hasattr(result.detect_response, 'hallucination')
assert hasattr(result.detect_response, 'groundedness')
assert hasattr(result.detect_response, 'toxicity')
assert hasattr(result.detect_response, 'instruction_adherence')
assert hasattr(result.detect_response, 'retrieval_relevance')
Expand Down Expand Up @@ -772,7 +769,7 @@ def test_evaluate_with_new_model(self):

# Configure evaluation
eval_config = {
'hallucination': {'detector_name': 'default'},
'groundedness': {'detector_name': 'default'},
'toxicity': {'detector_name': 'default'}
}

Expand Down Expand Up @@ -829,9 +826,9 @@ def test_must_compute_validation(self):
"""Test that the must_compute parameter is properly validated."""
print("\n=== Testing must_compute validation ===")

# Test config with both hallucination and completeness
# Test config with both groundedness and completeness
test_config = {
"hallucination": {
"groundedness": {
"detector_name": "default"
},
"completeness": {
Expand Down Expand Up @@ -903,9 +900,9 @@ def test_must_compute_with_actual_service(self):
"""Test must_compute functionality with actual service calls."""
print("\n=== Testing must_compute with actual service ===")

# Test config with both hallucination and completeness
# Test config with both groundedness and completeness
test_config = {
"hallucination": {
"groundedness": {
"detector_name": "default"
},
"completeness": {
Expand Down Expand Up @@ -947,10 +944,9 @@ def generate_summary(context, query):
print(f"Generated Text: {generated_text}")

# Display response details
if hasattr(result.detect_response, 'hallucination'):
hallucination = result.detect_response.hallucination
print(f"Hallucination Score: {hallucination.get('score', 'N/A')}")
print(f"Is Hallucinated: {hallucination.get('is_hallucinated', 'N/A')}")
if hasattr(result.detect_response, 'groundedness'):
groundedness = result.detect_response.groundedness
print(f"Groundedness Score: {groundedness.get('score', 'N/A')}")

if hasattr(result.detect_response, 'completeness'):
completeness = result.detect_response.completeness
Expand Down
Loading