Guardrails
Guardrails validate agent input or output. On failure, you choose how to respond: retry with feedback, raise an error, auto-fix, or escalate to a human.
Import
from agentspan.agents import (
Agent, AgentRuntime, Guardrail, GuardrailResult, guardrail,
OnFail, Position, RegexGuardrail, LLMGuardrail,
)
Basic Usage
from agentspan.agents import Agent, AgentRuntime, Guardrail, GuardrailResult, guardrail
@guardrail
def word_limit(content: str) -> GuardrailResult:
"""Keep responses concise."""
if len(content.split()) > 500:
return GuardrailResult(passed=False, message="Too long. Be more concise.")
return GuardrailResult(passed=True)
agent = Agent(
name="concise_bot",
model="openai/gpt-4o",
guardrails=[Guardrail(word_limit, on_fail=OnFail.RETRY)],
)
with AgentRuntime() as runtime:
result = runtime.run(agent, "Explain quantum computing.")
result.print_result()
GuardrailResult
GuardrailResult(
passed: bool, # True if content passes
message: str = "", # Feedback for the LLM on retry
fixed_output: Optional[str] = None, # Corrected output for on_fail="fix"
)
OnFail Modes
| Mode | Behavior |
|---|---|
OnFail.RETRY | Append feedback message and re-run the LLM (up to max_retries times) |
OnFail.RAISE | Fail the execution immediately |
OnFail.FIX | Replace output with GuardrailResult.fixed_output |
OnFail.HUMAN | Pause for human review (creates a WaitTask) |
String values ("retry", "raise", "fix", "human") also work.
Guardrail Constructor
Guardrail(
func: Optional[Callable[[str], GuardrailResult]] = None,
position: Union[str, Position] = Position.OUTPUT, # "input" or "output"
on_fail: Union[str, OnFail] = OnFail.RETRY,
name: Optional[str] = None,
max_retries: int = 3,
)
Position
class Position(str, Enum):
INPUT = "input" # Run before the LLM call (validate the user's prompt)
OUTPUT = "output" # Run after the LLM call (validate the response)
Custom Guardrail
@guardrail
def no_pii(content: str) -> GuardrailResult:
"""Reject responses containing email addresses."""
import re
if re.search(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}", content):
return GuardrailResult(passed=False, message="Response contains PII (email). Remove it.")
return GuardrailResult(passed=True)
agent = Agent(
name="safe_bot",
model="openai/gpt-4o",
guardrails=[Guardrail(no_pii, on_fail=OnFail.RETRY, max_retries=3)],
)
RegexGuardrail
Block or allow responses based on regex patterns:
from agentspan.agents import RegexGuardrail
# Block responses containing profanity
agent = Agent(
name="safe_bot",
model="openai/gpt-4o",
guardrails=[
RegexGuardrail(patterns=r"\b(badword1|badword2)\b", mode="block", on_fail=OnFail.RETRY),
],
)
# Only allow responses matching a pattern
agent = Agent(
name="structured_bot",
model="openai/gpt-4o",
guardrails=[
RegexGuardrail(patterns=r"^\d{4}-\d{2}-\d{2}$", mode="allow", on_fail=OnFail.RETRY),
],
)
LLMGuardrail
Use a second LLM as a judge:
from agentspan.agents import LLMGuardrail
factual_check = LLMGuardrail(
model="openai/gpt-4o-mini",
policy="Is this response factually accurate and helpful? Reply YES or NO with a brief explanation.",
on_fail=OnFail.RETRY,
max_retries=2,
)
agent = Agent(
name="research_bot",
model="openai/gpt-4o",
guardrails=[factual_check],
)
Input Guardrails
Validate the user’s prompt before it reaches the LLM:
@guardrail
def no_jailbreak(content: str) -> GuardrailResult:
"""Block jailbreak attempts."""
red_flags = ["ignore previous instructions", "act as", "jailbreak"]
if any(flag in content.lower() for flag in red_flags):
return GuardrailResult(passed=False, message="Request blocked.")
return GuardrailResult(passed=True)
agent = Agent(
name="safe_bot",
model="openai/gpt-4o",
guardrails=[Guardrail(no_jailbreak, position=Position.INPUT, on_fail=OnFail.RAISE)],
)
Auto-Fix
Use OnFail.FIX to replace the output automatically:
@guardrail
def ensure_json(content: str) -> GuardrailResult:
"""Ensure the output is valid JSON."""
import json
try:
json.loads(content)
return GuardrailResult(passed=True)
except json.JSONDecodeError:
return GuardrailResult(
passed=False,
message="Output must be valid JSON.",
fixed_output='{"error": "Could not generate valid JSON"}',
)
agent = Agent(
name="json_bot",
model="openai/gpt-4o",
guardrails=[Guardrail(ensure_json, on_fail=OnFail.FIX)],
)
Multiple Guardrails
Chain multiple guardrails — they run in order:
agent = Agent(
name="safe_bot",
model="openai/gpt-4o",
guardrails=[
Guardrail(no_jailbreak, position=Position.INPUT, on_fail=OnFail.RAISE),
Guardrail(no_pii, on_fail=OnFail.RETRY, max_retries=3),
Guardrail(word_limit, on_fail=OnFail.RETRY),
],
)