Edit persona_policy_validator.py

Python source — Ctrl+S to save
"""Pathfinder persona-chat policy validator.

Blocks LLM responses that violate the career-assessment interview rules:
no code, no URLs, no web-search claims, no task completion, no role-breaking,
no leakage of system prompt / app configuration / platform details.

Patterns are kept in sync with _POLICY_PATTERNS in persona_service.py.
"""
import re
from guardrails.validator_base import (
    FailResult,
    PassResult,
    ValidationResult,
    Validator,
    register_validator,
)

_POLICY_PATTERNS: list[tuple[re.Pattern, str]] = [
    (re.compile(r"^```", re.MULTILINE), "code_block"),
    (re.compile(r"<code>|<pre>", re.I), "html_code"),
    (re.compile(r"\bI(?:'ve| have)?\s+searched\b", re.I), "web_search_claim"),
    (re.compile(r"\bI(?:'ve| have)?\s+found\s+(?:that\s+)?(?:on|at|in)\s+https?://", re.I), "web_found"),
    (re.compile(r"\baccording\s+to\s+(?:the\s+)?(?:web|internet|google|wikipedia|search results?)\b", re.I), "web_source"),
    (re.compile(r"https?://\S{10,}", re.I), "url_present"),
    (re.compile(r"\bhere(?:'s| is) (?:how|the|a|your)\b.{0,60}(?:solution|answer|code|script|guide|tutorial|result|calculation|formula)\b", re.I), "task_complete"),
    (re.compile(r"\blet me (?:calculate|compute|write|create|build|generate|show you how|explain how to|search|look up)\b", re.I), "task_offer"),
    (re.compile(r"\bI (?:can|will|would) (?:write|create|build|generate|calculate|compute|search|look up|find online|check online)\b", re.I), "task_will_do"),
    (re.compile(r"\bstep\s+1\s*[:\-]|step-by-step\b", re.I), "howto_pattern"),
    (re.compile(r"\bas (?:a general|an? AI|a large language model|an LLM|ChatGPT|GPT)\b", re.I), "role_break"),
    (re.compile(r"\bI(?:'m| am) (?:not just|more than|also capable of)\b", re.I), "role_expand"),
    # Catches "search online for X", "searching the web for X", "search the internet"
    (re.compile(r"\bsearch(?:ing)?\s+(?:online|the web|the internet|for\s+information)\b", re.I), "web_search_online"),
    # Catches "looking it up", "look that up online", "look up Ricardo"
    (re.compile(r"\blook(?:ing)?\s+(?:it up|that up|up online|online for)\b", re.I), "web_lookup"),
    # Leakage: reveals system prompt / instructions content
    (re.compile(r"\bmy\s+(?:system\s+)?(?:prompt|instructions?|guidelines?|directives?)\b", re.I), "leaks_instructions"),
    # Leakage: reveals it was configured / programmed with specific instructions
    (re.compile(r"\bI\s+(?:was|am)\s+(?:told|instructed|configured|programmed)\s+to\b", re.I), "leaks_config"),
    # Leakage: reveals internal app architecture details
    (re.compile(r"\bthe\s+(?:application|website|platform|database|api|codebase|backend|frontend)\b.{0,60}\b(?:uses?|runs?|is\s+built)\b", re.I), "leaks_app_info"),
    # Leakage: reveals platform/company identity details
    (re.compile(r"\b(?:pathfinder|ravricsolutions|ravric)\b.{0,40}\b(?:uses?|built\s+with|powered\s+by|runs\s+on)\b", re.I), "leaks_platform_info"),
]

_FIX_VALUE = (
    "I'm here to guide you through your career profile assessment. "
    "Could you tell me more about what you're looking for in your career?"
)


@register_validator(name="pathfinder/persona_policy", data_type="string")
class PersonaPolicyValidator(Validator):
    """Enforces Pathfinder career-assessment chat policy on LLM responses."""

    def validate(self, value: str, metadata: dict) -> ValidationResult:
        for pattern, violation_type in _POLICY_PATTERNS:
            if pattern.search(value):
                return FailResult(
                    error_message=f"Response violates persona policy: {violation_type}",
                    fix_value=_FIX_VALUE,
                )
        return PassResult()