๐ก๏ธ Guardrail
Manager
MAIN
๐
Dashboard
๐ก๏ธ
Guards
๐ง
Custom Validators
๐ฆ
Hub Validators
๐งช
Test Guard
CONFIG
โ๏ธ
Settings
validatorplus.py
Edit Python validator file
โ Back to Validators
Python source โ Ctrl+S to save
๐พ Save
๐๏ธ Delete
"""ValidatorPlus โ comprehensive multi-check content safety validator for RavRic Solutions. Combines prompt injection detection, policy compliance, and content safety into a single configurable validator. Designed for production AI pipelines needing multiple risk checks. """ import re from typing import Any, Dict, Optional from guardrails.validator_base import ( FailResult, PassResult, ValidationResult, Validator, register_validator, ) _INJECTION_PATTERNS = [ (re.compile(r"ignore\s+(all\s+)?(previous|prior|above|earlier)\s+(instructions?|prompts?|context)", re.I), "override_ignore"), (re.compile(r"disregard\s+(all\s+)?(previous|prior|above|earlier)", re.I), "override_disregard"), (re.compile(r"forget\s+(everything|all|the above|prior)", re.I), "override_forget"), (re.compile(r"\byou\s+are\s+now\b.{0,40}(assistant|ai|model|bot|agent|system)\b", re.I), "role_hijack"), (re.compile(r"\bact\s+as\b.{0,20}(assistant|ai|model|bot|agent|hacker)", re.I), "role_act_as"), (re.compile(r"\bpretend\s+(you\s+are|to\s+be)\b", re.I), "role_pretend"), (re.compile(r"\[SYSTEM\]|\[INST\]|<\|system\|>|<<SYS>>", re.I), "system_tag_injection"), (re.compile(r"(reveal|show|print|expose|leak)\s+(your\s+)?(system\s+)?(prompt|instructions?)", re.I), "exfil_system_prompt"), (re.compile(r"\b(jailbreak|jail\s+break)\b", re.I), "jailbreak_keyword"), ] _TOXIC_PATTERNS = [ (re.compile(r"\b(kill|murder|assassinate)\s+(yourself|himself|herself|myself)\b", re.I), "self_harm"), (re.compile(r"\b(how\s+to\s+make|synthesize|manufacture)\s+(bomb|explosive|poison|drug)\b", re.I), "dangerous_instructions"), ] _PII_PATTERNS = [ (re.compile(r"\b\d{3}[-.\s]\d{2}[-.\s]\d{4}\b"), "ssn"), (re.compile(r"\b(?:\d[ -]?){13,16}\b"), "credit_card"), ] _HIGH_RISK = {"self_harm", "dangerous_instructions", "jailbreak_keyword", "system_tag_injection"} @register_validator(name="rrs/validator_plus", data_type="string") class ValidatorPlus(Validator): """Multi-check: prompt injection + toxicity + optional PII. Args: check_injection: Enable prompt injection detection (default: True) check_toxicity: Enable harmful content detection (default: True) check_pii: Enable PII detection (default: False) on_fail: Action on failure """ def __init__(self, check_injection=True, check_toxicity=True, check_pii=False, on_fail=None, **kwargs): super().__init__(on_fail=on_fail, **kwargs) self._check_injection = check_injection self._check_toxicity = check_toxicity self._check_pii = check_pii def validate(self, value, metadata={}): if not isinstance(value, str) or not value.strip(): return PassResult() violations = [] if self._check_injection: for pattern, name in _INJECTION_PATTERNS: if pattern.search(value): violations.append(name) if self._check_toxicity: for pattern, name in _TOXIC_PATTERNS: if pattern.search(value): violations.append(name) if self._check_pii: for pattern, name in _PII_PATTERNS: if pattern.search(value): violations.append(name) if not violations: return PassResult() high = [v for v in violations if v in _HIGH_RISK] severity = "high-risk" if high else "medium-risk" flagged = high if high else violations return FailResult( error_message=f"ValidatorPlus blocked {severity} content: {', '.join(flagged)}", fix_value="[CONTENT BLOCKED by ValidatorPlus]", )
๐พ Save File