From 1729a1898bbb8eed630b4658d3427d2e79a7d61f Mon Sep 17 00:00:00 2001 From: rkritika1508 Date: Wed, 21 Jan 2026 16:26:12 +0530 Subject: [PATCH 1/3] Add "rephrase" on_fail action --- backend/app/core/enum.py | 7 ++++- backend/app/core/guardrail_controller.py | 11 +++++-- backend/app/core/on_fail_actions.py | 4 +++ .../ban_list_safety_validator_config.py | 4 +-- backend/app/models/base_validator_config.py | 29 +++++++++---------- ...assumption_bias_safety_validator_config.py | 4 +-- .../lexical_slur_safety_validator_config.py | 4 +-- .../pii_remover_safety_validator_config.py | 4 +-- 8 files changed, 41 insertions(+), 26 deletions(-) create mode 100644 backend/app/core/on_fail_actions.py diff --git a/backend/app/core/enum.py b/backend/app/core/enum.py index 475381d..38418e9 100644 --- a/backend/app/core/enum.py +++ b/backend/app/core/enum.py @@ -10,4 +10,9 @@ class BiasCategories(Enum): Generic = "generic" Healthcare = "healthcare" Education = "education" - All = "all" \ No newline at end of file + All = "all" + +class GuardrailOnFail(Enum): + Exception = "exception" + Fix = "fix" + Rephrase = "rephrase" \ No newline at end of file diff --git a/backend/app/core/guardrail_controller.py b/backend/app/core/guardrail_controller.py index a935636..10cd87f 100644 --- a/backend/app/core/guardrail_controller.py +++ b/backend/app/core/guardrail_controller.py @@ -5,10 +5,17 @@ from app.models.guardrail_config import ValidatorConfigItem def build_guard(validator_items): - validators = [v_item.build() for v_item in validator_items] + validators = [] + + for v_item in validator_items: + validator = v_item.build( + on_fail=v_item.resolve_on_fail() + ) + validators.append(validator) + return Guard().use_many(*validators) def get_validator_config_models(): annotated_args = get_args(ValidatorConfigItem) union_type = annotated_args[0] - return get_args(union_type) \ No newline at end of file + return get_args(union_type) diff --git a/backend/app/core/on_fail_actions.py b/backend/app/core/on_fail_actions.py new file mode 100644 index 0000000..e568eb3 --- /dev/null +++ b/backend/app/core/on_fail_actions.py @@ -0,0 +1,4 @@ +from guardrails.validators import FailResult + +def rephrase_query_on_fail(value: str, fail_result: FailResult): + return f"Please rephrase the query without unsafe content. {fail_result.error_message}" \ No newline at end of file diff --git a/backend/app/models/ban_list_safety_validator_config.py b/backend/app/models/ban_list_safety_validator_config.py index 4a853f0..e237869 100644 --- a/backend/app/models/ban_list_safety_validator_config.py +++ b/backend/app/models/ban_list_safety_validator_config.py @@ -8,8 +8,8 @@ class BanListSafetyValidatorConfig(BaseValidatorConfig): type: Literal["ban_list"] banned_words: List[str] #list of banned words to be redacted - def build(self): + def build(self, *, on_fail): return BanList( banned_words=self.banned_words, - on_fail=self.resolve_on_fail(), + on_fail=on_fail, ) \ No newline at end of file diff --git a/backend/app/models/base_validator_config.py b/backend/app/models/base_validator_config.py index 50f28e9..b287a1a 100644 --- a/backend/app/models/base_validator_config.py +++ b/backend/app/models/base_validator_config.py @@ -1,28 +1,27 @@ -from typing import Any, Literal, Optional +from typing import Any, Optional from guardrails import OnFailAction from sqlmodel import SQLModel -ON_FAIL_STR = Literal["exception", "fix", "noop", "reask"] +from app.core.enum import GuardrailOnFail +from app.core.on_fail_actions import rephrase_query_on_fail + + +_ON_FAIL_MAP = { + GuardrailOnFail.Fix: OnFailAction.FIX, + GuardrailOnFail.Exception: OnFailAction.EXCEPTION, + GuardrailOnFail.Rephrase: rephrase_query_on_fail, +} class BaseValidatorConfig(SQLModel): - on_fail: Optional[ON_FAIL_STR] = OnFailAction.FIX + on_fail: GuardrailOnFail = GuardrailOnFail.Fix model_config = {"arbitrary_types_allowed": True} def resolve_on_fail(self): - if self.on_fail is None: - return None - - try: - return OnFailAction[self.on_fail.upper()] - except KeyError: - raise ValueError( - f"Invalid on_fail value: {self.on_fail}. " - "Expected one of: exception, fix, noop, reask" - ) - - def build(self) -> Any: + return _ON_FAIL_MAP[self.on_fail] + + def build(self, *, on_fail) -> Any: raise NotImplementedError( f"{self.__class__.__name__} must implement build()" ) \ No newline at end of file diff --git a/backend/app/models/gender_assumption_bias_safety_validator_config.py b/backend/app/models/gender_assumption_bias_safety_validator_config.py index 116c281..d22150d 100644 --- a/backend/app/models/gender_assumption_bias_safety_validator_config.py +++ b/backend/app/models/gender_assumption_bias_safety_validator_config.py @@ -8,8 +8,8 @@ class GenderAssumptionBiasSafetyValidatorConfig(BaseValidatorConfig): type: Literal["gender_assumption_bias"] categories: Optional[List[BiasCategories]] = [BiasCategories.All] # preferred category (based on sector) - def build(self): + def build(self, *, on_fail): return GenderAssumptionBias( categories=self.categories, - on_fail=self.resolve_on_fail(), + on_fail=on_fail, ) diff --git a/backend/app/models/lexical_slur_safety_validator_config.py b/backend/app/models/lexical_slur_safety_validator_config.py index 6378182..5acc00d 100644 --- a/backend/app/models/lexical_slur_safety_validator_config.py +++ b/backend/app/models/lexical_slur_safety_validator_config.py @@ -9,9 +9,9 @@ class LexicalSlurSafetyValidatorConfig(BaseValidatorConfig): languages: List[str] = ["en", "hi"] # list of languages to check slurs in severity: Literal["low", "medium", "high", "all"] = "all" # severity level of slurs to check - def build(self): + def build(self, *, on_fail): return LexicalSlur( languages=self.languages, severity=SlurSeverity(self.severity), - on_fail=self.resolve_on_fail(), + on_fail=on_fail, ) diff --git a/backend/app/models/pii_remover_safety_validator_config.py b/backend/app/models/pii_remover_safety_validator_config.py index d8d3a18..1edaa64 100644 --- a/backend/app/models/pii_remover_safety_validator_config.py +++ b/backend/app/models/pii_remover_safety_validator_config.py @@ -9,9 +9,9 @@ class PIIRemoverSafetyValidatorConfig(BaseValidatorConfig): entity_types: Optional[List[str]] = None # list of PII entity types to remove threshold: float = 0.5 # confidence threshold for PII detection - def build(self): + def build(self, *, on_fail): return PIIRemover( entity_types=self.entity_types, threshold=self.threshold, - on_fail=self.resolve_on_fail(), + on_fail=on_fail, ) From e774be6cf8f53c265518a8f1ad6aba5a86731d06 Mon Sep 17 00:00:00 2001 From: rkritika1508 Date: Fri, 23 Jan 2026 02:11:39 +0530 Subject: [PATCH 2/3] Resolved comments --- backend/app/models/base_validator_config.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/backend/app/models/base_validator_config.py b/backend/app/models/base_validator_config.py index b287a1a..b5cc7e1 100644 --- a/backend/app/models/base_validator_config.py +++ b/backend/app/models/base_validator_config.py @@ -1,6 +1,7 @@ from typing import Any, Optional from guardrails import OnFailAction +from guardrails.validators import Validator from sqlmodel import SQLModel from app.core.enum import GuardrailOnFail @@ -21,7 +22,7 @@ class BaseValidatorConfig(SQLModel): def resolve_on_fail(self): return _ON_FAIL_MAP[self.on_fail] - def build(self, *, on_fail) -> Any: + def build(self, *, on_fail) -> Validator: raise NotImplementedError( f"{self.__class__.__name__} must implement build()" ) \ No newline at end of file From ad21a26d8c1aede6b8939b70ede8140facc35d99 Mon Sep 17 00:00:00 2001 From: rkritika1508 Date: Fri, 23 Jan 2026 02:47:55 +0530 Subject: [PATCH 3/3] Added unit test --- .../tests/test_guardrails_api_integration.py | 24 ++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/backend/app/tests/test_guardrails_api_integration.py b/backend/app/tests/test_guardrails_api_integration.py index 8ba0ad2..b414181 100644 --- a/backend/app/tests/test_guardrails_api_integration.py +++ b/backend/app/tests/test_guardrails_api_integration.py @@ -169,4 +169,26 @@ def test_input_guardrails_with_validator_actions_exception(integration_client): body = response.json() assert body["success"] is False - assert "chakki" in body["error"] \ No newline at end of file + assert "chakki" in body["error"] + + +def test_input_guardrails_with_validator_actions_rephrase(integration_client): + response = integration_client.post( + "/api/v1/guardrails/input/", + json={ + "request_id": request_id, + "input": "This sentence contains chakki.", + "validators": [ + { + "type": "uli_slur_match", + "severity": "all", + "on_fail": "rephrase", + } + ], + }, + ) + + assert response.status_code == 200 + body = response.json() + assert body["success"] is True + assert "Please rephrase the query without unsafe content. Mentioned toxic words" in body["data"]["safe_input"] \ No newline at end of file