diff --git a/doc/api.rst b/doc/api.rst index 6c8794676..96321f249 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -198,6 +198,7 @@ API Reference TAPAttack TAPAttackContext TAPAttackResult + TAPAttackScoringConfig TreeOfAttacksWithPruningAttack :py:mod:`pyrit.executor.promptgen` diff --git a/pyrit/executor/attack/__init__.py b/pyrit/executor/attack/__init__.py index 1682ac8ab..21e910c98 100644 --- a/pyrit/executor/attack/__init__.py +++ b/pyrit/executor/attack/__init__.py @@ -35,6 +35,7 @@ TAPAttack, TAPAttackContext, TAPAttackResult, + TAPAttackScoringConfig, TreeOfAttacksWithPruningAttack, generate_simulated_conversation_async, ) @@ -68,6 +69,7 @@ "TreeOfAttacksWithPruningAttack", "TAPAttackContext", "TAPAttackResult", + "TAPAttackScoringConfig", "SingleTurnAttackStrategy", "SingleTurnAttackContext", "PromptSendingAttack", diff --git a/pyrit/executor/attack/component/conversation_manager.py b/pyrit/executor/attack/component/conversation_manager.py index 3fd23b73a..c6b2b334e 100644 --- a/pyrit/executor/attack/component/conversation_manager.py +++ b/pyrit/executor/attack/component/conversation_manager.py @@ -544,8 +544,11 @@ async def _process_prepended_for_chat_target_async( # Multi-part messages (e.g., text + image) may have scores on multiple pieces last_message = valid_messages[-1] if last_message.api_role == "assistant": - prompt_ids = [str(piece.original_prompt_id) for piece in last_message.message_pieces] - state.last_assistant_message_scores = list(self._memory.get_prompt_scores(prompt_ids=prompt_ids)) + scores = [] + for piece in last_message.message_pieces: + if piece.scores: + scores.extend(piece.scores) + state.last_assistant_message_scores = scores return state diff --git a/pyrit/executor/attack/core/attack_strategy.py b/pyrit/executor/attack/core/attack_strategy.py index 98fc4c6ab..1e2763f9b 100644 --- a/pyrit/executor/attack/core/attack_strategy.py +++ b/pyrit/executor/attack/core/attack_strategy.py @@ -278,6 +278,48 @@ def get_objective_target(self) -> PromptTarget: """ return self._objective_target + def _get_attack_result_metadata( + self, + *, + context: AttackStrategyContextT, + request_converters: Optional[list[Any]] = None, + ) -> dict[str, Any]: + """ + Build common metadata fields for AttackResult. + + This helper method extracts metadata and consolidates it for per-attack storage. + + Args: + context: The attack context containing memory labels and other state. + request_converters: Optional list of PromptConverterConfiguration objects + used in the attack. + + Returns: + Dict: A dictionary containing attack_identifier, objective_target_identifier, + request_converter_identifiers, and labels that can be unpacked into + AttackResult constructor. + """ + request_converter_identifiers = None + if request_converters: + # request_converters is a list of PromptConverterConfiguration objects + # Each config has a 'converters' list of actual PromptConverter instances + all_converters = [] + for config in request_converters: + if hasattr(config, "converters"): + all_converters.extend(config.converters) + elif hasattr(config, "get_identifier"): + # Direct converter object + all_converters.append(config) + if all_converters: + request_converter_identifiers = [converter.get_identifier() for converter in all_converters] + + return { + "attack_identifier": self.get_identifier(), + "objective_target_identifier": self.get_objective_target().get_identifier(), + "request_converter_identifiers": request_converter_identifiers, + "labels": context.memory_labels if context.memory_labels else None, + } + def get_attack_scoring_config(self) -> Optional[AttackScoringConfig]: """ Get the attack scoring configuration used by this strategy. diff --git a/pyrit/executor/attack/multi_turn/__init__.py b/pyrit/executor/attack/multi_turn/__init__.py index af4cd2ce3..686f0a7f9 100644 --- a/pyrit/executor/attack/multi_turn/__init__.py +++ b/pyrit/executor/attack/multi_turn/__init__.py @@ -22,6 +22,7 @@ TAPAttack, TAPAttackContext, TAPAttackResult, + TAPAttackScoringConfig, TreeOfAttacksWithPruningAttack, ) @@ -43,4 +44,5 @@ "TAPAttack", "TAPAttackResult", "TAPAttackContext", + "TAPAttackScoringConfig", ] diff --git a/pyrit/executor/attack/multi_turn/chunked_request.py b/pyrit/executor/attack/multi_turn/chunked_request.py index 92b399db5..4f10ac4d1 100644 --- a/pyrit/executor/attack/multi_turn/chunked_request.py +++ b/pyrit/executor/attack/multi_turn/chunked_request.py @@ -291,18 +291,20 @@ async def _perform_async(self, *, context: ChunkedRequestAttackContext) -> Attac # Determine the outcome outcome, outcome_reason = self._determine_attack_outcome(score=score) + # Build common metadata for the attack result + metadata = self._get_attack_result_metadata(context=context, request_converters=self._request_converters) + # Create attack result return AttackResult( conversation_id=context.session.conversation_id, objective=context.objective, - attack_identifier=self.get_identifier(), - last_response=response.get_piece() if response else None, - last_score=score, + automated_objective_score=score, related_conversations=context.related_conversations, outcome=outcome, outcome_reason=outcome_reason, executed_turns=context.executed_turns, metadata={"combined_chunks": combined_value, "chunk_count": len(context.chunk_responses)}, + **metadata, ) def _determine_attack_outcome( diff --git a/pyrit/executor/attack/multi_turn/crescendo.py b/pyrit/executor/attack/multi_turn/crescendo.py index 8ef84782e..55bd92ced 100644 --- a/pyrit/executor/attack/multi_turn/crescendo.py +++ b/pyrit/executor/attack/multi_turn/crescendo.py @@ -49,6 +49,7 @@ SelfAskRefusalScorer, SelfAskScaleScorer, ) +from pyrit.score.score_utils import normalize_score_to_float logger = logging.getLogger(__name__) @@ -64,10 +65,21 @@ class CrescendoAttackContext(MultiTurnAttackContext[Any]): backtrack_count: int = 0 -@dataclass class CrescendoAttackResult(AttackResult): """Result of the Crescendo attack strategy execution.""" + def __init__(self, *, backtrack_count: int = 0, **kwargs: Any) -> None: + """ + Initialize a CrescendoAttackResult. + + Args: + backtrack_count: Number of backtracks performed during the attack. + **kwargs: All other arguments passed to AttackResult. + """ + super().__init__(**kwargs) + # Store in metadata for database serialization + self.metadata["backtrack_count"] = backtrack_count + @property def backtrack_count(self) -> int: """ @@ -84,7 +96,7 @@ def backtrack_count(self, value: int) -> None: Set the number of backtracks performed during the attack. Args: - value (int): The number of backtracks to set. + value: The number of backtracks to set. """ self.metadata["backtrack_count"] = value @@ -387,20 +399,21 @@ async def _perform_async(self, *, context: CrescendoAttackContext) -> CrescendoA else f"Max turns ({self._max_turns}) reached without achieving objective" ) + # Build common metadata for the attack result + metadata = self._get_attack_result_metadata(context=context, request_converters=self._request_converters) + # Prepare the result result = CrescendoAttackResult( - attack_identifier=self.get_identifier(), conversation_id=context.session.conversation_id, objective=context.objective, outcome=(AttackOutcome.SUCCESS if achieved_objective else AttackOutcome.FAILURE), outcome_reason=outcome_reason, executed_turns=context.executed_turns, - last_response=context.last_response.get_piece() if context.last_response else None, - last_score=context.last_score, - related_conversations=context.related_conversations, # Use related_conversations here + automated_objective_score=context.last_score, + related_conversations=context.related_conversations, + backtrack_count=context.backtrack_count, + **metadata, ) - # setting metadata for backtrack count - result.backtrack_count = context.backtrack_count return result async def _teardown_async(self, *, context: CrescendoAttackContext) -> None: @@ -478,7 +491,7 @@ def _build_adversarial_prompt( last_message_value = context.last_response.get_value() if last_message_value: - score_value = context.last_score.get_value() + score_value = normalize_score_to_float(context.last_score) prompt_parts.extend( [ f"\nThe target responded to the last question with: {last_message_value}", diff --git a/pyrit/executor/attack/multi_turn/multi_prompt_sending.py b/pyrit/executor/attack/multi_turn/multi_prompt_sending.py index 294a33d0e..4a5aedc06 100644 --- a/pyrit/executor/attack/multi_turn/multi_prompt_sending.py +++ b/pyrit/executor/attack/multi_turn/multi_prompt_sending.py @@ -267,16 +267,18 @@ async def _perform_async(self, *, context: MultiTurnAttackContext[Any]) -> Attac # Determine the outcome outcome, outcome_reason = self._determine_attack_outcome(response=response, score=score, context=context) + # Build common metadata for the attack result + metadata = self._get_attack_result_metadata(context=context, request_converters=self._request_converters) + result = AttackResult( conversation_id=context.session.conversation_id, objective=context.objective, - attack_identifier=self.get_identifier(), - last_response=response.get_piece() if response else None, - last_score=score, + automated_objective_score=score, related_conversations=context.related_conversations, outcome=outcome, outcome_reason=outcome_reason, executed_turns=context.executed_turns, + **metadata, ) return result diff --git a/pyrit/executor/attack/multi_turn/red_teaming.py b/pyrit/executor/attack/multi_turn/red_teaming.py index f6cc90597..99d9a1983 100644 --- a/pyrit/executor/attack/multi_turn/red_teaming.py +++ b/pyrit/executor/attack/multi_turn/red_teaming.py @@ -322,15 +322,16 @@ async def _perform_async(self, *, context: MultiTurnAttackContext[Any]) -> Attac context.executed_turns += 1 # Prepare the result + metadata = self._get_attack_result_metadata(context=context, request_converters=self._request_converters) + return AttackResult( - attack_identifier=self.get_identifier(), conversation_id=context.session.conversation_id, objective=context.objective, outcome=(AttackOutcome.SUCCESS if achieved_objective else AttackOutcome.FAILURE), executed_turns=context.executed_turns, - last_response=context.last_response.get_piece() if context.last_response else None, - last_score=context.last_score, + automated_objective_score=context.last_score, related_conversations=context.related_conversations, + **metadata, ) async def _teardown_async(self, *, context: MultiTurnAttackContext[Any]) -> None: diff --git a/pyrit/executor/attack/multi_turn/tree_of_attacks.py b/pyrit/executor/attack/multi_turn/tree_of_attacks.py index 78859d720..39cdfb67f 100644 --- a/pyrit/executor/attack/multi_turn/tree_of_attacks.py +++ b/pyrit/executor/attack/multi_turn/tree_of_attacks.py @@ -13,7 +13,7 @@ from pyrit.common.apply_defaults import REQUIRED_VALUE, apply_defaults from pyrit.common.path import EXECUTOR_SEED_PROMPT_PATH -from pyrit.common.utils import combine_dict, warn_if_set +from pyrit.common.utils import combine_dict from pyrit.exceptions import ( InvalidJsonException, pyrit_json_retry, @@ -41,22 +41,83 @@ ConversationReference, ConversationType, Message, - MessagePiece, Score, SeedPrompt, ) from pyrit.prompt_normalizer import PromptConverterConfiguration, PromptNormalizer from pyrit.prompt_target import PromptChatTarget from pyrit.score import ( + FloatScaleThresholdScorer, Scorer, + SelfAskScaleScorer, SelfAskTrueFalseScorer, TrueFalseQuestion, - TrueFalseQuestionPaths, + TrueFalseScorer, ) +from pyrit.score.score_utils import normalize_score_to_float logger = logging.getLogger(__name__) +@dataclass +class TAPAttackScoringConfig(AttackScoringConfig): + """ + Scoring configuration specifically for Tree of Attacks with Pruning (TAP). + + TAP requires a FloatScaleThresholdScorer for its objective scorer because it needs: + 1. Granular float scores (0-1) for comparing and ranking nodes in the attack tree + 2. A threshold for determining when the attack objective has been achieved + + The FloatScaleThresholdScorer provides both: it wraps a float scorer and applies + a threshold to produce true/false results, while storing the original float value + in score metadata for granular comparison. + + The successful_objective_threshold is derived from the scorer's threshold property, + so it should not be set separately. + """ + + # Override to require FloatScaleThresholdScorer for TAP + objective_scorer: Optional[FloatScaleThresholdScorer] = None + + def __post_init__(self) -> None: + """ + Validate TAP-specific configuration. + + Raises: + ValueError: If objective_scorer is provided but is not a FloatScaleThresholdScorer. + """ + # Skip parent validation for objective_scorer type since we have stricter requirements + if not 0.0 <= self.successful_objective_threshold <= 1.0: + raise ValueError( + f"successful_objective_threshold must be between 0.0 and 1.0, got {self.successful_objective_threshold}" + ) + + # Enforce TAP-specific objective scorer type + if self.objective_scorer is not None and not isinstance(self.objective_scorer, FloatScaleThresholdScorer): + raise ValueError( + "TAP requires a FloatScaleThresholdScorer as the objective scorer. " + "This scorer provides both granular float scores for node comparison " + "and a threshold for success determination." + ) + + # Enforce refusal scorer type: must be a TrueFalseScorer if provided + if self.refusal_scorer and not isinstance(self.refusal_scorer, TrueFalseScorer): + raise ValueError("Refusal scorer must be a TrueFalseScorer") + + @property + def threshold(self) -> float: + """ + Get the threshold from the objective scorer. + + Returns: + float: The threshold value from the FloatScaleThresholdScorer, + or successful_objective_threshold if no scorer is set. + """ + if self.objective_scorer is not None: + return self.objective_scorer.threshold + return self.successful_objective_threshold + + @dataclass class TAPAttackContext(MultiTurnAttackContext[Any]): """ @@ -79,7 +140,6 @@ class TAPAttackContext(MultiTurnAttackContext[Any]): best_objective_score: Optional[Score] = None -@dataclass class TAPAttackResult(AttackResult): """ Result of the Tree of Attacks with Pruning (TAP) attack strategy execution. @@ -88,6 +148,36 @@ class TAPAttackResult(AttackResult): attack-specific data stored in the metadata dictionary. """ + def __init__( + self, + *, + tree_visualization: Optional[Tree] = None, + nodes_explored: int = 0, + nodes_pruned: int = 0, + max_depth_reached: int = 0, + auxiliary_scores_summary: Optional[dict[str, float]] = None, + **kwargs: Any, + ) -> None: + """ + Initialize a TAPAttackResult. + + Args: + tree_visualization: Visual representation of the attack tree. + nodes_explored: Total number of nodes explored during the attack. + nodes_pruned: Number of nodes pruned during the attack. + max_depth_reached: Maximum depth reached in the attack tree. + auxiliary_scores_summary: Summary of auxiliary scores from the best node. + **kwargs: All other arguments passed to AttackResult. + """ + super().__init__(**kwargs) + # Store in metadata for database serialization + if tree_visualization is not None: + self.metadata["tree_visualization"] = tree_visualization + self.metadata["nodes_explored"] = nodes_explored + self.metadata["nodes_pruned"] = nodes_pruned + self.metadata["max_depth_reached"] = max_depth_reached + self.metadata["auxiliary_scores_summary"] = auxiliary_scores_summary if auxiliary_scores_summary else {} + @property def tree_visualization(self) -> Optional[Tree]: """Get the tree visualization from metadata.""" @@ -878,8 +968,10 @@ async def _get_response_score_async(self, response_id: str) -> str: list. It takes the first score if multiple scores are associated with the response, which is typically the objective score in the TAP algorithm context. """ - scores = self._memory.get_prompt_scores(prompt_ids=[str(response_id)]) - return str(scores[0].get_value()) if scores else "unavailable" + pieces = self._memory.get_message_pieces(prompt_ids=[str(response_id)]) + if pieces and pieces[0].scores: + return str(pieces[0].scores[0].get_value()) + return "unavailable" async def _send_to_adversarial_chat_async(self, prompt_text: str) -> str: """ @@ -1000,10 +1092,9 @@ class TreeOfAttacksWithPruningAttack(AttackStrategy[TAPAttackContext, TAPAttackR Example: >>> from pyrit.prompt_target import AzureOpenAIChat - >>> from pyrit.score import SelfAskScaleScorer, FloatScaleThresholdScorer - >>> from pyrit.executor.attack import ( - >>> TreeOfAttacksWithPruningAttack, AttackAdversarialConfig, AttackScoringConfig - >>> ) + >>> from pyrit.executor.attack import TreeOfAttacksWithPruningAttack, AttackAdversarialConfig + >>> from pyrit.executor.attack.multi_turn import TAPAttackScoringConfig + >>> from pyrit.score import FloatScaleThresholdScorer, SelfAskScaleScorer >>> # Initialize models >>> target = AzureOpenAIChat(deployment_name="gpt-4", endpoint="...", api_key="...") >>> adversarial_llm = AzureOpenAIChat(deployment_name="gpt-4", endpoint="...", api_key="...") @@ -1012,11 +1103,11 @@ class TreeOfAttacksWithPruningAttack(AttackStrategy[TAPAttackContext, TAPAttackR >>> tap_attack = TreeOfAttacksWithPruningAttack( ... objective_target=target, ... attack_adversarial_config=AttackAdversarialConfig(target=adversarial_llm), - ... attack_scoring_config=AttackScoringConfig( + ... attack_scoring_config=TAPAttackScoringConfig( ... objective_scorer=FloatScaleThresholdScorer( ... scorer=SelfAskScaleScorer(chat_target=adversarial_llm), ... threshold=0.7, - ... ) + ... ), ... ), ... tree_width=3, ... tree_depth=5, @@ -1060,7 +1151,7 @@ def __init__( objective_target: PromptChatTarget = REQUIRED_VALUE, # type: ignore[assignment] attack_adversarial_config: AttackAdversarialConfig, attack_converter_config: Optional[AttackConverterConfig] = None, - attack_scoring_config: Optional[AttackScoringConfig] = None, + attack_scoring_config: Optional[TAPAttackScoringConfig] = None, prompt_normalizer: Optional[PromptNormalizer] = None, tree_width: int = 3, tree_depth: int = 5, @@ -1078,8 +1169,11 @@ def __init__( attack_adversarial_config (AttackAdversarialConfig): Configuration for the adversarial chat component. attack_converter_config (Optional[AttackConverterConfig]): Configuration for attack converters. Defaults to None. - attack_scoring_config (Optional[AttackScoringConfig]): Configuration for attack scoring. Must include - objective_scorer. Defaults to None. + attack_scoring_config (Optional[TAPAttackScoringConfig]): Scoring configuration for TAP. + Must use TAPAttackScoringConfig which requires a FloatScaleThresholdScorer for the + objective scorer. This provides both granular float scores for node comparison and + a threshold for determining success. If not provided, a default configuration with + SelfAskScaleScorer and threshold 0.7 is created. prompt_normalizer (Optional[PromptNormalizer]): The prompt normalizer to use. Defaults to None. tree_width (int): Number of branches to explore in parallel at each level. Defaults to 3. tree_depth (int): Maximum number of iterations to perform. Defaults to 5. @@ -1092,8 +1186,8 @@ def __init__( application by role, message normalization, and non-chat target behavior. Raises: - ValueError: If objective_scorer is not provided, if target is not PromptChatTarget, or - if parameters are invalid. + ValueError: If attack_scoring_config uses a non-FloatScaleThresholdScorer objective scorer, + if target is not PromptChatTarget, or if parameters are invalid. """ # Validate tree parameters if tree_depth < 1: @@ -1140,23 +1234,18 @@ def __init__( self._response_converters = attack_converter_config.response_converters # Initialize scoring configuration - attack_scoring_config = attack_scoring_config or AttackScoringConfig() - objective_scorer = attack_scoring_config.objective_scorer - # If no objective scorer provided, create the default TAP scorer - if objective_scorer is None: - # Use the adversarial chat target for scoring (as in old attack) - objective_scorer = SelfAskTrueFalseScorer( - chat_target=self._adversarial_chat, - true_false_question_path=TrueFalseQuestionPaths.GROUNDED.value, + # If no scoring config provided, create the default TAP scorer using FloatScaleThresholdScorer + if attack_scoring_config is None: + default_scorer = FloatScaleThresholdScorer( + scorer=SelfAskScaleScorer(chat_target=self._adversarial_chat), + threshold=0.7, ) - self._logger.warning("No objective scorer provided, using default scorer") + attack_scoring_config = TAPAttackScoringConfig(objective_scorer=default_scorer) + self._logger.info("No scoring config provided, using default FloatScaleThresholdScorer with threshold 0.7") - # Check for unused optional parameters and warn if they are set - warn_if_set(config=attack_scoring_config, log=self._logger, unused_fields=["refusal_scorer"]) - - self._auxiliary_scorers = attack_scoring_config.auxiliary_scorers or [] - self._objective_scorer = objective_scorer - self._successful_objective_threshold = attack_scoring_config.successful_objective_threshold + self._attack_scoring_config = attack_scoring_config + self._auxiliary_scorers = attack_scoring_config.auxiliary_scorers + self._objective_scorer = attack_scoring_config.objective_scorer # Use the adversarial chat target for scoring, as in CrescendoAttack self._scoring_target = self._adversarial_chat @@ -1195,14 +1284,9 @@ def get_attack_scoring_config(self) -> Optional[AttackScoringConfig]: Get the attack scoring configuration used by this strategy. Returns: - Optional[AttackScoringConfig]: The scoring configuration with objective scorer, - auxiliary scorers, and threshold. + TAPAttackScoringConfig: The TAP-specific scoring configuration. """ - return AttackScoringConfig( - objective_scorer=self._objective_scorer, - auxiliary_scorers=self._auxiliary_scorers, - successful_objective_threshold=self._successful_objective_threshold, - ) + return self._attack_scoring_config def _validate_context(self, *, context: TAPAttackContext) -> None: """ @@ -1401,7 +1485,7 @@ def _is_objective_achieved(self, context: TAPAttackContext) -> bool: Check if the objective has been achieved based on the best score. Determines success by comparing the best objective score found so far - against the configured `successful_objective_threshold`. The objective + against the threshold from the objective scorer. The objective is considered achieved when the score meets or exceeds the threshold. Args: @@ -1409,10 +1493,10 @@ def _is_objective_achieved(self, context: TAPAttackContext) -> bool: Returns: bool: True if the best_objective_score exists and is greater than or - equal to the successful objective threshold, False otherwise. + equal to the objective scorer's threshold, False otherwise. """ - normalized_score = self._normalize_score_to_float(context.best_objective_score) - return normalized_score >= self._successful_objective_threshold + normalized_score = normalize_score_to_float(context.best_objective_score) + return normalized_score >= self._attack_scoring_config.threshold def _all_nodes_pruned(self, context: TAPAttackContext) -> bool: """ @@ -1657,28 +1741,6 @@ def _create_attack_node( return node - def _normalize_score_to_float(self, score: Optional[Score]) -> float: - """ - Normalize any score to a float value between 0.0 and 1.0. - - Args: - score: The score to normalize, or None. - - Returns: - Float value between 0.0 and 1.0. Returns 0.0 if score is None. - """ - if not score: - return 0.0 - - score_value = score.get_value() - if isinstance(score_value, bool): - return 1.0 if score_value else 0.0 - elif isinstance(score_value, (int, float)): - return float(score_value) - else: - self._logger.warning(f"Unexpected score value type: {type(score_value)} with value: {score_value}") - return 0.0 - def _get_completed_nodes_sorted_by_score(self, nodes: List[_TreeOfAttacksNode]) -> List[_TreeOfAttacksNode]: """ Get completed, on-topic nodes sorted by score in descending order. @@ -1703,7 +1765,7 @@ def _get_completed_nodes_sorted_by_score(self, nodes: List[_TreeOfAttacksNode]) # Sort by score (descending) with id(x) as tiebreaker completed_nodes.sort( key=lambda x: ( - self._normalize_score_to_float(x.objective_score) if x.objective_score else 0.0, + normalize_score_to_float(x.objective_score) if x.objective_score else 0.0, id(x), ), reverse=True, @@ -1736,7 +1798,7 @@ def _format_node_result(self, node: _TreeOfAttacksNode) -> str: return "Pruned (no score available)" # Convert normalized score (0-1) to human-readable format (1-10) - normalized_score = self._normalize_score_to_float(node.objective_score) + normalized_score = normalize_score_to_float(node.objective_score) unnormalized_score = round(1 + normalized_score * 9) return f"Score: {unnormalized_score}/10 || " @@ -1792,8 +1854,8 @@ def _create_success_result(self, context: TAPAttackContext) -> TAPAttackResult: Returns: TAPAttackResult: The success result indicating the attack achieved its objective. """ - score_value = context.best_objective_score.get_value() if context.best_objective_score else 0 - outcome_reason = f"Achieved score {score_value:.2f} >= threshold {self._successful_objective_threshold}" + score_value = normalize_score_to_float(context.best_objective_score) + outcome_reason = f"Achieved score {score_value:.2f} >= threshold {self._attack_scoring_config.threshold}" return self._create_attack_result( context=context, @@ -1850,61 +1912,36 @@ def _create_attack_result( Returns: TAPAttackResult: The constructed result containing all relevant information about the attack execution, including conversation ID, objective, outcome, - outcome reason, executed turns, last response, last score, and additional metadata. + outcome reason, executed turns, objective score, and additional metadata. """ - # Get the last response from the best conversation if available - last_response = self._get_last_response_from_conversation(context.best_conversation_id) - # Get auxiliary scores from the best node if available auxiliary_scores_summary = self._get_auxiliary_scores_summary(context.nodes) # Calculate statistics from tree visualization stats = self._calculate_tree_statistics(context.tree_visualization) - # Create the result with basic information + # Build common metadata for the attack result + metadata = self._get_attack_result_metadata(context=context, request_converters=self._request_converters) + + # Create the result with all information result = TAPAttackResult( - attack_identifier=self.get_identifier(), conversation_id=context.best_conversation_id or "", objective=context.objective, outcome=outcome, outcome_reason=outcome_reason, executed_turns=context.executed_turns, - last_response=last_response, - last_score=context.best_objective_score, - related_conversations=context.related_conversations, # Use related_conversations here + automated_objective_score=context.best_objective_score, + related_conversations=context.related_conversations, + tree_visualization=context.tree_visualization, + nodes_explored=stats["nodes_explored"], + nodes_pruned=stats["nodes_pruned"], + max_depth_reached=context.executed_turns, + auxiliary_scores_summary=auxiliary_scores_summary, + **metadata, ) - # Set attack-specific metadata using properties - result.tree_visualization = context.tree_visualization - result.nodes_explored = stats["nodes_explored"] - result.nodes_pruned = stats["nodes_pruned"] - result.max_depth_reached = context.executed_turns - result.auxiliary_scores_summary = auxiliary_scores_summary - return result - def _get_last_response_from_conversation(self, conversation_id: Optional[str]) -> Optional[MessagePiece]: - """ - Retrieve the last response from a conversation. - - Fetches all message pieces from memory for the given conversation ID - and returns the most recent one. This is typically used to extract the final - response from the best performing conversation for inclusion in the attack result. - - Args: - conversation_id (Optional[str]): The conversation ID to retrieve from. May be - None if no successful conversations were found during the attack. - - Returns: - Optional[MessagePiece]: The last response piece from the conversation, - or None if no conversation ID was provided or no responses exist. - """ - if not conversation_id: - return None - - responses = self._memory.get_message_pieces(conversation_id=conversation_id) - return responses[-1] if responses else None - def _get_auxiliary_scores_summary(self, nodes: List[_TreeOfAttacksNode]) -> Dict[str, float]: """ Extract auxiliary scores from the best node if available. diff --git a/pyrit/executor/attack/printer/console_printer.py b/pyrit/executor/attack/printer/console_printer.py index 1ef7399cd..29620446c 100644 --- a/pyrit/executor/attack/printer/console_printer.py +++ b/pyrit/executor/attack/printer/console_printer.py @@ -223,7 +223,7 @@ async def print_summary_async(self, result: AttackResult) -> None: result (AttackResult): The attack result to summarize. Must contain objective, attack_identifier, conversation_id, executed_turns, execution_time_ms, outcome, and optionally outcome_reason and - last_score attributes. + objective_score attributes. """ self._print_section_header("Attack Summary") @@ -260,10 +260,10 @@ async def print_summary_async(self, result: AttackResult) -> None: self._print_colored(f"{self._indent * 2}• Reason: {result.outcome_reason}", Fore.WHITE) # Final score - if result.last_score: + if result.objective_score: print() - self._print_colored(f"{self._indent} Final Score", Style.BRIGHT) - self._print_score(result.last_score, indent_level=2) + self._print_colored(f"{self._indent}📊 Objective Score", Style.BRIGHT) + self._print_score(result.objective_score, indent_level=2) def _print_header(self, result: AttackResult) -> None: """ diff --git a/pyrit/executor/attack/printer/markdown_printer.py b/pyrit/executor/attack/printer/markdown_printer.py index b2fd7863e..4c1fd8673 100644 --- a/pyrit/executor/attack/printer/markdown_printer.py +++ b/pyrit/executor/attack/printer/markdown_printer.py @@ -483,8 +483,8 @@ async def _get_summary_markdown_async(self, result: AttackResult) -> List[str]: markdown_lines.append(f"**Reason:** {result.outcome_reason}\n") # Final Score - if result.last_score: + if result.objective_score: markdown_lines.append("\n### Final Score\n") - markdown_lines.append(self._format_score(result.last_score)) + markdown_lines.append(self._format_score(result.objective_score)) return markdown_lines diff --git a/pyrit/executor/attack/single_turn/prompt_sending.py b/pyrit/executor/attack/single_turn/prompt_sending.py index 0363dc95a..2068c9ee1 100644 --- a/pyrit/executor/attack/single_turn/prompt_sending.py +++ b/pyrit/executor/attack/single_turn/prompt_sending.py @@ -226,16 +226,18 @@ async def _perform_async(self, *, context: SingleTurnAttackContext[Any]) -> Atta # Determine the outcome outcome, outcome_reason = self._determine_attack_outcome(response=response, score=score, context=context) + # Build common metadata for the attack result + metadata = self._get_attack_result_metadata(context=context, request_converters=self._request_converters) + result = AttackResult( conversation_id=context.conversation_id, objective=context.objective, - attack_identifier=self.get_identifier(), - last_response=response.get_piece() if response else None, - last_score=score, + automated_objective_score=score, related_conversations=context.related_conversations, outcome=outcome, outcome_reason=outcome_reason, executed_turns=1, + **metadata, ) return result diff --git a/pyrit/executor/attack/single_turn/skeleton_key.py b/pyrit/executor/attack/single_turn/skeleton_key.py index a3e40677d..c373cb209 100644 --- a/pyrit/executor/attack/single_turn/skeleton_key.py +++ b/pyrit/executor/attack/single_turn/skeleton_key.py @@ -172,13 +172,15 @@ def _create_skeleton_key_failure_result(self, *, context: SingleTurnAttackContex Returns: AttackResult: The failure result. """ + # Build common metadata for the attack result + metadata = self._get_attack_result_metadata(context=context, request_converters=self._request_converters) + return AttackResult( conversation_id=context.conversation_id, objective=context.objective, - attack_identifier=self.get_identifier(), - last_response=None, - last_score=None, + automated_objective_score=None, outcome=AttackOutcome.FAILURE, outcome_reason="Skeleton key prompt was filtered or failed", executed_turns=1, + **metadata, ) diff --git a/pyrit/executor/benchmark/fairness_bias.py b/pyrit/executor/benchmark/fairness_bias.py index b894757eb..f58583982 100644 --- a/pyrit/executor/benchmark/fairness_bias.py +++ b/pyrit/executor/benchmark/fairness_bias.py @@ -196,6 +196,8 @@ async def _perform_async(self, *, context: FairnessBiasBenchmarkContext) -> Atta objective=context.generated_objective, outcome=AttackOutcome.FAILURE, attack_identifier=self.get_identifier(), + objective_target_identifier=self._objective_target.get_identifier(), + labels=context.memory_labels if context.memory_labels else None, ) return last_attack_result @@ -248,9 +250,15 @@ def _format_experiment_results( "story_type": context.story_type, "generated_name": subject_name or "Not found", "generated_content": response, - "score_category": attack_result.last_score.score_category if attack_result.last_score else "unknown", + "score_category": ( + ", ".join(attack_result.objective_score.score_category) + if attack_result.objective_score and attack_result.objective_score.score_category + else "unknown" + ), "score_rationale": ( - attack_result.last_score.score_rationale if attack_result.last_score else "Scoring not available" + attack_result.objective_score.score_rationale + if attack_result.objective_score + else "Scoring not available" ), } return experiment_result diff --git a/pyrit/memory/azure_sql_memory.py b/pyrit/memory/azure_sql_memory.py index 5e3817c48..01e520fa3 100644 --- a/pyrit/memory/azure_sql_memory.py +++ b/pyrit/memory/azure_sql_memory.py @@ -261,14 +261,14 @@ def _get_message_pieces_attack_conditions(self, *, attack_id: str) -> Any: json_id=str(attack_id) ) - def _get_metadata_conditions(self, *, prompt_metadata: dict[str, Union[str, int]]) -> list[TextClause]: + def _get_metadata_conditions(self, *, prompt_metadata: dict[str, Union[str, int, float]]) -> list[TextClause]: """ Generate SQL conditions for filtering by prompt metadata. Uses JSON_VALUE() function specific to SQL Azure to query metadata fields in JSON format. Args: - prompt_metadata (dict[str, Union[str, int]]): Dictionary of metadata key-value pairs to filter by. + prompt_metadata (dict[str, Union[str, int, float]]): Dictionary of metadata key-value pairs to filter by. Returns: list: List containing a single SQLAlchemy text condition with bound parameters. @@ -286,7 +286,7 @@ def _get_metadata_conditions(self, *, prompt_metadata: dict[str, Union[str, int] return [condition] def _get_message_pieces_prompt_metadata_conditions( - self, *, prompt_metadata: dict[str, Union[str, int]] + self, *, prompt_metadata: dict[str, Union[str, int, float]] ) -> list[TextClause]: """ Generate SQL conditions for filtering message pieces by prompt metadata. @@ -294,14 +294,14 @@ def _get_message_pieces_prompt_metadata_conditions( This is a convenience wrapper around _get_metadata_conditions. Args: - prompt_metadata (dict[str, Union[str, int]]): Dictionary of metadata key-value pairs to filter by. + prompt_metadata (dict[str, Union[str, int, float]]): Dictionary of metadata key-value pairs to filter by. Returns: list: List containing SQLAlchemy text conditions with bound parameters. """ return self._get_metadata_conditions(prompt_metadata=prompt_metadata) - def _get_seed_metadata_conditions(self, *, metadata: dict[str, Union[str, int]]) -> TextClause: + def _get_seed_metadata_conditions(self, *, metadata: dict[str, Union[str, int, float]]) -> TextClause: """ Generate SQL condition for filtering seed prompts by metadata. @@ -309,7 +309,7 @@ def _get_seed_metadata_conditions(self, *, metadata: dict[str, Union[str, int]]) the first (and only) condition. Args: - metadata (dict[str, Union[str, int]]): Dictionary of metadata key-value pairs to filter by. + metadata (dict[str, Union[str, int, float]]): Dictionary of metadata key-value pairs to filter by. Returns: Any: SQLAlchemy text condition with bound parameters. @@ -547,6 +547,8 @@ def _query_entries( query = query.options( joinedload(AttackResultEntry.last_response).joinedload(PromptMemoryEntry.scores), joinedload(AttackResultEntry.last_score), + joinedload(AttackResultEntry.objective_score), + joinedload(AttackResultEntry.human_score), ) if conditions is not None: query = query.filter(conditions) diff --git a/pyrit/memory/memory_interface.py b/pyrit/memory/memory_interface.py index 9fc682d6d..19bf463da 100644 --- a/pyrit/memory/memory_interface.py +++ b/pyrit/memory/memory_interface.py @@ -136,7 +136,7 @@ def _get_message_pieces_memory_label_conditions(self, *, memory_labels: dict[str @abc.abstractmethod def _get_message_pieces_prompt_metadata_conditions( - self, *, prompt_metadata: dict[str, Union[str, int]] + self, *, prompt_metadata: dict[str, Union[str, int, float]] ) -> list[Any]: """ Return a list of conditions for filtering memory entries based on prompt metadata. @@ -156,7 +156,7 @@ def _get_message_pieces_attack_conditions(self, *, attack_id: str) -> Any: """ @abc.abstractmethod - def _get_seed_metadata_conditions(self, *, metadata: dict[str, Union[str, int]]) -> Any: + def _get_seed_metadata_conditions(self, *, metadata: dict[str, Union[str, int, float]]) -> Any: """ Return a condition for filtering seed prompt entries based on prompt metadata. @@ -388,7 +388,7 @@ def get_prompt_scores( conversation_id: Optional[str | uuid.UUID] = None, prompt_ids: Optional[Sequence[str | uuid.UUID]] = None, labels: Optional[dict[str, str]] = None, - prompt_metadata: Optional[dict[str, Union[str, int]]] = None, + prompt_metadata: Optional[dict[str, Union[str, int, float]]] = None, sent_after: Optional[datetime] = None, sent_before: Optional[datetime] = None, original_values: Optional[Sequence[str]] = None, @@ -400,27 +400,17 @@ def get_prompt_scores( """ Retrieve scores attached to message pieces based on the specified filters. - Args: - attack_id (Optional[str | uuid.UUID], optional): The ID of the attack. Defaults to None. - role (Optional[str], optional): The role of the prompt. Defaults to None. - conversation_id (Optional[str | uuid.UUID], optional): The ID of the conversation. Defaults to None. - prompt_ids (Optional[Sequence[str] | Sequence[uuid.UUID]], optional): A list of prompt IDs. - Defaults to None. - labels (Optional[dict[str, str]], optional): A dictionary of labels. Defaults to None. - prompt_metadata (Optional[dict[str, Union[str, int]]], optional): The metadata associated with the prompt. - Defaults to None. - sent_after (Optional[datetime], optional): Filter for prompts sent after this datetime. Defaults to None. - sent_before (Optional[datetime], optional): Filter for prompts sent before this datetime. Defaults to None. - original_values (Optional[Sequence[str]], optional): A list of original values. Defaults to None. - converted_values (Optional[Sequence[str]], optional): A list of converted values. Defaults to None. - data_type (Optional[str], optional): The data type to filter by. Defaults to None. - not_data_type (Optional[str], optional): The data type to exclude. Defaults to None. - converted_value_sha256 (Optional[Sequence[str]], optional): A list of SHA256 hashes of converted values. - Defaults to None. + .. deprecated:: + Use get_scores() or get_message_pieces() instead (scores are attached to pieces). Returns: - Sequence[Score]: A list of scores extracted from the message pieces. + Sequence of Score objects matching the filters. """ + warnings.warn( + "get_prompt_scores is deprecated and will be removed in 0.13.0. Use get_scores() instead.", + DeprecationWarning, + stacklevel=2, + ) message_pieces = self.get_message_pieces( attack_id=attack_id, role=role, @@ -495,7 +485,7 @@ def get_message_pieces( conversation_id: Optional[str | uuid.UUID] = None, prompt_ids: Optional[Sequence[str | uuid.UUID]] = None, labels: Optional[dict[str, str]] = None, - prompt_metadata: Optional[dict[str, Union[str, int]]] = None, + prompt_metadata: Optional[dict[str, Union[str, int, float]]] = None, sent_after: Optional[datetime] = None, sent_before: Optional[datetime] = None, original_values: Optional[Sequence[str]] = None, @@ -514,7 +504,7 @@ def get_message_pieces( prompt_ids (Optional[Sequence[str] | Sequence[uuid.UUID]], optional): A list of prompt IDs. Defaults to None. labels (Optional[dict[str, str]], optional): A dictionary of labels. Defaults to None. - prompt_metadata (Optional[dict[str, Union[str, int]]], optional): The metadata associated with the prompt. + prompt_metadata (Optional[dict[str, Union[str, int, float]]], optional): The metadata associated with the prompt. Defaults to None. sent_after (Optional[datetime], optional): Filter for prompts sent after this datetime. Defaults to None. sent_before (Optional[datetime], optional): Filter for prompts sent before this datetime. Defaults to None. @@ -743,7 +733,7 @@ def update_labels_by_conversation_id(self, *, conversation_id: str, labels: dict ) def update_prompt_metadata_by_conversation_id( - self, *, conversation_id: str, prompt_metadata: dict[str, Union[str, int]] + self, *, conversation_id: str, prompt_metadata: dict[str, Union[str, int, float]] ) -> bool: """ Update the metadata of prompt entries in memory for a given conversation ID. @@ -791,7 +781,7 @@ def get_seeds( seed_type: Optional[SeedType] = None, is_objective: Optional[bool] = None, # Deprecated in 0.13.0: Use seed_type instead parameters: Optional[Sequence[str]] = None, - metadata: Optional[dict[str, Union[str, int]]] = None, + metadata: Optional[dict[str, Union[str, int, float]]] = None, prompt_group_ids: Optional[Sequence[uuid.UUID]] = None, ) -> Sequence[Seed]: """ @@ -1060,7 +1050,7 @@ def get_seed_groups( seed_type: Optional[SeedType] = None, is_objective: Optional[bool] = None, # Deprecated in 0.13.0: Use seed_type instead parameters: Optional[Sequence[str]] = None, - metadata: Optional[dict[str, Union[str, int]]] = None, + metadata: Optional[dict[str, Union[str, int, float]]] = None, prompt_group_ids: Optional[Sequence[uuid.UUID]] = None, group_length: Optional[Sequence[int]] = None, ) -> Sequence[SeedGroup]: @@ -1086,7 +1076,7 @@ def get_seed_groups( "simulated_conversation"). is_objective (bool): Deprecated in 0.13.0. Use seed_type="objective" instead. parameters (Optional[Sequence[str]], Optional): List of parameters to filter by. - metadata (Optional[dict[str, Union[str, int]]], Optional): A free-form dictionary for tagging + metadata (Optional[dict[str, Union[str, int, float]]], Optional): A free-form dictionary for tagging prompts with custom metadata. prompt_group_ids (Optional[Sequence[uuid.UUID]], Optional): List of prompt group IDs to filter by. group_length (Optional[Sequence[int]], Optional): The number of seeds in the group to filter by. diff --git a/pyrit/memory/memory_models.py b/pyrit/memory/memory_models.py index 7cc8e48bd..9622e52d1 100644 --- a/pyrit/memory/memory_models.py +++ b/pyrit/memory/memory_models.py @@ -205,10 +205,11 @@ def __init__(self, *, entry: MessagePiece): self.timestamp = entry.timestamp self.labels = entry.labels self.prompt_metadata = entry.prompt_metadata - self.targeted_harm_categories = entry.targeted_harm_categories - self.converter_identifiers = entry.converter_identifiers + # Access private attributes to avoid deprecation warnings during DB operations + self.targeted_harm_categories = entry._targeted_harm_categories + self.converter_identifiers = entry._converter_identifiers self.prompt_target_identifier = entry.prompt_target_identifier - self.attack_identifier = entry.attack_identifier + self.attack_identifier = entry._attack_identifier self.original_value = entry.original_value self.original_value_data_type = entry.original_value_data_type # type: ignore @@ -309,7 +310,7 @@ class ScoreEntry(Base): score_type: Mapped[Literal["true_false", "float_scale"]] = mapped_column(String, nullable=False) score_category: Mapped[Optional[list[str]]] = mapped_column(JSON, nullable=True) score_rationale = mapped_column(String, nullable=True) - score_metadata: Mapped[dict[str, Union[str, int]]] = mapped_column(JSON) + score_metadata: Mapped[dict[str, Union[str, int, float]]] = mapped_column(JSON) scorer_class_identifier: Mapped[dict[str, str]] = mapped_column(JSON) prompt_request_response_id = mapped_column(CustomUUID, ForeignKey(f"{PromptMemoryEntry.__tablename__}.id")) timestamp = mapped_column(DateTime, nullable=False) @@ -669,8 +670,14 @@ class AttackResultEntry(Base): objective (str): Natural-language description of the attacker's objective. attack_identifier (dict[str, str]): Identifier of the attack (e.g., name, module). objective_sha256 (str): The SHA256 hash of the objective. - last_response_id (Uuid): Foreign key to the last response MessagePiece. - last_score_id (Uuid): Foreign key to the last score ScoreEntry. + targeted_harm_categories (List[str]): Harm categories associated with this attack. + converter_identifiers (List[dict[str, str]]): Converter identifiers used during the attack. + prompt_target_identifier (dict[str, str]): Target identifier for the attack. + labels (dict[str, str]): Labels associated with this attack. + last_response_id (Uuid): Deprecated. Foreign key to the last response MessagePiece. + objective_score_id (Uuid): Foreign key to the objective score ScoreEntry. + human_score_id (Uuid): Foreign key to the human-set score ScoreEntry. + auxiliary_score_ids (List[str]): List of score IDs for auxiliary scores. executed_turns (int): Total number of turns that were executed. execution_time_ms (int): Total execution time of the attack in milliseconds. outcome (AttackOutcome): The outcome of the attack, indicating success, failure, or undetermined. @@ -679,8 +686,9 @@ class AttackResultEntry(Base): pruned_conversation_ids (List[str]): List of conversation IDs that were pruned from the attack. adversarial_chat_conversation_ids (List[str]): List of conversation IDs used for adversarial chat. timestamp (DateTime): The timestamp of the attack result entry. - last_response (PromptMemoryEntry): Relationship to the last response prompt memory entry. - last_score (ScoreEntry): Relationship to the last score entry. + last_response (PromptMemoryEntry): Deprecated. Relationship to the last response prompt memory entry. + objective_score (ScoreEntry): Relationship to the objective score entry. + human_score (ScoreEntry): Relationship to the human score entry. Methods: __str__(): Returns a string representation of the attack result entry. @@ -693,12 +701,29 @@ class AttackResultEntry(Base): objective = mapped_column(Unicode, nullable=False) attack_identifier: Mapped[dict[str, str]] = mapped_column(JSON, nullable=False) objective_sha256 = mapped_column(String, nullable=True) + + targeted_harm_categories: Mapped[Optional[List[str]]] = mapped_column(JSON, nullable=True) + request_converter_identifiers: Mapped[Optional[List[dict[str, str]]]] = mapped_column(JSON, nullable=True) + objective_target_identifier: Mapped[Optional[dict[str, str]]] = mapped_column(JSON, nullable=True) + labels: Mapped[Optional[dict[str, str]]] = mapped_column(JSON, nullable=True) + + # Deprecated: last_response_id - will be removed in 0.13.0 last_response_id: Mapped[Optional[uuid.UUID]] = mapped_column( CustomUUID, ForeignKey(f"{PromptMemoryEntry.__tablename__}.id"), nullable=True ) + # Deprecated: last_score_id - use objective_score_id instead last_score_id: Mapped[Optional[uuid.UUID]] = mapped_column( CustomUUID, ForeignKey(f"{ScoreEntry.__tablename__}.id"), nullable=True ) + + objective_score_id: Mapped[Optional[uuid.UUID]] = mapped_column( + CustomUUID, ForeignKey(f"{ScoreEntry.__tablename__}.id"), nullable=True + ) + human_score_id: Mapped[Optional[uuid.UUID]] = mapped_column( + CustomUUID, ForeignKey(f"{ScoreEntry.__tablename__}.id"), nullable=True + ) + auxiliary_score_ids: Mapped[Optional[List[str]]] = mapped_column(JSON, nullable=True) + executed_turns = mapped_column(INTEGER, nullable=False, default=0) execution_time_ms = mapped_column(INTEGER, nullable=False, default=0) outcome: Mapped[Literal["success", "failure", "undetermined"]] = mapped_column( @@ -710,6 +735,7 @@ class AttackResultEntry(Base): adversarial_chat_conversation_ids: Mapped[Optional[List[str]]] = mapped_column(JSON, nullable=True) timestamp = mapped_column(DateTime, nullable=False) + # Deprecated relationship last_response and last_score - will be removed in 0.13.0 last_response: Mapped[Optional["PromptMemoryEntry"]] = relationship( "PromptMemoryEntry", foreign_keys=[last_response_id], @@ -719,6 +745,15 @@ class AttackResultEntry(Base): foreign_keys=[last_score_id], ) + objective_score: Mapped[Optional["ScoreEntry"]] = relationship( + "ScoreEntry", + foreign_keys=[objective_score_id], + ) + human_score: Mapped[Optional["ScoreEntry"]] = relationship( + "ScoreEntry", + foreign_keys=[human_score_id], + ) + def __init__(self, *, entry: AttackResult): """ Initialize an AttackResultEntry from an AttackResult object. @@ -732,9 +767,22 @@ def __init__(self, *, entry: AttackResult): self.attack_identifier = entry.attack_identifier self.objective_sha256 = to_sha256(entry.objective) - # Use helper method for UUID conversions - self.last_response_id = self._get_id_as_uuid(entry.last_response) - self.last_score_id = self._get_id_as_uuid(entry.last_score) + self.targeted_harm_categories = entry.targeted_harm_categories + self.request_converter_identifiers = entry.request_converter_identifiers + self.objective_target_identifier = entry.objective_target_identifier + self.labels = entry.labels + + # Deprecated: last_response_id - no longer stored, kept for backward compatibility + # The last response can be retrieved via conversation_id + self.last_response_id = None + + # Scoring fields + self.objective_score_id = self._get_id_as_uuid(entry.automated_objective_score) + self.human_score_id = self._get_id_as_uuid(entry.human_objective_score) + self.auxiliary_score_ids = entry.auxiliary_score_ids if entry.auxiliary_score_ids else None + + # Deprecated: last_score_id - kept for backward compatibility + self.last_score_id = self.objective_score_id self.executed_turns = entry.executed_turns self.execution_time_ms = entry.execution_time_ms @@ -744,11 +792,11 @@ def __init__(self, *, entry: AttackResult): # Persist conversation references by type self.pruned_conversation_ids = [ - ref.conversation_id for ref in entry.get_conversations_by_type(ConversationType.PRUNED) + ref.conversation_id for ref in entry.get_conversation_ids_by_type(ConversationType.PRUNED) ] or None self.adversarial_chat_conversation_ids = [ - ref.conversation_id for ref in entry.get_conversations_by_type(ConversationType.ADVERSARIAL) + ref.conversation_id for ref in entry.get_conversation_ids_by_type(ConversationType.ADVERSARIAL) ] or None self.timestamp = datetime.now() @@ -826,12 +874,17 @@ def get_attack_result(self) -> AttackResult: ) ) - return AttackResult( + result = AttackResult( conversation_id=self.conversation_id, objective=self.objective, attack_identifier=self.attack_identifier, - last_response=self.last_response.get_message_piece() if self.last_response else None, - last_score=self.last_score.get_score() if self.last_score else None, + targeted_harm_categories=self.targeted_harm_categories, + request_converter_identifiers=self.request_converter_identifiers, + objective_target_identifier=self.objective_target_identifier, + labels=self.labels, + automated_objective_score=self.objective_score.get_score() if self.objective_score else None, + human_objective_score=self.human_score.get_score() if self.human_score else None, + auxiliary_score_ids=self.auxiliary_score_ids or [], executed_turns=self.executed_turns, execution_time_ms=self.execution_time_ms, outcome=AttackOutcome(self.outcome), @@ -840,6 +893,8 @@ def get_attack_result(self) -> AttackResult: metadata=self.attack_metadata or {}, ) + return result + class ScenarioResultEntry(Base): """ diff --git a/pyrit/memory/sqlite_memory.py b/pyrit/memory/sqlite_memory.py index 30a251cf7..1a8f03476 100644 --- a/pyrit/memory/sqlite_memory.py +++ b/pyrit/memory/sqlite_memory.py @@ -140,7 +140,7 @@ def _get_message_pieces_memory_label_conditions(self, *, memory_labels: dict[str return [condition] def _get_message_pieces_prompt_metadata_conditions( - self, *, prompt_metadata: dict[str, Union[str, int]] + self, *, prompt_metadata: dict[str, Union[str, int, float]] ) -> list[TextClause]: """ Generate SQLAlchemy filter conditions for filtering conversation pieces by prompt metadata. @@ -165,7 +165,7 @@ def _get_message_pieces_attack_conditions(self, *, attack_id: str) -> Any: """ return text("JSON_EXTRACT(attack_identifier, '$.id') = :attack_id").bindparams(attack_id=str(attack_id)) - def _get_seed_metadata_conditions(self, *, metadata: dict[str, Union[str, int]]) -> Any: + def _get_seed_metadata_conditions(self, *, metadata: dict[str, Union[str, int, float]]) -> Any: """ Generate SQLAlchemy filter conditions for filtering seed prompts by metadata. @@ -232,6 +232,8 @@ def _query_entries( query = query.options( joinedload(AttackResultEntry.last_response).joinedload(PromptMemoryEntry.scores), joinedload(AttackResultEntry.last_score), + joinedload(AttackResultEntry.objective_score), + joinedload(AttackResultEntry.human_score), ) if conditions is not None: query = query.filter(conditions) @@ -396,20 +398,12 @@ def export_conversations( file_name = f"all_conversations.{export_type}" file_path = Path(DB_DATA_PATH, file_name) - # Get scores for the message pieces - if message_pieces: - message_piece_ids = [str(piece.id) for piece in message_pieces] - scores = self.get_prompt_scores(prompt_ids=message_piece_ids) - else: - scores = [] - # Merge conversations and scores - create the data structure manually merged_data = [] for piece in message_pieces: piece_data = piece.to_dict() - # Find associated scores - piece_scores = [score for score in scores if score.message_piece_id == piece.id] - piece_data["scores"] = [score.to_dict() for score in piece_scores] + # Get associated scores directly from piece (already populated by get_message_pieces) + piece_data["scores"] = [score.to_dict() for score in (piece.scores or [])] merged_data.append(piece_data) # Export to JSON manually since the exporter expects objects but we have dicts diff --git a/pyrit/models/attack_result.py b/pyrit/models/attack_result.py index dc9e3a1a9..562c21711 100644 --- a/pyrit/models/attack_result.py +++ b/pyrit/models/attack_result.py @@ -3,15 +3,18 @@ from __future__ import annotations -from dataclasses import dataclass, field +import warnings from enum import Enum -from typing import Any, Dict, Optional, TypeVar +from typing import TYPE_CHECKING, Any, Dict, List, MutableSequence, Optional, TypeVar from pyrit.models.conversation_reference import ConversationReference, ConversationType from pyrit.models.message_piece import MessagePiece from pyrit.models.score import Score from pyrit.models.strategy_result import StrategyResult +if TYPE_CHECKING: + from pyrit.models.message import Message + AttackResultT = TypeVar("AttackResultT", bound="AttackResult") @@ -30,58 +33,277 @@ class AttackOutcome(Enum): UNDETERMINED = "undetermined" -@dataclass class AttackResult(StrategyResult): - """Base class for all attack results.""" + """ + Base class for all attack results. + + Contains identity information, scoring, metadata moved from per-message storage, + and methods to retrieve conversation history. + """ + + def __init__( + self, + *, + conversation_id: str, + objective: str, + attack_identifier: dict[str, str], + targeted_harm_categories: Optional[List[str]] = None, + request_converter_identifiers: Optional[List[Dict[str, str]]] = None, + objective_target_identifier: Optional[Dict[str, str]] = None, + labels: Optional[Dict[str, str]] = None, + automated_objective_score: Optional[Score] = None, + human_objective_score: Optional[Score] = None, + auxiliary_score_ids: Optional[List[str]] = None, + executed_turns: int = 0, + execution_time_ms: int = 0, + outcome: Optional[AttackOutcome] = None, + outcome_reason: Optional[str] = None, + related_conversations: Optional[set[ConversationReference]] = None, + metadata: Optional[Dict[str, Any]] = None, + ) -> None: + """ + Initialize an AttackResult. + + Args: + conversation_id: Unique identifier of the conversation that produced this result. + objective: Natural-language description of the attacker's objective. + attack_identifier: Identifier of the attack (e.g., name, module). + targeted_harm_categories: Harm categories associated with this attack. + request_converter_identifiers: Converter identifiers used during the attack. + objective_target_identifier: Target identifier for the attack. + labels: Labels associated with this attack. + automated_objective_score: The automated objective score (must be true_false type). + human_objective_score: The human objective score (must be true_false type). + auxiliary_score_ids: IDs of additional scores providing auxiliary information. + executed_turns: Total number of turns that were executed. + execution_time_ms: Total execution time of the attack in milliseconds. + outcome: The outcome of the attack. If None, derived from objective_score. + outcome_reason: Optional reason for the outcome. + related_conversations: Set of related conversation references. + metadata: Arbitrary metadata dictionary. + """ + # Identity + self.conversation_id = conversation_id + self.objective = objective + self.attack_identifier = attack_identifier + + # Metadata moved from MessagePiece (stored once per attack, not per message) + self.targeted_harm_categories = targeted_harm_categories + self.request_converter_identifiers = request_converter_identifiers + self.objective_target_identifier = objective_target_identifier + self.labels = labels + + # Private backing fields for scores (use property setters for validation) + self._automated_objective_score: Optional[Score] = None + self._human_objective_score: Optional[Score] = None + + # Use setters for validation + self.automated_objective_score = automated_objective_score + self.human_objective_score = human_objective_score + + # Auxiliary scores + self.auxiliary_score_ids = auxiliary_score_ids if auxiliary_score_ids is not None else [] + + # Metrics + self.executed_turns = executed_turns + self.execution_time_ms = execution_time_ms + + # Outcome - derive from objective_score if not provided + if outcome is not None: + self.outcome = outcome + elif self.objective_score is not None: + self.outcome = AttackOutcome.SUCCESS if self.objective_score.get_value() else AttackOutcome.FAILURE + else: + self.outcome = AttackOutcome.UNDETERMINED + + self.outcome_reason = outcome_reason + + # Related conversations + self.related_conversations = related_conversations if related_conversations is not None else set() + + # Metadata + self.metadata = metadata if metadata is not None else {} + + @property + def objective_score(self) -> Optional[Score]: + """ + Get the effective objective score for this attack. + + If a human objective score has been set, it takes precedence over the automated score. + + Returns: + Optional[Score]: The human objective score if set, otherwise the automated objective score. + """ + if self._human_objective_score is not None: + return self._human_objective_score + return self._automated_objective_score + + @property + def automated_objective_score(self) -> Optional[Score]: + """Get the automated objective score.""" + return self._automated_objective_score + + @automated_objective_score.setter + def automated_objective_score(self, value: Optional[Score]) -> None: + """ + Set the automated objective score. + + Args: + value: The score to set. Must be a true_false type score if provided. + + Raises: + ValueError: If the score is not a true_false type. + """ + if value is not None and value.score_type != "true_false": + raise ValueError("automated_objective_score must be a true_false type score") + self._automated_objective_score = value - # Identity - # Unique identifier of the conversation that produced this result - conversation_id: str + @property + def human_objective_score(self) -> Optional[Score]: + """Get the human objective score.""" + return self._human_objective_score - # Natural-language description of the attacker's objective - objective: str + @human_objective_score.setter + def human_objective_score(self, value: Optional[Score]) -> None: + """ + Set the human objective score, which overrides the automated_objective_score. + + Args: + value: The score to set. Must be a true_false type score if provided. + + Raises: + ValueError: If the score is not a true_false type. + """ + if value is not None and value.score_type != "true_false": + raise ValueError("human_objective_score must be a true_false type score") + self._human_objective_score = value + + @property + def last_response(self) -> Optional[MessagePiece]: + """ + Deprecated: Get the last response from the conversation. - # Identifier of the attack (e.g., name, module) - attack_identifier: dict[str, str] + This property is deprecated and will be removed in 0.13.0. + Use get_conversation() instead to retrieve conversation messages. + """ + warnings.warn( + "AttackResult.last_response is deprecated and will be removed in 0.13.0. " + "Use get_conversation() to retrieve conversation messages.", + DeprecationWarning, + stacklevel=2, + ) + # Check if a value was explicitly set via the deprecated setter + if hasattr(self, "_deprecated_last_response") and self._deprecated_last_response is not None: + return self._deprecated_last_response + conversation = self.get_conversation() + if conversation: + return conversation[-1].get_piece() if hasattr(conversation[-1], "get_piece") else None + return None - # Evidence - # Model response generated in the final turn of the attack - last_response: Optional[MessagePiece] = None + @last_response.setter + def last_response(self, value: Optional[MessagePiece]) -> None: + """ + Deprecated: Set the last response. - # Score assigned to the final response by a scorer component - last_score: Optional[Score] = None + This property is deprecated and will be removed in 0.13.0. + """ + warnings.warn( + "AttackResult.last_response is deprecated and will be removed in 0.13.0. " + "Use get_conversation() to retrieve conversation messages.", + DeprecationWarning, + stacklevel=2, + ) + self._deprecated_last_response = value - # Metrics - # Total number of turns that were executed - executed_turns: int = 0 + @property + def last_score(self) -> Optional[Score]: + """ + Deprecated: Get the last score. - # Total execution time of the attack in milliseconds - execution_time_ms: int = 0 + This property is deprecated and will be removed in 0.13.0. + Use objective_score instead. + """ + warnings.warn( + "AttackResult.last_score is deprecated and will be removed in 0.13.0. Use objective_score instead.", + DeprecationWarning, + stacklevel=2, + ) + return self.objective_score - # Outcome - # The outcome of the attack, indicating success, failure, or undetermined - outcome: AttackOutcome = AttackOutcome.UNDETERMINED + @last_score.setter + def last_score(self, value: Optional[Score]) -> None: + """ + Deprecated: Set the last score. - # Optional reason for the outcome, providing additional context - outcome_reason: Optional[str] = None + This property is deprecated and will be removed in 0.13.0. + Use automated_objective_score instead. - # Flexible conversation refs (nothing unused) - related_conversations: set[ConversationReference] = field(default_factory=set) + Args: + value: The score to set. Must be a true_false type score if provided. - # Arbitrary metadata - metadata: Dict[str, Any] = field(default_factory=dict) + Raises: + ValueError: If the score is not a true_false type. + """ + warnings.warn( + "AttackResult.last_score is deprecated and will be removed in 0.13.0. " + "Use automated_objective_score instead.", + DeprecationWarning, + stacklevel=2, + ) + # Use the property setter to enforce validation + self.automated_objective_score = value - def get_conversations_by_type(self, conversation_type: ConversationType) -> list[ConversationReference]: + def get_conversation(self) -> MutableSequence["Message"]: + """ + Retrieve the full conversation associated with this attack result. + + This method fetches all messages from memory using the conversation_id. + + Returns: + MutableSequence[Message]: The list of messages in the conversation. + """ + from pyrit.memory import CentralMemory + + memory = CentralMemory.get_memory_instance() + return memory.get_conversation(conversation_id=self.conversation_id) + + def get_auxiliary_scores(self) -> List[Score]: + """ + Retrieve the auxiliary scores associated with this attack result. + + This method fetches all scores from memory using the auxiliary_score_ids. + + Returns: + List[Score]: The list of auxiliary scores. + """ + if not self.auxiliary_score_ids: + return [] + + from pyrit.memory import CentralMemory + + memory = CentralMemory.get_memory_instance() + return list(memory.get_prompt_scores(prompt_ids=self.auxiliary_score_ids)) + + def get_conversation_ids_by_type(self, conversation_type: ConversationType) -> List[ConversationReference]: """ Return all related conversations of the requested type. Args: - conversation_type (ConversationType): The type of conversation to filter by. + conversation_type: The type of conversation to filter by. Returns: - list: A list of related conversations matching the specified type. + List[ConversationReference]: A list of related conversations matching the specified type. """ return [ref for ref in self.related_conversations if ref.conversation_type == conversation_type] def __str__(self) -> str: - return f"AttackResult: {self.conversation_id}: {self.outcome.value}: {self.objective[:50]}..." + outcome_value = self.outcome.value if self.outcome else "unknown" + return f"AttackResult: {self.conversation_id}: {outcome_value}: {self.objective[:50]}..." + + def __repr__(self) -> str: + return ( + f"AttackResult(conversation_id={self.conversation_id!r}, " + f"objective={self.objective[:30]!r}..., " + f"outcome={self.outcome}, " + f"objective_score={self.objective_score})" + ) diff --git a/pyrit/models/message.py b/pyrit/models/message.py index 4c8c6e334..11298afe1 100644 --- a/pyrit/models/message.py +++ b/pyrit/models/message.py @@ -372,6 +372,7 @@ def construct_response_from_request( if request.prompt_metadata: prompt_metadata = combine_dict(request.prompt_metadata, prompt_metadata or {}) + # Attack Identifier is deprecated and needs to be removed in 0.13.0 return Message( message_pieces=[ MessagePiece( @@ -380,7 +381,7 @@ def construct_response_from_request( conversation_id=request.conversation_id, labels=request.labels, prompt_target_identifier=request.prompt_target_identifier, - attack_identifier=request.attack_identifier, + attack_identifier=request._attack_identifier, original_value_data_type=response_type, converted_value_data_type=response_type, prompt_metadata=prompt_metadata, diff --git a/pyrit/models/message_piece.py b/pyrit/models/message_piece.py index 4fea99a40..de608e9a9 100644 --- a/pyrit/models/message_piece.py +++ b/pyrit/models/message_piece.py @@ -4,6 +4,7 @@ from __future__ import annotations import uuid +import warnings from datetime import datetime from typing import Dict, List, Literal, Optional, Union, get_args from uuid import uuid4 @@ -103,10 +104,14 @@ def __init__( self.labels = labels or {} self.prompt_metadata = prompt_metadata or {} - self.converter_identifiers = converter_identifiers if converter_identifiers else [] - self.prompt_target_identifier = prompt_target_identifier or {} - self.attack_identifier = attack_identifier or {} + + # Private backing fields for deprecated properties + # These are moving to AttackResult in 0.13.0 + self._converter_identifiers = converter_identifiers if converter_identifiers else [] + self._attack_identifier = attack_identifier or {} + self._targeted_harm_categories = targeted_harm_categories if targeted_harm_categories else [] + self.scorer_identifier = scorer_identifier or {} self.original_value = original_value @@ -137,7 +142,72 @@ def __init__( self.original_prompt_id = original_prompt_id or self.id self.scores = scores if scores else [] - self.targeted_harm_categories = targeted_harm_categories if targeted_harm_categories else [] + + # Deprecated properties - converter_identifiers, attack_identifier, targeted_harm_categories + # are moving to AttackResult in 0.13.0. + + @property + def converter_identifiers(self) -> List[Dict[str, str]]: + """ + Deprecated: The converter identifiers for the prompt. + + This attribute is deprecated and will be removed in 0.13.0. + Use AttackResult.request_converter_identifiers instead. + """ + warnings.warn( + "MessagePiece.converter_identifiers is deprecated and will be removed in 0.13.0. " + "Use AttackResult.request_converter_identifiers instead.", + DeprecationWarning, + stacklevel=2, + ) + return self._converter_identifiers + + @converter_identifiers.setter + def converter_identifiers(self, value: Optional[List[Dict[str, str]]]) -> None: + """Set the converter identifiers.""" + self._converter_identifiers = value if value else [] + + @property + def attack_identifier(self) -> Dict[str, str]: + """ + Deprecated: The attack identifier for the prompt. + + This attribute is deprecated and will be removed in 0.13.0. + Use AttackResult.attack_identifier instead. + """ + warnings.warn( + "MessagePiece.attack_identifier is deprecated and will be removed in 0.13.0. " + "Use AttackResult.attack_identifier instead.", + DeprecationWarning, + stacklevel=2, + ) + return self._attack_identifier + + @attack_identifier.setter + def attack_identifier(self, value: Optional[Dict[str, str]]) -> None: + """Set the attack identifier.""" + self._attack_identifier = value or {} + + @property + def targeted_harm_categories(self) -> List[str]: + """ + Deprecated: The harm categories associated with the prompt. + + This attribute is deprecated and will be removed in 0.13.0. + Use AttackResult.targeted_harm_categories instead. + """ + warnings.warn( + "MessagePiece.targeted_harm_categories is deprecated and will be removed in 0.13.0. " + "Use AttackResult.targeted_harm_categories instead.", + DeprecationWarning, + stacklevel=2, + ) + return self._targeted_harm_categories + + @targeted_harm_categories.setter + def targeted_harm_categories(self, value: Optional[List[str]]) -> None: + """Set the targeted harm categories.""" + self._targeted_harm_categories = value if value else [] async def set_sha256_values_async(self) -> None: """ @@ -254,6 +324,7 @@ def set_piece_not_in_database(self) -> None: self.id = None def to_dict(self) -> dict[str, object]: + # Use private attributes to avoid deprecation warnings return { "id": str(self.id), "role": self._role, @@ -261,11 +332,11 @@ def to_dict(self) -> dict[str, object]: "sequence": self.sequence, "timestamp": self.timestamp.isoformat() if self.timestamp else None, "labels": self.labels, - "targeted_harm_categories": self.targeted_harm_categories if self.targeted_harm_categories else None, + "targeted_harm_categories": self._targeted_harm_categories if self._targeted_harm_categories else None, "prompt_metadata": self.prompt_metadata, - "converter_identifiers": self.converter_identifiers, + "converter_identifiers": self._converter_identifiers, "prompt_target_identifier": self.prompt_target_identifier, - "attack_identifier": self.attack_identifier, + "attack_identifier": self._attack_identifier, "scorer_identifier": self.scorer_identifier, "original_value_data_type": self.original_value_data_type, "original_value": self.original_value, diff --git a/pyrit/models/score.py b/pyrit/models/score.py index e96dfcf4c..24072817e 100644 --- a/pyrit/models/score.py +++ b/pyrit/models/score.py @@ -28,7 +28,7 @@ class Score: score_rationale: str # Custom metadata a scorer might use. This can vary by scorer. - score_metadata: Optional[Dict[str, Union[str, int]]] + score_metadata: Optional[Dict[str, Union[str, int, float]]] # The identifier of the scorer class, including relevant information # e.g. {"scorer_name": "SelfAskScorer", "classifier": "current_events.yml"} @@ -48,14 +48,14 @@ class Score: def __init__( self, *, - id: Optional[uuid.UUID | str] = None, score_value: str, score_value_description: str, score_type: ScoreType, - score_category: Optional[List[str]] = None, score_rationale: str, - score_metadata: Optional[Dict[str, Union[str, int]]], message_piece_id: str | uuid.UUID, + id: Optional[uuid.UUID | str] = None, + score_category: Optional[List[str]] = None, + score_metadata: Optional[Dict[str, Union[str, int, float]]] = None, scorer_class_identifier: Optional[Dict[str, str]] = None, timestamp: Optional[datetime] = None, objective: Optional[str] = None, @@ -74,7 +74,7 @@ def __init__( self.score_type = score_type self.score_category = score_category self.score_rationale = score_rationale - self.score_metadata = score_metadata + self.score_metadata = score_metadata or {} self.scorer_class_identifier = scorer_class_identifier or {} self.message_piece_id = message_piece_id self.objective = objective @@ -149,7 +149,7 @@ class UnvalidatedScore: score_value_description: str score_category: Optional[List[str]] score_rationale: str - score_metadata: Optional[Dict[str, Union[str, int]]] + score_metadata: Optional[Dict[str, Union[str, int, float]]] scorer_class_identifier: Dict[str, str] message_piece_id: uuid.UUID | str objective: Optional[str] diff --git a/pyrit/prompt_normalizer/prompt_normalizer.py b/pyrit/prompt_normalizer/prompt_normalizer.py index 753b9220c..ca5d8bfec 100644 --- a/pyrit/prompt_normalizer/prompt_normalizer.py +++ b/pyrit/prompt_normalizer/prompt_normalizer.py @@ -224,7 +224,9 @@ async def convert_values( if data_types and piece.converted_value_data_type not in data_types: continue - piece.converter_identifiers.extend( + # converter_identifiers as part of piece is deprecated and will be removed + # as part of message in 0.13.0 + piece._converter_identifiers.extend( [converter.get_identifier() for converter in converter_configuration.converters] ) diff --git a/pyrit/prompt_target/openai/openai_response_target.py b/pyrit/prompt_target/openai/openai_response_target.py index 31343ed40..398ed8233 100644 --- a/pyrit/prompt_target/openai/openai_response_target.py +++ b/pyrit/prompt_target/openai/openai_response_target.py @@ -625,13 +625,14 @@ def _parse_response_output_section( if not piece_value: raise EmptyResponseException(message="The chat returned an empty response.") + # attack identifier is deprecated and will be removed in 0.13.0 return MessagePiece( role="assistant", original_value=piece_value, conversation_id=message_piece.conversation_id, labels=message_piece.labels, prompt_target_identifier=message_piece.prompt_target_identifier, - attack_identifier=message_piece.attack_identifier, + attack_identifier=message_piece._attack_identifier, original_value_data_type=piece_type, response_error=error or "none", ) @@ -756,5 +757,5 @@ def _make_tool_piece(self, output: dict[str, Any], call_id: str, *, reference_pi conversation_id=reference_piece.conversation_id, labels={"call_id": call_id}, prompt_target_identifier=reference_piece.prompt_target_identifier, - attack_identifier=reference_piece.attack_identifier, + attack_identifier=reference_piece._attack_identifier, ) diff --git a/pyrit/prompt_target/playwright_copilot_target.py b/pyrit/prompt_target/playwright_copilot_target.py index 2610e78d8..3a5ad5cfb 100644 --- a/pyrit/prompt_target/playwright_copilot_target.py +++ b/pyrit/prompt_target/playwright_copilot_target.py @@ -197,7 +197,7 @@ async def send_prompt_async(self, *, message: Message) -> list[Message]: conversation_id=request_piece.conversation_id, labels=request_piece.labels, prompt_target_identifier=request_piece.prompt_target_identifier, - attack_identifier=request_piece.attack_identifier, + attack_identifier=request_piece._attack_identifier, original_value_data_type=piece_type, converted_value_data_type=piece_type, prompt_metadata=request_piece.prompt_metadata, diff --git a/pyrit/score/aggregator_utils.py b/pyrit/score/aggregator_utils.py deleted file mode 100644 index d30cc7df7..000000000 --- a/pyrit/score/aggregator_utils.py +++ /dev/null @@ -1,43 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT license. - -from typing import Dict, List, Union - -from pyrit.common.utils import combine_dict -from pyrit.models import Score - - -def combine_metadata_and_categories(scores: List[Score]) -> tuple[Dict[str, Union[str, int]], List[str]]: - """ - Combine metadata and categories from multiple scores with deduplication. - - Args: - scores: List of Score objects. - - Returns: - Tuple of (metadata dict, sorted category list with empty strings filtered). - """ - metadata: Dict[str, Union[str, int]] = {} - category_set: set[str] = set() - - for s in scores: - metadata = combine_dict(metadata, getattr(s, "score_metadata", None)) - score_categories = getattr(s, "score_category", None) or [] - category_set.update([c for c in score_categories if c]) - - category = sorted(category_set) - return metadata, category - - -def format_score_for_rationale(score: Score) -> str: - """ - Format a single score for inclusion in an aggregated rationale. - - Args: - score: The Score object to format. - - Returns: - Formatted string with scorer class, value, and rationale. - """ - class_type = score.scorer_class_identifier.get("__type__", "Unknown") - return f" - {class_type} {score.score_value}: {score.score_rationale or ''}" diff --git a/pyrit/score/conversation_scorer.py b/pyrit/score/conversation_scorer.py index d64ee725a..f2eb6d030 100644 --- a/pyrit/score/conversation_scorer.py +++ b/pyrit/score/conversation_scorer.py @@ -85,7 +85,7 @@ async def _score_async(self, message: Message, *, objective: Optional[str] = Non conversation_id=original_piece.conversation_id, labels=original_piece.labels, prompt_target_identifier=original_piece.prompt_target_identifier, - attack_identifier=original_piece.attack_identifier, + attack_identifier=original_piece._attack_identifier, original_value_data_type=original_piece.original_value_data_type, converted_value_data_type=original_piece.converted_value_data_type, response_error=original_piece.response_error, diff --git a/pyrit/score/float_scale/azure_content_filter_scorer.py b/pyrit/score/float_scale/azure_content_filter_scorer.py index 808c9868b..d8e855e6d 100644 --- a/pyrit/score/float_scale/azure_content_filter_scorer.py +++ b/pyrit/score/float_scale/azure_content_filter_scorer.py @@ -282,7 +282,7 @@ async def _score_piece_async(self, message_piece: MessagePiece, *, objective: Op # Severity as defined here # https://learn.microsoft.com/en-us/azure/ai-services/content-safety/concepts/harm-categories?tabs=definitions#severity-levels - metadata: dict[str, str | int] = {"azure_severity": int(value)} + metadata: dict[str, str | int | float] = {"azure_severity": int(value)} score_obj = Score( score_type="float_scale", diff --git a/pyrit/score/float_scale/float_scale_score_aggregator.py b/pyrit/score/float_scale/float_scale_score_aggregator.py index d07ab0507..dc9b30beb 100644 --- a/pyrit/score/float_scale/float_scale_score_aggregator.py +++ b/pyrit/score/float_scale/float_scale_score_aggregator.py @@ -5,11 +5,11 @@ from typing import Callable, Dict, Iterable, List from pyrit.models import Score -from pyrit.score.aggregator_utils import ( +from pyrit.score.score_aggregator_result import ScoreAggregatorResult +from pyrit.score.score_utils import ( combine_metadata_and_categories, format_score_for_rationale, ) -from pyrit.score.score_aggregator_result import ScoreAggregatorResult FloatScaleOp = Callable[[List[float]], float] FloatScaleAggregatorFunc = Callable[[Iterable[Score]], List[ScoreAggregatorResult]] diff --git a/pyrit/score/float_scale/insecure_code_scorer.py b/pyrit/score/float_scale/insecure_code_scorer.py index 2fd7521bc..1af28dcae 100644 --- a/pyrit/score/float_scale/insecure_code_scorer.py +++ b/pyrit/score/float_scale/insecure_code_scorer.py @@ -85,7 +85,7 @@ async def _score_piece_async(self, message_piece: MessagePiece, *, objective: Op scored_prompt_id=message_piece.id, category=self._harm_category, objective=objective, - attack_identifier=message_piece.attack_identifier, + attack_identifier=message_piece._attack_identifier, ) # Modify the UnvalidatedScore parsing to check for 'score_value' diff --git a/pyrit/score/float_scale/self_ask_general_float_scale_scorer.py b/pyrit/score/float_scale/self_ask_general_float_scale_scorer.py index 41221db64..f9cddb30b 100644 --- a/pyrit/score/float_scale/self_ask_general_float_scale_scorer.py +++ b/pyrit/score/float_scale/self_ask_general_float_scale_scorer.py @@ -135,7 +135,7 @@ async def _score_piece_async(self, message_piece: MessagePiece, *, objective: Op scored_prompt_id=message_piece.id, category=self._score_category, objective=objective, - attack_identifier=message_piece.attack_identifier, + attack_identifier=message_piece._attack_identifier, score_value_output_key=self._score_value_output_key, rationale_output_key=self._rationale_output_key, description_output_key=self._description_output_key, diff --git a/pyrit/score/score_aggregator_result.py b/pyrit/score/score_aggregator_result.py index 21e821630..e66f46fc0 100644 --- a/pyrit/score/score_aggregator_result.py +++ b/pyrit/score/score_aggregator_result.py @@ -16,11 +16,11 @@ class ScoreAggregatorResult: description (str): A short, human-friendly description of the aggregation outcome. rationale (str): Combined rationale from constituent scores. category (List[str]): Combined list of categories from constituent scores. - metadata (Dict[str, Union[str, int]]): Combined metadata from constituent scores. + metadata (Dict[str, Union[str, int, float]]): Combined metadata from constituent scores. """ value: Union[bool, float] description: str rationale: str category: List[str] - metadata: Dict[str, Union[str, int]] + metadata: Dict[str, Union[str, int, float]] diff --git a/pyrit/score/score_utils.py b/pyrit/score/score_utils.py new file mode 100644 index 000000000..ec3d0d15d --- /dev/null +++ b/pyrit/score/score_utils.py @@ -0,0 +1,86 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +from typing import Dict, List, Optional, Union + +from pyrit.common.utils import combine_dict +from pyrit.models import Score + +# Key used by FloatScaleThresholdScorer to store the original float value +# in score_metadata when converting float_scale to true_false +ORIGINAL_FLOAT_VALUE_KEY = "original_float_value" + + +def combine_metadata_and_categories(scores: List[Score]) -> tuple[Dict[str, Union[str, int, float]], List[str]]: + """ + Combine metadata and categories from multiple scores with deduplication. + + Args: + scores: List of Score objects. + + Returns: + Tuple of (metadata dict, sorted category list with empty strings filtered). + """ + metadata: Dict[str, Union[str, int, float]] = {} + category_set: set[str] = set() + + for s in scores: + metadata = combine_dict(metadata, getattr(s, "score_metadata", None)) + score_categories = getattr(s, "score_category", None) or [] + category_set.update([c for c in score_categories if c]) + + category = sorted(category_set) + return metadata, category + + +def format_score_for_rationale(score: Score) -> str: + """ + Format a single score for inclusion in an aggregated rationale. + + Args: + score: The Score object to format. + + Returns: + Formatted string with scorer class, value, and rationale. + """ + class_type = score.scorer_class_identifier.get("__type__", "Unknown") + return f" - {class_type} {score.score_value}: {score.score_rationale or ''}" + + +def normalize_score_to_float(score: Optional[Score]) -> float: + """ + Normalize any score to a float value between 0.0 and 1.0. + + This function extracts a float value from a score object, handling different + score types and metadata. It first checks for the original float value in + score metadata (stored by FloatScaleThresholdScorer) to preserve granularity. + Falls back to the score value if metadata is not present. + + This is useful for providing numerical feedback to adversarial chats in + multi-turn attacks, regardless of whether the underlying scorer produces + float_scale or true_false type scores. + + Args: + score: The score to normalize, or None. + + Returns: + Float value between 0.0 and 1.0. Returns 0.0 if score is None. + """ + if not score: + return 0.0 + + # Check for original float value in metadata (from FloatScaleThresholdScorer) + # This preserves granularity when using threshold scorers + if score.score_metadata: + original_float = score.score_metadata.get(ORIGINAL_FLOAT_VALUE_KEY) + if original_float is not None: + return float(original_float) + + # Fall back to the score value itself + score_value = score.get_value() + if isinstance(score_value, bool): + return 1.0 if score_value else 0.0 + elif isinstance(score_value, (int, float)): + return float(score_value) + else: + return 0.0 diff --git a/pyrit/score/scorer.py b/pyrit/score/scorer.py index 1bce6f222..e6d7173d7 100644 --- a/pyrit/score/scorer.py +++ b/pyrit/score/scorer.py @@ -646,16 +646,16 @@ async def _score_value_with_llm( # JSON must yield either a string or a list of strings raise ValueError("'category' must be a string or a list of strings") - # Normalize metadata to a dictionary with string keys and string/int values + # Normalize metadata to a dictionary with string keys and string/int/float values raw_md = parsed_response.get(metadata_output_key) - normalized_md: Optional[Dict[str, Union[str, int]]] + normalized_md: Optional[Dict[str, Union[str, int, float]]] if raw_md is None: normalized_md = None elif isinstance(raw_md, dict): - # Coerce keys to str and filter to str/int values only - normalized_md = {str(k): v for k, v in raw_md.items() if isinstance(v, (str, int))} + # Coerce keys to str and filter to str/int/float values only + normalized_md = {str(k): v for k, v in raw_md.items() if isinstance(v, (str, int, float))} # If dictionary becomes empty after filtering, keep as empty dict - elif isinstance(raw_md, (str, int)): + elif isinstance(raw_md, (str, int, float)): # Wrap primitive metadata into a namespaced field normalized_md = {"metadata": raw_md} else: diff --git a/pyrit/score/true_false/float_scale_threshold_scorer.py b/pyrit/score/true_false/float_scale_threshold_scorer.py index 07135aeae..081a15bf7 100644 --- a/pyrit/score/true_false/float_scale_threshold_scorer.py +++ b/pyrit/score/true_false/float_scale_threshold_scorer.py @@ -10,6 +10,7 @@ FloatScaleScoreAggregator, ) from pyrit.score.float_scale.float_scale_scorer import FloatScaleScorer +from pyrit.score.score_utils import ORIGINAL_FLOAT_VALUE_KEY from pyrit.score.scorer_prompt_validator import ScorerPromptValidator from pyrit.score.true_false.true_false_scorer import TrueFalseScorer @@ -17,6 +18,8 @@ class FloatScaleThresholdScorer(TrueFalseScorer): """A scorer that applies a threshold to a float scale score to make it a true/false score.""" + ORIGINAL_FLOAT_VALUE_KEY: str = ORIGINAL_FLOAT_VALUE_KEY + def __init__( self, *, @@ -46,6 +49,11 @@ def __init__( if threshold <= 0 or threshold > 1: raise ValueError("The threshold must be between 0 and 1") + @property + def threshold(self) -> float: + """Get the threshold value used for score comparison.""" + return self._threshold + def _build_scorer_identifier(self) -> None: """Build the scorer evaluation identifier for this scorer.""" self._set_scorer_identifier( @@ -111,6 +119,10 @@ async def _score_async( score.score_value_description = aggregate_score.description score.id = uuid.uuid4() score.scorer_class_identifier = self.get_identifier() + # Store the original float value in metadata for granular comparison + if score.score_metadata is None: + score.score_metadata = {} + score.score_metadata[ORIGINAL_FLOAT_VALUE_KEY] = aggregate_value else: # Create new score from aggregator result (all pieces were filtered out) # Use the first message piece's id if available, otherwise generate a new UUID @@ -130,7 +142,11 @@ async def _score_async( f"{aggregate_score.rationale}" ), score_category=aggregate_score.category, - score_metadata=aggregate_score.metadata, + # Include original float value in metadata for granular comparison + score_metadata={ + **aggregate_score.metadata, + ORIGINAL_FLOAT_VALUE_KEY: aggregate_value, + }, scorer_class_identifier=self.get_identifier(), message_piece_id=piece_id, objective=objective, diff --git a/pyrit/score/true_false/self_ask_category_scorer.py b/pyrit/score/true_false/self_ask_category_scorer.py index ade30e0a3..d63fe8911 100644 --- a/pyrit/score/true_false/self_ask_category_scorer.py +++ b/pyrit/score/true_false/self_ask_category_scorer.py @@ -137,7 +137,7 @@ async def _score_piece_async(self, message_piece: MessagePiece, *, objective: Op message_data_type=message_piece.converted_value_data_type, scored_prompt_id=message_piece.id, objective=objective, - attack_identifier=message_piece.attack_identifier, + attack_identifier=message_piece._attack_identifier, ) score = unvalidated_score.to_score(score_value=unvalidated_score.raw_score_value, score_type="true_false") diff --git a/pyrit/score/true_false/self_ask_general_true_false_scorer.py b/pyrit/score/true_false/self_ask_general_true_false_scorer.py index a764947eb..e221e19cf 100644 --- a/pyrit/score/true_false/self_ask_general_true_false_scorer.py +++ b/pyrit/score/true_false/self_ask_general_true_false_scorer.py @@ -129,7 +129,7 @@ async def _score_piece_async(self, message_piece: MessagePiece, *, objective: Op scored_prompt_id=message_piece.id, category=self._score_category, objective=objective, - attack_identifier=message_piece.attack_identifier, + attack_identifier=message_piece._attack_identifier, score_value_output_key=self._score_value_output_key, rationale_output_key=self._rationale_output_key, description_output_key=self._description_output_key, diff --git a/pyrit/score/true_false/self_ask_question_answer_scorer.py b/pyrit/score/true_false/self_ask_question_answer_scorer.py index 0cb0e704d..db24e0091 100644 --- a/pyrit/score/true_false/self_ask_question_answer_scorer.py +++ b/pyrit/score/true_false/self_ask_question_answer_scorer.py @@ -87,7 +87,7 @@ async def _score_piece_async(self, message_piece: MessagePiece, *, objective: Op scored_prompt_id=message_piece.id, category=self._score_category, objective=objective, - attack_identifier=message_piece.attack_identifier, + attack_identifier=message_piece._attack_identifier, ) score = unvalidated_score.to_score(score_value=unvalidated_score.raw_score_value, score_type="true_false") diff --git a/pyrit/score/true_false/self_ask_refusal_scorer.py b/pyrit/score/true_false/self_ask_refusal_scorer.py index a6420fa7d..f0ea18914 100644 --- a/pyrit/score/true_false/self_ask_refusal_scorer.py +++ b/pyrit/score/true_false/self_ask_refusal_scorer.py @@ -139,7 +139,7 @@ async def _score_piece_async(self, message_piece: MessagePiece, *, objective: Op scored_prompt_id=message_piece.id, category=self._score_category, objective=objective, - attack_identifier=message_piece.attack_identifier, + attack_identifier=message_piece._attack_identifier, ) score = unvalidated_score.to_score(score_value=unvalidated_score.raw_score_value, score_type="true_false") diff --git a/pyrit/score/true_false/self_ask_true_false_scorer.py b/pyrit/score/true_false/self_ask_true_false_scorer.py index 8984aa5c2..f45200225 100644 --- a/pyrit/score/true_false/self_ask_true_false_scorer.py +++ b/pyrit/score/true_false/self_ask_true_false_scorer.py @@ -191,7 +191,7 @@ async def _score_piece_async(self, message_piece: MessagePiece, *, objective: Op prepended_text_message_piece=prepended_text, category=self._score_category, objective=objective, - attack_identifier=message_piece.attack_identifier, + attack_identifier=message_piece._attack_identifier, ) score = unvalidated_score.to_score(score_value=unvalidated_score.raw_score_value, score_type="true_false") diff --git a/pyrit/score/true_false/true_false_score_aggregator.py b/pyrit/score/true_false/true_false_score_aggregator.py index 88bce9e54..6a523eea0 100644 --- a/pyrit/score/true_false/true_false_score_aggregator.py +++ b/pyrit/score/true_false/true_false_score_aggregator.py @@ -6,11 +6,11 @@ from typing import Callable, Iterable, List from pyrit.models import Score -from pyrit.score.aggregator_utils import ( +from pyrit.score.score_aggregator_result import ScoreAggregatorResult +from pyrit.score.score_utils import ( combine_metadata_and_categories, format_score_for_rationale, ) -from pyrit.score.score_aggregator_result import ScoreAggregatorResult BinaryBoolOp = Callable[[bool, bool], bool] TrueFalseAggregatorFunc = Callable[[Iterable[Score]], ScoreAggregatorResult] diff --git a/tests/unit/executor/attack/component/test_conversation_manager.py b/tests/unit/executor/attack/component/test_conversation_manager.py index c13392dd0..235b009f6 100644 --- a/tests/unit/executor/attack/component/test_conversation_manager.py +++ b/tests/unit/executor/attack/component/test_conversation_manager.py @@ -827,73 +827,6 @@ async def test_returns_turn_count_for_multi_turn_attacks( # sample_conversation has 1 assistant message = 1 turn assert state.turn_count == 1 - @pytest.mark.asyncio - async def test_multipart_message_extracts_scores_from_all_pieces( - self, - attack_identifier: Dict[str, str], - mock_chat_target: MagicMock, - sample_score: Score, - ) -> None: - """Test that multi-part assistant messages extract scores from all pieces.""" - manager = ConversationManager(attack_identifier=attack_identifier) - conversation_id = str(uuid.uuid4()) - context = _TestAttackContext(params=AttackParameters(objective="Test objective")) - - # Create a multi-part assistant response (e.g., text + image) - # All pieces in a Message must share the same conversation_id - piece_conversation_id = str(uuid.uuid4()) - piece1 = MessagePiece( - role="assistant", - original_value="Here is the analysis:", - original_value_data_type="text", - conversation_id=piece_conversation_id, - ) - piece2 = MessagePiece( - role="assistant", - original_value="chart_image.png", - original_value_data_type="image_path", - conversation_id=piece_conversation_id, - ) - multipart_response = Message(message_pieces=[piece1, piece2]) - context.prepended_conversation = [ - Message.from_prompt(prompt="Analyze data", role="user"), - multipart_response, - ] - - # Mock get_prompt_scores to verify it's called with all piece IDs - score2 = Score( - score_type="true_false", - score_value="true", - score_category=["test"], - score_value_description="Score for image piece", - score_rationale="Test rationale", - score_metadata={}, - message_piece_id=str(piece2.id), - ) - original_get_prompt_scores = manager._memory.get_prompt_scores - captured_prompt_ids: List[str] = [] - - def mock_get_prompt_scores(prompt_ids: List[str]) -> List[Score]: - captured_prompt_ids.extend(prompt_ids) - return [sample_score, score2] - - manager._memory.get_prompt_scores = mock_get_prompt_scores # type: ignore[assignment, method-assign] - - try: - state = await manager.initialize_context_async( - context=context, - target=mock_chat_target, - conversation_id=conversation_id, - max_turns=10, - ) - - # Verify all piece IDs were passed to get_prompt_scores - assert len(captured_prompt_ids) == 2 - # Verify scores from both pieces are returned - assert len(state.last_assistant_message_scores) == 2 - finally: - manager._memory.get_prompt_scores = original_get_prompt_scores # type: ignore[assignment, method-assign] - # ============================================================================= # Test Class: Prepended Conversation Config Settings @@ -1395,26 +1328,6 @@ async def test_assigns_conversation_id_to_all_pieces( for piece in msg.message_pieces: assert piece.conversation_id == conversation_id - @pytest.mark.asyncio - async def test_assigns_attack_identifier_to_all_pieces( - self, - attack_identifier: Dict[str, str], - sample_conversation: List[Message], - ) -> None: - """Test that attack_identifier is assigned to all message pieces.""" - manager = ConversationManager(attack_identifier=attack_identifier) - conversation_id = str(uuid.uuid4()) - - await manager.add_prepended_conversation_to_memory_async( - prepended_conversation=sample_conversation, - conversation_id=conversation_id, - ) - - stored = manager.get_conversation(conversation_id) - for msg in stored: - for piece in msg.message_pieces: - assert piece.attack_identifier == attack_identifier - @pytest.mark.asyncio async def test_raises_error_when_exceeds_max_turns( self, diff --git a/tests/unit/executor/attack/component/test_simulated_conversation.py b/tests/unit/executor/attack/component/test_simulated_conversation.py index 2e59fb386..cb50d27ea 100644 --- a/tests/unit/executor/attack/component/test_simulated_conversation.py +++ b/tests/unit/executor/attack/component/test_simulated_conversation.py @@ -263,7 +263,17 @@ async def test_returns_simulated_conversation_result( ): """Test that the function returns a list of SeedPrompts.""" conversation_id = str(uuid.uuid4()) - mock_score = MagicMock(spec=Score) + # Create a proper true_false score for automated_objective_score + mock_score = Score( + score_type="true_false", + score_value="true", + score_category=["objective"], + score_value_description="Objective achieved", + score_rationale="Test rationale", + scorer_class_identifier={"__type__": "TrueFalseScorer"}, + message_piece_id=str(uuid.uuid4()), + score_metadata={}, + ) with patch("pyrit.executor.attack.multi_turn.simulated_conversation.RedTeamingAttack") as mock_attack_class: mock_attack = MagicMock() @@ -275,7 +285,7 @@ async def test_returns_simulated_conversation_result( objective="Test objective", outcome=AttackOutcome.SUCCESS, executed_turns=3, - last_score=mock_score, + automated_objective_score=mock_score, ) ) mock_attack_class.return_value = mock_attack diff --git a/tests/unit/executor/attack/core/test_attack_strategy.py b/tests/unit/executor/attack/core/test_attack_strategy.py index dcb9db2d6..2b2f21347 100644 --- a/tests/unit/executor/attack/core/test_attack_strategy.py +++ b/tests/unit/executor/attack/core/test_attack_strategy.py @@ -416,6 +416,8 @@ async def test_on_post_execute_adds_results_to_memory(self, mock_memory): sample_context = MagicMock() sample_context.start_time = 100.0 sample_result = MagicMock(spec=AttackResult) + sample_result.outcome = AttackOutcome.SUCCESS + sample_result.outcome_reason = "Test reason" event_data = StrategyEventData( event=StrategyEvent.ON_POST_EXECUTE, diff --git a/tests/unit/executor/attack/core/test_markdown_printer.py b/tests/unit/executor/attack/core/test_markdown_printer.py index 613784c2d..b56b6fa6f 100644 --- a/tests/unit/executor/attack/core/test_markdown_printer.py +++ b/tests/unit/executor/attack/core/test_markdown_printer.py @@ -64,15 +64,15 @@ def sample_attack_result(): execution_time_ms=1500, outcome=AttackOutcome.SUCCESS, outcome_reason="Test successful", - last_score=Score( - score_type="float_scale", - score_value="0.5", - score_category="other", - score_value_description="Other score", - score_rationale="Other rationale", - score_metadata="{}", + automated_objective_score=Score( + score_type="true_false", + score_value="true", + score_category="objective", + score_value_description="Objective achieved", + score_rationale="Test rationale", + score_metadata={}, message_piece_id=str(uuid.uuid4()), - scorer_class_identifier={"__type__": "OtherScorer", "__module__": "test_module"}, + scorer_class_identifier={"__type__": "TrueFalseScorer", "__module__": "test_module"}, ), ) diff --git a/tests/unit/executor/attack/multi_turn/test_crescendo.py b/tests/unit/executor/attack/multi_turn/test_crescendo.py index 6cfa8937b..28484d24d 100644 --- a/tests/unit/executor/attack/multi_turn/test_crescendo.py +++ b/tests/unit/executor/attack/multi_turn/test_crescendo.py @@ -191,24 +191,24 @@ def no_refusal_score() -> Score: @pytest.fixture def success_objective_score() -> Score: return create_score( - score_type="float_scale", - score_value="0.9", + score_type="true_false", + score_value="true", score_category=["objective"], - scorer_class="FloatScaleThresholdScorer", + scorer_class="SelfAskTrueFalseScorer", score_rationale="The response successfully achieves the objective", - score_value_description="High objective achievement", + score_value_description="Objective achieved", ) @pytest.fixture def failure_objective_score() -> Score: return create_score( - score_type="float_scale", - score_value="0.3", + score_type="true_false", + score_value="false", score_category=["objective"], - scorer_class="FloatScaleThresholdScorer", + scorer_class="SelfAskTrueFalseScorer", score_rationale="The response does not achieve the objective", - score_value_description="Low objective achievement", + score_value_description="Objective not achieved", ) @@ -812,7 +812,7 @@ async def test_build_adversarial_prompt_with_objective_score( assert "This is the turn 3 of 10 turns" in result assert "Test objective" in result assert "Test response" in result # From sample_response - assert "0.30" in result # Score value + assert "0.00" in result # Score value (false = 0.0) assert failure_objective_score.score_rationale in result @pytest.mark.asyncio @@ -1340,7 +1340,7 @@ async def test_perform_attack_success_on_first_turn( assert isinstance(result, CrescendoAttackResult) assert result.outcome == AttackOutcome.SUCCESS assert result.executed_turns == 1 - assert result.last_score == success_objective_score + assert result.objective_score == success_objective_score assert result.outcome_reason is not None assert "Objective achieved in 1 turns" in result.outcome_reason @@ -1396,7 +1396,7 @@ async def test_perform_attack_failure_max_turns_reached( assert isinstance(result, CrescendoAttackResult) assert result.outcome == AttackOutcome.FAILURE assert result.executed_turns == 2 - assert result.last_score == failure_objective_score + assert result.objective_score == failure_objective_score assert result.outcome_reason is not None assert "Max turns (2) reached" in result.outcome_reason @@ -1638,8 +1638,7 @@ async def test_execute_with_context_async_successful_lifecycle( attack_identifier=attack.get_identifier(), outcome=AttackOutcome.SUCCESS, executed_turns=1, - last_response=sample_response.get_piece(), - last_score=success_objective_score, + automated_objective_score=success_objective_score, metadata={"backtrack_count": 0}, ) @@ -1707,8 +1706,7 @@ async def test_execute_async_with_parameters( attack_identifier=attack.get_identifier(), outcome=AttackOutcome.SUCCESS, executed_turns=1, - last_response=sample_response.get_piece(), - last_score=success_objective_score, + automated_objective_score=success_objective_score, metadata={"backtrack_count": 0}, ) @@ -1783,26 +1781,26 @@ async def test_complete_successful_attack_scenario( ] # Create scores that show progression toward the objective - # The increasing scores simulate the model becoming more compliant + # Since objective scorers must be TrueFalseScorer, the final successful turn has score_value="true" scores = [ create_score( - score_type="float_scale", - score_value="0.3", + score_type="true_false", + score_value="false", score_category=["objective"], - scorer_class="FloatScaleThresholdScorer", + scorer_class="SelfAskTrueFalseScorer", ), create_score( - score_type="float_scale", - score_value="0.6", + score_type="true_false", + score_value="false", score_category=["objective"], - scorer_class="FloatScaleThresholdScorer", + scorer_class="SelfAskTrueFalseScorer", ), create_score( - score_type="float_scale", - score_value="0.9", + score_type="true_false", + score_value="true", score_category=["objective"], - scorer_class="FloatScaleThresholdScorer", - ), # Above threshold + scorer_class="SelfAskTrueFalseScorer", + ), # Objective achieved ] # Set up mock behavior to simulate the conversation flow @@ -1843,10 +1841,8 @@ async def test_complete_successful_attack_scenario( assert isinstance(result, CrescendoAttackResult) assert result.outcome == AttackOutcome.SUCCESS assert result.executed_turns == 3 - assert result.last_score is not None - assert result.last_score.get_value() == 0.9 - assert result.last_response is not None - assert "sensitive data" in result.last_response.converted_value + assert result.objective_score is not None + assert result.objective_score.get_value() is True @pytest.mark.asyncio async def test_attack_with_backtracking_scenario( @@ -2156,3 +2152,90 @@ async def test_setup_tracks_adversarial_chat_conversation_id( and ref.conversation_type == ConversationType.ADVERSARIAL for ref in basic_context.related_conversations ) + + +@pytest.mark.usefixtures("patch_central_database") +class TestCrescendoScoringIntegration: + """ + Tests verifying Crescendo correctly uses FloatScaleThresholdScorer and normalize_score_to_float. + + These tests are critical because: + 1. Crescendo needs granular float scores for feedback to the adversarial chat + 2. The adversarial chat uses scores to incrementally improve prompts + 3. Using raw true_false scores would only provide 0 or 1, losing the "how close" information + 4. FloatScaleThresholdScorer preserves the original float in metadata while providing + true_false for AttackResult.automated_objective_score validation + + Historical bug: Without normalize_score_to_float, the adversarial chat would receive + "True"/"False" instead of "0.73", making it impossible to provide graduated feedback. + """ + + def test_default_scorer_is_float_scale_threshold_scorer( + self, mock_objective_target: MagicMock, mock_adversarial_chat: MagicMock + ) -> None: + """ + Crescendo must use FloatScaleThresholdScorer by default for objective scoring. + + This should be updated with care. + + Why FloatScaleThresholdScorer: + - Wraps a float_scale scorer (SelfAskScaleScorer) that returns 0.0-1.0 scores + - Converts to true_false for success/failure determination + - Stores original float in score_metadata[ORIGINAL_FLOAT_VALUE_KEY] + - This allows Crescendo to use the float for feedback while satisfying + AttackResult's requirement for true_false objective scores + """ + # Create attack with no explicit objective scorer - should use default + attack = CrescendoAttack( + objective_target=mock_objective_target, + attack_adversarial_config=AttackAdversarialConfig(target=mock_adversarial_chat), + # No explicit objective_scorer - should create default FloatScaleThresholdScorer + ) + + # Verify the objective scorer is a FloatScaleThresholdScorer + assert isinstance(attack._objective_scorer, FloatScaleThresholdScorer), ( + "Crescendo default objective scorer must be FloatScaleThresholdScorer to preserve " + "float granularity for adversarial feedback while providing true_false for AttackResult" + ) + + def test_normalize_score_to_float_import_in_crescendo(self) -> None: + """ + Verify normalize_score_to_float is imported and available in crescendo module. + + This test ensures the module has the necessary import for extracting float values + from scores, which is essential for correct adversarial feedback and outcome reporting. + """ + from pyrit.executor.attack.multi_turn import crescendo + + assert hasattr(crescendo, "normalize_score_to_float"), ( + "crescendo module must import normalize_score_to_float for " + "extracting float values from scores for adversarial feedback" + ) + + def test_score_feedback_uses_normalized_value( + self, mock_objective_target: MagicMock, mock_adversarial_chat: MagicMock + ) -> None: + """ + Score feedback to adversarial chat should use normalize_score_to_float. + + When providing feedback to the adversarial chat, the score should be the + original float value (e.g., "0.73"), not the true_false conversion ("1"). + """ + from pyrit.score.score_utils import ORIGINAL_FLOAT_VALUE_KEY, normalize_score_to_float + + # Create a score that has metadata with original float + score = create_score( + score_type="true_false", + score_value="True", # Threshold was met + scorer_class="FloatScaleThresholdScorer", + ) + # Manually set metadata (create_score doesn't support this) + score.score_metadata = {ORIGINAL_FLOAT_VALUE_KEY: 0.73} + + # normalize_score_to_float should extract 0.73, not convert True to 1.0 + result = normalize_score_to_float(score) + + assert result == 0.73, ( + f"Expected 0.73 from metadata, got {result}. " + "normalize_score_to_float should extract original float from metadata." + ) diff --git a/tests/unit/executor/attack/multi_turn/test_multi_prompt_sending.py b/tests/unit/executor/attack/multi_turn/test_multi_prompt_sending.py index b3f8dba7b..8048e9106 100644 --- a/tests/unit/executor/attack/multi_turn/test_multi_prompt_sending.py +++ b/tests/unit/executor/attack/multi_turn/test_multi_prompt_sending.py @@ -402,7 +402,8 @@ async def test_perform_async_sends_all_prompts_in_sequence( # Should have called send_prompt_async for each message in sequence assert mock_prompt_normalizer.send_prompt_async.call_count == len(basic_context.params.user_messages) assert result.executed_turns == len(basic_context.params.user_messages) - assert result.last_response is not None + # Verify the last response was recorded in context + assert basic_context.last_response is not None @pytest.mark.asyncio async def test_perform_async_stops_on_failed_prompt(self, mock_target, mock_prompt_normalizer, basic_context): @@ -446,7 +447,7 @@ async def test_perform_async_evaluates_final_response( result = await attack._perform_async(context=basic_context) mock_evaluate.assert_called_once_with(response=sample_response, objective=basic_context.objective) - assert result.last_score == success_score + assert result.objective_score == success_score @pytest.mark.usefixtures("patch_central_database") @@ -641,7 +642,6 @@ async def test_perform_attack_with_empty_messages( result = await attack._perform_async(context=basic_context) assert result.executed_turns == 0 - assert result.last_response is None assert result.outcome == AttackOutcome.FAILURE @pytest.mark.asyncio @@ -661,7 +661,8 @@ async def test_perform_attack_with_single_prompt(self, mock_target, mock_prompt_ result = await attack._perform_async(context=context) assert result.executed_turns == 1 - assert result.last_response is not None + # Verify the last response was recorded in context + assert context.last_response is not None assert mock_prompt_normalizer.send_prompt_async.call_count == 1 def test_attack_has_unique_identifier(self, mock_target): diff --git a/tests/unit/executor/attack/multi_turn/test_red_teaming.py b/tests/unit/executor/attack/multi_turn/test_red_teaming.py index f90ffcf7e..03c691ece 100644 --- a/tests/unit/executor/attack/multi_turn/test_red_teaming.py +++ b/tests/unit/executor/attack/multi_turn/test_red_teaming.py @@ -1285,17 +1285,14 @@ async def test_perform_attack_with_multi_piece_message_uses_first_piece( assert mock_prompt_normalizer.send_prompt_async.call_count == 1 @pytest.mark.parametrize( - "scorer_type,score_value,threshold,expected_achieved", + "scorer_type,score_value,expected_achieved", [ - ("true_false", "true", 0.8, True), - ("true_false", "false", 0.8, False), - ("float_scale", "0.9", 0.8, True), - ("float_scale", "0.7", 0.8, False), - ("float_scale", "0.8", 0.8, True), # Edge case: equal to threshold + ("true_false", "true", True), + ("true_false", "false", False), ], ) @pytest.mark.asyncio - async def test_perform_attack_with_different_scoring_thresholds( + async def test_perform_attack_with_different_score_values( self, mock_objective_target: MagicMock, mock_objective_scorer: MagicMock, @@ -1305,15 +1302,12 @@ async def test_perform_attack_with_different_scoring_thresholds( sample_response: Message, scorer_type: ScoreType, score_value: str, - threshold: float, expected_achieved: bool, ): - """Test attack execution with different scoring thresholds.""" + """Test attack execution with different score values.""" adversarial_config = AttackAdversarialConfig(target=mock_adversarial_chat) - scoring_config = AttackScoringConfig( - objective_scorer=mock_objective_scorer, successful_objective_threshold=threshold - ) + scoring_config = AttackScoringConfig(objective_scorer=mock_objective_scorer) attack = RedTeamingAttack( objective_target=mock_objective_target, @@ -1428,8 +1422,7 @@ async def test_execute_async_successful_lifecycle( attack_identifier=attack.get_identifier(), outcome=AttackOutcome.SUCCESS, executed_turns=1, - last_response=sample_response.get_piece(), - last_score=success_score, + automated_objective_score=success_score, ) # Execute using execute_async @@ -1510,8 +1503,7 @@ async def test_execute_with_context_async_successful( attack_identifier=attack.get_identifier(), outcome=AttackOutcome.SUCCESS, executed_turns=1, - last_response=sample_response.get_piece(), - last_score=success_score, + automated_objective_score=success_score, ) # Execute using execute_with_context_async @@ -1874,7 +1866,7 @@ async def capture_score(*args, **kwargs): result = await attack._perform_async(context=context) # The final result should have a score - assert result.last_score == failure_score + assert result.objective_score == failure_score @pytest.mark.asyncio async def test_score_last_turn_only_can_still_succeed_on_last_turn( @@ -1913,4 +1905,4 @@ async def test_score_last_turn_only_can_still_succeed_on_last_turn( # Should succeed based on final score assert result.outcome == AttackOutcome.SUCCESS - assert result.last_score == success_score + assert result.objective_score == success_score diff --git a/tests/unit/executor/attack/multi_turn/test_tree_of_attacks.py b/tests/unit/executor/attack/multi_turn/test_tree_of_attacks.py index fc244ecf1..1d5d29cc0 100644 --- a/tests/unit/executor/attack/multi_turn/test_tree_of_attacks.py +++ b/tests/unit/executor/attack/multi_turn/test_tree_of_attacks.py @@ -17,9 +17,9 @@ AttackAdversarialConfig, AttackConverterConfig, AttackParameters, - AttackScoringConfig, TAPAttackContext, TAPAttackResult, + TAPAttackScoringConfig, TreeOfAttacksWithPruningAttack, ) from pyrit.executor.attack.multi_turn.tree_of_attacks import _TreeOfAttacksNode @@ -34,7 +34,7 @@ ) from pyrit.prompt_normalizer import PromptNormalizer from pyrit.prompt_target import PromptChatTarget, PromptTarget -from pyrit.score import Scorer, TrueFalseScorer +from pyrit.score import FloatScaleThresholdScorer, Scorer logger = logging.getLogger(__name__) @@ -86,7 +86,11 @@ def create_node(config: Optional[NodeMockConfig] = None) -> "_TreeOfAttacksNode" # Set up objective score if config.objective_score_value is not None: - node.objective_score = MagicMock(get_value=MagicMock(return_value=config.objective_score_value)) + mock_score = MagicMock() + mock_score.get_value = MagicMock(return_value=config.objective_score_value) + mock_score.score_metadata = None # Ensure _normalize_score_to_float falls back to get_value() + mock_score.score_type = "true_false" # Required for AttackResult validation + node.objective_score = mock_score else: node.objective_score = None @@ -152,7 +156,7 @@ def with_default_mocks(self) -> "AttackBuilder": """Set up default mocks for all required components.""" self.objective_target = self._create_mock_target() self.adversarial_chat = self._create_mock_chat() - self.objective_scorer = self._create_mock_scorer("MockScorer") + self.objective_scorer = self._create_mock_scorer("MockScorer", self.successful_threshold) return self def with_tree_params(self, **kwargs) -> "AttackBuilder": @@ -181,10 +185,9 @@ def build(self) -> TreeOfAttacksWithPruningAttack: """Build the attack instance.""" assert self.adversarial_chat is not None, "Adversarial chat target must be set." adversarial_config = AttackAdversarialConfig(target=self.adversarial_chat) - scoring_config = AttackScoringConfig( - objective_scorer=cast(TrueFalseScorer, self.objective_scorer), + scoring_config = TAPAttackScoringConfig( + objective_scorer=cast(FloatScaleThresholdScorer, self.objective_scorer), auxiliary_scorers=self.auxiliary_scorers, - successful_objective_threshold=self.successful_threshold, ) kwargs = { @@ -215,12 +218,13 @@ def _create_mock_chat() -> PromptChatTarget: return cast(PromptChatTarget, chat) @staticmethod - def _create_mock_scorer(name: str) -> TrueFalseScorer: - scorer = MagicMock(spec=TrueFalseScorer) + def _create_mock_scorer(name: str, threshold: float = 0.8) -> FloatScaleThresholdScorer: + scorer = MagicMock(spec=FloatScaleThresholdScorer) scorer.scorer_type = "true_false" scorer.score_async = AsyncMock(return_value=[]) scorer.get_identifier.return_value = {"__type__": name, "__module__": "test_module"} - return cast(TrueFalseScorer, scorer) + scorer.threshold = threshold + return cast(FloatScaleThresholdScorer, scorer) @staticmethod def _create_mock_aux_scorer(name: str) -> Scorer: @@ -245,12 +249,12 @@ def create_basic_context() -> TAPAttackContext: return context @staticmethod - def create_score(value: float = 0.9) -> Score: + def create_score(value: bool = True) -> Score: """Create a mock Score object.""" return Score( id=None, - score_type="float_scale", - score_value=str(value), + score_type="true_false", + score_value="true" if value else "false", score_category=["test"], score_value_description="Test score", score_rationale="Test rationale", @@ -361,14 +365,14 @@ def test_get_objective_target_returns_correct_target(self, attack_builder): def test_get_attack_scoring_config_returns_config(self, attack_builder): """Test that get_attack_scoring_config returns the scoring configuration""" - attack = attack_builder.with_default_mocks().with_auxiliary_scorers(1).with_threshold(0.75).build() + attack = attack_builder.with_threshold(0.75).with_default_mocks().with_auxiliary_scorers(1).build() result = attack.get_attack_scoring_config() assert result is not None assert result.objective_scorer == attack_builder.objective_scorer assert len(result.auxiliary_scorers) == 1 - assert result.successful_objective_threshold == 0.75 + assert result.threshold == 0.75 # TAPAttackScoringConfig.threshold gets it from the scorer @pytest.mark.asyncio async def test_tree_depth_validation_with_prepended_conversation(self, attack_builder, helpers): @@ -522,7 +526,7 @@ def test_update_best_performing_node_preserves_existing_best_when_no_valid_nodes context = helpers.create_basic_context() # Set existing best - existing_score = helpers.create_score(0.8) + existing_score = helpers.create_score(True) context.best_objective_score = existing_score context.best_conversation_id = "existing_conv_id" @@ -675,23 +679,30 @@ def test_format_node_result(self, basic_attack, node_factory): def test_is_objective_achieved(self, attack_builder, helpers): """Test _is_objective_achieved logic.""" - attack = attack_builder.with_default_mocks().with_threshold(0.8).build() + attack = attack_builder.with_threshold(0.8).with_default_mocks().build() context = helpers.create_basic_context() + def create_mock_score(value: float) -> MagicMock: + """Create a mock score with score_metadata set to None.""" + score = MagicMock() + score.get_value = MagicMock(return_value=value) + score.score_metadata = None + return score + # Test 1: No score available context.best_objective_score = None assert attack._is_objective_achieved(context=context) is False # Test 2: Score below threshold - context.best_objective_score = MagicMock(get_value=MagicMock(return_value=0.5)) + context.best_objective_score = create_mock_score(0.5) assert attack._is_objective_achieved(context=context) is False # Test 3: Score at threshold - context.best_objective_score = MagicMock(get_value=MagicMock(return_value=0.8)) + context.best_objective_score = create_mock_score(0.8) assert attack._is_objective_achieved(context=context) is True # Test 4: Score above threshold - context.best_objective_score = MagicMock(get_value=MagicMock(return_value=0.9)) + context.best_objective_score = create_mock_score(0.9) assert attack._is_objective_achieved(context=context) is True @@ -720,8 +731,7 @@ async def test_execute_async_with_message_uses_it_for_root_node(self, attack_bui conversation_id="test_conv_id", objective="Test objective", attack_identifier=attack.get_identifier(), - last_response=None, - last_score=helpers.create_score(0.5), + automated_objective_score=helpers.create_score(False), executed_turns=1, execution_time_ms=100, outcome=AttackOutcome.FAILURE, @@ -767,8 +777,7 @@ async def test_execute_async_success_flow(self, attack_builder, helpers): conversation_id="success_conv_id", objective="Test objective", attack_identifier=attack.get_identifier(), - last_response=None, - last_score=helpers.create_score(0.9), + automated_objective_score=helpers.create_score(True), executed_turns=1, execution_time_ms=100, outcome=AttackOutcome.SUCCESS, @@ -1189,7 +1198,10 @@ def test_format_node_result_with_scores(self, basic_attack): node = MagicMock() node.off_topic = False node.completed = True - node.objective_score = MagicMock(get_value=MagicMock(return_value=0.7)) + mock_score = MagicMock() + mock_score.get_value = MagicMock(return_value=0.7) + mock_score.score_metadata = None # Ensure _normalize_score_to_float uses get_value() + node.objective_score = mock_score result = basic_attack._format_node_result(node) @@ -1349,7 +1361,7 @@ def test_attack_result_includes_adversarial_chat_conversation_ids(self, attack_b ConversationReference(conversation_id="adv_conv_2", conversation_type=ConversationType.ADVERSARIAL), } context.best_conversation_id = "best_conv" - context.best_objective_score = helpers.create_score(0.9) + context.best_objective_score = helpers.create_score(True) # Create the result result = attack._create_attack_result( @@ -1431,3 +1443,95 @@ def test_add_adversarial_chat_conversation_id_ensures_uniqueness(self, basic_att ) in context.related_conversations ) + + +@pytest.mark.usefixtures("patch_central_database") +class TestTAPAttackScoringIntegration: + """ + Tests verifying TAP attack correctly uses FloatScaleThresholdScorer and normalize_score_to_float. + + These tests are critical because: + 1. TAP needs granular float scores (0.0-1.0) to make intelligent pruning decisions + 2. The adversarial chat needs numerical feedback to improve prompts incrementally + 3. Using raw true_false scores would only provide 0 or 1, losing the "how close" information + 4. FloatScaleThresholdScorer preserves the original float in metadata while providing + true_false for AttackResult.automated_objective_score validation + + Historical bug: Without normalize_score_to_float, outcome_reason reported incorrect + scores because it used get_value() on true_false scores (returning True/False). + """ + + def test_default_scorer_is_float_scale_threshold_scorer(self) -> None: + """ + TAP must use FloatScaleThresholdScorer by default for objective scoring. + + Why FloatScaleThresholdScorer: + - Wraps a float_scale scorer (SelfAskScaleScorer) that returns 0.0-1.0 scores + - Converts to true_false for success/failure determination + - Stores original float in score_metadata[ORIGINAL_FLOAT_VALUE_KEY] + - This allows TAP to use the float for pruning decisions while still + satisfying AttackResult's requirement for true_false objective scores + """ + mock_target = MagicMock(spec=PromptChatTarget) + mock_target.send_prompt_async = AsyncMock() + mock_target.get_identifier.return_value = {"__type__": "MockTarget", "__module__": "test"} + + mock_adversarial = MagicMock(spec=PromptChatTarget) + mock_adversarial.send_prompt_async = AsyncMock() + mock_adversarial.get_identifier.return_value = {"__type__": "MockAdversarial", "__module__": "test"} + + # Create attack with no explicit scoring config - should use default + with patch("pyrit.executor.attack.multi_turn.tree_of_attacks.SelfAskScaleScorer"): + attack = TreeOfAttacksWithPruningAttack( + objective_target=mock_target, + attack_adversarial_config=AttackAdversarialConfig(target=mock_adversarial), + # No attack_scoring_config - should create default FloatScaleThresholdScorer + ) + + # Verify the default scorer is a FloatScaleThresholdScorer + assert isinstance(attack._objective_scorer, FloatScaleThresholdScorer), ( + "TAP default objective scorer must be FloatScaleThresholdScorer to preserve " + "float granularity for pruning while providing true_false for AttackResult" + ) + + def test_tap_attack_requires_float_scale_threshold_scorer(self) -> None: + """ + TAPAttackScoringConfig validates that objective_scorer is FloatScaleThresholdScorer. + + This constraint exists because TAP's algorithm depends on: + 1. Float scores for intelligent node ranking and pruning + 2. Threshold for determining objective success + 3. Metadata preservation for normalize_score_to_float to extract original floats + """ + mock_scorer = MagicMock(spec=Scorer) + mock_scorer.get_identifier.return_value = {"__type__": "MockScorer"} + + # Non-FloatScaleThresholdScorer should raise ValueError + with pytest.raises(ValueError, match="FloatScaleThresholdScorer"): + TAPAttackScoringConfig(objective_scorer=mock_scorer) + + def test_is_objective_achieved_uses_normalized_score(self, attack_builder, helpers) -> None: + """ + _is_objective_achieved should use normalize_score_to_float for threshold comparison. + + The threshold comparison must use the original float value from metadata, + not the true_false conversion, to correctly determine success. + """ + attack = attack_builder.with_threshold(0.7).with_default_mocks().build() + context = helpers.create_basic_context() + + # Score of 0.75 should be above threshold (0.7) + mock_score = MagicMock() + mock_score.score_metadata = {"original_float_value": 0.75} + mock_score.get_value.return_value = True + context.best_objective_score = mock_score + + # Should be True because 0.75 >= 0.7 + assert attack._is_objective_achieved(context) is True + + # Now test below threshold + mock_score.score_metadata = {"original_float_value": 0.65} + context.best_objective_score = mock_score + + # Should be False because 0.65 < 0.7 + assert attack._is_objective_achieved(context) is False diff --git a/tests/unit/executor/attack/single_turn/test_prompt_sending.py b/tests/unit/executor/attack/single_turn/test_prompt_sending.py index 7abf60b99..a7b2457ee 100644 --- a/tests/unit/executor/attack/single_turn/test_prompt_sending.py +++ b/tests/unit/executor/attack/single_turn/test_prompt_sending.py @@ -645,8 +645,9 @@ async def test_perform_attack_without_scorer_completes_after_first_response( assert result.outcome == AttackOutcome.UNDETERMINED assert result.outcome_reason == "No objective scorer configured" assert result.executed_turns == 1 - assert result.last_response == sample_response.get_piece() - assert result.last_score is None + # Note: last_response is deprecated and gets data from get_conversation() which + # returns None in this test since there's no real conversation stored + assert result.objective_score is None # Verify only one attempt was made (no retries without scorer) attack._send_prompt_to_objective_target_async.assert_called_once() @@ -675,8 +676,9 @@ async def test_perform_attack_without_scorer_retries_on_filtered_response( result = await attack._perform_async(context=basic_context) # Verify completion after retry - assert result.last_response == sample_response.get_piece() + # Note: last_response is deprecated, verify via send call count assert attack._send_prompt_to_objective_target_async.call_count == 2 + assert result.outcome == AttackOutcome.UNDETERMINED @pytest.mark.usefixtures("patch_central_database") @@ -1018,7 +1020,6 @@ async def test_execute_async_with_parameters(self, mock_target, sample_response) objective="Test objective", attack_identifier=attack.get_identifier(), outcome=AttackOutcome.SUCCESS, - last_response=sample_response.get_piece(), ) attack._perform_async = AsyncMock(return_value=mock_result) attack._teardown_async = AsyncMock() diff --git a/tests/unit/executor/attack/single_turn/test_skeleton_key.py b/tests/unit/executor/attack/single_turn/test_skeleton_key.py index 779c1d0da..39d91df5b 100644 --- a/tests/unit/executor/attack/single_turn/test_skeleton_key.py +++ b/tests/unit/executor/attack/single_turn/test_skeleton_key.py @@ -313,8 +313,7 @@ def test_create_skeleton_key_failure_result(self, mock_target, basic_context): assert result.outcome == AttackOutcome.FAILURE assert result.outcome_reason == "Skeleton key prompt was filtered or failed" assert result.executed_turns == 1 - assert result.last_response is None - assert result.last_score is None + assert result.objective_score is None assert result.attack_identifier == attack.get_identifier() @@ -344,8 +343,7 @@ async def test_perform_attack_skeleton_key_success_objective_success( conversation_id=basic_context.conversation_id, objective=basic_context.objective, attack_identifier=attack.get_identifier(), - last_response=sample_response, - last_score=success_score, + automated_objective_score=success_score, outcome=AttackOutcome.SUCCESS, executed_turns=1, ) @@ -361,8 +359,7 @@ async def test_perform_attack_skeleton_key_success_objective_success( # Verify result properties assert result.outcome == AttackOutcome.SUCCESS assert result.executed_turns == 2 # Should be updated to 2 turns - assert result.last_response == sample_response - assert result.last_score == success_score + assert result.objective_score == success_score @pytest.mark.asyncio async def test_perform_attack_skeleton_key_failure(self, mock_target, basic_context): @@ -413,8 +410,7 @@ async def test_perform_attack_skeleton_key_success_objective_failure( conversation_id=basic_context.conversation_id, objective=basic_context.objective, attack_identifier=attack.get_identifier(), - last_response=sample_response, - last_score=failure_score, + automated_objective_score=failure_score, outcome=AttackOutcome.FAILURE, executed_turns=1, ) @@ -424,7 +420,7 @@ async def test_perform_attack_skeleton_key_success_objective_failure( # Verify result shows overall failure but 2 turns were executed assert result.outcome == AttackOutcome.FAILURE assert result.executed_turns == 2 - assert result.last_score == failure_score + assert result.objective_score == failure_score @pytest.mark.usefixtures("patch_central_database") diff --git a/tests/unit/executor/attack/test_attack_parameter_consistency.py b/tests/unit/executor/attack/test_attack_parameter_consistency.py index c89871c85..4156d4fde 100644 --- a/tests/unit/executor/attack/test_attack_parameter_consistency.py +++ b/tests/unit/executor/attack/test_attack_parameter_consistency.py @@ -20,6 +20,7 @@ CrescendoAttack, PromptSendingAttack, RedTeamingAttack, + TAPAttackScoringConfig, TreeOfAttacksWithPruningAttack, ) from pyrit.memory import CentralMemory @@ -32,7 +33,7 @@ ) from pyrit.prompt_normalizer import PromptNormalizer from pyrit.prompt_target import PromptChatTarget, PromptTarget -from pyrit.score import TrueFalseScorer +from pyrit.score import FloatScaleThresholdScorer, TrueFalseScorer # ============================================================================= # Multi-Modal Message Fixtures @@ -156,6 +157,16 @@ def mock_objective_scorer() -> MagicMock: return scorer +@pytest.fixture +def mock_tap_objective_scorer() -> MagicMock: + """Create a mock FloatScaleThresholdScorer for TAP attacks.""" + scorer = MagicMock(spec=FloatScaleThresholdScorer) + scorer.score_async = AsyncMock() + scorer.get_identifier.return_value = {"__type__": "MockFloatScaleThresholdScorer", "__module__": "test_module"} + scorer.threshold = 0.7 + return scorer + + @pytest.fixture def mock_prompt_normalizer() -> MagicMock: """Create a mock prompt normalizer.""" @@ -288,15 +299,15 @@ def crescendo_attack( def tap_attack( mock_chat_target: MagicMock, mock_adversarial_chat: MagicMock, - mock_objective_scorer: MagicMock, + mock_tap_objective_scorer: MagicMock, sample_response: Message, success_score: Score, ) -> TreeOfAttacksWithPruningAttack: """Create a pre-configured TreeOfAttacksWithPruningAttack with mocked normalizer.""" - mock_objective_scorer.score_async.return_value = [success_score] + mock_tap_objective_scorer.score_async.return_value = [success_score] adversarial_config = AttackAdversarialConfig(target=mock_adversarial_chat) - scoring_config = AttackScoringConfig(objective_scorer=mock_objective_scorer) + scoring_config = TAPAttackScoringConfig(objective_scorer=mock_tap_objective_scorer) attack = TreeOfAttacksWithPruningAttack( objective_target=mock_chat_target, @@ -473,16 +484,16 @@ async def test_tree_of_attacks_uses_next_message_first_turn( self, mock_chat_target: MagicMock, mock_adversarial_chat: MagicMock, - mock_objective_scorer: MagicMock, + mock_tap_objective_scorer: MagicMock, sample_response: Message, success_score: Score, multimodal_image_message: Message, ) -> None: """Test that TreeOfAttacksWithPruningAttack uses next_message for the first turn on all nodes.""" - mock_objective_scorer.score_async.return_value = [success_score] + mock_tap_objective_scorer.score_async.return_value = [success_score] adversarial_config = AttackAdversarialConfig(target=mock_adversarial_chat) - scoring_config = AttackScoringConfig(objective_scorer=mock_objective_scorer) + scoring_config = TAPAttackScoringConfig(objective_scorer=mock_tap_objective_scorer) attack = TreeOfAttacksWithPruningAttack( objective_target=mock_chat_target, @@ -690,7 +701,7 @@ async def test_tap_attack_adds_prepended_to_memory( self, mock_chat_target: MagicMock, mock_adversarial_chat: MagicMock, - mock_objective_scorer: MagicMock, + mock_tap_objective_scorer: MagicMock, sample_response: Message, success_score: Score, prepended_conversation_multimodal: List[Message], @@ -698,10 +709,10 @@ async def test_tap_attack_adds_prepended_to_memory( sqlite_instance, ) -> None: """Test that TreeOfAttacksWithPruningAttack preserves prepended conversation in memory.""" - mock_objective_scorer.score_async.return_value = [success_score] + mock_tap_objective_scorer.score_async.return_value = [success_score] adversarial_config = AttackAdversarialConfig(target=mock_adversarial_chat) - scoring_config = AttackScoringConfig(objective_scorer=mock_objective_scorer) + scoring_config = TAPAttackScoringConfig(objective_scorer=mock_tap_objective_scorer) attack = TreeOfAttacksWithPruningAttack( objective_target=mock_chat_target, diff --git a/tests/unit/executor/benchmark/test_fairness_bias.py b/tests/unit/executor/benchmark/test_fairness_bias.py index 8ad868d81..2692f3d4f 100644 --- a/tests/unit/executor/benchmark/test_fairness_bias.py +++ b/tests/unit/executor/benchmark/test_fairness_bias.py @@ -1,6 +1,7 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. +import uuid from typing import Dict, List from unittest.mock import AsyncMock, MagicMock, patch @@ -15,6 +16,7 @@ AttackResult, Message, MessagePiece, + Score, ) from pyrit.prompt_target import PromptTarget @@ -56,10 +58,17 @@ def sample_benchmark_context_with_custom_objective() -> FairnessBiasBenchmarkCon @pytest.fixture def sample_attack_result() -> AttackResult: """Sample attack result for testing.""" - # Mock score object - mock_score = MagicMock() - mock_score.score_category = "biased" - mock_score.score_rationale = "Test rationale" + # Create a proper true_false Score object + objective_score = Score( + score_type="true_false", + score_value="true", + score_category=["biased"], + score_rationale="Test rationale", + score_value_description="Objective achieved", + scorer_class_identifier={"__type__": "TrueFalseScorer"}, + message_piece_id=str(uuid.uuid4()), + score_metadata={}, + ) result = AttackResult( conversation_id="test-conversation-id", @@ -69,8 +78,8 @@ def sample_attack_result() -> AttackResult: execution_time_ms=1000, outcome=AttackOutcome.SUCCESS, outcome_reason="Test completed successfully", + automated_objective_score=objective_score, ) - result.last_score = mock_score return result @@ -289,7 +298,7 @@ async def test_perform_async_calls_prompt_sending_attack( assert experiment_result["subject"] == "doctor" assert experiment_result["story_type"] == "short story" assert experiment_result["generated_name"] == "Sarah Johnson" - assert experiment_result["score_category"] == "biased" + assert experiment_result["score_category"] == "biased" # List is joined to string assert experiment_result["score_rationale"] == "Test rationale" @pytest.mark.asyncio diff --git a/tests/unit/memory/memory_interface/test_interface_attack_results.py b/tests/unit/memory/memory_interface/test_interface_attack_results.py index 3635c34bc..687668548 100644 --- a/tests/unit/memory/memory_interface/test_interface_attack_results.py +++ b/tests/unit/memory/memory_interface/test_interface_attack_results.py @@ -420,40 +420,39 @@ def test_get_attack_results_nonexistent_ids(sqlite_instance: MemoryInterface): assert len(retrieved_results) == 0 -def test_attack_result_with_last_response_and_score(sqlite_instance: MemoryInterface): - """Test attack result with last_response and last_score relationships.""" +def test_attack_result_with_objective_score_and_conversation(sqlite_instance: MemoryInterface): + """Test attack result with automated_objective_score and conversation retrieval.""" # Create a message piece first message_piece = MessagePiece( - role="user", - original_value="Test prompt", - converted_value="Test prompt", + role="assistant", + original_value="Test response", + converted_value="Test response", conversation_id="conv_1", ) assert message_piece.id is not None, "Message piece ID should not be None" - # Create a score - score = Score( - score_value="1.0", - score_type="float_scale", - score_category=["test_category"], + # Create a true_false score (required for objective scores) + objective_score = Score( + score_value="true", + score_type="true_false", + score_category=["objective"], scorer_class_identifier={"name": "test_scorer"}, message_piece_id=message_piece.id, - score_value_description="Test score description", + score_value_description="Objective achieved", score_rationale="Test score rationale", score_metadata={"test": "metadata"}, ) # Add message piece and score to memory sqlite_instance.add_message_pieces_to_memory(message_pieces=[message_piece]) - sqlite_instance.add_scores_to_memory(scores=[score]) + sqlite_instance.add_scores_to_memory(scores=[objective_score]) - # Create attack result with last_response and last_score + # Create attack result with automated_objective_score attack_result = AttackResult( conversation_id="conv_1", objective="Test objective with relationships", attack_identifier={"name": "test_attack"}, - last_response=message_piece, - last_score=score, + automated_objective_score=objective_score, executed_turns=5, execution_time_ms=1000, outcome=AttackOutcome.SUCCESS, @@ -462,14 +461,18 @@ def test_attack_result_with_last_response_and_score(sqlite_instance: MemoryInter # Add attack result to memory sqlite_instance.add_attack_results_to_memory(attack_results=[attack_result]) - # Retrieve and verify relationships + # Retrieve and verify all_entries: Sequence[AttackResult] = sqlite_instance.get_attack_results() assert len(all_entries) == 1 assert all_entries[0].conversation_id == "conv_1" - assert all_entries[0].last_response is not None - assert all_entries[0].last_response.id == message_piece.id - assert all_entries[0].last_score is not None - assert all_entries[0].last_score.id == score.id + + # Verify objective_score is retrievable + assert all_entries[0].objective_score is not None + assert all_entries[0].objective_score.get_value() is True + + # Verify conversation can be retrieved + conversation = all_entries[0].get_conversation() + assert len(conversation) == 1 def test_attack_result_all_outcomes(sqlite_instance: MemoryInterface): @@ -587,10 +590,10 @@ def test_attack_result_with_attack_generation_conversation_ids(sqlite_instance: retrieved_result = entry.get_attack_result() assert { - r.conversation_id for r in retrieved_result.get_conversations_by_type(ConversationType.PRUNED) + r.conversation_id for r in retrieved_result.get_conversation_ids_by_type(ConversationType.PRUNED) } == pruned_ids assert { - r.conversation_id for r in retrieved_result.get_conversations_by_type(ConversationType.ADVERSARIAL) + r.conversation_id for r in retrieved_result.get_conversation_ids_by_type(ConversationType.ADVERSARIAL) } == adversarial_ids @@ -612,8 +615,8 @@ def test_attack_result_without_attack_generation_conversation_ids(sqlite_instanc assert not entry.adversarial_chat_conversation_ids retrieved_result = entry.get_attack_result() - assert not retrieved_result.get_conversations_by_type(ConversationType.PRUNED) - assert not retrieved_result.get_conversations_by_type(ConversationType.ADVERSARIAL) + assert not retrieved_result.get_conversation_ids_by_type(ConversationType.PRUNED) + assert not retrieved_result.get_conversation_ids_by_type(ConversationType.ADVERSARIAL) def test_get_attack_results_by_harm_category_single(sqlite_instance: MemoryInterface): diff --git a/tests/unit/memory/memory_interface/test_interface_export.py b/tests/unit/memory/memory_interface/test_interface_export.py index 16b584752..2472ab297 100644 --- a/tests/unit/memory/memory_interface/test_interface_export.py +++ b/tests/unit/memory/memory_interface/test_interface_export.py @@ -15,7 +15,7 @@ def test_export_conversation_by_attack_id_file_created( sqlite_instance: MemoryInterface, sample_conversations: Sequence[MessagePiece] ): - attack1_id = sample_conversations[0].attack_identifier["id"] + attack1_id = sample_conversations[0]._attack_identifier["id"] # Default path in export_conversations() file_name = f"{attack1_id}.json" @@ -72,29 +72,26 @@ def test_export_all_conversations_with_scores_correct_data(sqlite_instance: Memo temp_file.close() # Close the file to allow Windows to open it for writing try: - with ( - patch.object(sqlite_instance, "get_message_pieces") as mock_get_pieces, - patch.object(sqlite_instance, "get_prompt_scores") as mock_get_scores, - ): - # Create a mock piece + with patch.object(sqlite_instance, "get_message_pieces") as mock_get_pieces: + # Create a mock score + mock_score = MagicMock() + mock_score.message_piece_id = "piece_id_1234" + mock_score.score_value = 10 + mock_score.to_dict.return_value = {"message_piece_id": "piece_id_1234", "score_value": 10} + + # Create a mock piece with scores attached (scores are now eagerly loaded) mock_piece = MagicMock() mock_piece.id = "piece_id_1234" mock_piece.original_prompt_id = "1234" mock_piece.converted_value = "sample piece" + mock_piece.scores = [mock_score] # Scores are now attached directly to pieces mock_piece.to_dict.return_value = { "id": "piece_id_1234", "original_prompt_id": "1234", "converted_value": "sample piece", } - # Create a mock score - mock_score = MagicMock() - mock_score.message_piece_id = "piece_id_1234" - mock_score.score_value = 10 - mock_score.to_dict.return_value = {"message_piece_id": "piece_id_1234", "score_value": 10} - mock_get_pieces.return_value = [mock_piece] - mock_get_scores.return_value = [mock_score] result_path = sqlite_instance.export_conversations(file_path=file_path) diff --git a/tests/unit/memory/memory_interface/test_interface_prompts.py b/tests/unit/memory/memory_interface/test_interface_prompts.py index 20dc9edcd..c8c4e5239 100644 --- a/tests/unit/memory/memory_interface/test_interface_prompts.py +++ b/tests/unit/memory/memory_interface/test_interface_prompts.py @@ -156,8 +156,7 @@ def test_duplicate_memory(sqlite_instance: MemoryInterface): all_pieces = sqlite_instance.get_message_pieces() assert len(all_pieces) == 9 # Attack IDs are preserved (not changed) when duplicating - assert len([p for p in all_pieces if p.attack_identifier["id"] == attack1.get_identifier()["id"]]) == 8 - assert len([p for p in all_pieces if p.attack_identifier["id"] == attack2.get_identifier()["id"]]) == 1 + assert len([p for p in all_pieces if p.conversation_id == conversation_id_1]) == 2 assert len([p for p in all_pieces if p.conversation_id == conversation_id_2]) == 2 assert len([p for p in all_pieces if p.conversation_id == conversation_id_3]) == 1 @@ -232,12 +231,24 @@ def test_duplicate_conversation_pieces_not_score(sqlite_instance: MemoryInterfac for piece in new_pieces: assert piece.id not in (prompt_id_1, prompt_id_2) - assert len(sqlite_instance.get_prompt_scores(labels=memory_labels)) == 2 - # Attack ID is preserved, so both original and duplicated pieces have the same attack ID - assert len(sqlite_instance.get_prompt_scores(attack_id=attack1.get_identifier()["id"])) == 2 - # The duplicate prompts ids should not have scores so only two scores are returned - assert len(sqlite_instance.get_prompt_scores(prompt_ids=[str(prompt_id_1), str(prompt_id_2)] + new_pieces_ids)) == 2 + # Verify scores are attached to pieces retrieved by label + # Original pieces have scores, and duplicated pieces also get scores via original_prompt_id join + pieces_with_label = sqlite_instance.get_message_pieces(labels=memory_labels) + assert len(pieces_with_label) == 4 # 2 original + 2 duplicated + total_scores = sum(len(p.scores) for p in pieces_with_label) + assert total_scores == 4 # Each of the 4 pieces has 1 score via original_prompt_id + + # The duplicate pieces should also have scores via their original_prompt_id link + for new_piece in new_pieces: + assert len(new_piece.scores) == 1 # Each duplicated piece should get the score from its original + + # Verify that the scores on duplicated pieces are the SAME score objects (same IDs) as the originals + # This proves scores aren't duplicated, just linked via original_prompt_id + original_pieces = [p for p in pieces_with_label if p.id in (prompt_id_1, prompt_id_2)] + original_score_ids = {s.id for p in original_pieces for s in p.scores} + duplicate_score_ids = {s.id for p in new_pieces for s in p.scores} + assert original_score_ids == duplicate_score_ids # Same score objects, different piece IDs def test_duplicate_conversation_excluding_last_turn(sqlite_instance: MemoryInterface): @@ -386,11 +397,26 @@ def test_duplicate_conversation_excluding_last_turn_not_score(sqlite_instance: M assert new_pieces[1].original_prompt_id == prompt_id_2 assert new_pieces[0].id != prompt_id_1 assert new_pieces[1].id != prompt_id_2 - assert len(sqlite_instance.get_prompt_scores(labels=memory_labels)) == 2 - # Attack ID is preserved - assert len(sqlite_instance.get_prompt_scores(attack_id=attack1.get_identifier()["id"])) == 2 - # The duplicate prompts ids should not have scores so only two scores are returned - assert len(sqlite_instance.get_prompt_scores(prompt_ids=[str(prompt_id_1), str(prompt_id_2)] + new_pieces_ids)) == 2 + + # Verify scores are attached to pieces retrieved by label + # Original pieces: 4 total (2 with scores on seq 0,1; 2 without scores on seq 2,3) + # Duplicated pieces: 2 (both get scores via original_prompt_id) + # Total pieces: 6, Total scores: 4 (2 original + 2 via original_prompt_id join) + pieces_with_label = sqlite_instance.get_message_pieces(labels=memory_labels) + assert len(pieces_with_label) == 6 # 4 original + 2 duplicated + total_scores = sum(len(p.scores) for p in pieces_with_label) + assert total_scores == 4 + + # The duplicate pieces should also have scores via their original_prompt_id link + for new_piece in new_pieces: + assert len(new_piece.scores) == 1 # Each duplicated piece should get the score from its original + + # Verify that the scores on duplicated pieces are the SAME score objects (same IDs) as the originals + # This proves scores aren't duplicated, just linked via original_prompt_id + original_pieces = [p for p in pieces_with_label if p.id in (prompt_id_1, prompt_id_2)] + original_score_ids = {s.id for p in original_pieces for s in p.scores} + duplicate_score_ids = {s.id for p in new_pieces for s in p.scores} + assert original_score_ids == duplicate_score_ids # Same score objects, different piece IDs def test_duplicate_conversation_excluding_last_turn_same_attack(sqlite_instance: MemoryInterface): @@ -443,38 +469,6 @@ def test_duplicate_conversation_excluding_last_turn_same_attack(sqlite_instance: assert piece.sequence < 2 -def test_duplicate_memory_preserves_attack_id(sqlite_instance: MemoryInterface): - attack1 = PromptSendingAttack(objective_target=MagicMock()) - conversation_id = "11111" - pieces = [ - MessagePiece( - role="user", - original_value="original prompt text", - converted_value="Hello, how are you?", - conversation_id=conversation_id, - sequence=0, - attack_identifier=attack1.get_identifier(), - ), - ] - sqlite_instance.add_message_pieces_to_memory(message_pieces=pieces) - assert len(sqlite_instance.get_message_pieces()) == 1 - - # Duplicating preserves the attack ID - new_conversation_id = sqlite_instance.duplicate_conversation( - conversation_id=conversation_id, - ) - - # Verify duplication succeeded - all_pieces = sqlite_instance.get_message_pieces() - assert len(all_pieces) == 2 - assert new_conversation_id != conversation_id - - # Both pieces should have the same attack ID - attack_ids = {p.attack_identifier["id"] for p in all_pieces} - assert len(attack_ids) == 1 - assert attack1.get_identifier()["id"] in attack_ids - - def test_duplicate_conversation_creates_new_ids(sqlite_instance: MemoryInterface): """Test that duplicated conversation has new piece IDs.""" attack1 = PromptSendingAttack(objective_target=MagicMock()) diff --git a/tests/unit/memory/memory_interface/test_interface_scores.py b/tests/unit/memory/memory_interface/test_interface_scores.py index 8354073eb..b5aaa3c92 100644 --- a/tests/unit/memory/memory_interface/test_interface_scores.py +++ b/tests/unit/memory/memory_interface/test_interface_scores.py @@ -2,7 +2,6 @@ # Licensed under the MIT license. -import uuid from typing import Literal, Sequence from unittest.mock import MagicMock from uuid import uuid4 @@ -42,9 +41,8 @@ def test_get_scores_by_attack_id_and_label( sqlite_instance.add_scores_to_memory(scores=[score]) - # Fetch the score we just added - db_score = sqlite_instance.get_prompt_scores(attack_id=sample_conversations[0].attack_identifier["id"]) - + # Test get_scores with score_ids filter + db_score = sqlite_instance.get_scores(score_ids=[str(score.id)]) assert len(db_score) == 1 assert db_score[0].score_value == score.score_value assert db_score[0].score_value_description == score.score_value_description @@ -55,25 +53,13 @@ def test_get_scores_by_attack_id_and_label( assert db_score[0].scorer_class_identifier == score.scorer_class_identifier assert db_score[0].message_piece_id == score.message_piece_id - db_score = sqlite_instance.get_prompt_scores(labels=sample_conversations[0].labels) - assert len(db_score) == 1 - assert db_score[0].score_value == score.score_value - - db_score = sqlite_instance.get_scores(score_ids=[str(score.id)]) - assert len(db_score) == 1 - assert db_score[0].score_value == score.score_value - - db_score = sqlite_instance.get_prompt_scores( - attack_id=sample_conversations[0].attack_identifier["id"], - labels={"x": "y"}, - ) - assert len(db_score) == 0 - - db_score = sqlite_instance.get_prompt_scores( - attack_id=str(uuid.uuid4()), - ) - assert len(db_score) == 0 + # Test get_message_pieces returns scores attached to pieces + pieces = sqlite_instance.get_message_pieces(prompt_ids=[prompt_id]) + assert len(pieces) == 1 + assert len(pieces[0].scores) == 1 + assert pieces[0].scores[0].score_value == score.score_value + # Test get_scores with no filters returns empty db_score = sqlite_instance.get_scores() assert len(db_score) == 0 @@ -104,8 +90,11 @@ def test_add_score_get_score( sqlite_instance.add_scores_to_memory(scores=[score]) - # Fetch the score we just added - db_score = sqlite_instance.get_prompt_scores(prompt_ids=[prompt_id]) + # Fetch the score via get_message_pieces which joins scores + pieces = sqlite_instance.get_message_pieces(prompt_ids=[prompt_id]) + assert pieces + assert len(pieces) == 1 + db_score = pieces[0].scores assert db_score assert len(db_score) == 1 assert db_score[0].score_value == score_value @@ -157,9 +146,19 @@ def test_add_score_duplicate_prompt(sqlite_instance: MemoryInterface): ) sqlite_instance.add_scores_to_memory(scores=[score]) + # Score should be linked to original_id assert score.message_piece_id == original_id - assert sqlite_instance.get_prompt_scores(prompt_ids=[str(dupe_id)])[0].id == score_id - assert sqlite_instance.get_prompt_scores(prompt_ids=[str(original_id)])[0].id == score_id + + # Both dupe and original should retrieve the same score via get_message_pieces + dupe_pieces = sqlite_instance.get_message_pieces(prompt_ids=[dupe_id]) + assert len(dupe_pieces) == 1 + assert len(dupe_pieces[0].scores) == 1 + assert dupe_pieces[0].scores[0].id == score_id + + original_pieces = sqlite_instance.get_message_pieces(prompt_ids=[original_id]) + assert len(original_pieces) == 1 + assert len(original_pieces[0].scores) == 1 + assert original_pieces[0].scores[0].id == score_id def test_get_scores_by_memory_labels(sqlite_instance: MemoryInterface): @@ -188,9 +187,10 @@ def test_get_scores_by_memory_labels(sqlite_instance: MemoryInterface): ) sqlite_instance.add_scores_to_memory(scores=[score]) - # Fetch the score we just added - db_score = sqlite_instance.get_prompt_scores(labels={"sample": "label"}) - + # Fetch pieces by label and check scores are attached + pieces_with_label = sqlite_instance.get_message_pieces(labels={"sample": "label"}) + assert len(pieces_with_label) == 1 + db_score = pieces_with_label[0].scores assert len(db_score) == 1 assert db_score[0].score_value == score.score_value assert db_score[0].score_value_description == score.score_value_description @@ -214,3 +214,24 @@ async def test_get_seeds_no_filters(sqlite_instance: MemoryInterface): assert len(result) == 2 assert result[0].value == "prompt1" assert result[1].value == "prompt2" + + +# =========================================================================================== +# DEPRECATED METHOD TESTS - Remove in 0.13.0 +# These tests verify deprecated methods still exist and emit warnings. +# Do not add new functionality tests here - use the new methods above instead. +# =========================================================================================== + + +def test_get_prompt_scores_deprecated_exists(sqlite_instance: MemoryInterface): + """Verify get_prompt_scores exists and emits deprecation warning. Remove in 0.13.0.""" + import warnings + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + # Call with no matching data - just verify it exists and warns + result = sqlite_instance.get_prompt_scores(prompt_ids=["00000000-0000-0000-0000-000000000000"]) + assert len(result) == 0 + assert len(w) == 1 + assert issubclass(w[0].category, DeprecationWarning) + assert "get_prompt_scores is deprecated" in str(w[0].message) diff --git a/tests/unit/memory/test_azure_sql_memory.py b/tests/unit/memory/test_azure_sql_memory.py index 0f6033edb..7b310255c 100644 --- a/tests/unit/memory/test_azure_sql_memory.py +++ b/tests/unit/memory/test_azure_sql_memory.py @@ -10,7 +10,6 @@ from pyrit.memory import AzureSQLMemory, EmbeddingDataEntry, PromptMemoryEntry from pyrit.memory.memory_models import Base from pyrit.models import MessagePiece -from pyrit.prompt_converter.base64_converter import Base64Converter from pyrit.prompt_target.text_target import TextTarget from unit.mocks import get_azure_sql_memory, get_sample_conversation_entries @@ -167,7 +166,6 @@ def test_get_memories_with_json_properties(memory_interface: AzureSQLMemory): # Define a specific conversation_id specific_conversation_id = "test_conversation_id" - converter_identifiers = [Base64Converter().get_identifier()] target = TextTarget() # Start a session @@ -181,7 +179,7 @@ def test_get_memories_with_json_properties(memory_interface: AzureSQLMemory): original_value="Test content", converted_value="Test content", labels={"normalizer_id": "id1"}, - converter_identifiers=converter_identifiers, + prompt_metadata={"encoding": "utf-8", "max_tokens": 100}, prompt_target_identifier=target.get_identifier(), ) ) @@ -202,9 +200,9 @@ def test_get_memories_with_json_properties(memory_interface: AzureSQLMemory): # For timestamp, you might want to check if it's close to the current time instead of an exact match assert abs((retrieved_entry.timestamp - entry.timestamp).total_seconds()) < 10 # Assuming the test runs quickly - converter_identifiers = retrieved_entry.converter_identifiers - assert len(converter_identifiers) == 1 - assert converter_identifiers[0]["__type__"] == "Base64Converter" + prompt_metadata = retrieved_entry.prompt_metadata + assert prompt_metadata["encoding"] == "utf-8" + assert prompt_metadata["max_tokens"] == 100 prompt_target = retrieved_entry.prompt_target_identifier assert prompt_target["__type__"] == "TextTarget" diff --git a/tests/unit/memory/test_sqlite_memory.py b/tests/unit/memory/test_sqlite_memory.py index a18e84e7e..3616b60c3 100644 --- a/tests/unit/memory/test_sqlite_memory.py +++ b/tests/unit/memory/test_sqlite_memory.py @@ -15,7 +15,6 @@ from pyrit.memory.memory_models import EmbeddingDataEntry, PromptMemoryEntry from pyrit.models import MessagePiece -from pyrit.prompt_converter.base64_converter import Base64Converter from pyrit.prompt_target.text_target import TextTarget from unit.mocks import get_sample_conversation_entries @@ -336,7 +335,6 @@ def test_get_memories_with_json_properties(sqlite_instance): # Define a specific conversation_id specific_conversation_id = "test_conversation_id" - converter_identifiers = [Base64Converter().get_identifier()] target = TextTarget() # Start a session @@ -349,7 +347,7 @@ def test_get_memories_with_json_properties(sqlite_instance): original_value="Test content", converted_value="Test content", labels={"normalizer_id": "id1"}, - converter_identifiers=converter_identifiers, + prompt_metadata={"encoding": "utf-8", "max_tokens": 100}, prompt_target_identifier=target.get_identifier(), ) entry = PromptMemoryEntry(entry=piece) @@ -371,9 +369,9 @@ def test_get_memories_with_json_properties(sqlite_instance): assert abs((retrieved_entry.timestamp - piece.timestamp).total_seconds()) < 0.1 assert abs((retrieved_entry.timestamp - entry.timestamp).total_seconds()) < 0.1 - converter_identifiers = retrieved_entry.converter_identifiers - assert len(converter_identifiers) == 1 - assert converter_identifiers[0]["__type__"] == "Base64Converter" + prompt_metadata = retrieved_entry.prompt_metadata + assert prompt_metadata["encoding"] == "utf-8" + assert prompt_metadata["max_tokens"] == 100 prompt_target = retrieved_entry.prompt_target_identifier assert prompt_target["__type__"] == "TextTarget" diff --git a/tests/unit/mocks.py b/tests/unit/mocks.py index 5dbb095b6..ab47f8a36 100644 --- a/tests/unit/mocks.py +++ b/tests/unit/mocks.py @@ -91,7 +91,7 @@ async def send_prompt_async(self, *, message: Message) -> list[Message]: role="assistant", original_value="default", conversation_id=message.message_pieces[0].conversation_id, - attack_identifier=message.message_pieces[0].attack_identifier, + attack_identifier=message.message_pieces[0]._attack_identifier, labels=message.message_pieces[0].labels, ).to_message() ] diff --git a/tests/unit/models/test_message_piece.py b/tests/unit/models/test_message_piece.py index 0c0cd2576..7080b6bc6 100644 --- a/tests/unit/models/test_message_piece.py +++ b/tests/unit/models/test_message_piece.py @@ -59,9 +59,12 @@ def test_converters_serialize(): converter_identifiers=converter_identifiers, ) - assert len(entry.converter_identifiers) == 1 + # converter_identifiers getter is deprecated - use pytest.warns to acknowledge + with pytest.warns(DeprecationWarning, match="converter_identifiers is deprecated"): + assert len(entry.converter_identifiers) == 1 - converter = entry.converter_identifiers[0] + with pytest.warns(DeprecationWarning, match="converter_identifiers is deprecated"): + converter = entry.converter_identifiers[0] assert converter["__type__"] == "Base64Converter" assert converter["__module__"] == "pyrit.prompt_converter.base64_converter" @@ -90,9 +93,13 @@ def test_executors_serialize(): attack_identifier=attack.get_identifier(), ) - assert entry.attack_identifier["id"] is not None - assert entry.attack_identifier["__type__"] == "PromptSendingAttack" - assert entry.attack_identifier["__module__"] == "pyrit.executor.attack.single_turn.prompt_sending" + # attack_identifier getter is deprecated - use pytest.warns to acknowledge + with pytest.warns(DeprecationWarning, match="attack_identifier is deprecated"): + assert entry.attack_identifier["id"] is not None + with pytest.warns(DeprecationWarning, match="attack_identifier is deprecated"): + assert entry.attack_identifier["__type__"] == "PromptSendingAttack" + with pytest.warns(DeprecationWarning, match="attack_identifier is deprecated"): + assert entry.attack_identifier["__module__"] == "pyrit.executor.attack.single_turn.prompt_sending" @pytest.mark.asyncio @@ -730,11 +737,11 @@ def test_message_piece_to_dict(): assert result["sequence"] == entry.sequence assert result["timestamp"] == entry.timestamp.isoformat() assert result["labels"] == entry.labels - assert result["targeted_harm_categories"] == entry.targeted_harm_categories + assert result["targeted_harm_categories"] == entry._targeted_harm_categories assert result["prompt_metadata"] == entry.prompt_metadata - assert result["converter_identifiers"] == entry.converter_identifiers + assert result["converter_identifiers"] == entry._converter_identifiers assert result["prompt_target_identifier"] == entry.prompt_target_identifier - assert result["attack_identifier"] == entry.attack_identifier + assert result["attack_identifier"] == entry._attack_identifier assert result["scorer_identifier"] == entry.scorer_identifier assert result["original_value_data_type"] == entry.original_value_data_type assert result["original_value"] == entry.original_value @@ -867,7 +874,7 @@ def test_message_piece_harm_categories_none(): original_value="Hello", converted_value="Hello", ) - assert entry.targeted_harm_categories == [] + assert entry._targeted_harm_categories == [] def test_message_piece_harm_categories_single(): @@ -875,7 +882,7 @@ def test_message_piece_harm_categories_single(): entry = MessagePiece( role="user", original_value="Hello", converted_value="Hello", targeted_harm_categories=["violence"] ) - assert entry.targeted_harm_categories == ["violence"] + assert entry._targeted_harm_categories == ["violence"] def test_message_piece_harm_categories_multiple(): @@ -884,7 +891,7 @@ def test_message_piece_harm_categories_multiple(): entry = MessagePiece( role="user", original_value="Hello", converted_value="Hello", targeted_harm_categories=harm_categories ) - assert entry.targeted_harm_categories == harm_categories + assert entry._targeted_harm_categories == harm_categories def test_message_piece_harm_categories_serialization(): @@ -912,7 +919,7 @@ def test_message_piece_harm_categories_with_labels(): labels=labels, ) - assert entry.targeted_harm_categories == harm_categories + assert entry._targeted_harm_categories == harm_categories assert entry.labels == labels result = entry.to_dict() diff --git a/tests/unit/models/test_seed.py b/tests/unit/models/test_seed.py index 0af659f03..69c757fca 100644 --- a/tests/unit/models/test_seed.py +++ b/tests/unit/models/test_seed.py @@ -724,10 +724,10 @@ def test_from_yaml_with_required_parameters_complex_template(tmp_path): def test_seed_group_dict_with_is_objective_true(): - """Test that a dictionary with is_objective=True creates an objective.""" + """Test that a dictionary with seed_type='objective' creates an objective.""" prompt_dict = { "value": "Test objective from dict", - "is_objective": True, + "seed_type": "objective", } group = SeedGroup(seeds=[prompt_dict]) @@ -777,9 +777,9 @@ def test_seed_group_dict_without_is_objective(): def test_seed_group_mixed_objective_types(): - """Test that mixing SeedObjective and dict with is_objective=True raises ValueError.""" + """Test that mixing SeedObjective and dict with seed_type='objective' raises ValueError.""" objective = SeedObjective(value="Seed objective") - dict_objective = {"value": "Dict objective", "data_type": "text", "is_objective": True} + dict_objective = {"value": "Dict objective", "data_type": "text", "seed_type": "objective"} with pytest.raises(ValueError, match="SeedGroup can only have one objective."): SeedGroup(seeds=[objective, dict_objective]) diff --git a/tests/unit/models/test_seed_group.py b/tests/unit/models/test_seed_group.py index 14569af7c..8fb34d521 100644 --- a/tests/unit/models/test_seed_group.py +++ b/tests/unit/models/test_seed_group.py @@ -56,7 +56,7 @@ def test_init_with_dict_seeds(self): """Test initialization with dictionary seeds.""" group = SeedGroup( seeds=[ - {"value": "Test objective", "is_objective": True}, + {"value": "Test objective", "seed_type": "objective"}, {"value": "Test prompt", "data_type": "text"}, ] ) diff --git a/tests/unit/scenarios/test_encoding.py b/tests/unit/scenarios/test_encoding.py index 8bc9d6703..83ca84bae 100644 --- a/tests/unit/scenarios/test_encoding.py +++ b/tests/unit/scenarios/test_encoding.py @@ -53,7 +53,7 @@ class TestEncodingInitialization: """Tests for Encoding initialization.""" def test_init_with_custom_seed_prompts(self, mock_objective_target, mock_objective_scorer, sample_seeds): - """Test initialization with custom seed prompts.""" + """Test initialization with custom seed prompts (deprecated parameter).""" scenario = Encoding( seed_prompts=sample_seeds, objective_scorer=mock_objective_scorer, diff --git a/tests/unit/scenarios/test_foundry.py b/tests/unit/scenarios/test_foundry.py index 636e7454b..d91d5dee5 100644 --- a/tests/unit/scenarios/test_foundry.py +++ b/tests/unit/scenarios/test_foundry.py @@ -129,7 +129,7 @@ async def test_init_with_multiple_strategies( }, ) def test_init_with_custom_objectives(self, mock_objective_target, mock_objective_scorer, sample_objectives): - """Test initialization with custom objectives.""" + """Test initialization with custom objectives (deprecated parameter).""" scenario = RedTeamAgent( objectives=sample_objectives, attack_scoring_config=AttackScoringConfig(objective_scorer=mock_objective_scorer), diff --git a/tests/unit/score/test_conversation_history_scorer.py b/tests/unit/score/test_conversation_history_scorer.py index 58672274a..7b1e2097b 100644 --- a/tests/unit/score/test_conversation_history_scorer.py +++ b/tests/unit/score/test_conversation_history_scorer.py @@ -271,7 +271,7 @@ async def test_conversation_history_scorer_preserves_metadata(patch_central_data assert called_piece.conversation_id == message_piece.conversation_id assert called_piece.labels == message_piece.labels assert called_piece.prompt_target_identifier == message_piece.prompt_target_identifier - assert called_piece.attack_identifier == message_piece.attack_identifier + assert called_piece._attack_identifier == message_piece._attack_identifier @pytest.mark.asyncio diff --git a/tests/unit/score/test_score_utils.py b/tests/unit/score/test_score_utils.py new file mode 100644 index 000000000..9be8bff8e --- /dev/null +++ b/tests/unit/score/test_score_utils.py @@ -0,0 +1,262 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +""" +Tests for pyrit.score.score_utils module. + +This module tests the score utility functions, particularly normalize_score_to_float, +which is critical for multi-turn attacks like TAP and Crescendo. These attacks need +to provide numerical feedback to adversarial chats regardless of score type. +""" + +import uuid +from unittest.mock import MagicMock + +from pyrit.models import Score +from pyrit.score.score_utils import ( + ORIGINAL_FLOAT_VALUE_KEY, + combine_metadata_and_categories, + format_score_for_rationale, + normalize_score_to_float, +) + + +class TestNormalizeScoreToFloat: + """ + Tests for the normalize_score_to_float function. + + This function is critical for multi-turn attacks (TAP, Crescendo) because: + 1. It provides consistent numerical feedback to adversarial chats + 2. It preserves granularity from FloatScaleThresholdScorer via metadata + 3. It handles both true_false and float_scale score types uniformly + + Without this function, attacks would only get boolean feedback (0 or 1), + making it impossible for the adversarial chat to know "how close" it is + to achieving the objective. + """ + + def test_returns_zero_for_none_score(self) -> None: + """None scores should return 0.0 as a safe default.""" + result = normalize_score_to_float(None) + + assert result == 0.0 + + def test_extracts_float_from_metadata_when_present(self) -> None: + """ + When score_metadata contains ORIGINAL_FLOAT_VALUE_KEY, use that value. + + This is the key behavior for FloatScaleThresholdScorer integration. + The scorer converts float_scale to true_false but preserves the original + float in metadata. This test verifies we extract and use that value. + """ + original_float = 0.73 + + score = Score( + score_type="true_false", + score_value="True", # The threshold result + score_value_description="Above threshold", + score_rationale="Test", + score_metadata={ORIGINAL_FLOAT_VALUE_KEY: original_float}, + message_piece_id=str(uuid.uuid4()), + scorer_class_identifier={"__type__": "FloatScaleThresholdScorer"}, + ) + + result = normalize_score_to_float(score) + + assert result == original_float + + def test_uses_score_value_when_no_metadata(self) -> None: + """When no metadata, fall back to the score's get_value() result.""" + score = Score( + score_type="float_scale", + score_value="0.65", + score_value_description="Test", + score_rationale="Test", + score_metadata={}, # No ORIGINAL_FLOAT_VALUE_KEY + message_piece_id=str(uuid.uuid4()), + scorer_class_identifier={"__type__": "SelfAskScaleScorer"}, + ) + + result = normalize_score_to_float(score) + + assert result == 0.65 + + def test_converts_true_boolean_to_one(self) -> None: + """Boolean True scores should convert to 1.0.""" + score = Score( + score_type="true_false", + score_value="True", + score_value_description="Test", + score_rationale="Test", + score_metadata={}, # No float metadata + message_piece_id=str(uuid.uuid4()), + scorer_class_identifier={"__type__": "TrueFalseScorer"}, + ) + + result = normalize_score_to_float(score) + + assert result == 1.0 + + def test_converts_false_boolean_to_zero(self) -> None: + """Boolean False scores should convert to 0.0.""" + score = Score( + score_type="true_false", + score_value="False", + score_value_description="Test", + score_rationale="Test", + score_metadata={}, + message_piece_id=str(uuid.uuid4()), + scorer_class_identifier={"__type__": "TrueFalseScorer"}, + ) + + result = normalize_score_to_float(score) + + assert result == 0.0 + + def test_handles_integer_score_values(self) -> None: + """Integer score values should be converted to float.""" + score = MagicMock() + score.score_metadata = {} + score.get_value.return_value = 1 + + result = normalize_score_to_float(score) + + assert result == 1.0 + assert isinstance(result, float) + + def test_returns_zero_for_non_numeric_string_values(self) -> None: + """Non-numeric string values should return 0.0.""" + score = MagicMock() + score.score_metadata = {} + score.get_value.return_value = "not_a_number" + + result = normalize_score_to_float(score) + + assert result == 0.0 + + def test_metadata_takes_precedence_over_score_value(self) -> None: + """ + Metadata float should take precedence over score value. + + This is important for FloatScaleThresholdScorer: the score_value + might be "True" (threshold met) but the actual float is 0.71. + We want 0.71 for feedback, not 1.0. + """ + score = Score( + score_type="true_false", + score_value="True", # Would give 1.0 without metadata + score_value_description="Above threshold", + score_rationale="Test", + score_metadata={ORIGINAL_FLOAT_VALUE_KEY: 0.71}, # Actual value + message_piece_id=str(uuid.uuid4()), + scorer_class_identifier={"__type__": "FloatScaleThresholdScorer"}, + ) + + result = normalize_score_to_float(score) + + # Should use metadata value, not 1.0 from True + assert result == 0.71 + + def test_handles_none_metadata(self) -> None: + """Scores with None metadata should not raise errors.""" + score = MagicMock() + score.score_metadata = None + score.get_value.return_value = 0.5 + + result = normalize_score_to_float(score) + + assert result == 0.5 + + +class TestCombineMetadataAndCategories: + """Tests for the combine_metadata_and_categories function.""" + + def test_combines_metadata_from_multiple_scores(self) -> None: + """Metadata from multiple scores should be merged.""" + score1 = MagicMock() + score1.score_metadata = {"key1": "value1"} + score1.score_category = ["cat1"] + + score2 = MagicMock() + score2.score_metadata = {"key2": "value2"} + score2.score_category = ["cat2"] + + metadata, categories = combine_metadata_and_categories([score1, score2]) + + assert metadata == {"key1": "value1", "key2": "value2"} + assert categories == ["cat1", "cat2"] + + def test_deduplicates_categories(self) -> None: + """Duplicate categories should be removed.""" + score1 = MagicMock() + score1.score_metadata = {} + score1.score_category = ["cat1", "cat2"] + + score2 = MagicMock() + score2.score_metadata = {} + score2.score_category = ["cat2", "cat3"] + + _, categories = combine_metadata_and_categories([score1, score2]) + + assert sorted(categories) == ["cat1", "cat2", "cat3"] + + def test_filters_empty_categories(self) -> None: + """Empty string categories should be filtered out.""" + score = MagicMock() + score.score_metadata = {} + score.score_category = ["cat1", "", "cat2"] + + _, categories = combine_metadata_and_categories([score]) + + assert "" not in categories + assert sorted(categories) == ["cat1", "cat2"] + + def test_handles_none_metadata_and_categories(self) -> None: + """Scores with None metadata/categories should not raise errors.""" + score = MagicMock() + score.score_metadata = None + score.score_category = None + + metadata, categories = combine_metadata_and_categories([score]) + + assert metadata == {} + assert categories == [] + + +class TestFormatScoreForRationale: + """Tests for the format_score_for_rationale function.""" + + def test_formats_score_with_all_fields(self) -> None: + """Score should be formatted with type, value, and rationale.""" + score = Score( + score_type="true_false", + score_value="True", + score_value_description="Test", + score_rationale="This is the rationale", + score_metadata={}, + message_piece_id=str(uuid.uuid4()), + scorer_class_identifier={"__type__": "TestScorer", "__module__": "test"}, + ) + + result = format_score_for_rationale(score) + + assert "TestScorer" in result + assert "True" in result + assert "This is the rationale" in result + + def test_handles_missing_rationale(self) -> None: + """Scores without rationale should not raise errors.""" + score = Score( + score_type="true_false", + score_value="False", + score_value_description="Test", + score_rationale=None, + score_metadata={}, + message_piece_id=str(uuid.uuid4()), + scorer_class_identifier={"__type__": "TestScorer"}, + ) + + result = format_score_for_rationale(score) + + assert "TestScorer" in result + assert "False" in result diff --git a/uv.lock b/uv.lock index 16e0c4e05..609770894 100644 --- a/uv.lock +++ b/uv.lock @@ -6146,8 +6146,17 @@ dev = [ { name = "ruff" }, { name = "sphinxcontrib-mermaid" }, { name = "types-aiofiles" }, + { name = "types-cachetools" }, + { name = "types-decorator" }, + { name = "types-paramiko" }, + { name = "types-pycurl" }, + { name = "types-pytz" }, { name = "types-pyyaml" }, { name = "types-requests" }, + { name = "types-simplejson" }, + { name = "types-six" }, + { name = "types-tabulate" }, + { name = "types-ujson" }, ] fairness-bias = [ { name = "spacy" }, @@ -6270,10 +6279,19 @@ requires-dist = [ { name = "transformers", specifier = ">=4.52.4" }, { name = "treelib", specifier = ">=1.7.1" }, { name = "types-aiofiles", marker = "extra == 'dev'", specifier = ">=24.1.0" }, + { name = "types-cachetools", marker = "extra == 'dev'", specifier = ">=5.5.0" }, + { name = "types-decorator", marker = "extra == 'dev'", specifier = ">=5.1.0" }, + { name = "types-paramiko", marker = "extra == 'dev'", specifier = ">=3.5.0" }, + { name = "types-pycurl", marker = "extra == 'dev'", specifier = ">=7.45.0" }, + { name = "types-pytz", marker = "extra == 'dev'", specifier = ">=2024.2.0" }, { name = "types-pyyaml", marker = "extra == 'all'", specifier = ">=6.0.12.20250516" }, { name = "types-pyyaml", marker = "extra == 'dev'", specifier = ">=6.0.12.20250516" }, { name = "types-requests", marker = "extra == 'all'", specifier = ">=2.31.0.20250515" }, { name = "types-requests", marker = "extra == 'dev'", specifier = ">=2.31.0.20250515" }, + { name = "types-simplejson", marker = "extra == 'dev'", specifier = ">=3.19.0" }, + { name = "types-six", marker = "extra == 'dev'", specifier = ">=1.16.0" }, + { name = "types-tabulate", marker = "extra == 'dev'", specifier = ">=0.9.0" }, + { name = "types-ujson", marker = "extra == 'dev'", specifier = ">=5.10.0" }, { name = "uvicorn", extras = ["standard"], specifier = ">=0.32.0" }, { name = "websockets", specifier = ">=14.0" }, ] @@ -8238,6 +8256,55 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/71/0f/76917bab27e270bb6c32addd5968d69e558e5b6f7fb4ac4cbfa282996a96/types_aiofiles-25.1.0.20251011-py3-none-any.whl", hash = "sha256:8ff8de7f9d42739d8f0dadcceeb781ce27cd8d8c4152d4a7c52f6b20edb8149c", size = 14338, upload-time = "2025-10-11T02:44:50.054Z" }, ] +[[package]] +name = "types-cachetools" +version = "6.2.0.20251022" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/3b/a8/f9bcc7f1be63af43ef0170a773e2d88817bcc7c9d8769f2228c802826efe/types_cachetools-6.2.0.20251022.tar.gz", hash = "sha256:f1d3c736f0f741e89ec10f0e1b0138625023e21eb33603a930c149e0318c0cef", size = 9608, upload-time = "2025-10-22T03:03:58.16Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/98/2d/8d821ed80f6c2c5b427f650bf4dc25b80676ed63d03388e4b637d2557107/types_cachetools-6.2.0.20251022-py3-none-any.whl", hash = "sha256:698eb17b8f16b661b90624708b6915f33dbac2d185db499ed57e4997e7962cad", size = 9341, upload-time = "2025-10-22T03:03:57.036Z" }, +] + +[[package]] +name = "types-decorator" +version = "5.2.0.20251101" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a6/e4/929a77f6580928a5b4914a62834a0570d2449428ecdbb0a2e916150ed978/types_decorator-5.2.0.20251101.tar.gz", hash = "sha256:120e2bf4792ec8a47653db1cb380c7aacb6862a797c1490a910aacc21548286c", size = 9059, upload-time = "2025-11-01T03:04:02.355Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/bb/cc/aa53df63915e10d429b7aa0491ba520abe4b80aef0304d1b02425cd5bd08/types_decorator-5.2.0.20251101-py3-none-any.whl", hash = "sha256:8176470ec0a2190e9d688577d4987b24039ae4a23913211707eda96bf2755b0c", size = 8074, upload-time = "2025-11-01T03:04:01.353Z" }, +] + +[[package]] +name = "types-paramiko" +version = "4.0.0.20250822" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cryptography", version = "45.0.7", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.13'" }, + { name = "cryptography", version = "46.0.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.13'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b7/b8/c6ff3b10c2f7b9897650af746f0dc6c5cddf054db857bc79d621f53c7d22/types_paramiko-4.0.0.20250822.tar.gz", hash = "sha256:1b56b0cbd3eec3d2fd123c9eb2704e612b777e15a17705a804279ea6525e0c53", size = 28730, upload-time = "2025-08-22T03:03:43.262Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/79/a1/b3774ed924a66ee2c041224d89c36f0c21f4f6cf75036d6ee7698bf8a4b9/types_paramiko-4.0.0.20250822-py3-none-any.whl", hash = "sha256:55bdb14db75ca89039725ec64ae3fa26b8d57b6991cfb476212fa8f83a59753c", size = 38833, upload-time = "2025-08-22T03:03:42.072Z" }, +] + +[[package]] +name = "types-pycurl" +version = "7.45.7.20251101" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/49/c7/1bbed7c8b7f0a05ca63c2502d8ab91b339d5d147d3a427ce629f0cbfa68e/types_pycurl-7.45.7.20251101.tar.gz", hash = "sha256:207f13450b49ef6b94189e4a8d8eedb053c2ad49d9f631fc66c2a819b76751e9", size = 14438, upload-time = "2025-11-01T03:03:58.697Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d3/b8/f469f017e67231fc96a3ad33aceff3bbf57508d0de910ec89f623a63da33/types_pycurl-7.45.7.20251101-py3-none-any.whl", hash = "sha256:3fd98441deed459b5a3129a42cb59267a1f99a31f75fa6bc1b40b9937c1b59fb", size = 13199, upload-time = "2025-11-01T03:03:57.673Z" }, +] + +[[package]] +name = "types-pytz" +version = "2025.2.0.20251108" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/40/ff/c047ddc68c803b46470a357454ef76f4acd8c1088f5cc4891cdd909bfcf6/types_pytz-2025.2.0.20251108.tar.gz", hash = "sha256:fca87917836ae843f07129567b74c1929f1870610681b4c92cb86a3df5817bdb", size = 10961, upload-time = "2025-11-08T02:55:57.001Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e7/c1/56ef16bf5dcd255155cc736d276efa6ae0a5c26fd685e28f0412a4013c01/types_pytz-2025.2.0.20251108-py3-none-any.whl", hash = "sha256:0f1c9792cab4eb0e46c52f8845c8f77cf1e313cb3d68bf826aa867fe4717d91c", size = 10116, upload-time = "2025-11-08T02:55:56.194Z" }, +] + [[package]] name = "types-pyyaml" version = "6.0.12.20250915" @@ -8259,6 +8326,42 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/2a/20/9a227ea57c1285986c4cf78400d0a91615d25b24e257fd9e2969606bdfae/types_requests-2.32.4.20250913-py3-none-any.whl", hash = "sha256:78c9c1fffebbe0fa487a418e0fa5252017e9c60d1a2da394077f1780f655d7e1", size = 20658, upload-time = "2025-09-13T02:40:01.115Z" }, ] +[[package]] +name = "types-simplejson" +version = "3.20.0.20250822" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/df/6b/96d43a90cd202bd552cdd871858a11c138fe5ef11aeb4ed8e8dc51389257/types_simplejson-3.20.0.20250822.tar.gz", hash = "sha256:2b0bfd57a6beed3b932fd2c3c7f8e2f48a7df3978c9bba43023a32b3741a95b0", size = 10608, upload-time = "2025-08-22T03:03:35.36Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3c/9f/8e2c9e6aee9a2ff34f2ffce6ccd9c26edeef6dfd366fde611dc2e2c00ab9/types_simplejson-3.20.0.20250822-py3-none-any.whl", hash = "sha256:b5e63ae220ac7a1b0bb9af43b9cb8652237c947981b2708b0c776d3b5d8fa169", size = 10417, upload-time = "2025-08-22T03:03:34.485Z" }, +] + +[[package]] +name = "types-six" +version = "1.17.0.20251009" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ee/f7/448215bc7695cfa0c8a7e0dcfa54fe31b1d52fb87004fed32e659dd85c80/types_six-1.17.0.20251009.tar.gz", hash = "sha256:efe03064ecd0ffb0f7afe133990a2398d8493d8d1c1cc10ff3dfe476d57ba44f", size = 15552, upload-time = "2025-10-09T02:54:26.02Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b8/2f/94baa623421940e3eb5d2fc63570ebb046f2bb4d9573b8787edab3ed2526/types_six-1.17.0.20251009-py3-none-any.whl", hash = "sha256:2494f4c2a58ada0edfe01ea84b58468732e43394c572d9cf5b1dd06d86c487a3", size = 19935, upload-time = "2025-10-09T02:54:25.096Z" }, +] + +[[package]] +name = "types-tabulate" +version = "0.9.0.20241207" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/3f/43/16030404a327e4ff8c692f2273854019ed36718667b2993609dc37d14dd4/types_tabulate-0.9.0.20241207.tar.gz", hash = "sha256:ac1ac174750c0a385dfd248edc6279fa328aaf4ea317915ab879a2ec47833230", size = 8195, upload-time = "2024-12-07T02:54:42.554Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5e/86/a9ebfd509cbe74471106dffed320e208c72537f9aeb0a55eaa6b1b5e4d17/types_tabulate-0.9.0.20241207-py3-none-any.whl", hash = "sha256:b8dad1343c2a8ba5861c5441370c3e35908edd234ff036d4298708a1d4cf8a85", size = 8307, upload-time = "2024-12-07T02:54:41.031Z" }, +] + +[[package]] +name = "types-ujson" +version = "5.10.0.20250822" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/5c/bd/d372d44534f84864a96c19a7059d9b4d29db8541828b8b9dc3040f7a46d0/types_ujson-5.10.0.20250822.tar.gz", hash = "sha256:0a795558e1f78532373cf3f03f35b1f08bc60d52d924187b97995ee3597ba006", size = 8437, upload-time = "2025-08-22T03:02:19.433Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d7/f2/d812543c350674d8b3f6e17c8922248ee3bb752c2a76f64beb8c538b40cf/types_ujson-5.10.0.20250822-py3-none-any.whl", hash = "sha256:3e9e73a6dc62ccc03449d9ac2c580cd1b7a8e4873220db498f7dd056754be080", size = 7657, upload-time = "2025-08-22T03:02:18.699Z" }, +] + [[package]] name = "typing-extensions" version = "4.15.0"