diff --git a/doc/code/scenarios/0_scenarios.ipynb b/doc/code/scenarios/0_scenarios.ipynb index 7cb24539a8..afe9337c21 100644 --- a/doc/code/scenarios/0_scenarios.ipynb +++ b/doc/code/scenarios/0_scenarios.ipynb @@ -74,7 +74,6 @@ " - `version`: Integer version number\n", " - `strategy_class`: The strategy enum class for this scenario\n", " - `objective_scorer_identifier`: Identifier dict for the scoring mechanism (optional)\n", - " - `include_default_baseline`: Whether to include a baseline attack (default: True)\n", " - `scenario_result_id`: Optional ID to resume an existing scenario (optional)\n", "\n", "5. **Initialization**: Call `await scenario.initialize_async()` to populate atomic attacks:\n", @@ -83,6 +82,8 @@ " - `max_concurrency`: Number of concurrent operations (default: 1)\n", " - `max_retries`: Number of retry attempts on failure (default: 0)\n", " - `memory_labels`: Optional labels for tracking (optional)\n", + " - `include_baseline`: Whether to prepend a baseline attack (defaults to the scenario type's\n", + " `DEFAULT_INCLUDE_BASELINE`; most scenarios default it on, `Jailbreak` defaults it off)\n", "\n", "### Example Structure\n", "\n", @@ -101,9 +102,15 @@ "name": "stdout", "output_type": "stream", "text": [ - "Found default environment files: ['./.pyrit/.env', './.pyrit/.env.local']\n", - "Loaded environment file: ./.pyrit/.env\n", - "Loaded environment file: ./.pyrit/.env.local\n" + "Found default environment files: ['./.pyrit/.env']\n", + "Loaded environment file: ./.pyrit/.env\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "No new upgrade operations detected.\n" ] } ], @@ -193,34 +200,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "Found default environment files: ['./.pyrit/.env', './.pyrit/.env.local']\n", + "Loading default configuration file: ./.pyrit/.pyrit_conf\n", + "Found default environment files: ['./.pyrit/.env']\n", "Loaded environment file: ./.pyrit/.env\n", - "Loaded environment file: ./.pyrit/.env.local\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ "\n", "Available Scenarios:\n", "================================================================================\n", "\u001b[1m\u001b[36m\n", - " airt.content_harms\u001b[0m\n", - " Class: ContentHarms\n", - " Description:\n", - " Content Harms Scenario implementation for PyRIT. This scenario contains\n", - " various harm-based checks that you can run to get a quick idea about\n", - " model behavior with respect to certain harm categories.\n", - " Aggregate Strategies:\n", - " - all\n", - " Available Strategies (7):\n", - " hate, fairness, violence, sexual, harassment, misinformation, leakage\n", - " Default Strategy: all\n", - " Default Datasets (7, max 4 per dataset):\n", - " airt_hate, airt_fairness, airt_violence, airt_sexual, airt_harassment,\n", - " airt_misinformation, airt_leakage\n", - "\u001b[1m\u001b[36m\n", " airt.cyber\u001b[0m\n", " Class: Cyber\n", " Description:\n", @@ -229,9 +215,9 @@ " Cyber class contains different variations of the malware generation\n", " techniques.\n", " Aggregate Strategies:\n", - " - all\n", + " - all, single_turn, multi_turn\n", " Available Strategies (2):\n", - " single_turn, multi_turn\n", + " prompt_sending, red_teaming\n", " Default Strategy: all\n", " Default Datasets (1, max 4 per dataset):\n", " airt_malware\n", @@ -256,14 +242,14 @@ " Description:\n", " Leakage scenario implementation for PyRIT. This scenario tests how\n", " susceptible models are to leaking training data, PII, intellectual\n", - " property, or other confidential information. The Leakage class\n", - " contains different attack variations designed to extract sensitive\n", - " information from models.\n", + " property, or other confidential information. Uses the registry/factory\n", + " pattern to construct attack techniques.\n", " Aggregate Strategies:\n", - " - all, single_turn, multi_turn, ip, sensitive_data\n", - " Available Strategies (4):\n", - " first_letter, image, role_play, crescendo\n", - " Default Strategy: all\n", + " - all, default, single_turn, multi_turn\n", + " Available Strategies (9):\n", + " prompt_sending, role_play, many_shot, tap, crescendo_simulated,\n", + " red_teaming, context_compliance, first_letter, image\n", + " Default Strategy: default\n", " Default Datasets (1, max 4 per dataset):\n", " airt_leakage\n", "\u001b[1m\u001b[36m\n", @@ -296,6 +282,21 @@ " Default Datasets (1, max 4 per dataset):\n", " airt_imminent_crisis\n", "\u001b[1m\u001b[36m\n", + " airt.rapid_response\u001b[0m\n", + " Class: RapidResponse\n", + " Description:\n", + " Rapid Response scenario for content-harms testing. Tests model behavior\n", + " across multiple harm categories using selectable attack techniques.\n", + " Aggregate Strategies:\n", + " - all, default, single_turn, multi_turn\n", + " Available Strategies (7):\n", + " prompt_sending, role_play, many_shot, tap, crescendo_simulated,\n", + " red_teaming, context_compliance\n", + " Default Strategy: default\n", + " Default Datasets (7, max 4 per dataset):\n", + " airt_hate, airt_fairness, airt_violence, airt_sexual, airt_harassment,\n", + " airt_misinformation, airt_leakage\n", + "\u001b[1m\u001b[36m\n", " airt.scam\u001b[0m\n", " Class: Scam\n", " Description:\n", @@ -309,6 +310,21 @@ " Default Strategy: all\n", " Default Datasets (1, max 4 per dataset):\n", " airt_scams\n", + " Supported Parameters:\n", + " - max_turns (int) [default: 5]: Maximum conversation turns for the persuasive_rta strategy.\n", + "\u001b[1m\u001b[36m\n", + " benchmark.adversarial\u001b[0m\n", + " Class: AdversarialBenchmark\n", + " Description:\n", + " Benchmarking scenario that compares the attack success rate (ASR) of\n", + " several different adversarial models.\n", + " Aggregate Strategies:\n", + " - all, default, single_turn, multi_turn, light\n", + " Available Strategies (4):\n", + " role_play, tap, red_teaming, context_compliance\n", + " Default Strategy: light\n", + " Default Datasets (1, max 8 per dataset):\n", + " harmbench\n", "\u001b[1m\u001b[36m\n", " foundry.red_team_agent\u001b[0m\n", " Class: RedTeamAgent\n", @@ -359,7 +375,7 @@ "\n", "================================================================================\n", "\n", - "Total scenarios: 8\n" + "Total scenarios: 9\n" ] }, { @@ -389,10 +405,13 @@ "\n", "Every scenario can optionally include a **baseline attack** — a `PromptSendingAttack` that sends\n", "each objective directly to the target without any converters or multi-turn techniques. This is\n", - "controlled by the `include_default_baseline` parameter (default: `True` for most scenarios).\n", + "controlled by the `include_baseline` parameter on `initialize_async`; when omitted, each\n", + "scenario falls back to its own `DEFAULT_INCLUDE_BASELINE` class attribute (most scenarios\n", + "default it on; `Jailbreak` defaults it off).\n", "\n", - "To run *only* the baseline (no attack strategies), create a `RedTeamAgent` with\n", - "`include_baseline=True` (the default) and pass `scenario_strategies=None`. See\n", + "To run *only* the baseline (no attack strategies), pass `include_baseline=True` and\n", + "`scenario_strategies=None` explicitly to `initialize_async` — that way the example works\n", + "regardless of the scenario's default. See\n", "[Common Scenario Parameters](./1_common_scenario_parameters.ipynb) for a working example." ] }, @@ -436,7 +455,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.15" + "version": "3.12.13" } }, "nbformat": 4, diff --git a/doc/code/scenarios/0_scenarios.py b/doc/code/scenarios/0_scenarios.py index 6788630572..d2e03a4067 100644 --- a/doc/code/scenarios/0_scenarios.py +++ b/doc/code/scenarios/0_scenarios.py @@ -76,7 +76,6 @@ # - `version`: Integer version number # - `strategy_class`: The strategy enum class for this scenario # - `objective_scorer_identifier`: Identifier dict for the scoring mechanism (optional) -# - `include_default_baseline`: Whether to include a baseline attack (default: True) # - `scenario_result_id`: Optional ID to resume an existing scenario (optional) # # 5. **Initialization**: Call `await scenario.initialize_async()` to populate atomic attacks: @@ -85,6 +84,8 @@ # - `max_concurrency`: Number of concurrent operations (default: 1) # - `max_retries`: Number of retry attempts on failure (default: 0) # - `memory_labels`: Optional labels for tracking (optional) +# - `include_baseline`: Whether to prepend a baseline attack (defaults to the scenario type's +# `DEFAULT_INCLUDE_BASELINE`; most scenarios default it on, `Jailbreak` defaults it off) # # ### Example Structure # @@ -174,10 +175,13 @@ def _build_display_group(self, *, technique_name: str, seed_group_name: str) -> # # Every scenario can optionally include a **baseline attack** — a `PromptSendingAttack` that sends # each objective directly to the target without any converters or multi-turn techniques. This is -# controlled by the `include_default_baseline` parameter (default: `True` for most scenarios). +# controlled by the `include_baseline` parameter on `initialize_async`; when omitted, each +# scenario falls back to its own `DEFAULT_INCLUDE_BASELINE` class attribute (most scenarios +# default it on; `Jailbreak` defaults it off). # -# To run *only* the baseline (no attack strategies), create a `RedTeamAgent` with -# `include_baseline=True` (the default) and pass `scenario_strategies=None`. See +# To run *only* the baseline (no attack strategies), pass `include_baseline=True` and +# `scenario_strategies=None` explicitly to `initialize_async` — that way the example works +# regardless of the scenario's default. See # [Common Scenario Parameters](./1_common_scenario_parameters.ipynb) for a working example. # %% [markdown] diff --git a/doc/code/scenarios/1_common_scenario_parameters.ipynb b/doc/code/scenarios/1_common_scenario_parameters.ipynb index 4c9923e2a3..fc5d8f12a5 100644 --- a/doc/code/scenarios/1_common_scenario_parameters.ipynb +++ b/doc/code/scenarios/1_common_scenario_parameters.ipynb @@ -32,9 +32,15 @@ "name": "stdout", "output_type": "stream", "text": [ - "Found default environment files: ['./.pyrit/.env', './.pyrit/.env.local']\n", - "Loaded environment file: ./.pyrit/.env\n", - "Loaded environment file: ./.pyrit/.env.local\n" + "Found default environment files: ['./.pyrit/.env']\n", + "Loaded environment file: ./.pyrit/.env\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "No new upgrade operations detected.\n" ] }, { @@ -210,8 +216,8 @@ "## Baseline Execution\n", "\n", "The baseline sends each objective directly to the target without any converters or multi-turn\n", - "strategies. It is included automatically when `include_baseline=True` (the default). This is\n", - "useful for:\n", + "strategies. It is included automatically when `initialize_async` is called with\n", + "`include_baseline=True` (the default for scenarios that support a baseline). This is useful for:\n", "\n", "- **Measuring default defenses** — how does the target respond to unmodified harmful prompts?\n", "- **Establishing comparison points** — compare baseline refusal rates against attack-enhanced runs\n", @@ -227,12 +233,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "bd475b00d8c845048a8a85d817baee53", + "model_id": "83d7df3d03e644e786db59f46dba22ef", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "Executing RedTeamAgent: 0%| | 0/1 [00:00 None: """ @@ -125,10 +137,6 @@ def __init__( version (int): Version number of the scenario. strategy_class (Type[ScenarioStrategy]): The strategy enum class for this scenario. objective_scorer (Scorer): The objective scorer used to evaluate attack results. - include_default_baseline (bool): Whether to include a baseline atomic attack that sends all objectives - without modifications. Most scenarios should have some kind of baseline so users can understand - the impact of strategies, but subclasses can optionally write their own custom baselines. - Defaults to True. scenario_result_id (Optional[Union[uuid.UUID, str]]): Optional ID of an existing scenario result to resume. Can be either a UUID object or a string representation of a UUID. If provided and found in memory, the scenario will resume from prior progress. @@ -168,8 +176,6 @@ def __init__( self._scenario_result_id: Optional[str] = str(scenario_result_id) if scenario_result_id else None self._result_lock = asyncio.Lock() - self._include_baseline = include_default_baseline - # Store prepared strategies for use in _get_atomic_attacks_async self._scenario_strategies: list[ScenarioStrategy] = [] @@ -486,6 +492,7 @@ async def initialize_async( max_concurrency: int = 10, max_retries: int = 0, memory_labels: Optional[dict[str, str]] = None, + include_baseline: bool | None = None, ) -> None: """ Initialize the scenario by populating self._atomic_attacks and creating the ScenarioResult. @@ -513,9 +520,15 @@ async def initialize_async( For example, max_retries=3 allows up to 4 total attempts (1 initial + 3 retries). memory_labels (Optional[Dict[str, str]]): Additional labels to apply to all attack runs in the scenario. These help track and categorize the scenario. + include_baseline (bool | None): Whether to prepend a baseline atomic attack that sends + all objectives without modifications, allowing comparison between unmodified prompts + and the scenario's strategies. If None (the default), the scenario type's + ``SUPPORTS_DEFAULT_BASELINE`` class attribute decides. Passing ``True`` on a + scenario whose ``SUPPORTS_DEFAULT_BASELINE`` is False raises ``ValueError``. Raises: - ValueError: If no objective_target is provided. + ValueError: If no objective_target is provided, or if ``include_baseline=True`` is passed + to a scenario that does not support a default baseline. """ # Validate required parameters if objective_target is None: @@ -534,6 +547,20 @@ async def initialize_async( self._max_retries = max_retries self._memory_labels = memory_labels or {} + # Resolve the effective include_baseline. Capability is checked first so a forbidden + # scenario type never silently inherits a True default; explicit-True on a forbidden + # type is a hard error rather than a silent ignore. When the scenario type supports + # the baseline, None defers to DEFAULT_INCLUDE_BASELINE on the class. + if not self.SUPPORTS_DEFAULT_BASELINE: + if include_baseline is True: + raise ValueError( + f"{type(self).__name__} does not support a default baseline; pass " + f"include_baseline=False or omit the argument." + ) + include_baseline = False + elif include_baseline is None: + include_baseline = self.DEFAULT_INCLUDE_BASELINE + # Prepare scenario strategies using the stored configuration self._scenario_strategies = self._prepare_strategies(scenario_strategies) @@ -546,7 +573,7 @@ async def initialize_async( self._atomic_attacks = await self._get_atomic_attacks_async() - if self._include_baseline: + if include_baseline: baseline_attack = self._get_baseline() self._atomic_attacks.insert(0, baseline_attack) diff --git a/pyrit/scenario/scenarios/airt/cyber.py b/pyrit/scenario/scenarios/airt/cyber.py index 7ff874be1b..5b9fa2ba31 100644 --- a/pyrit/scenario/scenarios/airt/cyber.py +++ b/pyrit/scenario/scenarios/airt/cyber.py @@ -104,7 +104,6 @@ def __init__( self, *, objective_scorer: TrueFalseScorer | None = None, - include_baseline: bool = True, scenario_result_id: str | None = None, ) -> None: """ @@ -113,8 +112,6 @@ def __init__( Args: objective_scorer (TrueFalseScorer | None): Objective scorer for malware detection. If not provided, defaults to a composite scorer using malware detection + refusal backstop. - include_baseline (bool): Whether to include a baseline atomic attack that sends all objectives - without modifications. Defaults to True. scenario_result_id (str | None): Optional ID of an existing scenario result to resume. """ self._objective_scorer: TrueFalseScorer = ( @@ -125,7 +122,6 @@ def __init__( version=self.VERSION, objective_scorer=self._objective_scorer, strategy_class=self.get_strategy_class(), - include_default_baseline=include_baseline, scenario_result_id=scenario_result_id, ) diff --git a/pyrit/scenario/scenarios/airt/jailbreak.py b/pyrit/scenario/scenarios/airt/jailbreak.py index 1870fdc0cf..763ac426dd 100644 --- a/pyrit/scenario/scenarios/airt/jailbreak.py +++ b/pyrit/scenario/scenarios/airt/jailbreak.py @@ -3,7 +3,7 @@ import os from pathlib import Path -from typing import Any, Optional, Union +from typing import Any, ClassVar, Optional, Union from pyrit.auth import get_azure_openai_auth from pyrit.common import apply_defaults @@ -81,6 +81,11 @@ class Jailbreak(Scenario): VERSION: int = 1 + #: Jailbreak runs many templates per objective, so the baseline atomic attack is rarely + #: informative relative to the volume of jailbreak templates. Off by default; callers that + #: want a comparison can pass ``include_baseline=True`` to ``initialize_async``. + DEFAULT_INCLUDE_BASELINE: ClassVar[bool] = False + @classmethod def get_strategy_class(cls) -> type[ScenarioStrategy]: """ @@ -121,7 +126,6 @@ def __init__( self, *, objective_scorer: Optional[TrueFalseScorer] = None, - include_baseline: bool = False, scenario_result_id: Optional[str] = None, num_templates: Optional[int] = None, num_attempts: int = 1, @@ -133,8 +137,6 @@ def __init__( Args: objective_scorer (Optional[TrueFalseScorer]): Scorer for detecting successful jailbreaks (non-refusal). If not provided, defaults to an inverted refusal scorer. - include_baseline (bool): Whether to include a baseline atomic attack that sends all - objectives without modifications. Defaults to True. scenario_result_id (Optional[str]): Optional ID of an existing scenario result to resume. num_templates (Optional[int]): Choose num_templates random jailbreaks rather than using all of them. num_attempts (Optional[int]): Number of times to try each jailbreak. @@ -184,7 +186,6 @@ def __init__( version=self.VERSION, strategy_class=JailbreakStrategy, objective_scorer=self._objective_scorer, - include_default_baseline=include_baseline, scenario_result_id=scenario_result_id, ) diff --git a/pyrit/scenario/scenarios/airt/leakage.py b/pyrit/scenario/scenarios/airt/leakage.py index a3a1826aef..9fc9bdca92 100644 --- a/pyrit/scenario/scenarios/airt/leakage.py +++ b/pyrit/scenario/scenarios/airt/leakage.py @@ -155,7 +155,6 @@ def __init__( version=self.VERSION, strategy_class=self.get_strategy_class(), objective_scorer=objective_scorer, - include_default_baseline=True, scenario_result_id=scenario_result_id, ) diff --git a/pyrit/scenario/scenarios/airt/psychosocial.py b/pyrit/scenario/scenarios/airt/psychosocial.py index ff98853747..34f289fc19 100644 --- a/pyrit/scenario/scenarios/airt/psychosocial.py +++ b/pyrit/scenario/scenarios/airt/psychosocial.py @@ -5,7 +5,7 @@ import os import pathlib from dataclasses import dataclass -from typing import Any, Optional, TypeVar +from typing import Any, ClassVar, Optional, TypeVar import yaml @@ -148,6 +148,10 @@ class Psychosocial(Scenario): VERSION: int = 1 + #: Psychosocial measures multi-turn escalation behavior; a single-shot baseline send + #: isn't a meaningful comparator, so the default baseline is forbidden. + SUPPORTS_DEFAULT_BASELINE: ClassVar[bool] = False + #: Psychosocial runs CrescendoAttack, which requires the target to natively support #: editable conversation history (for backtracking). Declared here so the base scenario #: validates the target as soon as it is supplied to ``initialize_async``. @@ -265,7 +269,6 @@ def __init__( strategy_class=PsychosocialStrategy, objective_scorer=self._objective_scorer, scenario_result_id=scenario_result_id, - include_default_baseline=False, ) # Store deprecated objectives for later resolution in _resolve_seed_groups diff --git a/pyrit/scenario/scenarios/airt/scam.py b/pyrit/scenario/scenarios/airt/scam.py index 789a3eaf68..9623f037e6 100644 --- a/pyrit/scenario/scenarios/airt/scam.py +++ b/pyrit/scenario/scenarios/airt/scam.py @@ -150,7 +150,6 @@ def __init__( *, objective_scorer: Optional[TrueFalseScorer] = None, adversarial_chat: Optional[PromptTarget] = None, - include_baseline: bool = True, scenario_result_id: Optional[str] = None, ) -> None: """ @@ -161,10 +160,6 @@ def __init__( evaluation. adversarial_chat (Optional[PromptTarget]): Chat target used to rephrase the objective into the role-play context (in single-turn strategies). - include_baseline (bool): Whether to include a baseline atomic attack that sends all objectives - without modifications. Defaults to True. When True, a "baseline" attack is automatically - added as the first atomic attack, allowing comparison between unmodified prompts and - encoding-modified prompts. scenario_result_id (Optional[str]): Optional ID of an existing scenario result to resume. """ if not objective_scorer: @@ -179,7 +174,6 @@ def __init__( version=self.VERSION, strategy_class=ScamStrategy, objective_scorer=objective_scorer, - include_default_baseline=include_baseline, scenario_result_id=scenario_result_id, ) diff --git a/pyrit/scenario/scenarios/benchmark/adversarial.py b/pyrit/scenario/scenarios/benchmark/adversarial.py index bdcbd7e0d5..6d0d553b48 100644 --- a/pyrit/scenario/scenarios/benchmark/adversarial.py +++ b/pyrit/scenario/scenarios/benchmark/adversarial.py @@ -34,6 +34,10 @@ class AdversarialBenchmark(Scenario): VERSION: int = 1 _cached_strategy_class: ClassVar[type[ScenarioStrategy] | None] = None + #: AdversarialBenchmark compares attack-success rates across adversarial models; a baseline + #: attack would be model-independent and contribute no signal to the comparison. + SUPPORTS_DEFAULT_BASELINE: ClassVar[bool] = False + @classmethod def get_strategy_class(cls) -> type[ScenarioStrategy]: """ @@ -118,7 +122,6 @@ def __init__( version=self.VERSION, objective_scorer=self._objective_scorer, strategy_class=self.get_strategy_class(), - include_default_baseline=False, scenario_result_id=scenario_result_id, ) diff --git a/pyrit/scenario/scenarios/foundry/red_team_agent.py b/pyrit/scenario/scenarios/foundry/red_team_agent.py index 8b7da826e6..f2d944a18c 100644 --- a/pyrit/scenario/scenarios/foundry/red_team_agent.py +++ b/pyrit/scenario/scenarios/foundry/red_team_agent.py @@ -247,7 +247,6 @@ def __init__( *, adversarial_chat: Optional[PromptTarget] = None, attack_scoring_config: Optional[AttackScoringConfig] = None, - include_baseline: bool = True, scenario_result_id: Optional[str] = None, ) -> None: """ @@ -260,10 +259,6 @@ def __init__( attack_scoring_config (Optional[AttackScoringConfig]): Configuration for attack scoring, including the objective scorer and auxiliary scorers. If not provided, creates a default configuration with a composite scorer using Azure Content Filter and SelfAsk Refusal scorers. - include_baseline (bool): Whether to include a baseline atomic attack that sends all objectives - without modifications. Defaults to True. When True, a "baseline" attack is automatically - added as the first atomic attack, allowing comparison between unmodified prompts and - attack-modified prompts. scenario_result_id (Optional[str]): Optional ID of an existing scenario result to resume. Raises: @@ -286,7 +281,6 @@ def __init__( version=self.VERSION, strategy_class=FoundryStrategy, objective_scorer=objective_scorer, - include_default_baseline=include_baseline, scenario_result_id=scenario_result_id, ) self._scenario_composites: list[FoundryComposite] = [] @@ -303,6 +297,7 @@ async def initialize_async( max_concurrency: int = 10, max_retries: int = 0, memory_labels: Optional[dict[str, str]] = None, + include_baseline: bool | None = None, ) -> None: """ Initialize the scenario. @@ -318,6 +313,7 @@ async def initialize_async( max_concurrency (int): Maximum number of concurrent attack executions. Defaults to 10. max_retries (int): Maximum number of retries on failure. Defaults to 0. memory_labels (Optional[dict[str, str]]): Labels to attach to all memory entries. + include_baseline (bool | None): See ``Scenario.initialize_async``. """ # This override exists purely for type-widening: FoundryComposite is a dataclass, # not a ScenarioStrategy enum member, so the base class signature would reject it. @@ -329,6 +325,7 @@ async def initialize_async( max_concurrency=max_concurrency, max_retries=max_retries, memory_labels=memory_labels, + include_baseline=include_baseline, ) def _prepare_strategies( # type: ignore[ty:invalid-method-override] diff --git a/pyrit/scenario/scenarios/garak/encoding.py b/pyrit/scenario/scenarios/garak/encoding.py index 531b11062e..f239637132 100644 --- a/pyrit/scenario/scenarios/garak/encoding.py +++ b/pyrit/scenario/scenarios/garak/encoding.py @@ -173,7 +173,6 @@ def __init__( *, objective_scorer: Optional[TrueFalseScorer] = None, encoding_templates: Optional[Sequence[str]] = None, - include_baseline: bool = True, scenario_result_id: Optional[str] = None, ) -> None: """ @@ -185,10 +184,6 @@ def __init__( category. encoding_templates (Optional[Sequence[str]]): Templates used to construct the decoding prompts. Defaults to AskToDecodeConverter.garak_templates. - include_baseline (bool): Whether to include a baseline atomic attack that sends all objectives - without modifications. Defaults to True. When True, a "baseline" attack is automatically - added as the first atomic attack, allowing comparison between unmodified prompts and - encoding-modified prompts. scenario_result_id (Optional[str]): Optional ID of an existing scenario result to resume. """ objective_scorer = objective_scorer or DecodingScorer(categories=["encoding_scenario"]) @@ -200,7 +195,6 @@ def __init__( version=self.VERSION, strategy_class=EncodingStrategy, objective_scorer=objective_scorer, - include_default_baseline=include_baseline, scenario_result_id=scenario_result_id, ) diff --git a/tests/integration/datasets/test_seed_dataset_provider_integration.py b/tests/integration/datasets/test_seed_dataset_provider_integration.py index 457aadb7e6..85a8a80235 100644 --- a/tests/integration/datasets/test_seed_dataset_provider_integration.py +++ b/tests/integration/datasets/test_seed_dataset_provider_integration.py @@ -683,7 +683,6 @@ async def test_red_team_agent_initializes_with_harmbench(self, sqlite_instance): rta = RedTeamAgent( adversarial_chat=target, attack_scoring_config=AttackScoringConfig(objective_scorer=mock_scorer), - include_baseline=False, ) # This is the critical call — it loads seed groups from memory @@ -693,6 +692,7 @@ async def test_red_team_agent_initializes_with_harmbench(self, sqlite_instance): objective_target=target, max_concurrency=1, scenario_strategies=[FoundryStrategy.Base64], + include_baseline=False, ) # Verify the scenario got objectives from harmbench diff --git a/tests/unit/scenario/test_adversarial.py b/tests/unit/scenario/test_adversarial.py index e6b082cb0d..a6445393a7 100644 --- a/tests/unit/scenario/test_adversarial.py +++ b/tests/unit/scenario/test_adversarial.py @@ -432,14 +432,41 @@ async def test_attacks_carry_seed_groups(self, mock_objective_target, single_adv async def test_baseline_excluded(self, mock_objective_target, single_adversarial_model): """AdversarialBenchmark must opt out of the parent's default baseline. - Verifies both the configuration toggle (``_include_baseline is False``) and - the observable property (no atomic attack is named ``"baseline"``). + Verifies both the class-level capability flag and the observable property + (no atomic attack is named ``"baseline"``). """ scenario, attacks = await self._init_and_get_attacks( mock_objective_target=mock_objective_target, adversarial_models=single_adversarial_model, ) - assert scenario._include_baseline is False + assert type(scenario).SUPPORTS_DEFAULT_BASELINE is False + assert not any(a.atomic_attack_name == "baseline" for a in attacks) + + @pytest.mark.asyncio + async def test_baseline_explicit_true_raises(self, mock_objective_target, single_adversarial_model): + """Explicitly passing include_baseline=True to a forbidden scenario raises ValueError.""" + scenario = AdversarialBenchmark(adversarial_models=single_adversarial_model) + with pytest.raises(ValueError, match="does not support a default baseline"): + await scenario.initialize_async( + objective_target=mock_objective_target, + include_baseline=True, + ) + + @pytest.mark.asyncio + async def test_baseline_explicit_false_succeeds(self, mock_objective_target, single_adversarial_model): + """Explicit include_baseline=False on a forbidden scenario is accepted (matches the default).""" + groups = {"harmbench": _make_seed_groups("harmbench")} + with ( + patch.object(DatasetConfiguration, "get_seed_attack_groups", return_value=groups), + patch("pyrit.scenario.core.scenario.Scenario._get_default_objective_scorer") as mock_scorer, + ): + mock_scorer.return_value = MagicMock(spec=TrueFalseScorer, get_identifier=lambda: _mock_id("scorer")) + scenario = AdversarialBenchmark(adversarial_models=single_adversarial_model) + await scenario.initialize_async( + objective_target=mock_objective_target, + include_baseline=False, + ) + attacks = await scenario._get_atomic_attacks_async() assert not any(a.atomic_attack_name == "baseline" for a in attacks) diff --git a/tests/unit/scenario/test_foundry.py b/tests/unit/scenario/test_foundry.py index e07f51e842..b4f7eaa2e8 100644 --- a/tests/unit/scenario/test_foundry.py +++ b/tests/unit/scenario/test_foundry.py @@ -655,7 +655,6 @@ async def test_scenario_composites_set_after_initialize( with patch.object(RedTeamAgent, "_resolve_seed_groups", return_value=mock_memory_seed_groups): scenario = RedTeamAgent( attack_scoring_config=AttackScoringConfig(objective_scorer=mock_objective_scorer), - include_baseline=False, ) # Before initialize_async, composites should be empty @@ -665,6 +664,7 @@ async def test_scenario_composites_set_after_initialize( objective_target=mock_objective_target, scenario_strategies=strategies, dataset_config=mock_dataset_config, + include_baseline=False, ) # After initialize_async, composites should be set @@ -735,12 +735,12 @@ async def test_initialize_with_foundry_composite_directly( with patch.object(RedTeamAgent, "_resolve_seed_groups", return_value=mock_memory_seed_groups): scenario = RedTeamAgent( attack_scoring_config=AttackScoringConfig(objective_scorer=mock_objective_scorer), - include_baseline=False, ) await scenario.initialize_async( objective_target=mock_objective_target, scenario_strategies=[composite], dataset_config=mock_dataset_config, + include_baseline=False, ) assert len(scenario._scenario_composites) == 1 @@ -766,12 +766,12 @@ async def test_initialize_with_mixed_composites_and_strategies( with patch.object(RedTeamAgent, "_resolve_seed_groups", return_value=mock_memory_seed_groups): scenario = RedTeamAgent( attack_scoring_config=AttackScoringConfig(objective_scorer=mock_objective_scorer), - include_baseline=False, ) await scenario.initialize_async( objective_target=mock_objective_target, scenario_strategies=[composite, FoundryStrategy.ROT13], dataset_config=mock_dataset_config, + include_baseline=False, ) assert len(scenario._scenario_composites) == 2 @@ -797,12 +797,12 @@ async def test_initialize_converts_scenario_composite_strategy_to_foundry_compos with patch.object(RedTeamAgent, "_resolve_seed_groups", return_value=mock_memory_seed_groups): scenario = RedTeamAgent( attack_scoring_config=AttackScoringConfig(objective_scorer=mock_objective_scorer), - include_baseline=False, ) await scenario.initialize_async( objective_target=mock_objective_target, scenario_strategies=[legacy], # type: ignore[arg-type] dataset_config=mock_dataset_config, + include_baseline=False, ) assert len(scenario._scenario_composites) == 1 @@ -828,12 +828,12 @@ async def test_initialize_converts_converter_first_composite_strategy( with patch.object(RedTeamAgent, "_resolve_seed_groups", return_value=mock_memory_seed_groups): scenario = RedTeamAgent( attack_scoring_config=AttackScoringConfig(objective_scorer=mock_objective_scorer), - include_baseline=False, ) await scenario.initialize_async( objective_target=mock_objective_target, scenario_strategies=[legacy], # type: ignore[arg-type] dataset_config=mock_dataset_config, + include_baseline=False, ) result = scenario._scenario_composites[0] @@ -858,12 +858,12 @@ async def test_initialize_converts_converter_only_composite_strategy( with patch.object(RedTeamAgent, "_resolve_seed_groups", return_value=mock_memory_seed_groups): scenario = RedTeamAgent( attack_scoring_config=AttackScoringConfig(objective_scorer=mock_objective_scorer), - include_baseline=False, ) await scenario.initialize_async( objective_target=mock_objective_target, scenario_strategies=[legacy], # type: ignore[arg-type] dataset_config=mock_dataset_config, + include_baseline=False, ) result = scenario._scenario_composites[0] diff --git a/tests/unit/scenario/test_jailbreak.py b/tests/unit/scenario/test_jailbreak.py index c873465c6b..36713fafa4 100644 --- a/tests/unit/scenario/test_jailbreak.py +++ b/tests/unit/scenario/test_jailbreak.py @@ -202,6 +202,37 @@ async def test_init_raises_exception_when_no_datasets_available(self, mock_objec with pytest.raises(ValueError, match="DatasetConfiguration has no seed_groups"): await scenario.initialize_async(objective_target=mock_objective_target) + def test_class_supports_baseline_but_defaults_off(self): + """Jailbreak supports a baseline but does not include one by default.""" + assert Jailbreak.SUPPORTS_DEFAULT_BASELINE is True + assert Jailbreak.DEFAULT_INCLUDE_BASELINE is False + + async def test_default_initialize_omits_baseline( + self, mock_objective_target, mock_objective_scorer, mock_memory_seed_groups + ): + """initialize_async without include_baseline honors DEFAULT_INCLUDE_BASELINE=False.""" + with patch.object(Jailbreak, "_resolve_seed_groups", return_value=mock_memory_seed_groups): + scenario = Jailbreak(objective_scorer=mock_objective_scorer) + await scenario.initialize_async(objective_target=mock_objective_target) + assert not any(a.atomic_attack_name == "baseline" for a in scenario._atomic_attacks) + + async def test_explicit_include_baseline_true_prepends_baseline( + self, mock_objective_target, mock_objective_scorer, mock_memory_seed_groups + ): + """Caller can override DEFAULT_INCLUDE_BASELINE=False by passing include_baseline=True.""" + from pyrit.scenario import DatasetConfiguration + + with ( + patch.object(Jailbreak, "_resolve_seed_groups", return_value=mock_memory_seed_groups), + patch.object(DatasetConfiguration, "get_all_seed_attack_groups", return_value=mock_memory_seed_groups), + ): + scenario = Jailbreak(objective_scorer=mock_objective_scorer) + await scenario.initialize_async( + objective_target=mock_objective_target, + include_baseline=True, + ) + assert scenario._atomic_attacks[0].atomic_attack_name == "baseline" + @pytest.mark.usefixtures(*FIXTURES) class TestJailbreakAttackGeneration: diff --git a/tests/unit/scenario/test_leakage_scenario.py b/tests/unit/scenario/test_leakage_scenario.py index 5a83bb4565..b8a0cfb8cc 100644 --- a/tests/unit/scenario/test_leakage_scenario.py +++ b/tests/unit/scenario/test_leakage_scenario.py @@ -102,10 +102,9 @@ def test_default_scorer_uses_leakage_yaml(self): scorer_path = DATASETS_PATH / "score" / "true_false_question" / "leakage.yaml" assert scorer_path.exists(), f"Expected leakage.yaml scorer at {scorer_path}" - def test_init_include_baseline_true(self, mock_objective_scorer): - """Test that include_baseline is always True.""" - scenario = Leakage(objective_scorer=mock_objective_scorer) - assert scenario._include_baseline is True + def test_init_supports_default_baseline(self): + """Leakage opts into the parent's default baseline.""" + assert Leakage.SUPPORTS_DEFAULT_BASELINE is True @pytest.mark.usefixtures(*FIXTURES) diff --git a/tests/unit/scenario/test_scenario.py b/tests/unit/scenario/test_scenario.py index bbead38407..a77ade8dfd 100644 --- a/tests/unit/scenario/test_scenario.py +++ b/tests/unit/scenario/test_scenario.py @@ -3,6 +3,7 @@ """Tests for the scenarios.Scenario class.""" +from typing import ClassVar from unittest.mock import AsyncMock, MagicMock, PropertyMock, patch import pytest @@ -97,10 +98,12 @@ def sample_attack_results(): class ConcreteScenario(Scenario): """Concrete implementation of Scenario for testing.""" - def __init__(self, atomic_attacks_to_return=None, **kwargs): - # Default include_default_baseline=False for tests unless explicitly specified - kwargs.setdefault("include_default_baseline", False) + # Tests using this fixture should default to no baseline; set the class flag to forbid + # the default baseline so we don't have to thread include_baseline=False through every + # initialize_async call. + SUPPORTS_DEFAULT_BASELINE: ClassVar[bool] = False + def __init__(self, atomic_attacks_to_return=None, **kwargs): # Add required strategy_class if not provided class TestStrategy(ScenarioStrategy): @@ -711,11 +714,10 @@ async def test_initialize_async_with_empty_strategies_and_baseline(self, mock_ob """Test that baseline is included when include_baseline=True, regardless of strategies.""" from pyrit.models import SeedAttackGroup, SeedObjective - # Create a scenario with include_default_baseline=True and TrueFalseScorer + # Create a scenario with TrueFalseScorer; baseline is included by default scenario = ConcreteScenarioWithTrueFalseScorer( name="Baseline Only Test", version=1, - include_default_baseline=True, ) # Create a mock dataset config with seed groups @@ -740,11 +742,10 @@ async def test_baseline_only_execution_runs_successfully(self, mock_objective_ta """Test that baseline-only scenario can run successfully.""" from pyrit.models import SeedAttackGroup, SeedObjective - # Create a scenario with include_default_baseline=True and TrueFalseScorer + # Create a scenario with TrueFalseScorer; baseline is included by default scenario = ConcreteScenarioWithTrueFalseScorer( name="Baseline Only Test", version=1, - include_default_baseline=True, ) # Create a mock dataset config with seed groups @@ -776,7 +777,6 @@ async def test_empty_strategies_without_baseline_allows_initialization(self, moc scenario = ConcreteScenario( name="No Baseline Test", version=1, - include_default_baseline=False, # No baseline ) mock_dataset_config = MagicMock(spec=DatasetConfiguration) @@ -799,7 +799,6 @@ async def test_standalone_baseline_uses_dataset_config_seeds(self, mock_objectiv scenario = ConcreteScenarioWithTrueFalseScorer( name="Baseline Seeds Test", version=1, - include_default_baseline=True, ) # Create specific seed groups to verify they're used diff --git a/tests/unit/scenario/test_scenario_parameters.py b/tests/unit/scenario/test_scenario_parameters.py index ae6eaf0010..f749418007 100644 --- a/tests/unit/scenario/test_scenario_parameters.py +++ b/tests/unit/scenario/test_scenario_parameters.py @@ -3,6 +3,7 @@ """Tests for Scenario custom parameter declaration, coercion, and validation (Stage 1b).""" +from typing import ClassVar from unittest.mock import MagicMock import pytest @@ -33,6 +34,9 @@ def get_aggregate_tags(cls) -> set[str]: return {"all"} class _ParamTestScenario(Scenario): + # No baseline in tests so atomic_attacks observations stay deterministic. + SUPPORTS_DEFAULT_BASELINE: ClassVar[bool] = False + @classmethod def get_strategy_class(cls): return _ParamTestStrategy @@ -60,7 +64,6 @@ async def _get_atomic_attacks_async(self): version=1, strategy_class=_ParamTestStrategy, objective_scorer=mock_scorer, - include_default_baseline=False, ) diff --git a/tests/unit/scenario/test_scenario_partial_results.py b/tests/unit/scenario/test_scenario_partial_results.py index 2cc7df714a..ca984153db 100644 --- a/tests/unit/scenario/test_scenario_partial_results.py +++ b/tests/unit/scenario/test_scenario_partial_results.py @@ -3,6 +3,7 @@ """Additional tests for Scenario retry with AttackExecutorResult functionality.""" +from typing import ClassVar from unittest.mock import MagicMock, PropertyMock import pytest @@ -73,10 +74,9 @@ def filter_objectives(*, remaining_objectives): class ConcreteScenario(Scenario): """Concrete implementation of Scenario for testing.""" - def __init__(self, *, atomic_attacks_to_return=None, objective_scorer=None, **kwargs): - # Default include_default_baseline=False for tests unless explicitly specified - kwargs.setdefault("include_default_baseline", False) + SUPPORTS_DEFAULT_BASELINE: ClassVar[bool] = False + def __init__(self, *, atomic_attacks_to_return=None, objective_scorer=None, **kwargs): # Get strategy_class from kwargs or use default strategy_class = kwargs.pop("strategy_class", None) or self.get_strategy_class() diff --git a/tests/unit/scenario/test_scenario_retry.py b/tests/unit/scenario/test_scenario_retry.py index 2ff0555192..2b93405363 100644 --- a/tests/unit/scenario/test_scenario_retry.py +++ b/tests/unit/scenario/test_scenario_retry.py @@ -3,6 +3,7 @@ """Tests for Scenario retry functionality.""" +from typing import ClassVar from unittest.mock import AsyncMock, MagicMock, PropertyMock import pytest @@ -136,10 +137,9 @@ def create_mock_atomic_attack(name: str, objectives: list[str], run_async_mock: class ConcreteScenario(Scenario): """Concrete implementation of Scenario for testing.""" - def __init__(self, atomic_attacks_to_return=None, objective_scorer=None, **kwargs): - # Default include_default_baseline=False for tests unless explicitly specified - kwargs.setdefault("include_default_baseline", False) + SUPPORTS_DEFAULT_BASELINE: ClassVar[bool] = False + def __init__(self, atomic_attacks_to_return=None, objective_scorer=None, **kwargs): # Get strategy_class from kwargs or use default strategy_class = kwargs.pop("strategy_class", None) or self.get_strategy_class()