diff --git a/dotnet/test/AskUserTests.cs b/dotnet/test/AskUserTests.cs index c21df302..55a56367 100644 --- a/dotnet/test/AskUserTests.cs +++ b/dotnet/test/AskUserTests.cs @@ -8,7 +8,7 @@ namespace GitHub.Copilot.SDK.Test; -public class AskUserTests(E2ETestFixture fixture, ITestOutputHelper output) : E2ETestBase(fixture, "ask-user", output) +public class AskUserTests(E2ETestFixture fixture, ITestOutputHelper output) : E2ETestBase(fixture, "ask_user", output) { [Fact] public async Task Should_Invoke_User_Input_Handler_When_Model_Uses_Ask_User_Tool() diff --git a/dotnet/test/McpAndAgentsTests.cs b/dotnet/test/McpAndAgentsTests.cs index d216032a..f24b7c8b 100644 --- a/dotnet/test/McpAndAgentsTests.cs +++ b/dotnet/test/McpAndAgentsTests.cs @@ -8,7 +8,7 @@ namespace GitHub.Copilot.SDK.Test; -public class McpAndAgentsTests(E2ETestFixture fixture, ITestOutputHelper output) : E2ETestBase(fixture, "mcp-and-agents", output) +public class McpAndAgentsTests(E2ETestFixture fixture, ITestOutputHelper output) : E2ETestBase(fixture, "mcp_and_agents", output) { [Fact] public async Task Should_Accept_MCP_Server_Configuration_On_Session_Create() diff --git a/go/e2e/testharness/context.go b/go/e2e/testharness/context.go index 718b08db..5206ee06 100644 --- a/go/e2e/testharness/context.go +++ b/go/e2e/testharness/context.go @@ -4,6 +4,7 @@ import ( "os" "path/filepath" "regexp" + "runtime" "strings" "sync" "testing" @@ -95,10 +96,23 @@ func (c *TestContext) ConfigureForTest(t *testing.T) { // Format: test/snapshots//.yaml // e.g., test/snapshots/session/should_have_stateful_conversation.yaml + + // Get the test file name from the caller's file path + _, callerFile, _, ok := runtime.Caller(1) + if !ok { + t.Fatal("Failed to get caller information") + } + + // Extract test file name: ask_user_test.go -> ask_user + testFile := strings.TrimSuffix(filepath.Base(callerFile), "_test.go") + + // Extract and sanitize the subtest name from t.Name() + // t.Name() returns "TestAskUser/should_handle_freeform_user_input_response" testName := t.Name() parts := strings.SplitN(testName, "/", 2) - - testFile := strings.ToLower(strings.TrimPrefix(parts[0], "Test")) + if len(parts) < 2 { + t.Fatalf("Expected test name with subtest, got: %s", testName) + } sanitizedName := strings.ToLower(regexp.MustCompile(`[^a-zA-Z0-9]`).ReplaceAllString(parts[1], "_")) snapshotPath := filepath.Join("..", "..", "test", "snapshots", testFile, sanitizedName+".yaml") diff --git a/nodejs/test/e2e/ask-user.test.ts b/nodejs/test/e2e/ask_user.test.ts similarity index 100% rename from nodejs/test/e2e/ask-user.test.ts rename to nodejs/test/e2e/ask_user.test.ts diff --git a/nodejs/test/e2e/harness/sdkTestContext.ts b/nodejs/test/e2e/harness/sdkTestContext.ts index ba68bb24..094eaff9 100644 --- a/nodejs/test/e2e/harness/sdkTestContext.ts +++ b/nodejs/test/e2e/harness/sdkTestContext.ts @@ -45,6 +45,8 @@ export async function createSdkTestContext({ cwd: workDir, env, logLevel: logLevel || "error", + // Use fake token in CI to allow cached responses without real auth + githubToken: process.env.CI === "true" ? "fake-token-for-e2e-tests" : undefined, }); const harness = { homeDir, workDir, openAiEndpoint, copilotClient, env }; @@ -93,7 +95,8 @@ function getTrafficCapturePath(testContext: TestContext): string { ); } - const testFileName = basename(testFilePath, suffix); + // Convert to snake_case for cross-SDK snapshot compatibility + const testFileName = basename(testFilePath, suffix).replace(/-/g, "_"); const taskNameAsFilename = testContext.task.name.replace(/[^a-z0-9]/gi, "_").toLowerCase(); return join(SNAPSHOTS_DIR, testFileName, `${taskNameAsFilename}.yaml`); } diff --git a/nodejs/test/e2e/mcp-and-agents.test.ts b/nodejs/test/e2e/mcp_and_agents.test.ts similarity index 100% rename from nodejs/test/e2e/mcp-and-agents.test.ts rename to nodejs/test/e2e/mcp_and_agents.test.ts diff --git a/nodejs/test/e2e/session.test.ts b/nodejs/test/e2e/session.test.ts index 9d5c0ef1..8e086023 100644 --- a/nodejs/test/e2e/session.test.ts +++ b/nodejs/test/e2e/session.test.ts @@ -5,7 +5,7 @@ import { CLI_PATH, createSdkTestContext } from "./harness/sdkTestContext.js"; import { getFinalAssistantMessage, getNextEventOfType } from "./harness/sdkTestHelper.js"; describe("Sessions", async () => { - const { copilotClient: client, openAiEndpoint, homeDir } = await createSdkTestContext(); + const { copilotClient: client, openAiEndpoint, homeDir, env } = await createSdkTestContext(); it("should create and destroy sessions", async () => { const session = await client.createSession({ model: "fake-test-model" }); @@ -158,11 +158,8 @@ describe("Sessions", async () => { // Resume using a new client const newClient = new CopilotClient({ cliPath: CLI_PATH, - env: { - ...process.env, - XDG_CONFIG_HOME: homeDir, - XDG_STATE_HOME: homeDir, - }, + env, + githubToken: process.env.CI === "true" ? "fake-token-for-e2e-tests" : undefined, }); onTestFinished(() => newClient.forceStop()); @@ -387,7 +384,9 @@ describe("Send Blocking Behavior", async () => { expect(events).toContain("assistant.message"); }); - it("sendAndWait throws on timeout", async () => { + // Skip in CI - this test validates client-side timeout behavior, not LLM responses. + // The test intentionally times out before receiving a response, so there's no snapshot to replay. + it.skipIf(process.env.CI === "true")("sendAndWait throws on timeout", async () => { const session = await client.createSession(); // Use a slow command to ensure timeout triggers before completion diff --git a/python/e2e/test_ask_user.py b/python/e2e/test_ask_user.py index 282b529b..93036ea4 100644 --- a/python/e2e/test_ask_user.py +++ b/python/e2e/test_ask_user.py @@ -10,7 +10,7 @@ class TestAskUser: - async def test_invoke_user_input_handler_when_model_uses_ask_user_tool( + async def test_should_invoke_user_input_handler_when_model_uses_ask_user_tool( self, ctx: E2ETestContext ): """Test that user input handler is invoked when model uses ask_user tool""" @@ -48,7 +48,7 @@ async def on_user_input_request(request, invocation): await session.destroy() - async def test_receive_choices_in_user_input_request(self, ctx: E2ETestContext): + async def test_should_receive_choices_in_user_input_request(self, ctx: E2ETestContext): """Test that choices are received in user input request""" user_input_requests = [] @@ -84,7 +84,7 @@ async def on_user_input_request(request, invocation): await session.destroy() - async def test_handle_freeform_user_input_response(self, ctx: E2ETestContext): + async def test_should_handle_freeform_user_input_response(self, ctx: E2ETestContext): """Test that freeform user input responses work""" user_input_requests = [] freeform_answer = "This is my custom freeform answer that was not in the choices" diff --git a/python/e2e/test_hooks.py b/python/e2e/test_hooks.py index 57aaee18..b64628e0 100644 --- a/python/e2e/test_hooks.py +++ b/python/e2e/test_hooks.py @@ -11,7 +11,7 @@ class TestHooks: - async def test_invoke_pre_tool_use_hook_when_model_runs_a_tool(self, ctx: E2ETestContext): + async def test_should_invoke_pretooluse_hook_when_model_runs_a_tool(self, ctx: E2ETestContext): """Test that preToolUse hook is invoked when model runs a tool""" pre_tool_use_inputs = [] @@ -38,7 +38,9 @@ async def on_pre_tool_use(input_data, invocation): await session.destroy() - async def test_invoke_post_tool_use_hook_after_model_runs_a_tool(self, ctx: E2ETestContext): + async def test_should_invoke_posttooluse_hook_after_model_runs_a_tool( + self, ctx: E2ETestContext + ): """Test that postToolUse hook is invoked after model runs a tool""" post_tool_use_inputs = [] @@ -65,7 +67,9 @@ async def on_post_tool_use(input_data, invocation): await session.destroy() - async def test_invoke_both_hooks_for_single_tool_call(self, ctx: E2ETestContext): + async def test_should_invoke_both_pretooluse_and_posttooluse_hooks_for_a_single_tool_call( + self, ctx: E2ETestContext + ): """Test that both preToolUse and postToolUse hooks fire for the same tool call""" pre_tool_use_inputs = [] post_tool_use_inputs = [] @@ -103,7 +107,9 @@ async def on_post_tool_use(input_data, invocation): await session.destroy() - async def test_deny_tool_execution_when_pre_tool_use_returns_deny(self, ctx: E2ETestContext): + async def test_should_deny_tool_execution_when_pretooluse_returns_deny( + self, ctx: E2ETestContext + ): """Test that returning deny in preToolUse prevents tool execution""" pre_tool_use_inputs = [] diff --git a/python/e2e/test_mcp_and_agents.py b/python/e2e/test_mcp_and_agents.py index 95738d5e..bfff6c09 100644 --- a/python/e2e/test_mcp_and_agents.py +++ b/python/e2e/test_mcp_and_agents.py @@ -12,7 +12,9 @@ class TestMCPServers: - async def test_accept_mcp_server_config_on_create(self, ctx: E2ETestContext): + async def test_should_accept_mcp_server_configuration_on_session_create( + self, ctx: E2ETestContext + ): """Test that MCP server configuration is accepted on session create""" mcp_servers: dict[str, MCPServerConfig] = { "test-server": { @@ -34,7 +36,9 @@ async def test_accept_mcp_server_config_on_create(self, ctx: E2ETestContext): await session.destroy() - async def test_accept_mcp_server_config_on_resume(self, ctx: E2ETestContext): + async def test_should_accept_mcp_server_configuration_on_session_resume( + self, ctx: E2ETestContext + ): """Test that MCP server configuration is accepted on session resume""" # Create a session first session1 = await ctx.client.create_session() @@ -61,31 +65,11 @@ async def test_accept_mcp_server_config_on_resume(self, ctx: E2ETestContext): await session2.destroy() - async def test_handle_multiple_mcp_servers(self, ctx: E2ETestContext): - """Test that multiple MCP servers can be configured""" - mcp_servers: dict[str, MCPServerConfig] = { - "server1": { - "type": "local", - "command": "echo", - "args": ["server1"], - "tools": ["*"], - }, - "server2": { - "type": "local", - "command": "echo", - "args": ["server2"], - "tools": ["*"], - }, - } - - session = await ctx.client.create_session({"mcp_servers": mcp_servers}) - - assert session.session_id is not None - await session.destroy() - class TestCustomAgents: - async def test_accept_custom_agent_config_on_create(self, ctx: E2ETestContext): + async def test_should_accept_custom_agent_configuration_on_session_create( + self, ctx: E2ETestContext + ): """Test that custom agent configuration is accepted on session create""" custom_agents: list[CustomAgentConfig] = [ { @@ -108,7 +92,9 @@ async def test_accept_custom_agent_config_on_create(self, ctx: E2ETestContext): await session.destroy() - async def test_accept_custom_agent_config_on_resume(self, ctx: E2ETestContext): + async def test_should_accept_custom_agent_configuration_on_session_resume( + self, ctx: E2ETestContext + ): """Test that custom agent configuration is accepted on session resume""" # Create a session first session1 = await ctx.client.create_session() @@ -135,74 +121,9 @@ async def test_accept_custom_agent_config_on_resume(self, ctx: E2ETestContext): await session2.destroy() - async def test_handle_custom_agent_with_tools(self, ctx: E2ETestContext): - """Test that custom agent with tools configuration is accepted""" - custom_agents: list[CustomAgentConfig] = [ - { - "name": "tool-agent", - "display_name": "Tool Agent", - "description": "An agent with specific tools", - "prompt": "You are an agent with specific tools.", - "tools": ["bash", "edit"], - "infer": True, - } - ] - - session = await ctx.client.create_session({"custom_agents": custom_agents}) - - assert session.session_id is not None - await session.destroy() - - async def test_handle_custom_agent_with_mcp_servers(self, ctx: E2ETestContext): - """Test that custom agent with its own MCP servers is accepted""" - custom_agents: list[CustomAgentConfig] = [ - { - "name": "mcp-agent", - "display_name": "MCP Agent", - "description": "An agent with its own MCP servers", - "prompt": "You are an agent with MCP servers.", - "mcp_servers": { - "agent-server": { - "type": "local", - "command": "echo", - "args": ["agent-mcp"], - "tools": ["*"], - } - }, - } - ] - - session = await ctx.client.create_session({"custom_agents": custom_agents}) - - assert session.session_id is not None - await session.destroy() - - async def test_handle_multiple_custom_agents(self, ctx: E2ETestContext): - """Test that multiple custom agents can be configured""" - custom_agents: list[CustomAgentConfig] = [ - { - "name": "agent1", - "display_name": "Agent One", - "description": "First agent", - "prompt": "You are agent one.", - }, - { - "name": "agent2", - "display_name": "Agent Two", - "description": "Second agent", - "prompt": "You are agent two.", - "infer": False, - }, - ] - - session = await ctx.client.create_session({"custom_agents": custom_agents}) - - assert session.session_id is not None - await session.destroy() - class TestCombinedConfiguration: - async def test_accept_mcp_servers_and_custom_agents(self, ctx: E2ETestContext): + async def test_should_accept_both_mcp_servers_and_custom_agents(self, ctx: E2ETestContext): """Test that both MCP servers and custom agents can be configured together""" mcp_servers: dict[str, MCPServerConfig] = { "shared-server": { diff --git a/python/e2e/test_permissions.py b/python/e2e/test_permissions.py index eedfbe9a..7635219d 100644 --- a/python/e2e/test_permissions.py +++ b/python/e2e/test_permissions.py @@ -15,7 +15,7 @@ class TestPermissions: - async def test_permission_handler_for_write_operations(self, ctx: E2ETestContext): + async def test_should_invoke_permission_handler_for_write_operations(self, ctx: E2ETestContext): """Test that permission handler is invoked for write operations""" permission_requests = [] @@ -44,28 +44,7 @@ def on_permission_request( await session.destroy() - async def test_permission_handler_for_shell_commands(self, ctx: E2ETestContext): - """Test that permission handler is invoked for shell commands""" - permission_requests = [] - - def on_permission_request( - request: PermissionRequest, invocation: dict - ) -> PermissionRequestResult: - permission_requests.append(request) - # Approve the permission - return {"kind": "approved"} - - session = await ctx.client.create_session({"on_permission_request": on_permission_request}) - - await session.send_and_wait({"prompt": "Run 'echo hello' and tell me the output"}) - - # Should have received at least one shell permission request - shell_requests = [req for req in permission_requests if req.get("kind") == "shell"] - assert len(shell_requests) > 0 - - await session.destroy() - - async def test_deny_permission(self, ctx: E2ETestContext): + async def test_should_deny_permission_when_handler_returns_denied(self, ctx: E2ETestContext): """Test denying permissions""" def on_permission_request( @@ -89,7 +68,9 @@ def on_permission_request( await session.destroy() - async def test_without_permission_handler(self, ctx: E2ETestContext): + async def test_should_work_without_permission_handler__default_behavior_( + self, ctx: E2ETestContext + ): """Test that sessions work without permission handler (default behavior)""" # Create session without on_permission_request handler session = await ctx.client.create_session() @@ -101,7 +82,7 @@ async def test_without_permission_handler(self, ctx: E2ETestContext): await session.destroy() - async def test_async_permission_handler(self, ctx: E2ETestContext): + async def test_should_handle_async_permission_handler(self, ctx: E2ETestContext): """Test async permission handler""" permission_requests = [] @@ -121,7 +102,7 @@ async def on_permission_request( await session.destroy() - async def test_resume_session_with_permission_handler(self, ctx: E2ETestContext): + async def test_should_resume_session_with_permission_handler(self, ctx: E2ETestContext): """Test resuming session with permission handler""" permission_requests = [] @@ -148,7 +129,7 @@ def on_permission_request( await session2.destroy() - async def test_permission_handler_errors(self, ctx: E2ETestContext): + async def test_should_handle_permission_handler_errors_gracefully(self, ctx: E2ETestContext): """Test that permission handler errors are handled gracefully""" def on_permission_request( @@ -169,7 +150,7 @@ def on_permission_request( await session.destroy() - async def test_tool_call_id_in_permission_requests(self, ctx: E2ETestContext): + async def test_should_receive_toolcallid_in_permission_requests(self, ctx: E2ETestContext): """Test that toolCallId is included in permission requests""" received_tool_call_id = False diff --git a/python/e2e/test_session.py b/python/e2e/test_session.py index 3cd18852..f2e545ed 100644 --- a/python/e2e/test_session.py +++ b/python/e2e/test_session.py @@ -1,5 +1,7 @@ """E2E Session Tests""" +import os + import pytest from copilot import CopilotClient @@ -158,8 +160,14 @@ async def test_should_resume_a_session_using_a_new_client(self, ctx: E2ETestCont assert "2" in answer.data.content # Resume using a new client + github_token = "fake-token-for-e2e-tests" if os.environ.get("CI") == "true" else None new_client = CopilotClient( - {"cli_path": ctx.cli_path, "cwd": ctx.work_dir, "env": ctx.get_env()} + { + "cli_path": ctx.cli_path, + "cwd": ctx.work_dir, + "env": ctx.get_env(), + "github_token": github_token, + } ) try: diff --git a/python/e2e/testharness/context.py b/python/e2e/testharness/context.py index 359fc844..e0b8ea4e 100644 --- a/python/e2e/testharness/context.py +++ b/python/e2e/testharness/context.py @@ -65,11 +65,14 @@ async def setup(self): self.proxy_url = await self._proxy.start() # Create the shared client (like Node.js/Go do) + # Use fake token in CI to allow cached responses without real auth + github_token = "fake-token-for-e2e-tests" if os.environ.get("CI") == "true" else None self._client = CopilotClient( { "cli_path": self.cli_path, "cwd": self.work_dir, "env": self.get_env(), + "github_token": github_token, } ) diff --git a/test/harness/replayingCapiProxy.ts b/test/harness/replayingCapiProxy.ts index b48a5b50..a700596b 100644 --- a/test/harness/replayingCapiProxy.ts +++ b/test/harness/replayingCapiProxy.ts @@ -32,6 +32,12 @@ const normalizedToolNames = { [shellConfig.writeShellToolName]: "${write_shell}", }; +/** + * Default model to use when no stored data is available for a given test. + * This enables responding to /models without needing to have a capture file. + */ +const defaultModel = "claude-sonnet-4.5"; + /** * An HTTP proxy that not only captures HTTP exchanges, but also stores them in a file on disk and * replays the stored responses on subsequent runs. @@ -149,7 +155,9 @@ export class ReplayingCapiProxy extends CapturingHttpProxy { options.requestOptions.path?.startsWith("/stop") && options.requestOptions.method === "POST" ) { - const skipWritingCache = options.requestOptions.path.includes("skipWritingCache=true"); + const skipWritingCache = options.requestOptions.path.includes( + "skipWritingCache=true", + ); options.onResponseStart(200, {}); options.onResponseEnd(); await this.stop(skipWritingCache); @@ -184,13 +192,13 @@ export class ReplayingCapiProxy extends CapturingHttpProxy { } // Handle /models endpoint - if ( - options.requestOptions.path === "/models" && - state.storedData?.models.length - ) { - const modelsResponse = createGetModelsResponse( - state.storedData.models, - ); + // Use stored models if available, otherwise use default model + if (options.requestOptions.path === "/models") { + const models = + state.storedData?.models && state.storedData.models.length > 0 + ? state.storedData.models + : [defaultModel]; + const modelsResponse = createGetModelsResponse(models); const body = JSON.stringify(modelsResponse); const headers = { "content-type": "application/json", @@ -202,6 +210,27 @@ export class ReplayingCapiProxy extends CapturingHttpProxy { return; } + // Handle memory endpoints - return stub responses in tests + // Matches: /agents/*/memory/*/enabled, /agents/*/memory/*/recent, etc. + if (options.requestOptions.path?.match(/\/agents\/.*\/memory\//)) { + let body: string; + if (options.requestOptions.path.includes("/enabled")) { + body = JSON.stringify({ enabled: false }); + } else if (options.requestOptions.path.includes("/recent")) { + body = JSON.stringify({ memories: [] }); + } else { + body = JSON.stringify({}); + } + const headers = { + "content-type": "application/json", + ...commonResponseHeaders, + }; + options.onResponseStart(200, headers); + options.onData(Buffer.from(body)); + options.onResponseEnd(); + return; + } + // Handle /chat/completions endpoint if ( state.storedData && @@ -257,7 +286,7 @@ export class ReplayingCapiProxy extends CapturingHttpProxy { // Fallback to normal proxying if no cached response found // This implicitly captures the new exchange too if (process.env.CI === "true") { - await emitNoMatchingRequestWarning( + await exitWithNoMatchingRequestError( options, state.testInfo, state.workDir, @@ -295,7 +324,7 @@ async function writeCapturesToDisk( } } -async function emitNoMatchingRequestWarning( +async function exitWithNoMatchingRequestError( options: PerformRequestOptions, testInfo: { file: string; line?: number } | undefined, workDir: string, @@ -305,18 +334,27 @@ async function emitNoMatchingRequestWarning( if (testInfo?.file) parts.push(`file=${testInfo.file}`); if (typeof testInfo?.line === "number") parts.push(`line=${testInfo.line}`); const header = parts.length ? ` ${parts.join(",")}` : ""; - const normalized = await parseAndNormalizeRequest( - options.body, - workDir, - toolResultNormalizers, - ); - const normalizedMessages = normalized.conversations[0]?.messages ?? []; - const warningMessage = - `No cached response found for ${options.requestOptions.method} ${options.requestOptions.path}. ` + - `Final message: ${JSON.stringify( + + let finalMessageInfo: string; + try { + const normalized = await parseAndNormalizeRequest( + options.body, + workDir, + toolResultNormalizers, + ); + const normalizedMessages = normalized.conversations[0]?.messages ?? []; + finalMessageInfo = JSON.stringify( normalizedMessages[normalizedMessages.length - 1], - )}`; - process.stderr.write(`::warning${header}::${warningMessage}\n`); + ); + } catch { + finalMessageInfo = `(unable to parse request body: ${options.body?.slice(0, 200) ?? "empty"})`; + } + + const errorMessage = + `No cached response found for ${options.requestOptions.method} ${options.requestOptions.path}. ` + + `Final message: ${finalMessageInfo}`; + process.stderr.write(`::error${header}::${errorMessage}\n`); + options.onError(new Error(errorMessage)); } async function findSavedChatCompletionResponse( diff --git a/test/snapshots/ask_user/should_handle_freeform_user_input_response.yaml b/test/snapshots/ask_user/should_handle_freeform_user_input_response.yaml new file mode 100644 index 00000000..437cc2dd --- /dev/null +++ b/test/snapshots/ask_user/should_handle_freeform_user_input_response.yaml @@ -0,0 +1,21 @@ +models: + - claude-sonnet-4.5 +conversations: + - messages: + - role: system + content: ${system} + - role: user + content: Ask me a question using ask_user and then include my answer in your response. The question should be 'What is + your favorite color?' + - role: assistant + tool_calls: + - id: toolcall_0 + type: function + function: + name: ask_user + arguments: '{"question":"What is your favorite color?"}' + - role: tool + tool_call_id: toolcall_0 + content: "User response (freeform): This is my custom freeform answer that was not in the choices" + - role: assistant + content: 'You answered: "This is my custom freeform answer that was not in the choices"' diff --git a/test/snapshots/ask_user/should_invoke_user_input_handler_when_model_uses_ask_user_tool.yaml b/test/snapshots/ask_user/should_invoke_user_input_handler_when_model_uses_ask_user_tool.yaml new file mode 100644 index 00000000..beb7a584 --- /dev/null +++ b/test/snapshots/ask_user/should_invoke_user_input_handler_when_model_uses_ask_user_tool.yaml @@ -0,0 +1,21 @@ +models: + - claude-sonnet-4.5 +conversations: + - messages: + - role: system + content: ${system} + - role: user + content: Ask me to choose between 'Option A' and 'Option B' using the ask_user tool. Wait for my response before + continuing. + - role: assistant + tool_calls: + - id: toolcall_0 + type: function + function: + name: ask_user + arguments: '{"question":"Please choose between the following options:","choices":["Option A","Option B"]}' + - role: tool + tool_call_id: toolcall_0 + content: "User selected: Option A" + - role: assistant + content: You selected **Option A**. How would you like to proceed? diff --git a/test/snapshots/ask_user/should_receive_choices_in_user_input_request.yaml b/test/snapshots/ask_user/should_receive_choices_in_user_input_request.yaml new file mode 100644 index 00000000..70537806 --- /dev/null +++ b/test/snapshots/ask_user/should_receive_choices_in_user_input_request.yaml @@ -0,0 +1,21 @@ +models: + - claude-sonnet-4.5 +conversations: + - messages: + - role: system + content: ${system} + - role: user + content: "Use the ask_user tool to ask me to pick between exactly two options: 'Red' and 'Blue'. These should be + provided as choices. Wait for my answer." + - role: assistant + tool_calls: + - id: toolcall_0 + type: function + function: + name: ask_user + arguments: '{"question":"Please pick one of the following options:","choices":["Red","Blue"],"allow_freeform":false}' + - role: tool + tool_call_id: toolcall_0 + content: "User selected: Red" + - role: assistant + content: You selected **Red**. diff --git a/test/snapshots/mcp_and_agents/should_accept_both_mcp_servers_and_custom_agents.yaml b/test/snapshots/mcp_and_agents/should_accept_both_mcp_servers_and_custom_agents.yaml new file mode 100644 index 00000000..60d1eade --- /dev/null +++ b/test/snapshots/mcp_and_agents/should_accept_both_mcp_servers_and_custom_agents.yaml @@ -0,0 +1,10 @@ +models: + - claude-sonnet-4.5 +conversations: + - messages: + - role: system + content: ${system} + - role: user + content: What is 7+7? + - role: assistant + content: 7 + 7 = 14 diff --git a/test/snapshots/mcp_and_agents/should_accept_custom_agent_configuration_on_session_create.yaml b/test/snapshots/mcp_and_agents/should_accept_custom_agent_configuration_on_session_create.yaml new file mode 100644 index 00000000..56da15ba --- /dev/null +++ b/test/snapshots/mcp_and_agents/should_accept_custom_agent_configuration_on_session_create.yaml @@ -0,0 +1,10 @@ +models: + - claude-sonnet-4.5 +conversations: + - messages: + - role: system + content: ${system} + - role: user + content: What is 5+5? + - role: assistant + content: 5 + 5 = 10 diff --git a/test/snapshots/mcp_and_agents/should_accept_custom_agent_configuration_on_session_resume.yaml b/test/snapshots/mcp_and_agents/should_accept_custom_agent_configuration_on_session_resume.yaml new file mode 100644 index 00000000..9703495c --- /dev/null +++ b/test/snapshots/mcp_and_agents/should_accept_custom_agent_configuration_on_session_resume.yaml @@ -0,0 +1,14 @@ +models: + - claude-sonnet-4.5 +conversations: + - messages: + - role: system + content: ${system} + - role: user + content: What is 1+1? + - role: assistant + content: 1+1 equals 2. + - role: user + content: What is 6+6? + - role: assistant + content: 6+6 equals 12. diff --git a/test/snapshots/mcp_and_agents/should_accept_mcp_server_configuration_on_session_create.yaml b/test/snapshots/mcp_and_agents/should_accept_mcp_server_configuration_on_session_create.yaml new file mode 100644 index 00000000..9fe2fcd0 --- /dev/null +++ b/test/snapshots/mcp_and_agents/should_accept_mcp_server_configuration_on_session_create.yaml @@ -0,0 +1,10 @@ +models: + - claude-sonnet-4.5 +conversations: + - messages: + - role: system + content: ${system} + - role: user + content: What is 2+2? + - role: assistant + content: 2 + 2 = 4 diff --git a/test/snapshots/mcp_and_agents/should_accept_mcp_server_configuration_on_session_resume.yaml b/test/snapshots/mcp_and_agents/should_accept_mcp_server_configuration_on_session_resume.yaml new file mode 100644 index 00000000..82c9917c --- /dev/null +++ b/test/snapshots/mcp_and_agents/should_accept_mcp_server_configuration_on_session_resume.yaml @@ -0,0 +1,14 @@ +models: + - claude-sonnet-4.5 +conversations: + - messages: + - role: system + content: ${system} + - role: user + content: What is 1+1? + - role: assistant + content: 1 + 1 = 2 + - role: user + content: What is 3+3? + - role: assistant + content: 3 + 3 = 6 diff --git a/test/snapshots/permissions/should_receive_toolcallid_in_permission_requests.yaml b/test/snapshots/permissions/should_receive_toolcallid_in_permission_requests.yaml index 63fde597..b529a01e 100644 --- a/test/snapshots/permissions/should_receive_toolcallid_in_permission_requests.yaml +++ b/test/snapshots/permissions/should_receive_toolcallid_in_permission_requests.yaml @@ -46,4 +46,4 @@ conversations: test - role: assistant - content: The command executed successfully and output "test". + content: Command executed successfully, output is "test". diff --git a/test/snapshots/session/send_returns_immediately_while_events_stream_in_background.yaml b/test/snapshots/session/send_returns_immediately_while_events_stream_in_background.yaml index 8deef905..6da08758 100644 --- a/test/snapshots/session/send_returns_immediately_while_events_stream_in_background.yaml +++ b/test/snapshots/session/send_returns_immediately_while_events_stream_in_background.yaml @@ -12,14 +12,15 @@ conversations: type: function function: name: report_intent - arguments: '{"intent":"Running sleep command"}' + arguments: '{"intent":"Running command"}' - role: assistant tool_calls: - id: toolcall_1 type: function function: name: ${shell} - arguments: '{"command":"sleep 2 && echo done","description":"Run sleep 2 and echo done","initial_wait":5}' + arguments: '{"command":"sleep 2 && echo done","description":"Run sleep and echo + command","initial_wait":5,"mode":"sync"}' - messages: - role: system content: ${system} @@ -31,12 +32,13 @@ conversations: type: function function: name: report_intent - arguments: '{"intent":"Running sleep command"}' + arguments: '{"intent":"Running command"}' - id: toolcall_1 type: function function: name: ${shell} - arguments: '{"command":"sleep 2 && echo done","description":"Run sleep 2 and echo done","initial_wait":5}' + arguments: '{"command":"sleep 2 && echo done","description":"Run sleep and echo + command","initial_wait":5,"mode":"sync"}' - role: tool tool_call_id: toolcall_0 content: Intent logged @@ -46,4 +48,4 @@ conversations: done - role: assistant - content: The command completed successfully after a 2-second sleep and output "done". + content: Command completed successfully. The output is "done". diff --git a/test/snapshots/session/sendandwait_throws_on_timeout.yaml b/test/snapshots/session/sendandwait_throws_on_timeout.yaml new file mode 100644 index 00000000..0e019bda --- /dev/null +++ b/test/snapshots/session/sendandwait_throws_on_timeout.yaml @@ -0,0 +1,8 @@ +models: + - claude-sonnet-4.5 +conversations: + - messages: + - role: system + content: ${system} + - role: user + content: Run 'sleep 2 && echo done'