diff --git a/apps/vscode-e2e/.env.local.sample b/apps/vscode-e2e/.env.local.sample index 40c9da1bb4..11e2615cb2 100644 --- a/apps/vscode-e2e/.env.local.sample +++ b/apps/vscode-e2e/.env.local.sample @@ -1 +1,2 @@ OPENROUTER_API_KEY=sk-or-v1-... +DEEPSEEK_API_KEY=sk-... diff --git a/apps/vscode-e2e/AGENTS.md b/apps/vscode-e2e/AGENTS.md index 56d53e1cec..2bdf4f5d9c 100644 --- a/apps/vscode-e2e/AGENTS.md +++ b/apps/vscode-e2e/AGENTS.md @@ -82,10 +82,42 @@ Record mode uses **record-on-miss**: if an existing fixture already matches a re If the LLM calls a tool first (e.g. `read_file`) and then calls `attempt_completion` after seeing the result, you need two fixtures: -- **Turn 1**: match on the task prompt → respond with the tool call -- **Turn 2**: match on a stable part of the tool _result_ → respond with `attempt_completion` +- **Turn 1**: match on the task prompt (with `sequenceIndex: 0` so it fires only once) → respond with the tool call, giving the tool call a unique `id` +- **Turn 2**: match on `toolCallId` → respond with `attempt_completion` + +Using `toolCallId` (the `id` of the tool call emitted in turn 1) is the recommended approach for turn-2 matching. It is: + +- **Precise**: fires only when that exact tool call's result is in the conversation +- **Cross-test safe**: each test's tool call ids are unique, so accumulated match counts from previous tests can't interfere +- **Stateless**: no `sequenceIndex` needed on turn-2 fixtures — if the task makes extra API calls they'll keep getting the same `attempt_completion` + +Example: + +```json +{ + "fixtures": [ + { + "match": { + "userMessage": "my-e2e-tag:my-test", + "sequenceIndex": 0 + }, + "response": { + "toolCalls": [{ "name": "read_file", "arguments": "{\"path\":\"marker.txt\"}", "id": "call_my_read" }] + } + }, + { + "match": { "toolCallId": "call_my_read" }, + "response": { + "toolCalls": [ + { "name": "attempt_completion", "arguments": "{\"result\":\"MY_MARKER\"}", "id": "call_my_done" } + ] + } + } + ] +} +``` -The tool result is provided by the extension (not the mock), so its content is deterministic if test files have stable names. Use a stable substring from the tool result as the turn-2 match string. +The `model` field can be added to either match when a test targets a specific model. ## 404 errors in logs are expected @@ -118,6 +150,23 @@ ZAI_API_KEY= TEST_FILE=zai.test pnpm --filter @roo-code/vscode-e2e test:ci ``` When adding a new test to this suite, add a matching fixture to the `installZAiFetchInterceptor` call in `suiteSetup`. Use a short unique prefix (e.g. `"zai-glm-e2e-mytest:"`) that won't appear in ``. + +### DeepSeek V4 (`suite/providers/deepseek-v4.test.ts`) + +DeepSeek exposes `deepSeekBaseUrl`, so the suite redirects the OpenAI-compatible DeepSeek client through aimock with `deepSeekBaseUrl: ${AIMOCK_URL}/v1`. The test still installs a lightweight fetch capture for request-shape assertions, but responses should come from aimock fixtures or aimock record mode. + +Record DeepSeek fixtures with the targeted file filter so aimock proxies OpenAI-compatible traffic to `https://api.deepseek.com`: + +```sh +DEEPSEEK_API_KEY= TEST_FILE=deepseek-v4.test pnpm --filter @roo-code/vscode-e2e test:record +``` + +After converting the generated `openai-*.json` files into stable named fixtures, verify in mock mode: + +```sh +USE_MOCK=true TEST_FILE=deepseek-v4.test pnpm --filter @roo-code/vscode-e2e test:run +``` + ## Tests that use a non-default provider If your test calls `api.setConfiguration({ apiProvider: "anthropic", ... })`, point aimock at the diff --git a/apps/vscode-e2e/fixtures/deepseek-v4.json b/apps/vscode-e2e/fixtures/deepseek-v4.json new file mode 100644 index 0000000000..995dd0951b --- /dev/null +++ b/apps/vscode-e2e/fixtures/deepseek-v4.json @@ -0,0 +1,128 @@ +{ + "fixtures": [ + { + "match": { + "model": "deepseek-v4-flash", + "userMessage": "deepseek-v4-e2e:deepseek-v4-flash:reasoning-on", + "sequenceIndex": 0 + }, + "response": { + "toolCalls": [ + { + "name": "read_file", + "arguments": "{\"path\":\"deepseek-v4-e2e-deepseek-v4-flash-reasoning-on.txt\"}", + "id": "call_dsv4_flash_on_read" + } + ] + } + }, + { + "match": { + "model": "deepseek-v4-flash", + "toolCallId": "call_dsv4_flash_on_read" + }, + "response": { + "toolCalls": [ + { + "name": "attempt_completion", + "arguments": "{\"result\":\"DEEPSEEK_V4_MARKER_deepseek_v4_flash_reasoning_on\"}", + "id": "call_dsv4_flash_on_done" + } + ] + } + }, + { + "match": { + "model": "deepseek-v4-flash", + "userMessage": "deepseek-v4-e2e:deepseek-v4-flash:reasoning-off", + "sequenceIndex": 0 + }, + "response": { + "toolCalls": [ + { + "name": "read_file", + "arguments": "{\"path\":\"deepseek-v4-e2e-deepseek-v4-flash-reasoning-off.txt\"}", + "id": "call_dsv4_flash_off_read" + } + ] + } + }, + { + "match": { + "model": "deepseek-v4-flash", + "toolCallId": "call_dsv4_flash_off_read" + }, + "response": { + "toolCalls": [ + { + "name": "attempt_completion", + "arguments": "{\"result\":\"DEEPSEEK_V4_MARKER_deepseek_v4_flash_reasoning_off\"}", + "id": "call_dsv4_flash_off_done" + } + ] + } + }, + { + "match": { + "model": "deepseek-v4-pro", + "userMessage": "deepseek-v4-e2e:deepseek-v4-pro:reasoning-on", + "sequenceIndex": 0 + }, + "response": { + "toolCalls": [ + { + "name": "read_file", + "arguments": "{\"path\":\"deepseek-v4-e2e-deepseek-v4-pro-reasoning-on.txt\"}", + "id": "call_dsv4_pro_on_read" + } + ] + } + }, + { + "match": { + "model": "deepseek-v4-pro", + "toolCallId": "call_dsv4_pro_on_read" + }, + "response": { + "toolCalls": [ + { + "name": "attempt_completion", + "arguments": "{\"result\":\"DEEPSEEK_V4_MARKER_deepseek_v4_pro_reasoning_on\"}", + "id": "call_dsv4_pro_on_done" + } + ] + } + }, + { + "match": { + "model": "deepseek-v4-pro", + "userMessage": "deepseek-v4-e2e:deepseek-v4-pro:reasoning-off", + "sequenceIndex": 0 + }, + "response": { + "toolCalls": [ + { + "name": "read_file", + "arguments": "{\"path\":\"deepseek-v4-e2e-deepseek-v4-pro-reasoning-off.txt\"}", + "id": "call_dsv4_pro_off_read" + } + ] + } + }, + { + "match": { + "model": "deepseek-v4-pro", + "toolCallId": "call_dsv4_pro_off_read" + }, + "response": { + "toolCalls": [ + { + "name": "attempt_completion", + "arguments": "{\"result\":\"DEEPSEEK_V4_MARKER_deepseek_v4_pro_reasoning_off\"}", + "id": "call_dsv4_pro_off_done" + } + ] + } + } + ] +} diff --git a/apps/vscode-e2e/package.json b/apps/vscode-e2e/package.json index 4a1af856d3..7a96f7a864 100644 --- a/apps/vscode-e2e/package.json +++ b/apps/vscode-e2e/package.json @@ -5,6 +5,7 @@ "lint": "eslint src --ext=ts --max-warnings=0", "check-types": "tsc -p tsconfig.esm.json --noEmit", "format": "prettier --write src", + "test:deepseek-v4": "TEST_FILE=deepseek-v4.test pnpm test:ci", "test:ci": "pnpm -w bundle && pnpm --filter @roo-code/vscode-webview build && pnpm test:run", "test:ci:mock": "pnpm -w bundle && pnpm --filter @roo-code/vscode-webview build && USE_MOCK=true pnpm test:run", "test:record": "AIMOCK_RECORD=true pnpm test:ci", diff --git a/apps/vscode-e2e/src/runTest.ts b/apps/vscode-e2e/src/runTest.ts index 5efa409a9a..cdd11282e9 100644 --- a/apps/vscode-e2e/src/runTest.ts +++ b/apps/vscode-e2e/src/runTest.ts @@ -7,8 +7,15 @@ import { LLMock } from "@copilotkit/aimock" async function main() { const isRecord = process.env.AIMOCK_RECORD === "true" + const testGrep = process.argv.find((arg, i) => process.argv[i - 1] === "--grep") || process.env.TEST_GREP + const testFile = process.argv.find((arg, i) => process.argv[i - 1] === "--file") || process.env.TEST_FILE + const isDeepSeekTest = testFile?.includes("deepseek-v4") === true - if (isRecord && !process.env.OPENROUTER_API_KEY) { + if (isRecord && isDeepSeekTest && !process.env.DEEPSEEK_API_KEY) { + throw new Error("AIMOCK_RECORD=true requires DEEPSEEK_API_KEY to record DeepSeek fixtures") + } + + if (isRecord && !isDeepSeekTest && !process.env.OPENROUTER_API_KEY) { throw new Error("AIMOCK_RECORD=true requires OPENROUTER_API_KEY to record fixtures") } @@ -43,7 +50,7 @@ async function main() { // Use /api (not /api/v1) — aimock appends the request path (/v1/chat/completions) // so including /v1 here would produce a doubled /v1/v1 upstream URL. providers: { - openai: "https://openrouter.ai/api", + openai: isDeepSeekTest ? "https://api.deepseek.com" : "https://openrouter.ai/api", // aimock forwards the x-api-key header from the Anthropic SDK to the real API. anthropic: "https://api.anthropic.com", }, @@ -84,8 +91,6 @@ async function main() { // - npm run test:e2e -- --grep "write-to-file" // - TEST_GREP="apply-diff" npm run test:e2e // - TEST_FILE="task.test.js" npm run test:e2e - const testGrep = process.argv.find((arg, i) => process.argv[i - 1] === "--grep") || process.env.TEST_GREP - const testFile = process.argv.find((arg, i) => process.argv[i - 1] === "--file") || process.env.TEST_FILE // Pass test filters and mock URL as environment variables to the test runner const extensionTestsEnv = { diff --git a/apps/vscode-e2e/src/suite/index.ts b/apps/vscode-e2e/src/suite/index.ts index 065872f888..ca9aa8ef7a 100644 --- a/apps/vscode-e2e/src/suite/index.ts +++ b/apps/vscode-e2e/src/suite/index.ts @@ -23,7 +23,11 @@ export async function run() { apiProvider: "openrouter" as const, // In record mode, forward the real key so aimock can proxy it to OpenRouter. // In replay mode, "mock-key" is sufficient — aimock never contacts the real API. - openRouterApiKey: aimockUrl && !isRecord ? "mock-key" : process.env.OPENROUTER_API_KEY!, + openRouterApiKey: aimockUrl + ? isRecord + ? (process.env.OPENROUTER_API_KEY ?? "mock-key") + : "mock-key" + : process.env.OPENROUTER_API_KEY!, openRouterModelId: "openai/gpt-4.1", ...(aimockUrl && { openRouterBaseUrl: `${aimockUrl}/v1` }), }) diff --git a/apps/vscode-e2e/src/suite/providers/deepseek-v4.test.ts b/apps/vscode-e2e/src/suite/providers/deepseek-v4.test.ts new file mode 100644 index 0000000000..98da014dcd --- /dev/null +++ b/apps/vscode-e2e/src/suite/providers/deepseek-v4.test.ts @@ -0,0 +1,402 @@ +import * as assert from "assert" +import * as fs from "fs/promises" +import * as path from "path" +import * as vscode from "vscode" + +import { RooCodeEventName, type ClineMessage } from "@roo-code/types" + +import { setDefaultSuiteTimeout } from "../test-utils" +import { sleep, waitFor, waitUntilAborted } from "../utils" + +const DEEPSEEK_API_KEY = process.env.DEEPSEEK_API_KEY + +type DeepSeekModelId = "deepseek-v4-flash" | "deepseek-v4-pro" + +type CapturedDeepSeekRequest = { + model?: string + thinkingType?: "enabled" | "disabled" + reasoningEffort?: string + maxCompletionTokens?: number + probeTag?: string + lastUserMessage: string +} + +type DeepSeekProbeResult = { + completed: boolean + aborted: boolean + noToolErrors: number + mistakeLimitReached: boolean + completionText?: string + requests: CapturedDeepSeekRequest[] + transcript: string[] +} + +function getRequestUrl(input: RequestInfo | URL): string { + return typeof input === "string" ? input : input instanceof URL ? input.href : (input as Request).url +} + +function isUrlWithOrigin(rawUrl: string, expectedOrigin: string): boolean { + try { + return new URL(rawUrl).origin === expectedOrigin + } catch { + return false + } +} + +function isChatCompletionsUrl(rawUrl: string): boolean { + try { + return new URL(rawUrl).pathname.endsWith("/chat/completions") + } catch { + return false + } +} + +function getRequestBody(init?: RequestInit): + | { + model?: string + thinking?: { type?: "enabled" | "disabled" } + reasoning_effort?: string + max_completion_tokens?: number + messages?: Array<{ role?: string; content?: unknown }> + } + | undefined { + if (!init?.body || typeof init.body !== "string") { + return undefined + } + + return JSON.parse(init.body) +} + +function installDeepSeekRequestCapture(capture: CapturedDeepSeekRequest[], baseUrl: string): () => void { + const originalFetch = globalThis.fetch + const targetOrigin = new URL(baseUrl).origin + + globalThis.fetch = async function (input: RequestInfo | URL, init?: RequestInit): Promise { + const url = getRequestUrl(input) + + if (isUrlWithOrigin(url, targetOrigin) && isChatCompletionsUrl(url)) { + const body = getRequestBody(init) ?? {} + + const lastUser = [...(body.messages ?? [])].reverse().find((message) => message.role === "user") + const lastUserMessage = + typeof lastUser?.content === "string" ? lastUser.content : JSON.stringify(lastUser?.content ?? "") + const allMessagesText = JSON.stringify(body.messages ?? []) + const probeTag = allMessagesText.match(/deepseek-v4-e2e:[^"\s]+/)?.[0] + + const request = { + model: body.model, + thinkingType: body.thinking?.type, + reasoningEffort: body.reasoning_effort, + maxCompletionTokens: body.max_completion_tokens, + probeTag, + lastUserMessage, + } satisfies CapturedDeepSeekRequest + + capture.push(request) + } + + return originalFetch.call(globalThis, input, init as RequestInit) + } as typeof globalThis.fetch + + return () => { + globalThis.fetch = originalFetch + } +} + +function deepSeekFileName(modelId: DeepSeekModelId, reasoningEnabled: boolean): string { + return `deepseek-v4-e2e-${modelId}-${reasoningEnabled ? "reasoning-on" : "reasoning-off"}.txt` +} + +function deepSeekProbeTag(modelId: DeepSeekModelId, reasoningEnabled: boolean): string { + return `deepseek-v4-e2e:${modelId}:${reasoningEnabled ? "reasoning-on" : "reasoning-off"}` +} + +function deepSeekMarker(modelId: DeepSeekModelId, reasoningEnabled: boolean): string { + return `DEEPSEEK_V4_MARKER_${modelId.replaceAll("-", "_")}_${reasoningEnabled ? "reasoning_on" : "reasoning_off"}` +} + +function formatDiagnostics(result: DeepSeekProbeResult) { + const requestSummary = result.requests + .map((request, index) => { + const summary = { + model: request.model, + thinkingType: request.thinkingType, + reasoningEffort: request.reasoningEffort, + maxCompletionTokens: request.maxCompletionTokens, + probeTag: request.probeTag, + lastUserMessage: request.lastUserMessage.slice(0, 160), + } + + return `request[${index}]=${JSON.stringify(summary)}` + }) + .join("\n") + + return [ + `completed=${result.completed}`, + `aborted=${result.aborted}`, + `noToolErrors=${result.noToolErrors}`, + `mistakeLimitReached=${result.mistakeLimitReached}`, + `completionText=${JSON.stringify(result.completionText)}`, + requestSummary || "requestSummary=", + "transcript:", + ...result.transcript.map((line) => ` ${line}`), + ].join("\n") +} + +async function runDeepSeekToolProbe( + modelId: DeepSeekModelId, + reasoningEnabled: boolean, + requests: CapturedDeepSeekRequest[], +): Promise<{ result: DeepSeekProbeResult; marker: string }> { + const api = globalThis.api + const workspaceDir = vscode.workspace.workspaceFolders?.[0]?.uri.fsPath + + if (!workspaceDir) { + throw new Error("No workspace folder found for DeepSeek E2E probe") + } + + requests.length = 0 + + const marker = deepSeekMarker(modelId, reasoningEnabled) + const fileName = deepSeekFileName(modelId, reasoningEnabled) + const probeTag = deepSeekProbeTag(modelId, reasoningEnabled) + const filePath = path.join(workspaceDir, fileName) + const aimockUrl = process.env.AIMOCK_URL + const isRecord = process.env.AIMOCK_RECORD === "true" + + await fs.writeFile(filePath, `${marker}\n`, "utf8") + + const transcript: string[] = [] + let noToolErrors = 0 + let mistakeLimitReached = false + let completionText: string | undefined + let taskCompleted = false + let taskAborted = false + + const messageHandler = ({ message }: { message: ClineMessage }) => { + if (message.type === "say" && message.partial === false) { + transcript.push(`${message.say}: ${message.text?.slice(0, 220) ?? ""}`) + + if (message.say === "error" && message.text === "MODEL_NO_TOOLS_USED") { + noToolErrors++ + } + + if ((message.say === "completion_result" || message.say === "text") && message.text?.trim()) { + completionText = message.text.trim() + } + } + + if (message.type === "ask") { + transcript.push(`${message.ask}: ${message.text?.slice(0, 220) ?? ""}`) + + if (message.ask === "mistake_limit_reached") { + mistakeLimitReached = true + } + } + } + + api.on(RooCodeEventName.Message, messageHandler) + let taskId: string | undefined + + try { + await api.setConfiguration({ + apiProvider: "deepseek" as const, + deepSeekApiKey: aimockUrl && !isRecord ? "mock-key" : DEEPSEEK_API_KEY!, + ...(aimockUrl && { deepSeekBaseUrl: `${aimockUrl}/v1` }), + apiModelId: modelId, + enableReasoningEffort: reasoningEnabled, + reasoningEffort: reasoningEnabled ? ("high" as const) : ("disable" as const), + }) + + taskId = await api.startNewTask({ + configuration: { + mode: "code", + autoApprovalEnabled: true, + alwaysAllowReadOnly: true, + alwaysAllowReadOnlyOutsideWorkspace: true, + alwaysAllowExecute: false, + disabledTools: ["execute_command", "read_command_output"], + }, + text: + `${probeTag} ` + + `Use only the read_file tool to read "${fileName}" from the current workspace. ` + + `Do not run shell commands, search commands, or terminal commands. ` + + `Then reply with only the exact marker from that file. Do not guess, and do not add any extra text.`, + }) + + const taskCompletedHandler = (completedTaskId: string) => { + if (completedTaskId === taskId) { + taskCompleted = true + } + } + + const taskAbortedHandler = (abortedTaskId: string) => { + if (abortedTaskId === taskId) { + taskAborted = true + } + } + + api.on(RooCodeEventName.TaskCompleted, taskCompletedHandler) + api.on(RooCodeEventName.TaskAborted, taskAbortedHandler) + + try { + await waitFor(() => taskCompleted || taskAborted || mistakeLimitReached, { + timeout: 180_000, + interval: 500, + }) + + if (mistakeLimitReached && !taskCompleted && !taskAborted) { + await api.cancelCurrentTask() + await waitUntilAborted({ api, taskId, timeout: 15_000 }) + taskAborted = true + } + } catch (error) { + if (taskId && !taskCompleted && !taskAborted && !mistakeLimitReached) { + try { + await api.cancelCurrentTask() + await waitUntilAborted({ api, taskId, timeout: 15_000 }) + taskAborted = true + } catch { + // Best effort only; keep the original timeout failure. + } + } + + throw error + } finally { + api.off(RooCodeEventName.TaskCompleted, taskCompletedHandler) + api.off(RooCodeEventName.TaskAborted, taskAbortedHandler) + } + + return { + marker, + result: { + completed: taskCompleted, + aborted: taskAborted, + noToolErrors, + mistakeLimitReached, + completionText, + requests: requests.filter( + (request) => request.model === modelId && (!request.probeTag || request.probeTag === probeTag), + ), + transcript, + }, + } + } finally { + api.off(RooCodeEventName.Message, messageHandler) + + if (taskId && !taskCompleted && !taskAborted) { + try { + await api.cancelCurrentTask() + await waitUntilAborted({ api, taskId, timeout: 15_000 }) + } catch { + // Task may already be finished or absent. + } + } + + await sleep(1_500) + await fs.rm(filePath, { force: true }) + } +} + +suite("DeepSeek V4 provider", function () { + setDefaultSuiteTimeout(this) + this.timeout(8 * 60_000) + + let restoreFetch: (() => void) | undefined + const requests: CapturedDeepSeekRequest[] = [] + + setup(function () { + if (!process.env.AIMOCK_URL && !DEEPSEEK_API_KEY) { + this.skip() + } + }) + + suiteSetup(() => { + restoreFetch = installDeepSeekRequestCapture( + requests, + process.env.AIMOCK_URL ? `${process.env.AIMOCK_URL}/v1` : "https://api.deepseek.com", + ) + }) + + suiteTeardown(async () => { + restoreFetch?.() + restoreFetch = undefined + + const aimockUrl = process.env.AIMOCK_URL + const isRecord = process.env.AIMOCK_RECORD === "true" + await globalThis.api.setConfiguration({ + apiProvider: "openrouter" as const, + openRouterApiKey: aimockUrl + ? isRecord + ? (process.env.OPENROUTER_API_KEY ?? "mock-key") + : "mock-key" + : process.env.OPENROUTER_API_KEY!, + openRouterModelId: "openai/gpt-4.1", + ...(aimockUrl && { openRouterBaseUrl: `${aimockUrl}/v1` }), + }) + }) + + for (const [modelId, reasoningEnabled] of [ + ["deepseek-v4-flash", true], + ["deepseek-v4-flash", false], + ["deepseek-v4-pro", true], + ["deepseek-v4-pro", false], + ] as const) { + test(`${modelId} should complete a tool-using task with reasoning ${reasoningEnabled ? "enabled" : "disabled"}`, async () => { + const { result, marker } = await runDeepSeekToolProbe(modelId, reasoningEnabled, requests) + const diagnostics = formatDiagnostics(result) + const firstRequest = result.requests[0] + + assert.ok(firstRequest, `DeepSeek should have issued at least one API request.\n${diagnostics}`) + assert.strictEqual( + firstRequest.model, + modelId, + `DeepSeek should request the expected model.\n${diagnostics}`, + ) + assert.ok( + typeof firstRequest.maxCompletionTokens === "number" && firstRequest.maxCompletionTokens > 0, + `DeepSeek request should include max_completion_tokens.\n${diagnostics}`, + ) + + if (reasoningEnabled) { + assert.strictEqual( + firstRequest.thinkingType, + "enabled", + `Reasoning-enabled probe should send thinking=enabled.\n${diagnostics}`, + ) + assert.ok( + firstRequest.reasoningEffort === "high" || firstRequest.reasoningEffort === "max", + `Reasoning-enabled probe should send a DeepSeek reasoning_effort.\n${diagnostics}`, + ) + } else { + assert.strictEqual( + firstRequest.thinkingType, + "disabled", + `Reasoning-disabled probe should send thinking=disabled.\n${diagnostics}`, + ) + assert.strictEqual( + firstRequest.reasoningEffort, + undefined, + `Reasoning-disabled probe should omit reasoning_effort.\n${diagnostics}`, + ) + } + + assert.ok(result.completed, `Task should complete cleanly.\n${diagnostics}`) + assert.strictEqual( + result.mistakeLimitReached, + false, + `Task should not hit the consecutive mistake limit.\n${diagnostics}`, + ) + assert.strictEqual( + result.noToolErrors, + 0, + `Task should not emit MODEL_NO_TOOLS_USED while handling a required tool loop.\n${diagnostics}`, + ) + assert.strictEqual( + result.completionText, + marker, + `Task should return the exact marker from the workspace file.\n${diagnostics}`, + ) + }) + } +}) diff --git a/src/api/providers/__tests__/deepseek.spec.ts b/src/api/providers/__tests__/deepseek.spec.ts index 89fd292a3d..533fcaabbc 100644 --- a/src/api/providers/__tests__/deepseek.spec.ts +++ b/src/api/providers/__tests__/deepseek.spec.ts @@ -606,6 +606,96 @@ describe("DeepSeekHandler", () => { expect(callArgs.thinking).toBeUndefined() }) + it("should force tool_choice to required for thinking models when auto and tools present", async () => { + const v4Handler = new DeepSeekHandler({ + ...mockOptions, + apiModelId: "deepseek-v4-pro", + }) + + const tools: any[] = [ + { + type: "function", + function: { + name: "attempt_completion", + description: "Complete the task", + parameters: { type: "object", properties: {} }, + }, + }, + ] + + const stream = v4Handler.createMessage(systemPrompt, messages, { + taskId: "test", + tools, + tool_choice: "auto", + }) + for await (const _chunk of stream) { + // Consume the stream + } + + const callArgs = mockCreate.mock.calls[0][0] + expect(callArgs.tool_choice).toBe("required") + }) + + it("should not override tool_choice for non-thinking models", async () => { + const chatHandler = new DeepSeekHandler({ + ...mockOptions, + apiModelId: "deepseek-chat", + }) + + const tools: any[] = [ + { + type: "function", + function: { + name: "attempt_completion", + description: "Complete the task", + parameters: { type: "object", properties: {} }, + }, + }, + ] + + const stream = chatHandler.createMessage(systemPrompt, messages, { + taskId: "test", + tools, + tool_choice: "auto", + }) + for await (const _chunk of stream) { + // Consume the stream + } + + const callArgs = mockCreate.mock.calls[0][0] + expect(callArgs.tool_choice).toBe("auto") + }) + + it("should not override explicit non-auto tool_choice for thinking models", async () => { + const v4Handler = new DeepSeekHandler({ + ...mockOptions, + apiModelId: "deepseek-v4-flash", + }) + + const tools: any[] = [ + { + type: "function", + function: { + name: "attempt_completion", + description: "Complete the task", + parameters: { type: "object", properties: {} }, + }, + }, + ] + + const stream = v4Handler.createMessage(systemPrompt, messages, { + taskId: "test", + tools, + tool_choice: "none", + }) + for await (const _chunk of stream) { + // Consume the stream + } + + const callArgs = mockCreate.mock.calls[0][0] + expect(callArgs.tool_choice).toBe("none") + }) + it("should handle tool calls with reasoning_content", async () => { const reasonerHandler = new DeepSeekHandler({ ...mockOptions, diff --git a/src/api/providers/deepseek.ts b/src/api/providers/deepseek.ts index e2ffd29169..97a60e85a3 100644 --- a/src/api/providers/deepseek.ts +++ b/src/api/providers/deepseek.ts @@ -111,6 +111,14 @@ export class DeepSeekHandler extends OpenAiHandler { mergeToolResultText: isThinkingModel, }) + // DeepSeek thinking models sometimes skip tool use when tool_choice is "auto", + // causing consecutive MODEL_NO_TOOLS_USED errors. Force "required" for thinking turns + // so the model must call a tool each turn (attempt_completion is always included). + const toolChoice = + isThinkingModel && metadata?.tool_choice === "auto" && metadata?.tools?.length + ? "required" + : metadata?.tool_choice + const requestOptions: DeepSeekChatCompletionParams = { model: modelId, ...(!isThinkingModel && { temperature: temperature ?? DEEP_SEEK_DEFAULT_TEMPERATURE }), @@ -120,7 +128,7 @@ export class DeepSeekHandler extends OpenAiHandler { ...(thinking && { thinking }), ...(deepSeekReasoningEffort && { reasoning_effort: deepSeekReasoningEffort }), tools: this.convertToolsForOpenAI(metadata?.tools), - tool_choice: metadata?.tool_choice, + tool_choice: toolChoice, parallel_tool_calls: metadata?.parallelToolCalls ?? true, }