diff --git a/apps/vscode-e2e/src/suite/providers/zai.test.ts b/apps/vscode-e2e/src/suite/providers/zai.test.ts index 03576bb167..e0c11b02a0 100644 --- a/apps/vscode-e2e/src/suite/providers/zai.test.ts +++ b/apps/vscode-e2e/src/suite/providers/zai.test.ts @@ -153,7 +153,10 @@ suite("Z.ai GLM provider", function () { suiteSetup(async () => { restoreFetch = installZAiFetchInterceptor( - [{ match: "zai-glm-e2e:", result: "4" }], + [ + { match: "zai-glm-e2e:", result: "4" }, + { match: "zai-glm-5-turbo-e2e:", result: "4" }, + ], requestCapture, !!ZAI_API_KEY, ) @@ -211,4 +214,43 @@ suite("Z.ai GLM provider", function () { `max_tokens should be the documented glm-5.1 limit (131_072) but was ${requestCapture.maxTokens}`, ) }) + + test("Should complete a task end-to-end using glm-5-turbo via Z.ai provider", async () => { + await globalThis.api.setConfiguration({ + apiProvider: "zai" as const, + zaiApiKey: ZAI_API_KEY ?? "mock-key", + zaiApiLine: "international_api" as const, + apiModelId: "glm-5-turbo", + }) + + const api = globalThis.api + const messages: ClineMessage[] = [] + + api.on(RooCodeEventName.Message, ({ message }) => { + if (message.type === "say" && message.partial === false) { + messages.push(message) + } + }) + + const taskId = await api.startNewTask({ + configuration: { mode: "ask", alwaysAllowModeSwitch: true, autoApprovalEnabled: true }, + text: "zai-glm-5-turbo-e2e: what is 2+2? Reply with only the number.", + }) + + await waitUntilCompleted({ api, taskId }) + + const completionMessage = messages.find( + ({ say, text }) => (say === "completion_result" || say === "text") && text?.trim() === "4", + ) + + assert.ok(completionMessage, "Task should complete with the expected Z.ai GLM-5-Turbo response") + + // Verify max_tokens is the model's documented limit (131_072), not the 20%-of-context + // heuristic cap (40_000) that guards against inaccurate OpenRouter dynamic metadata. + assert.strictEqual( + requestCapture.maxTokens, + 131_072, + `max_tokens should be the documented glm-5-turbo limit (131_072) but was ${requestCapture.maxTokens}`, + ) + }) }) diff --git a/packages/types/src/providers/zai.ts b/packages/types/src/providers/zai.ts index d054903e34..fc4051dc81 100644 --- a/packages/types/src/providers/zai.ts +++ b/packages/types/src/providers/zai.ts @@ -6,6 +6,7 @@ import { ZaiApiLine } from "../provider-settings.js" // https://docs.z.ai/guides/llm/glm-4.5 // https://docs.z.ai/guides/llm/glm-4.6 // https://docs.z.ai/guides/llm/glm-5.1 +// https://docs.z.ai/guides/llm/glm-5-turbo // https://docs.z.ai/guides/overview/pricing // https://bigmodel.cn/pricing @@ -151,6 +152,21 @@ export const internationalZAiModels = { description: "GLM-5.1 is Zhipu's most capable model with a 200k context window, 128k max output, and built-in thinking capabilities. It delivers top-tier reasoning, coding, and agentic performance.", }, + "glm-5-turbo": { + maxTokens: 131_072, + contextWindow: 202_752, + supportsImages: false, + supportsPromptCache: true, + supportsReasoningEffort: ["disable", "medium"], + reasoningEffort: "medium", + preserveReasoning: true, + inputPrice: 1.2, + outputPrice: 4.0, + cacheWritesPrice: 0, + cacheReadsPrice: 0.24, + description: + "GLM-5-Turbo is Zhipu's OpenClaw-optimized model with built-in thinking capabilities. It improves tool use, instruction following, and long-running agent workflows while keeping fast response times.", + }, "glm-4.7-flash": { maxTokens: 16_384, contextWindow: 200_000, @@ -342,6 +358,21 @@ export const mainlandZAiModels = { description: "GLM-5.1 is Zhipu's most capable model with a 200k context window, 128k max output, and built-in thinking capabilities. It delivers top-tier reasoning, coding, and agentic performance.", }, + "glm-5-turbo": { + maxTokens: 131_072, + contextWindow: 202_752, + supportsImages: false, + supportsPromptCache: true, + supportsReasoningEffort: ["disable", "medium"], + reasoningEffort: "medium", + preserveReasoning: true, + inputPrice: 0.35, + outputPrice: 1.43, + cacheWritesPrice: 0, + cacheReadsPrice: 0.07, + description: + "GLM-5-Turbo is Zhipu's OpenClaw-optimized model with built-in thinking capabilities. It improves tool use, instruction following, and long-running agent workflows while keeping fast response times.", + }, "glm-4.7-flash": { maxTokens: 16_384, contextWindow: 204_800, diff --git a/src/api/providers/__tests__/zai.spec.ts b/src/api/providers/__tests__/zai.spec.ts index b748be2070..06b807ce70 100644 --- a/src/api/providers/__tests__/zai.spec.ts +++ b/src/api/providers/__tests__/zai.spec.ts @@ -116,6 +116,23 @@ describe("ZAiHandler", () => { expect(model.info.supportsImages).toBe(false) }) + it("should return GLM-5-Turbo international model with thinking support", () => { + const testModelId: InternationalZAiModelId = "glm-5-turbo" + const handlerWithModel = new ZAiHandler({ + apiModelId: testModelId, + zaiApiKey: "test-zai-api-key", + zaiApiLine: "international_coding", + }) + const model = handlerWithModel.getModel() + expect(model.id).toBe(testModelId) + expect(model.info).toEqual(internationalZAiModels[testModelId]) + expect(model.info.contextWindow).toBe(202_752) + expect(model.info.maxTokens).toBe(131_072) + expect(model.info.supportsReasoningEffort).toEqual(["disable", "medium"]) + expect(model.info.reasoningEffort).toBe("medium") + expect(model.info.preserveReasoning).toBe(true) + }) + it("should return GLM-4.5v international model with vision support", () => { const testModelId: InternationalZAiModelId = "glm-4.5v" const handlerWithModel = new ZAiHandler({ @@ -229,6 +246,23 @@ describe("ZAiHandler", () => { expect(model.info.reasoningEffort).toBe("medium") expect(model.info.preserveReasoning).toBe(true) }) + + it("should return GLM-5-Turbo China model with thinking support", () => { + const testModelId: MainlandZAiModelId = "glm-5-turbo" + const handlerWithModel = new ZAiHandler({ + apiModelId: testModelId, + zaiApiKey: "test-zai-api-key", + zaiApiLine: "china_coding", + }) + const model = handlerWithModel.getModel() + expect(model.id).toBe(testModelId) + expect(model.info).toEqual(mainlandZAiModels[testModelId]) + expect(model.info.contextWindow).toBe(202_752) + expect(model.info.maxTokens).toBe(131_072) + expect(model.info.supportsReasoningEffort).toEqual(["disable", "medium"]) + expect(model.info.reasoningEffort).toBe("medium") + expect(model.info.preserveReasoning).toBe(true) + }) }) describe("International API", () => { @@ -557,5 +591,63 @@ describe("ZAiHandler", () => { const callArgs = mockCreate.mock.calls[0][0] expect(callArgs.thinking).toBeUndefined() }) + + it("should enable thinking by default for GLM-5-Turbo", async () => { + const handlerWithModel = new ZAiHandler({ + apiModelId: "glm-5-turbo", + zaiApiKey: "test-zai-api-key", + zaiApiLine: "international_coding", + }) + + mockCreate.mockImplementationOnce(() => { + return { + [Symbol.asyncIterator]: () => ({ + async next() { + return { done: true } + }, + }), + } + }) + + const messageGenerator = handlerWithModel.createMessage("system prompt", []) + await messageGenerator.next() + + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + model: "glm-5-turbo", + thinking: { type: "enabled" }, + }), + ) + }) + + it("should disable thinking for GLM-5-Turbo when reasoningEffort is set to disable", async () => { + const handlerWithModel = new ZAiHandler({ + apiModelId: "glm-5-turbo", + zaiApiKey: "test-zai-api-key", + zaiApiLine: "international_coding", + enableReasoningEffort: true, + reasoningEffort: "disable", + }) + + mockCreate.mockImplementationOnce(() => { + return { + [Symbol.asyncIterator]: () => ({ + async next() { + return { done: true } + }, + }), + } + }) + + const messageGenerator = handlerWithModel.createMessage("system prompt", []) + await messageGenerator.next() + + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + model: "glm-5-turbo", + thinking: { type: "disabled" }, + }), + ) + }) }) }) diff --git a/src/api/providers/zai.ts b/src/api/providers/zai.ts index cf1227e8e0..84d28fd83e 100644 --- a/src/api/providers/zai.ts +++ b/src/api/providers/zai.ts @@ -40,8 +40,8 @@ export class ZAiHandler extends BaseOpenAiCompatibleProvider { } /** - * Override createStream to handle GLM-4.7's thinking mode. - * GLM-4.7 has thinking enabled by default in the API, so we need to + * Override createStream to handle GLM thinking-capable models. + * These models have thinking enabled by default in the API, so we need to * explicitly send { type: "disabled" } when the user turns off reasoning. */ protected override createStream( @@ -69,7 +69,7 @@ export class ZAiHandler extends BaseOpenAiCompatibleProvider { } /** - * Creates a stream with explicit thinking control for GLM-4.7 + * Creates a stream with explicit thinking control for GLM thinking-capable models. */ private createStreamWithThinking( systemPrompt: string, @@ -97,7 +97,7 @@ export class ZAiHandler extends BaseOpenAiCompatibleProvider { messages: [{ role: "system", content: systemPrompt }, ...convertedMessages], stream: true, stream_options: { include_usage: true }, - // For GLM-4.7: thinking is ON by default, so we explicitly disable when needed + // Thinking is ON by default for these models, so explicitly disable it when needed. thinking: useReasoning ? { type: "enabled" } : { type: "disabled" }, tools: this.convertToolsForOpenAI(metadata?.tools), tool_choice: metadata?.tool_choice,