From f814306c19e680a11a8d53dc4d5c53bd2fa38ef3 Mon Sep 17 00:00:00 2001 From: Toray Altas <6816042+taltas@users.noreply.github.com> Date: Sun, 10 May 2026 12:54:00 +0000 Subject: [PATCH 1/2] fix: add glm-5-turbo to zai provider --- packages/types/src/providers/zai.ts | 30 +++++++++ src/api/providers/__tests__/zai.spec.ts | 90 +++++++++++++++++++++++++ src/api/providers/zai.ts | 8 +-- 3 files changed, 124 insertions(+), 4 deletions(-) diff --git a/packages/types/src/providers/zai.ts b/packages/types/src/providers/zai.ts index d054903e34f..04a2a8e26ef 100644 --- a/packages/types/src/providers/zai.ts +++ b/packages/types/src/providers/zai.ts @@ -151,6 +151,21 @@ export const internationalZAiModels = { description: "GLM-5.1 is Zhipu's most capable model with a 200k context window, 128k max output, and built-in thinking capabilities. It delivers top-tier reasoning, coding, and agentic performance.", }, + "glm-5-turbo": { + maxTokens: 16_384, + contextWindow: 202_752, + supportsImages: false, + supportsPromptCache: true, + supportsReasoningEffort: ["disable", "medium"], + reasoningEffort: "medium", + preserveReasoning: true, + inputPrice: 1.2, + outputPrice: 4.0, + cacheWritesPrice: 0, + cacheReadsPrice: 0.24, + description: + "GLM-5-Turbo is Zhipu's OpenClaw-optimized model with built-in thinking capabilities. It improves tool use, instruction following, and long-running agent workflows while keeping fast response times.", + }, "glm-4.7-flash": { maxTokens: 16_384, contextWindow: 200_000, @@ -342,6 +357,21 @@ export const mainlandZAiModels = { description: "GLM-5.1 is Zhipu's most capable model with a 200k context window, 128k max output, and built-in thinking capabilities. It delivers top-tier reasoning, coding, and agentic performance.", }, + "glm-5-turbo": { + maxTokens: 16_384, + contextWindow: 202_752, + supportsImages: false, + supportsPromptCache: true, + supportsReasoningEffort: ["disable", "medium"], + reasoningEffort: "medium", + preserveReasoning: true, + inputPrice: 0.35, + outputPrice: 1.43, + cacheWritesPrice: 0, + cacheReadsPrice: 0.07, + description: + "GLM-5-Turbo is Zhipu's OpenClaw-optimized model with built-in thinking capabilities. It improves tool use, instruction following, and long-running agent workflows while keeping fast response times.", + }, "glm-4.7-flash": { maxTokens: 16_384, contextWindow: 204_800, diff --git a/src/api/providers/__tests__/zai.spec.ts b/src/api/providers/__tests__/zai.spec.ts index b748be20701..f22e04e6fb7 100644 --- a/src/api/providers/__tests__/zai.spec.ts +++ b/src/api/providers/__tests__/zai.spec.ts @@ -116,6 +116,22 @@ describe("ZAiHandler", () => { expect(model.info.supportsImages).toBe(false) }) + it("should return GLM-5-Turbo international model with thinking support", () => { + const testModelId: InternationalZAiModelId = "glm-5-turbo" + const handlerWithModel = new ZAiHandler({ + apiModelId: testModelId, + zaiApiKey: "test-zai-api-key", + zaiApiLine: "international_coding", + }) + const model = handlerWithModel.getModel() + expect(model.id).toBe(testModelId) + expect(model.info).toEqual(internationalZAiModels[testModelId]) + expect(model.info.contextWindow).toBe(202_752) + expect(model.info.supportsReasoningEffort).toEqual(["disable", "medium"]) + expect(model.info.reasoningEffort).toBe("medium") + expect(model.info.preserveReasoning).toBe(true) + }) + it("should return GLM-4.5v international model with vision support", () => { const testModelId: InternationalZAiModelId = "glm-4.5v" const handlerWithModel = new ZAiHandler({ @@ -229,6 +245,22 @@ describe("ZAiHandler", () => { expect(model.info.reasoningEffort).toBe("medium") expect(model.info.preserveReasoning).toBe(true) }) + + it("should return GLM-5-Turbo China model with thinking support", () => { + const testModelId: MainlandZAiModelId = "glm-5-turbo" + const handlerWithModel = new ZAiHandler({ + apiModelId: testModelId, + zaiApiKey: "test-zai-api-key", + zaiApiLine: "china_coding", + }) + const model = handlerWithModel.getModel() + expect(model.id).toBe(testModelId) + expect(model.info).toEqual(mainlandZAiModels[testModelId]) + expect(model.info.contextWindow).toBe(202_752) + expect(model.info.supportsReasoningEffort).toEqual(["disable", "medium"]) + expect(model.info.reasoningEffort).toBe("medium") + expect(model.info.preserveReasoning).toBe(true) + }) }) describe("International API", () => { @@ -557,5 +589,63 @@ describe("ZAiHandler", () => { const callArgs = mockCreate.mock.calls[0][0] expect(callArgs.thinking).toBeUndefined() }) + + it("should enable thinking by default for GLM-5-Turbo", async () => { + const handlerWithModel = new ZAiHandler({ + apiModelId: "glm-5-turbo", + zaiApiKey: "test-zai-api-key", + zaiApiLine: "international_coding", + }) + + mockCreate.mockImplementationOnce(() => { + return { + [Symbol.asyncIterator]: () => ({ + async next() { + return { done: true } + }, + }), + } + }) + + const messageGenerator = handlerWithModel.createMessage("system prompt", []) + await messageGenerator.next() + + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + model: "glm-5-turbo", + thinking: { type: "enabled" }, + }), + ) + }) + + it("should disable thinking for GLM-5-Turbo when reasoningEffort is set to disable", async () => { + const handlerWithModel = new ZAiHandler({ + apiModelId: "glm-5-turbo", + zaiApiKey: "test-zai-api-key", + zaiApiLine: "international_coding", + enableReasoningEffort: true, + reasoningEffort: "disable", + }) + + mockCreate.mockImplementationOnce(() => { + return { + [Symbol.asyncIterator]: () => ({ + async next() { + return { done: true } + }, + }), + } + }) + + const messageGenerator = handlerWithModel.createMessage("system prompt", []) + await messageGenerator.next() + + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + model: "glm-5-turbo", + thinking: { type: "disabled" }, + }), + ) + }) }) }) diff --git a/src/api/providers/zai.ts b/src/api/providers/zai.ts index cf1227e8e00..84d28fd83ee 100644 --- a/src/api/providers/zai.ts +++ b/src/api/providers/zai.ts @@ -40,8 +40,8 @@ export class ZAiHandler extends BaseOpenAiCompatibleProvider { } /** - * Override createStream to handle GLM-4.7's thinking mode. - * GLM-4.7 has thinking enabled by default in the API, so we need to + * Override createStream to handle GLM thinking-capable models. + * These models have thinking enabled by default in the API, so we need to * explicitly send { type: "disabled" } when the user turns off reasoning. */ protected override createStream( @@ -69,7 +69,7 @@ export class ZAiHandler extends BaseOpenAiCompatibleProvider { } /** - * Creates a stream with explicit thinking control for GLM-4.7 + * Creates a stream with explicit thinking control for GLM thinking-capable models. */ private createStreamWithThinking( systemPrompt: string, @@ -97,7 +97,7 @@ export class ZAiHandler extends BaseOpenAiCompatibleProvider { messages: [{ role: "system", content: systemPrompt }, ...convertedMessages], stream: true, stream_options: { include_usage: true }, - // For GLM-4.7: thinking is ON by default, so we explicitly disable when needed + // Thinking is ON by default for these models, so explicitly disable it when needed. thinking: useReasoning ? { type: "enabled" } : { type: "disabled" }, tools: this.convertToolsForOpenAI(metadata?.tools), tool_choice: metadata?.tool_choice, From 6bd8e76a9ea8b33e2cbb5e2a16cf8487c5c30d4a Mon Sep 17 00:00:00 2001 From: Elliott de Launay Date: Mon, 11 May 2026 13:12:20 +0000 Subject: [PATCH 2/2] refactor(zai): bumping up maxTokens for 5.1 turbo - adding e2e test --- .../src/suite/providers/zai.test.ts | 44 ++++++++++++++++++- packages/types/src/providers/zai.ts | 5 ++- src/api/providers/__tests__/zai.spec.ts | 2 + 3 files changed, 48 insertions(+), 3 deletions(-) diff --git a/apps/vscode-e2e/src/suite/providers/zai.test.ts b/apps/vscode-e2e/src/suite/providers/zai.test.ts index 03576bb1672..e0c11b02a0a 100644 --- a/apps/vscode-e2e/src/suite/providers/zai.test.ts +++ b/apps/vscode-e2e/src/suite/providers/zai.test.ts @@ -153,7 +153,10 @@ suite("Z.ai GLM provider", function () { suiteSetup(async () => { restoreFetch = installZAiFetchInterceptor( - [{ match: "zai-glm-e2e:", result: "4" }], + [ + { match: "zai-glm-e2e:", result: "4" }, + { match: "zai-glm-5-turbo-e2e:", result: "4" }, + ], requestCapture, !!ZAI_API_KEY, ) @@ -211,4 +214,43 @@ suite("Z.ai GLM provider", function () { `max_tokens should be the documented glm-5.1 limit (131_072) but was ${requestCapture.maxTokens}`, ) }) + + test("Should complete a task end-to-end using glm-5-turbo via Z.ai provider", async () => { + await globalThis.api.setConfiguration({ + apiProvider: "zai" as const, + zaiApiKey: ZAI_API_KEY ?? "mock-key", + zaiApiLine: "international_api" as const, + apiModelId: "glm-5-turbo", + }) + + const api = globalThis.api + const messages: ClineMessage[] = [] + + api.on(RooCodeEventName.Message, ({ message }) => { + if (message.type === "say" && message.partial === false) { + messages.push(message) + } + }) + + const taskId = await api.startNewTask({ + configuration: { mode: "ask", alwaysAllowModeSwitch: true, autoApprovalEnabled: true }, + text: "zai-glm-5-turbo-e2e: what is 2+2? Reply with only the number.", + }) + + await waitUntilCompleted({ api, taskId }) + + const completionMessage = messages.find( + ({ say, text }) => (say === "completion_result" || say === "text") && text?.trim() === "4", + ) + + assert.ok(completionMessage, "Task should complete with the expected Z.ai GLM-5-Turbo response") + + // Verify max_tokens is the model's documented limit (131_072), not the 20%-of-context + // heuristic cap (40_000) that guards against inaccurate OpenRouter dynamic metadata. + assert.strictEqual( + requestCapture.maxTokens, + 131_072, + `max_tokens should be the documented glm-5-turbo limit (131_072) but was ${requestCapture.maxTokens}`, + ) + }) }) diff --git a/packages/types/src/providers/zai.ts b/packages/types/src/providers/zai.ts index 04a2a8e26ef..fc4051dc81c 100644 --- a/packages/types/src/providers/zai.ts +++ b/packages/types/src/providers/zai.ts @@ -6,6 +6,7 @@ import { ZaiApiLine } from "../provider-settings.js" // https://docs.z.ai/guides/llm/glm-4.5 // https://docs.z.ai/guides/llm/glm-4.6 // https://docs.z.ai/guides/llm/glm-5.1 +// https://docs.z.ai/guides/llm/glm-5-turbo // https://docs.z.ai/guides/overview/pricing // https://bigmodel.cn/pricing @@ -152,7 +153,7 @@ export const internationalZAiModels = { "GLM-5.1 is Zhipu's most capable model with a 200k context window, 128k max output, and built-in thinking capabilities. It delivers top-tier reasoning, coding, and agentic performance.", }, "glm-5-turbo": { - maxTokens: 16_384, + maxTokens: 131_072, contextWindow: 202_752, supportsImages: false, supportsPromptCache: true, @@ -358,7 +359,7 @@ export const mainlandZAiModels = { "GLM-5.1 is Zhipu's most capable model with a 200k context window, 128k max output, and built-in thinking capabilities. It delivers top-tier reasoning, coding, and agentic performance.", }, "glm-5-turbo": { - maxTokens: 16_384, + maxTokens: 131_072, contextWindow: 202_752, supportsImages: false, supportsPromptCache: true, diff --git a/src/api/providers/__tests__/zai.spec.ts b/src/api/providers/__tests__/zai.spec.ts index f22e04e6fb7..06b807ce700 100644 --- a/src/api/providers/__tests__/zai.spec.ts +++ b/src/api/providers/__tests__/zai.spec.ts @@ -127,6 +127,7 @@ describe("ZAiHandler", () => { expect(model.id).toBe(testModelId) expect(model.info).toEqual(internationalZAiModels[testModelId]) expect(model.info.contextWindow).toBe(202_752) + expect(model.info.maxTokens).toBe(131_072) expect(model.info.supportsReasoningEffort).toEqual(["disable", "medium"]) expect(model.info.reasoningEffort).toBe("medium") expect(model.info.preserveReasoning).toBe(true) @@ -257,6 +258,7 @@ describe("ZAiHandler", () => { expect(model.id).toBe(testModelId) expect(model.info).toEqual(mainlandZAiModels[testModelId]) expect(model.info.contextWindow).toBe(202_752) + expect(model.info.maxTokens).toBe(131_072) expect(model.info.supportsReasoningEffort).toEqual(["disable", "medium"]) expect(model.info.reasoningEffort).toBe("medium") expect(model.info.preserveReasoning).toBe(true)