From f814306c19e680a11a8d53dc4d5c53bd2fa38ef3 Mon Sep 17 00:00:00 2001
From: Toray Altas <6816042+taltas@users.noreply.github.com>
Date: Sun, 10 May 2026 12:54:00 +0000
Subject: [PATCH 1/2] fix: add glm-5-turbo to zai provider

---
 packages/types/src/providers/zai.ts     | 30 +++++++++
 src/api/providers/__tests__/zai.spec.ts | 90 +++++++++++++++++++++++++
 src/api/providers/zai.ts                |  8 +--
 3 files changed, 124 insertions(+), 4 deletions(-)

diff --git a/packages/types/src/providers/zai.ts b/packages/types/src/providers/zai.ts
index d054903e34f..04a2a8e26ef 100644
--- a/packages/types/src/providers/zai.ts
+++ b/packages/types/src/providers/zai.ts
@@ -151,6 +151,21 @@ export const internationalZAiModels = {
 		description:
 			"GLM-5.1 is Zhipu's most capable model with a 200k context window, 128k max output, and built-in thinking capabilities. It delivers top-tier reasoning, coding, and agentic performance.",
 	},
+	"glm-5-turbo": {
+		maxTokens: 16_384,
+		contextWindow: 202_752,
+		supportsImages: false,
+		supportsPromptCache: true,
+		supportsReasoningEffort: ["disable", "medium"],
+		reasoningEffort: "medium",
+		preserveReasoning: true,
+		inputPrice: 1.2,
+		outputPrice: 4.0,
+		cacheWritesPrice: 0,
+		cacheReadsPrice: 0.24,
+		description:
+			"GLM-5-Turbo is Zhipu's OpenClaw-optimized model with built-in thinking capabilities. It improves tool use, instruction following, and long-running agent workflows while keeping fast response times.",
+	},
 	"glm-4.7-flash": {
 		maxTokens: 16_384,
 		contextWindow: 200_000,
@@ -342,6 +357,21 @@ export const mainlandZAiModels = {
 		description:
 			"GLM-5.1 is Zhipu's most capable model with a 200k context window, 128k max output, and built-in thinking capabilities. It delivers top-tier reasoning, coding, and agentic performance.",
 	},
+	"glm-5-turbo": {
+		maxTokens: 16_384,
+		contextWindow: 202_752,
+		supportsImages: false,
+		supportsPromptCache: true,
+		supportsReasoningEffort: ["disable", "medium"],
+		reasoningEffort: "medium",
+		preserveReasoning: true,
+		inputPrice: 0.35,
+		outputPrice: 1.43,
+		cacheWritesPrice: 0,
+		cacheReadsPrice: 0.07,
+		description:
+			"GLM-5-Turbo is Zhipu's OpenClaw-optimized model with built-in thinking capabilities. It improves tool use, instruction following, and long-running agent workflows while keeping fast response times.",
+	},
 	"glm-4.7-flash": {
 		maxTokens: 16_384,
 		contextWindow: 204_800,
diff --git a/src/api/providers/__tests__/zai.spec.ts b/src/api/providers/__tests__/zai.spec.ts
index b748be20701..f22e04e6fb7 100644
--- a/src/api/providers/__tests__/zai.spec.ts
+++ b/src/api/providers/__tests__/zai.spec.ts
@@ -116,6 +116,22 @@ describe("ZAiHandler", () => {
 			expect(model.info.supportsImages).toBe(false)
 		})
 
+		it("should return GLM-5-Turbo international model with thinking support", () => {
+			const testModelId: InternationalZAiModelId = "glm-5-turbo"
+			const handlerWithModel = new ZAiHandler({
+				apiModelId: testModelId,
+				zaiApiKey: "test-zai-api-key",
+				zaiApiLine: "international_coding",
+			})
+			const model = handlerWithModel.getModel()
+			expect(model.id).toBe(testModelId)
+			expect(model.info).toEqual(internationalZAiModels[testModelId])
+			expect(model.info.contextWindow).toBe(202_752)
+			expect(model.info.supportsReasoningEffort).toEqual(["disable", "medium"])
+			expect(model.info.reasoningEffort).toBe("medium")
+			expect(model.info.preserveReasoning).toBe(true)
+		})
+
 		it("should return GLM-4.5v international model with vision support", () => {
 			const testModelId: InternationalZAiModelId = "glm-4.5v"
 			const handlerWithModel = new ZAiHandler({
@@ -229,6 +245,22 @@ describe("ZAiHandler", () => {
 			expect(model.info.reasoningEffort).toBe("medium")
 			expect(model.info.preserveReasoning).toBe(true)
 		})
+
+		it("should return GLM-5-Turbo China model with thinking support", () => {
+			const testModelId: MainlandZAiModelId = "glm-5-turbo"
+			const handlerWithModel = new ZAiHandler({
+				apiModelId: testModelId,
+				zaiApiKey: "test-zai-api-key",
+				zaiApiLine: "china_coding",
+			})
+			const model = handlerWithModel.getModel()
+			expect(model.id).toBe(testModelId)
+			expect(model.info).toEqual(mainlandZAiModels[testModelId])
+			expect(model.info.contextWindow).toBe(202_752)
+			expect(model.info.supportsReasoningEffort).toEqual(["disable", "medium"])
+			expect(model.info.reasoningEffort).toBe("medium")
+			expect(model.info.preserveReasoning).toBe(true)
+		})
 	})
 
 	describe("International API", () => {
@@ -557,5 +589,63 @@ describe("ZAiHandler", () => {
 			const callArgs = mockCreate.mock.calls[0][0]
 			expect(callArgs.thinking).toBeUndefined()
 		})
+
+		it("should enable thinking by default for GLM-5-Turbo", async () => {
+			const handlerWithModel = new ZAiHandler({
+				apiModelId: "glm-5-turbo",
+				zaiApiKey: "test-zai-api-key",
+				zaiApiLine: "international_coding",
+			})
+
+			mockCreate.mockImplementationOnce(() => {
+				return {
+					[Symbol.asyncIterator]: () => ({
+						async next() {
+							return { done: true }
+						},
+					}),
+				}
+			})
+
+			const messageGenerator = handlerWithModel.createMessage("system prompt", [])
+			await messageGenerator.next()
+
+			expect(mockCreate).toHaveBeenCalledWith(
+				expect.objectContaining({
+					model: "glm-5-turbo",
+					thinking: { type: "enabled" },
+				}),
+			)
+		})
+
+		it("should disable thinking for GLM-5-Turbo when reasoningEffort is set to disable", async () => {
+			const handlerWithModel = new ZAiHandler({
+				apiModelId: "glm-5-turbo",
+				zaiApiKey: "test-zai-api-key",
+				zaiApiLine: "international_coding",
+				enableReasoningEffort: true,
+				reasoningEffort: "disable",
+			})
+
+			mockCreate.mockImplementationOnce(() => {
+				return {
+					[Symbol.asyncIterator]: () => ({
+						async next() {
+							return { done: true }
+						},
+					}),
+				}
+			})
+
+			const messageGenerator = handlerWithModel.createMessage("system prompt", [])
+			await messageGenerator.next()
+
+			expect(mockCreate).toHaveBeenCalledWith(
+				expect.objectContaining({
+					model: "glm-5-turbo",
+					thinking: { type: "disabled" },
+				}),
+			)
+		})
 	})
 })
diff --git a/src/api/providers/zai.ts b/src/api/providers/zai.ts
index cf1227e8e00..84d28fd83ee 100644
--- a/src/api/providers/zai.ts
+++ b/src/api/providers/zai.ts
@@ -40,8 +40,8 @@ export class ZAiHandler extends BaseOpenAiCompatibleProvider<string> {
 	}
 
 	/**
-	 * Override createStream to handle GLM-4.7's thinking mode.
-	 * GLM-4.7 has thinking enabled by default in the API, so we need to
+	 * Override createStream to handle GLM thinking-capable models.
+	 * These models have thinking enabled by default in the API, so we need to
 	 * explicitly send { type: "disabled" } when the user turns off reasoning.
 	 */
 	protected override createStream(
@@ -69,7 +69,7 @@ export class ZAiHandler extends BaseOpenAiCompatibleProvider<string> {
 	}
 
 	/**
-	 * Creates a stream with explicit thinking control for GLM-4.7
+	 * Creates a stream with explicit thinking control for GLM thinking-capable models.
 	 */
 	private createStreamWithThinking(
 		systemPrompt: string,
@@ -97,7 +97,7 @@ export class ZAiHandler extends BaseOpenAiCompatibleProvider<string> {
 			messages: [{ role: "system", content: systemPrompt }, ...convertedMessages],
 			stream: true,
 			stream_options: { include_usage: true },
-			// For GLM-4.7: thinking is ON by default, so we explicitly disable when needed
+			// Thinking is ON by default for these models, so explicitly disable it when needed.
 			thinking: useReasoning ? { type: "enabled" } : { type: "disabled" },
 			tools: this.convertToolsForOpenAI(metadata?.tools),
 			tool_choice: metadata?.tool_choice,

From 6bd8e76a9ea8b33e2cbb5e2a16cf8487c5c30d4a Mon Sep 17 00:00:00 2001
From: Elliott de Launay <edelauna@gmail.com>
Date: Mon, 11 May 2026 13:12:20 +0000
Subject: [PATCH 2/2] refactor(zai): bumping up maxTokens for 5.1 turbo -
 adding e2e test

---
 .../src/suite/providers/zai.test.ts           | 44 ++++++++++++++++++-
 packages/types/src/providers/zai.ts           |  5 ++-
 src/api/providers/__tests__/zai.spec.ts       |  2 +
 3 files changed, 48 insertions(+), 3 deletions(-)

diff --git a/apps/vscode-e2e/src/suite/providers/zai.test.ts b/apps/vscode-e2e/src/suite/providers/zai.test.ts
index 03576bb1672..e0c11b02a0a 100644
--- a/apps/vscode-e2e/src/suite/providers/zai.test.ts
+++ b/apps/vscode-e2e/src/suite/providers/zai.test.ts
@@ -153,7 +153,10 @@ suite("Z.ai GLM provider", function () {
 
 	suiteSetup(async () => {
 		restoreFetch = installZAiFetchInterceptor(
-			[{ match: "zai-glm-e2e:", result: "4" }],
+			[
+				{ match: "zai-glm-e2e:", result: "4" },
+				{ match: "zai-glm-5-turbo-e2e:", result: "4" },
+			],
 			requestCapture,
 			!!ZAI_API_KEY,
 		)
@@ -211,4 +214,43 @@ suite("Z.ai GLM provider", function () {
 			`max_tokens should be the documented glm-5.1 limit (131_072) but was ${requestCapture.maxTokens}`,
 		)
 	})
+
+	test("Should complete a task end-to-end using glm-5-turbo via Z.ai provider", async () => {
+		await globalThis.api.setConfiguration({
+			apiProvider: "zai" as const,
+			zaiApiKey: ZAI_API_KEY ?? "mock-key",
+			zaiApiLine: "international_api" as const,
+			apiModelId: "glm-5-turbo",
+		})
+
+		const api = globalThis.api
+		const messages: ClineMessage[] = []
+
+		api.on(RooCodeEventName.Message, ({ message }) => {
+			if (message.type === "say" && message.partial === false) {
+				messages.push(message)
+			}
+		})
+
+		const taskId = await api.startNewTask({
+			configuration: { mode: "ask", alwaysAllowModeSwitch: true, autoApprovalEnabled: true },
+			text: "zai-glm-5-turbo-e2e: what is 2+2? Reply with only the number.",
+		})
+
+		await waitUntilCompleted({ api, taskId })
+
+		const completionMessage = messages.find(
+			({ say, text }) => (say === "completion_result" || say === "text") && text?.trim() === "4",
+		)
+
+		assert.ok(completionMessage, "Task should complete with the expected Z.ai GLM-5-Turbo response")
+
+		// Verify max_tokens is the model's documented limit (131_072), not the 20%-of-context
+		// heuristic cap (40_000) that guards against inaccurate OpenRouter dynamic metadata.
+		assert.strictEqual(
+			requestCapture.maxTokens,
+			131_072,
+			`max_tokens should be the documented glm-5-turbo limit (131_072) but was ${requestCapture.maxTokens}`,
+		)
+	})
 })
diff --git a/packages/types/src/providers/zai.ts b/packages/types/src/providers/zai.ts
index 04a2a8e26ef..fc4051dc81c 100644
--- a/packages/types/src/providers/zai.ts
+++ b/packages/types/src/providers/zai.ts
@@ -6,6 +6,7 @@ import { ZaiApiLine } from "../provider-settings.js"
 // https://docs.z.ai/guides/llm/glm-4.5
 // https://docs.z.ai/guides/llm/glm-4.6
 // https://docs.z.ai/guides/llm/glm-5.1
+// https://docs.z.ai/guides/llm/glm-5-turbo
 // https://docs.z.ai/guides/overview/pricing
 // https://bigmodel.cn/pricing
 
@@ -152,7 +153,7 @@ export const internationalZAiModels = {
 			"GLM-5.1 is Zhipu's most capable model with a 200k context window, 128k max output, and built-in thinking capabilities. It delivers top-tier reasoning, coding, and agentic performance.",
 	},
 	"glm-5-turbo": {
-		maxTokens: 16_384,
+		maxTokens: 131_072,
 		contextWindow: 202_752,
 		supportsImages: false,
 		supportsPromptCache: true,
@@ -358,7 +359,7 @@ export const mainlandZAiModels = {
 			"GLM-5.1 is Zhipu's most capable model with a 200k context window, 128k max output, and built-in thinking capabilities. It delivers top-tier reasoning, coding, and agentic performance.",
 	},
 	"glm-5-turbo": {
-		maxTokens: 16_384,
+		maxTokens: 131_072,
 		contextWindow: 202_752,
 		supportsImages: false,
 		supportsPromptCache: true,
diff --git a/src/api/providers/__tests__/zai.spec.ts b/src/api/providers/__tests__/zai.spec.ts
index f22e04e6fb7..06b807ce700 100644
--- a/src/api/providers/__tests__/zai.spec.ts
+++ b/src/api/providers/__tests__/zai.spec.ts
@@ -127,6 +127,7 @@ describe("ZAiHandler", () => {
 			expect(model.id).toBe(testModelId)
 			expect(model.info).toEqual(internationalZAiModels[testModelId])
 			expect(model.info.contextWindow).toBe(202_752)
+			expect(model.info.maxTokens).toBe(131_072)
 			expect(model.info.supportsReasoningEffort).toEqual(["disable", "medium"])
 			expect(model.info.reasoningEffort).toBe("medium")
 			expect(model.info.preserveReasoning).toBe(true)
@@ -257,6 +258,7 @@ describe("ZAiHandler", () => {
 			expect(model.id).toBe(testModelId)
 			expect(model.info).toEqual(mainlandZAiModels[testModelId])
 			expect(model.info.contextWindow).toBe(202_752)
+			expect(model.info.maxTokens).toBe(131_072)
 			expect(model.info.supportsReasoningEffort).toEqual(["disable", "medium"])
 			expect(model.info.reasoningEffort).toBe("medium")
 			expect(model.info.preserveReasoning).toBe(true)