Zoo-Code-Org · edelauna · May 11, 2026 · May 10, 2026 · May 11, 2026 · edelauna
@@ -153,7 +153,10 @@ suite("Z.ai GLM provider", function () {
 
 	suiteSetup(async () => {
 		restoreFetch = installZAiFetchInterceptor(
-			[{ match: "zai-glm-e2e:", result: "4" }],
+			[
+				{ match: "zai-glm-e2e:", result: "4" },
+				{ match: "zai-glm-5-turbo-e2e:", result: "4" },
+			],
 			requestCapture,
 			!!ZAI_API_KEY,
 		)
@@ -211,4 +214,43 @@ suite("Z.ai GLM provider", function () {
 			`max_tokens should be the documented glm-5.1 limit (131_072) but was ${requestCapture.maxTokens}`,
 		)
 	})
+
+	test("Should complete a task end-to-end using glm-5-turbo via Z.ai provider", async () => {
+		await globalThis.api.setConfiguration({
+			apiProvider: "zai" as const,
+			zaiApiKey: ZAI_API_KEY ?? "mock-key",
+			zaiApiLine: "international_api" as const,
+			apiModelId: "glm-5-turbo",
+		})
+
+		const api = globalThis.api
+		const messages: ClineMessage[] = []
+
+		api.on(RooCodeEventName.Message, ({ message }) => {
+			if (message.type === "say" && message.partial === false) {
+				messages.push(message)
+			}
+		})
+
+		const taskId = await api.startNewTask({
+			configuration: { mode: "ask", alwaysAllowModeSwitch: true, autoApprovalEnabled: true },
+			text: "zai-glm-5-turbo-e2e: what is 2+2? Reply with only the number.",
+		})
+
+		await waitUntilCompleted({ api, taskId })
+
+		const completionMessage = messages.find(
+			({ say, text }) => (say === "completion_result" || say === "text") && text?.trim() === "4",
+		)
+
+		assert.ok(completionMessage, "Task should complete with the expected Z.ai GLM-5-Turbo response")
+
+		// Verify max_tokens is the model's documented limit (131_072), not the 20%-of-context
+		// heuristic cap (40_000) that guards against inaccurate OpenRouter dynamic metadata.
+		assert.strictEqual(
+			requestCapture.maxTokens,
+			131_072,
+			`max_tokens should be the documented glm-5-turbo limit (131_072) but was ${requestCapture.maxTokens}`,
+		)
+	})
 })
@@ -6,6 +6,7 @@ import { ZaiApiLine } from "../provider-settings.js"
 // https://docs.z.ai/guides/llm/glm-4.5
 // https://docs.z.ai/guides/llm/glm-4.6
 // https://docs.z.ai/guides/llm/glm-5.1
+// https://docs.z.ai/guides/llm/glm-5-turbo
 // https://docs.z.ai/guides/overview/pricing
 // https://bigmodel.cn/pricing
 
@@ -151,6 +152,21 @@ export const internationalZAiModels = {
 		description:
 			"GLM-5.1 is Zhipu's most capable model with a 200k context window, 128k max output, and built-in thinking capabilities. It delivers top-tier reasoning, coding, and agentic performance.",
 	},
+	"glm-5-turbo": {
+		maxTokens: 131_072,
+		contextWindow: 202_752,
+		supportsImages: false,
+		supportsPromptCache: true,
+		supportsReasoningEffort: ["disable", "medium"],
+		reasoningEffort: "medium",
+		preserveReasoning: true,
+		inputPrice: 1.2,
+		outputPrice: 4.0,
+		cacheWritesPrice: 0,
+		cacheReadsPrice: 0.24,
+		description:
+			"GLM-5-Turbo is Zhipu's OpenClaw-optimized model with built-in thinking capabilities. It improves tool use, instruction following, and long-running agent workflows while keeping fast response times.",
+	},
 	"glm-4.7-flash": {
 		maxTokens: 16_384,
 		contextWindow: 200_000,
@@ -342,6 +358,21 @@ export const mainlandZAiModels = {
 		description:
 			"GLM-5.1 is Zhipu's most capable model with a 200k context window, 128k max output, and built-in thinking capabilities. It delivers top-tier reasoning, coding, and agentic performance.",
 	},
+	"glm-5-turbo": {
+		maxTokens: 131_072,
+		contextWindow: 202_752,
+		supportsImages: false,
+		supportsPromptCache: true,
+		supportsReasoningEffort: ["disable", "medium"],
+		reasoningEffort: "medium",
+		preserveReasoning: true,
+		inputPrice: 0.35,
+		outputPrice: 1.43,
+		cacheWritesPrice: 0,
+		cacheReadsPrice: 0.07,
+		description:
+			"GLM-5-Turbo is Zhipu's OpenClaw-optimized model with built-in thinking capabilities. It improves tool use, instruction following, and long-running agent workflows while keeping fast response times.",
+	},
 	"glm-4.7-flash": {
 		maxTokens: 16_384,
 		contextWindow: 204_800,

@@ -116,6 +116,23 @@ describe("ZAiHandler", () => {
 			expect(model.info.supportsImages).toBe(false)
 		})
 
+		it("should return GLM-5-Turbo international model with thinking support", () => {
+			const testModelId: InternationalZAiModelId = "glm-5-turbo"
+			const handlerWithModel = new ZAiHandler({
+				apiModelId: testModelId,
+				zaiApiKey: "test-zai-api-key",
+				zaiApiLine: "international_coding",
+			})
+			const model = handlerWithModel.getModel()
+			expect(model.id).toBe(testModelId)
+			expect(model.info).toEqual(internationalZAiModels[testModelId])
+			expect(model.info.contextWindow).toBe(202_752)
+			expect(model.info.maxTokens).toBe(131_072)
+			expect(model.info.supportsReasoningEffort).toEqual(["disable", "medium"])
+			expect(model.info.reasoningEffort).toBe("medium")
+			expect(model.info.preserveReasoning).toBe(true)
+		})
+
 		it("should return GLM-4.5v international model with vision support", () => {
 			const testModelId: InternationalZAiModelId = "glm-4.5v"
 			const handlerWithModel = new ZAiHandler({
@@ -229,6 +246,23 @@ describe("ZAiHandler", () => {
 			expect(model.info.reasoningEffort).toBe("medium")
 			expect(model.info.preserveReasoning).toBe(true)
 		})
+
+		it("should return GLM-5-Turbo China model with thinking support", () => {
+			const testModelId: MainlandZAiModelId = "glm-5-turbo"
+			const handlerWithModel = new ZAiHandler({
+				apiModelId: testModelId,
+				zaiApiKey: "test-zai-api-key",
+				zaiApiLine: "china_coding",
+			})
+			const model = handlerWithModel.getModel()
+			expect(model.id).toBe(testModelId)
+			expect(model.info).toEqual(mainlandZAiModels[testModelId])
+			expect(model.info.contextWindow).toBe(202_752)
+			expect(model.info.maxTokens).toBe(131_072)
+			expect(model.info.supportsReasoningEffort).toEqual(["disable", "medium"])
+			expect(model.info.reasoningEffort).toBe("medium")
+			expect(model.info.preserveReasoning).toBe(true)
+		})
 	})
 
 	describe("International API", () => {
@@ -557,5 +591,63 @@ describe("ZAiHandler", () => {
 			const callArgs = mockCreate.mock.calls[0][0]
 			expect(callArgs.thinking).toBeUndefined()
 		})
+
+		it("should enable thinking by default for GLM-5-Turbo", async () => {
+			const handlerWithModel = new ZAiHandler({
+				apiModelId: "glm-5-turbo",
+				zaiApiKey: "test-zai-api-key",
+				zaiApiLine: "international_coding",
+			})
+
+			mockCreate.mockImplementationOnce(() => {
+				return {
+					[Symbol.asyncIterator]: () => ({
+						async next() {
+							return { done: true }
+						},
+					}),
+				}
+			})
+
+			const messageGenerator = handlerWithModel.createMessage("system prompt", [])
+			await messageGenerator.next()
+
+			expect(mockCreate).toHaveBeenCalledWith(
+				expect.objectContaining({
+					model: "glm-5-turbo",
+					thinking: { type: "enabled" },
+				}),
+			)
+		})
+
+		it("should disable thinking for GLM-5-Turbo when reasoningEffort is set to disable", async () => {
+			const handlerWithModel = new ZAiHandler({
+				apiModelId: "glm-5-turbo",
+				zaiApiKey: "test-zai-api-key",
+				zaiApiLine: "international_coding",
+				enableReasoningEffort: true,
+				reasoningEffort: "disable",
+			})
+
+			mockCreate.mockImplementationOnce(() => {
+				return {
+					[Symbol.asyncIterator]: () => ({
+						async next() {
+							return { done: true }
+						},
+					}),
+				}
+			})
+
+			const messageGenerator = handlerWithModel.createMessage("system prompt", [])
+			await messageGenerator.next()
+
+			expect(mockCreate).toHaveBeenCalledWith(
+				expect.objectContaining({
+					model: "glm-5-turbo",
+					thinking: { type: "disabled" },
+				}),
+			)
+		})
 	})
 })
@@ -40,8 +40,8 @@ export class ZAiHandler extends BaseOpenAiCompatibleProvider<string> {
 	}
 
 	/**
-	 * Override createStream to handle GLM-4.7's thinking mode.
-	 * GLM-4.7 has thinking enabled by default in the API, so we need to
+	 * Override createStream to handle GLM thinking-capable models.
+	 * These models have thinking enabled by default in the API, so we need to
 	 * explicitly send { type: "disabled" } when the user turns off reasoning.
 	 */
 	protected override createStream(
@@ -69,7 +69,7 @@ export class ZAiHandler extends BaseOpenAiCompatibleProvider<string> {
 	}
 
 	/**
-	 * Creates a stream with explicit thinking control for GLM-4.7
+	 * Creates a stream with explicit thinking control for GLM thinking-capable models.
 	 */
 	private createStreamWithThinking(
 		systemPrompt: string,
@@ -97,7 +97,7 @@ export class ZAiHandler extends BaseOpenAiCompatibleProvider<string> {
 			messages: [{ role: "system", content: systemPrompt }, ...convertedMessages],
 			stream: true,
 			stream_options: { include_usage: true },
-			// For GLM-4.7: thinking is ON by default, so we explicitly disable when needed
+			// Thinking is ON by default for these models, so explicitly disable it when needed.
 			thinking: useReasoning ? { type: "enabled" } : { type: "disabled" },
 			tools: this.convertToolsForOpenAI(metadata?.tools),
 			tool_choice: metadata?.tool_choice,