Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 43 additions & 1 deletion apps/vscode-e2e/src/suite/providers/zai.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,10 @@ suite("Z.ai GLM provider", function () {

suiteSetup(async () => {
restoreFetch = installZAiFetchInterceptor(
[{ match: "zai-glm-e2e:", result: "4" }],
[
{ match: "zai-glm-e2e:", result: "4" },
{ match: "zai-glm-5-turbo-e2e:", result: "4" },
],
requestCapture,
!!ZAI_API_KEY,
)
Expand Down Expand Up @@ -211,4 +214,43 @@ suite("Z.ai GLM provider", function () {
`max_tokens should be the documented glm-5.1 limit (131_072) but was ${requestCapture.maxTokens}`,
)
})

test("Should complete a task end-to-end using glm-5-turbo via Z.ai provider", async () => {
await globalThis.api.setConfiguration({
apiProvider: "zai" as const,
zaiApiKey: ZAI_API_KEY ?? "mock-key",
zaiApiLine: "international_api" as const,
apiModelId: "glm-5-turbo",
})

const api = globalThis.api
const messages: ClineMessage[] = []

api.on(RooCodeEventName.Message, ({ message }) => {
if (message.type === "say" && message.partial === false) {
messages.push(message)
}
})

const taskId = await api.startNewTask({
configuration: { mode: "ask", alwaysAllowModeSwitch: true, autoApprovalEnabled: true },
text: "zai-glm-5-turbo-e2e: what is 2+2? Reply with only the number.",
})

await waitUntilCompleted({ api, taskId })

const completionMessage = messages.find(
({ say, text }) => (say === "completion_result" || say === "text") && text?.trim() === "4",
)

assert.ok(completionMessage, "Task should complete with the expected Z.ai GLM-5-Turbo response")

// Verify max_tokens is the model's documented limit (131_072), not the 20%-of-context
// heuristic cap (40_000) that guards against inaccurate OpenRouter dynamic metadata.
assert.strictEqual(
requestCapture.maxTokens,
131_072,
`max_tokens should be the documented glm-5-turbo limit (131_072) but was ${requestCapture.maxTokens}`,
)
})
})
31 changes: 31 additions & 0 deletions packages/types/src/providers/zai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import { ZaiApiLine } from "../provider-settings.js"
// https://docs.z.ai/guides/llm/glm-4.5
// https://docs.z.ai/guides/llm/glm-4.6
// https://docs.z.ai/guides/llm/glm-5.1
// https://docs.z.ai/guides/llm/glm-5-turbo
// https://docs.z.ai/guides/overview/pricing
// https://bigmodel.cn/pricing

Expand Down Expand Up @@ -151,6 +152,21 @@ export const internationalZAiModels = {
description:
"GLM-5.1 is Zhipu's most capable model with a 200k context window, 128k max output, and built-in thinking capabilities. It delivers top-tier reasoning, coding, and agentic performance.",
},
"glm-5-turbo": {
maxTokens: 131_072,
contextWindow: 202_752,
supportsImages: false,
supportsPromptCache: true,
supportsReasoningEffort: ["disable", "medium"],
reasoningEffort: "medium",
preserveReasoning: true,
inputPrice: 1.2,
outputPrice: 4.0,
cacheWritesPrice: 0,
cacheReadsPrice: 0.24,
description:
"GLM-5-Turbo is Zhipu's OpenClaw-optimized model with built-in thinking capabilities. It improves tool use, instruction following, and long-running agent workflows while keeping fast response times.",
},
"glm-4.7-flash": {
maxTokens: 16_384,
contextWindow: 200_000,
Expand Down Expand Up @@ -342,6 +358,21 @@ export const mainlandZAiModels = {
description:
"GLM-5.1 is Zhipu's most capable model with a 200k context window, 128k max output, and built-in thinking capabilities. It delivers top-tier reasoning, coding, and agentic performance.",
},
"glm-5-turbo": {
maxTokens: 131_072,
contextWindow: 202_752,
supportsImages: false,
supportsPromptCache: true,
supportsReasoningEffort: ["disable", "medium"],
reasoningEffort: "medium",
preserveReasoning: true,
inputPrice: 0.35,
outputPrice: 1.43,
cacheWritesPrice: 0,
cacheReadsPrice: 0.07,
description:
"GLM-5-Turbo is Zhipu's OpenClaw-optimized model with built-in thinking capabilities. It improves tool use, instruction following, and long-running agent workflows while keeping fast response times.",
},
"glm-4.7-flash": {
maxTokens: 16_384,
contextWindow: 204_800,
Expand Down
92 changes: 92 additions & 0 deletions src/api/providers/__tests__/zai.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,23 @@ describe("ZAiHandler", () => {
expect(model.info.supportsImages).toBe(false)
})

it("should return GLM-5-Turbo international model with thinking support", () => {
const testModelId: InternationalZAiModelId = "glm-5-turbo"
const handlerWithModel = new ZAiHandler({
apiModelId: testModelId,
zaiApiKey: "test-zai-api-key",
zaiApiLine: "international_coding",
})
const model = handlerWithModel.getModel()
expect(model.id).toBe(testModelId)
expect(model.info).toEqual(internationalZAiModels[testModelId])
expect(model.info.contextWindow).toBe(202_752)
expect(model.info.maxTokens).toBe(131_072)
expect(model.info.supportsReasoningEffort).toEqual(["disable", "medium"])
expect(model.info.reasoningEffort).toBe("medium")
expect(model.info.preserveReasoning).toBe(true)
})

it("should return GLM-4.5v international model with vision support", () => {
const testModelId: InternationalZAiModelId = "glm-4.5v"
const handlerWithModel = new ZAiHandler({
Expand Down Expand Up @@ -229,6 +246,23 @@ describe("ZAiHandler", () => {
expect(model.info.reasoningEffort).toBe("medium")
expect(model.info.preserveReasoning).toBe(true)
})

it("should return GLM-5-Turbo China model with thinking support", () => {
const testModelId: MainlandZAiModelId = "glm-5-turbo"
const handlerWithModel = new ZAiHandler({
apiModelId: testModelId,
zaiApiKey: "test-zai-api-key",
zaiApiLine: "china_coding",
})
const model = handlerWithModel.getModel()
expect(model.id).toBe(testModelId)
expect(model.info).toEqual(mainlandZAiModels[testModelId])
expect(model.info.contextWindow).toBe(202_752)
expect(model.info.maxTokens).toBe(131_072)
expect(model.info.supportsReasoningEffort).toEqual(["disable", "medium"])
expect(model.info.reasoningEffort).toBe("medium")
expect(model.info.preserveReasoning).toBe(true)
})
})
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit / consistency, not a blocker: the new behavior tests below verify the thinking: { type: "enabled" | "disabled" } payload only against international_coding. This matches the pre-existing pattern (the GLM-4.7 behavior tests at lines 440/470/501 are also international-only), so this PR isn't introducing a gap — just continuing it.

createStreamWithThinking doesn't branch on zaiApiLine and the OpenAI client is mocked at the SDK level, so adding china_coding versions would exercise effectively the same code modulo baseURL. Worth doing only if you want to lock in symmetry against a future change that adds api-line-specific branching.


describe("International API", () => {
Expand Down Expand Up @@ -557,5 +591,63 @@ describe("ZAiHandler", () => {
const callArgs = mockCreate.mock.calls[0][0]
expect(callArgs.thinking).toBeUndefined()
})

it("should enable thinking by default for GLM-5-Turbo", async () => {
const handlerWithModel = new ZAiHandler({
apiModelId: "glm-5-turbo",
zaiApiKey: "test-zai-api-key",
zaiApiLine: "international_coding",
})

mockCreate.mockImplementationOnce(() => {
return {
[Symbol.asyncIterator]: () => ({
async next() {
return { done: true }
},
}),
}
})

const messageGenerator = handlerWithModel.createMessage("system prompt", [])
await messageGenerator.next()

expect(mockCreate).toHaveBeenCalledWith(
expect.objectContaining({
model: "glm-5-turbo",
thinking: { type: "enabled" },
}),
)
})

it("should disable thinking for GLM-5-Turbo when reasoningEffort is set to disable", async () => {
const handlerWithModel = new ZAiHandler({
apiModelId: "glm-5-turbo",
zaiApiKey: "test-zai-api-key",
zaiApiLine: "international_coding",
enableReasoningEffort: true,
reasoningEffort: "disable",
})

mockCreate.mockImplementationOnce(() => {
return {
[Symbol.asyncIterator]: () => ({
async next() {
return { done: true }
},
}),
}
})

const messageGenerator = handlerWithModel.createMessage("system prompt", [])
await messageGenerator.next()

expect(mockCreate).toHaveBeenCalledWith(
expect.objectContaining({
model: "glm-5-turbo",
thinking: { type: "disabled" },
}),
)
})
})
})
8 changes: 4 additions & 4 deletions src/api/providers/zai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ export class ZAiHandler extends BaseOpenAiCompatibleProvider<string> {
}

/**
* Override createStream to handle GLM-4.7's thinking mode.
* GLM-4.7 has thinking enabled by default in the API, so we need to
* Override createStream to handle GLM thinking-capable models.
* These models have thinking enabled by default in the API, so we need to
* explicitly send { type: "disabled" } when the user turns off reasoning.
*/
protected override createStream(
Expand Down Expand Up @@ -69,7 +69,7 @@ export class ZAiHandler extends BaseOpenAiCompatibleProvider<string> {
}

/**
* Creates a stream with explicit thinking control for GLM-4.7
* Creates a stream with explicit thinking control for GLM thinking-capable models.
*/
private createStreamWithThinking(
systemPrompt: string,
Expand Down Expand Up @@ -97,7 +97,7 @@ export class ZAiHandler extends BaseOpenAiCompatibleProvider<string> {
messages: [{ role: "system", content: systemPrompt }, ...convertedMessages],
stream: true,
stream_options: { include_usage: true },
// For GLM-4.7: thinking is ON by default, so we explicitly disable when needed
// Thinking is ON by default for these models, so explicitly disable it when needed.
thinking: useReasoning ? { type: "enabled" } : { type: "disabled" },
tools: this.convertToolsForOpenAI(metadata?.tools),
tool_choice: metadata?.tool_choice,
Expand Down
Loading