Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 52 additions & 0 deletions examples/mistral/chat/reasoning.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#!/usr/bin/env python

import os

from mistralai.client import Mistral
from mistralai.client.models import TextChunk, ThinkChunk, UserMessage


def main():
api_key = os.environ["MISTRAL_API_KEY"]
model = "mistral-medium-3-5"

# Bump request timeout because reasoning runs can be long.
client = Mistral(api_key=api_key, timeout_ms=300_000)

chat_response = client.chat.complete(
model=model,
messages=[
UserMessage(
content=(
"John is one of 4 children. The first sister is 4 years old. "
"Next year, the second sister will be twice as old as the first sister. "
"The third sister is two years older than the second sister. "
"The third sister is half the age of her older brother. "
"How old is John?"
)
)
],
reasoning_effort="high",
temperature=0.7,
)

# With reasoning_effort="high", message.content is a list of chunks.
# With reasoning_effort="none", message.content is a plain string.
content = chat_response.choices[0].message.content
if isinstance(content, str):
print(content)
return

for chunk in content or []:
if isinstance(chunk, ThinkChunk):
print("--- thinking ---")
for inner in chunk.thinking:
if isinstance(inner, TextChunk):
print(inner.text)
print("--- /thinking ---")
elif isinstance(chunk, TextChunk):
print(chunk.text)


if __name__ == "__main__":
main()
68 changes: 68 additions & 0 deletions examples/mistral/chat/reasoning_multi_turn.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
#!/usr/bin/env python

# Multi-turn conversation with a reasoning model.
#
# IMPORTANT: for Mistral Medium 3.5, always replay the assistant turn
# back into `messages` with its ThinkChunks intact. Dropping the
# reasoning trace across turns DEGRADES the model's performance.
#
# This example runs a 3-turn math chain and prints per-turn token
# usage. The prompt grows as the reasoning trace accumulates; that
# growth is expected.

import os

from mistralai.client import Mistral
from mistralai.client.models import TextChunk, UserMessage

MODEL = "mistral-medium-3-5"
TURNS = [
"What is 17 * 23?",
"Now multiply that by 3.",
"And subtract 100 from the result.",
]


def final_text(content):
if isinstance(content, str):
return content
return "".join(c.text for c in (content or []) if isinstance(c, TextChunk))


def main():
# Bump request timeout because reasoning runs can be long.
client = Mistral(api_key=os.environ["MISTRAL_API_KEY"], timeout_ms=300_000)

messages = []
total_prompt = 0
total_completion = 0

for i, user_text in enumerate(TURNS, start=1):
messages.append(UserMessage(content=user_text))
response = client.chat.complete(
model=MODEL,
messages=messages,
reasoning_effort="high",
temperature=0.7,
)
message = response.choices[0].message
usage = response.usage
total_prompt += usage.prompt_tokens
total_completion += usage.completion_tokens

print(
f"turn {i}: prompt={usage.prompt_tokens:>4} "
f"completion={usage.completion_tokens:>4} -> {final_text(message.content)}"
)
# Append the full assistant message back into history so the
# ThinkChunks are preserved across turns.
messages.append(message)

print(
f"TOTAL: prompt={total_prompt} completion={total_completion} "
f"(sum {total_prompt + total_completion})"
)


if __name__ == "__main__":
main()
43 changes: 43 additions & 0 deletions examples/mistral/chat/reasoning_response_shape.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#!/usr/bin/env python

# Print the raw shape of a chat response when using `reasoning_effort`.
# Run this first to see what ThinkChunk / TextChunk look like in the wire
# format, then move on to the other reasoning_*.py examples.

import json
import os

from mistralai.client import Mistral
from mistralai.client.models import UserMessage


def main():
# Bump request timeout because reasoning runs can be long.
client = Mistral(api_key=os.environ["MISTRAL_API_KEY"], timeout_ms=300_000)

prompt = "What is 12 * 14? Answer in one short sentence."

for effort in ["high", "none"]:
print(f"\n========== reasoning_effort={effort!r} ==========")
response = client.chat.complete(
model="mistral-medium-3-5",
messages=[UserMessage(content=prompt)],
reasoning_effort=effort,
temperature=0.7,
)
message = response.choices[0].message
print(f"type(message.content) = {type(message.content).__name__}")
print("message.content =")
if isinstance(message.content, str):
print(json.dumps(message.content, indent=2))
else:
print(
json.dumps(
[chunk.model_dump() for chunk in message.content],
indent=2,
)
)


if __name__ == "__main__":
main()
64 changes: 64 additions & 0 deletions examples/mistral/chat/reasoning_with_streaming.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#!/usr/bin/env python

import os

from mistralai.client import Mistral
from mistralai.client.models import TextChunk, ThinkChunk, UserMessage


def main():
api_key = os.environ["MISTRAL_API_KEY"]
model = "mistral-medium-3-5"

# Bump request timeout because reasoning runs can be long.
client = Mistral(api_key=api_key, timeout_ms=300_000)

# While the model is thinking, delta.content is a list containing a
# ThinkChunk. After the thinking phase ends, delta.content arrives as
# plain string fragments. The transition event may contain both a closing
# ThinkChunk and the first TextChunk in a single list.
in_thinking = False
for event in client.chat.stream(
model=model,
messages=[
UserMessage(
content=(
"If a train leaves Paris at 9am going 120 km/h and another "
"leaves Lyon at 10am going 150 km/h on the same track, "
"when do they meet? Paris-Lyon is 465 km."
)
)
],
reasoning_effort="high",
temperature=0.7,
):
delta = event.data.choices[0].delta.content
if not delta:
continue

if isinstance(delta, str):
if in_thinking:
print("\n--- /thinking ---")
in_thinking = False
print(delta, end="", flush=True)
continue

for chunk in delta:
if isinstance(chunk, ThinkChunk):
if not in_thinking:
print("--- thinking ---")
in_thinking = True
for inner in chunk.thinking:
if isinstance(inner, TextChunk):
print(inner.text, end="", flush=True)
elif isinstance(chunk, TextChunk):
if in_thinking:
print("\n--- /thinking ---")
in_thinking = False
print(chunk.text, end="", flush=True)

print()


if __name__ == "__main__":
main()
Loading