mistralai · andreaonofrei01 · May 4, 2026 · May 4, 2026 · May 4, 2026 · May 4, 2026
diff --git a/examples/mistral/chat/reasoning.py b/examples/mistral/chat/reasoning.py
@@ -0,0 +1,52 @@
+#!/usr/bin/env python
+
+import os
+
+from mistralai.client import Mistral
+from mistralai.client.models import TextChunk, ThinkChunk, UserMessage
+
+
+def main():
+    api_key = os.environ["MISTRAL_API_KEY"]
+    model = "mistral-medium-3-5"
+
+    # Bump request timeout because reasoning runs can be long.
+    client = Mistral(api_key=api_key, timeout_ms=300_000)
+
+    chat_response = client.chat.complete(
+        model=model,
+        messages=[
+            UserMessage(
+                content=(
+                    "John is one of 4 children. The first sister is 4 years old. "
+                    "Next year, the second sister will be twice as old as the first sister. "
+                    "The third sister is two years older than the second sister. "
+                    "The third sister is half the age of her older brother. "
+                    "How old is John?"
+                )
+            )
+        ],
+        reasoning_effort="high",
+        temperature=0.7,
+    )
+
+    # With reasoning_effort="high", message.content is a list of chunks.
+    # With reasoning_effort="none", message.content is a plain string.
+    content = chat_response.choices[0].message.content
+    if isinstance(content, str):
+        print(content)
+        return
+
+    for chunk in content or []:
+        if isinstance(chunk, ThinkChunk):
+            print("--- thinking ---")
+            for inner in chunk.thinking:
+                if isinstance(inner, TextChunk):
+                    print(inner.text)
+            print("--- /thinking ---")
+        elif isinstance(chunk, TextChunk):
+            print(chunk.text)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/mistral/chat/reasoning_multi_turn.py b/examples/mistral/chat/reasoning_multi_turn.py
@@ -0,0 +1,68 @@
+#!/usr/bin/env python
+
+# Multi-turn conversation with a reasoning model.
+#
+# IMPORTANT: for Mistral Medium 3.5, always replay the assistant turn
+# back into `messages` with its ThinkChunks intact. Dropping the
+# reasoning trace across turns DEGRADES the model's performance.
+#
+# This example runs a 3-turn math chain and prints per-turn token
+# usage. The prompt grows as the reasoning trace accumulates; that
+# growth is expected.
+
+import os
+
+from mistralai.client import Mistral
+from mistralai.client.models import TextChunk, UserMessage
+
+MODEL = "mistral-medium-3-5"
+TURNS = [
+    "What is 17 * 23?",
+    "Now multiply that by 3.",
+    "And subtract 100 from the result.",
+]
+
+
+def final_text(content):
+    if isinstance(content, str):
+        return content
+    return "".join(c.text for c in (content or []) if isinstance(c, TextChunk))
+
+
+def main():
+    # Bump request timeout because reasoning runs can be long.
+    client = Mistral(api_key=os.environ["MISTRAL_API_KEY"], timeout_ms=300_000)
+
+    messages = []
+    total_prompt = 0
+    total_completion = 0
+
+    for i, user_text in enumerate(TURNS, start=1):
+        messages.append(UserMessage(content=user_text))
+        response = client.chat.complete(
+            model=MODEL,
+            messages=messages,
+            reasoning_effort="high",
+            temperature=0.7,
+        )
+        message = response.choices[0].message
+        usage = response.usage
+        total_prompt += usage.prompt_tokens
+        total_completion += usage.completion_tokens
+
+        print(
+            f"turn {i}: prompt={usage.prompt_tokens:>4} "
+            f"completion={usage.completion_tokens:>4}  -> {final_text(message.content)}"
+        )
+        # Append the full assistant message back into history so the
+        # ThinkChunks are preserved across turns.
+        messages.append(message)
+
+    print(
+        f"TOTAL: prompt={total_prompt} completion={total_completion} "
+        f"(sum {total_prompt + total_completion})"
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/mistral/chat/reasoning_response_shape.py b/examples/mistral/chat/reasoning_response_shape.py
@@ -0,0 +1,43 @@
+#!/usr/bin/env python
+
+# Print the raw shape of a chat response when using `reasoning_effort`.
+# Run this first to see what ThinkChunk / TextChunk look like in the wire
+# format, then move on to the other reasoning_*.py examples.
+
+import json
+import os
+
+from mistralai.client import Mistral
+from mistralai.client.models import UserMessage
+
+
+def main():
+    # Bump request timeout because reasoning runs can be long.
+    client = Mistral(api_key=os.environ["MISTRAL_API_KEY"], timeout_ms=300_000)
+
+    prompt = "What is 12 * 14? Answer in one short sentence."
+
+    for effort in ["high", "none"]:
+        print(f"\n========== reasoning_effort={effort!r} ==========")
+        response = client.chat.complete(
+            model="mistral-medium-3-5",
+            messages=[UserMessage(content=prompt)],
+            reasoning_effort=effort,
+            temperature=0.7,
+        )
+        message = response.choices[0].message
+        print(f"type(message.content) = {type(message.content).__name__}")
+        print("message.content =")
+        if isinstance(message.content, str):
+            print(json.dumps(message.content, indent=2))
+        else:
+            print(
+                json.dumps(
+                    [chunk.model_dump() for chunk in message.content],
+                    indent=2,
+                )
+            )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/mistral/chat/reasoning_with_streaming.py b/examples/mistral/chat/reasoning_with_streaming.py
@@ -0,0 +1,64 @@
+#!/usr/bin/env python
+
+import os
+
+from mistralai.client import Mistral
+from mistralai.client.models import TextChunk, ThinkChunk, UserMessage
+
+
+def main():
+    api_key = os.environ["MISTRAL_API_KEY"]
+    model = "mistral-medium-3-5"
+
+    # Bump request timeout because reasoning runs can be long.
+    client = Mistral(api_key=api_key, timeout_ms=300_000)
+
+    # While the model is thinking, delta.content is a list containing a
+    # ThinkChunk. After the thinking phase ends, delta.content arrives as
+    # plain string fragments. The transition event may contain both a closing
+    # ThinkChunk and the first TextChunk in a single list.
+    in_thinking = False
+    for event in client.chat.stream(
+        model=model,
+        messages=[
+            UserMessage(
+                content=(
+                    "If a train leaves Paris at 9am going 120 km/h and another "
+                    "leaves Lyon at 10am going 150 km/h on the same track, "
+                    "when do they meet? Paris-Lyon is 465 km."
+                )
+            )
+        ],
+        reasoning_effort="high",
+        temperature=0.7,
+    ):
+        delta = event.data.choices[0].delta.content
+        if not delta:
+            continue
+
+        if isinstance(delta, str):
+            if in_thinking:
+                print("\n--- /thinking ---")
+                in_thinking = False
+            print(delta, end="", flush=True)
+            continue
+
+        for chunk in delta:
+            if isinstance(chunk, ThinkChunk):
+                if not in_thinking:
+                    print("--- thinking ---")
+                    in_thinking = True
+                for inner in chunk.thinking:
+                    if isinstance(inner, TextChunk):
+                        print(inner.text, end="", flush=True)
+            elif isinstance(chunk, TextChunk):
+                if in_thinking:
+                    print("\n--- /thinking ---")
+                    in_thinking = False
+                print(chunk.text, end="", flush=True)
+
+    print()
+
+
+if __name__ == "__main__":
+    main()