From d9e5354dc5fe40d6beb766753090c789a3a8d53f Mon Sep 17 00:00:00 2001
From: andreaonofrei01 <andrea.onofrei@mistral.ai>
Date: Mon, 4 May 2026 14:03:46 +0200
Subject: [PATCH 1/4] docs(examples): add chat reasoning_effort examples

Add four examples in examples/mistral/chat/ demonstrating reasoning_effort
on mistral-medium-3-5:

- reasoning_response_shape.py: dump the raw response shape for
  reasoning_effort="high" vs "none" so users see the ThinkChunk /
  TextChunk JSON before consuming it.
- reasoning.py: single-turn call, iterate ThinkChunk and TextChunk in
  message.content.
- reasoning_with_streaming.py: handle streaming deltas where chunks
  arrive as ThinkChunk lists during thinking and as plain string
  fragments after thinking ends.
- reasoning_multi_turn.py: 3-turn math chain run with two replay
  strategies (keep vs drop ThinkChunks) and prints token usage so
  the cost difference is visible.
---
 examples/mistral/chat/reasoning.py            | 56 +++++++++++
 examples/mistral/chat/reasoning_multi_turn.py | 96 +++++++++++++++++++
 .../mistral/chat/reasoning_response_shape.py  | 47 +++++++++
 .../mistral/chat/reasoning_with_streaming.py  | 68 +++++++++++++
 4 files changed, 267 insertions(+)
 create mode 100644 examples/mistral/chat/reasoning.py
 create mode 100644 examples/mistral/chat/reasoning_multi_turn.py
 create mode 100644 examples/mistral/chat/reasoning_response_shape.py
 create mode 100644 examples/mistral/chat/reasoning_with_streaming.py

diff --git a/examples/mistral/chat/reasoning.py b/examples/mistral/chat/reasoning.py
new file mode 100644
index 00000000..dbdb14cf
--- /dev/null
+++ b/examples/mistral/chat/reasoning.py
@@ -0,0 +1,56 @@
+#!/usr/bin/env python
+
+import os
+
+import httpx
+
+from mistralai.client import Mistral
+from mistralai.client.models import TextChunk, ThinkChunk, UserMessage
+
+
+def main():
+    api_key = os.environ["MISTRAL_API_KEY"]
+    model = "mistral-medium-3-5"
+
+    client = Mistral(
+        api_key=api_key,
+        client=httpx.Client(timeout=httpx.Timeout(300.0)),
+    )
+
+    chat_response = client.chat.complete(
+        model=model,
+        messages=[
+            UserMessage(
+                content=(
+                    "John is one of 4 children. The first sister is 4 years old. "
+                    "Next year, the second sister will be twice as old as the first sister. "
+                    "The third sister is two years older than the second sister. "
+                    "The third sister is half the age of her older brother. "
+                    "How old is John?"
+                )
+            )
+        ],
+        reasoning_effort="high",
+        temperature=0.7,
+    )
+
+    # With reasoning_effort="high", message.content is a list of chunks.
+    # With reasoning_effort="none", message.content is a plain string.
+    content = chat_response.choices[0].message.content
+    if isinstance(content, str):
+        print(content)
+        return
+
+    for chunk in content or []:
+        if isinstance(chunk, ThinkChunk):
+            print("--- thinking ---")
+            for inner in chunk.thinking:
+                if isinstance(inner, TextChunk):
+                    print(inner.text)
+            print("--- /thinking ---")
+        elif isinstance(chunk, TextChunk):
+            print(chunk.text)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/mistral/chat/reasoning_multi_turn.py b/examples/mistral/chat/reasoning_multi_turn.py
new file mode 100644
index 00000000..fb0a756d
--- /dev/null
+++ b/examples/mistral/chat/reasoning_multi_turn.py
@@ -0,0 +1,96 @@
+#!/usr/bin/env python
+
+# Multi-turn conversation with a reasoning model.
+#
+# When the assistant returns a list of chunks (ThinkChunk + TextChunk),
+# you must choose what to put back into `messages` for the next turn.
+# This example runs the same 3-turn math chain with two strategies and
+# prints the resulting token usage so you can see the tradeoff:
+#
+#   A) keep ThinkChunks  -> the prompt grows fast as reasoning accumulates
+#   B) drop ThinkChunks  -> only the final answer is replayed
+#
+# Both produce the correct answer here. Pick based on whether your task
+# benefits from the model seeing its prior reasoning.
+
+import os
+
+import httpx
+
+from mistralai.client import Mistral
+from mistralai.client.models import (
+    AssistantMessage,
+    TextChunk,
+    ThinkChunk,
+    UserMessage,
+)
+
+MODEL = "mistral-medium-3-5"
+TURNS = [
+    "What is 17 * 23?",
+    "Now multiply that by 3.",
+    "And subtract 100 from the result.",
+]
+
+
+def final_text(content):
+    if isinstance(content, str):
+        return content
+    return "".join(c.text for c in (content or []) if isinstance(c, TextChunk))
+
+
+def keep_thinking(content):
+    return content
+
+
+def drop_thinking(content):
+    if isinstance(content, str):
+        return content
+    return [c for c in (content or []) if not isinstance(c, ThinkChunk)]
+
+
+def run_chain(client, label, build_history):
+    print(f"\n========== {label} ==========")
+    messages = []
+    total_prompt = 0
+    total_completion = 0
+    last_answer = ""
+
+    for i, user_text in enumerate(TURNS, start=1):
+        messages.append(UserMessage(content=user_text))
+        response = client.chat.complete(
+            model=MODEL,
+            messages=messages,
+            reasoning_effort="high",
+            temperature=0.7,
+        )
+        content = response.choices[0].message.content
+        usage = response.usage
+        total_prompt += usage.prompt_tokens
+        total_completion += usage.completion_tokens
+        last_answer = final_text(content)
+
+        print(
+            f"turn {i}: prompt={usage.prompt_tokens:>4} "
+            f"completion={usage.completion_tokens:>4}  -> {last_answer}"
+        )
+        messages.append(AssistantMessage(content=build_history(content)))
+
+    print(
+        f"TOTAL: prompt={total_prompt} completion={total_completion} "
+        f"(sum {total_prompt + total_completion})"
+    )
+
+
+def main():
+    client = Mistral(
+        api_key=os.environ["MISTRAL_API_KEY"],
+        client=httpx.Client(timeout=httpx.Timeout(300.0)),
+    )
+
+    run_chain(client, "A) keep ThinkChunks across turns", keep_thinking)
+    run_chain(client, "B) drop ThinkChunks across turns", drop_thinking)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/mistral/chat/reasoning_response_shape.py b/examples/mistral/chat/reasoning_response_shape.py
new file mode 100644
index 00000000..ac93078f
--- /dev/null
+++ b/examples/mistral/chat/reasoning_response_shape.py
@@ -0,0 +1,47 @@
+#!/usr/bin/env python
+
+# Print the raw shape of a chat response when using `reasoning_effort`.
+# Run this first to see what ThinkChunk / TextChunk look like in the wire
+# format, then move on to the other reasoning_*.py examples.
+
+import json
+import os
+
+import httpx
+
+from mistralai.client import Mistral
+from mistralai.client.models import UserMessage
+
+
+def main():
+    client = Mistral(
+        api_key=os.environ["MISTRAL_API_KEY"],
+        client=httpx.Client(timeout=httpx.Timeout(300.0)),
+    )
+
+    prompt = "What is 12 * 14? Answer in one short sentence."
+
+    for effort in ["high", "none"]:
+        print(f"\n========== reasoning_effort={effort!r} ==========")
+        response = client.chat.complete(
+            model="mistral-medium-3-5",
+            messages=[UserMessage(content=prompt)],
+            reasoning_effort=effort,
+            temperature=0.7,
+        )
+        message = response.choices[0].message
+        print(f"type(message.content) = {type(message.content).__name__}")
+        print("message.content =")
+        if isinstance(message.content, str):
+            print(json.dumps(message.content, indent=2))
+        else:
+            print(
+                json.dumps(
+                    [chunk.model_dump() for chunk in message.content],
+                    indent=2,
+                )
+            )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/mistral/chat/reasoning_with_streaming.py b/examples/mistral/chat/reasoning_with_streaming.py
new file mode 100644
index 00000000..7b761519
--- /dev/null
+++ b/examples/mistral/chat/reasoning_with_streaming.py
@@ -0,0 +1,68 @@
+#!/usr/bin/env python
+
+import os
+
+import httpx
+
+from mistralai.client import Mistral
+from mistralai.client.models import TextChunk, ThinkChunk, UserMessage
+
+
+def main():
+    api_key = os.environ["MISTRAL_API_KEY"]
+    model = "mistral-medium-3-5"
+
+    client = Mistral(
+        api_key=api_key,
+        client=httpx.Client(timeout=httpx.Timeout(300.0)),
+    )
+
+    # While the model is thinking, delta.content is a list containing a
+    # ThinkChunk. After the thinking phase ends, delta.content arrives as
+    # plain string fragments. The transition event may contain both a closing
+    # ThinkChunk and the first TextChunk in a single list.
+    in_thinking = False
+    for event in client.chat.stream(
+        model=model,
+        messages=[
+            UserMessage(
+                content=(
+                    "If a train leaves Paris at 9am going 120 km/h and another "
+                    "leaves Lyon at 10am going 150 km/h on the same track, "
+                    "when do they meet? Paris-Lyon is 465 km."
+                )
+            )
+        ],
+        reasoning_effort="high",
+        temperature=0.7,
+    ):
+        delta = event.data.choices[0].delta.content
+        if not delta:
+            continue
+
+        if isinstance(delta, str):
+            if in_thinking:
+                print("\n--- /thinking ---")
+                in_thinking = False
+            print(delta, end="", flush=True)
+            continue
+
+        for chunk in delta:
+            if isinstance(chunk, ThinkChunk):
+                if not in_thinking:
+                    print("--- thinking ---")
+                    in_thinking = True
+                for inner in chunk.thinking:
+                    if isinstance(inner, TextChunk):
+                        print(inner.text, end="", flush=True)
+            elif isinstance(chunk, TextChunk):
+                if in_thinking:
+                    print("\n--- /thinking ---")
+                    in_thinking = False
+                print(chunk.text, end="", flush=True)
+
+    print()
+
+
+if __name__ == "__main__":
+    main()

From e9793b674d565278fdf10b5fe0db98e2d5698a3d Mon Sep 17 00:00:00 2001
From: andreaonofrei01 <andrea.onofrei@mistral.ai>
Date: Mon, 4 May 2026 14:38:33 +0200
Subject: [PATCH 2/4] address review: keep ThinkChunks for MM3.5; use
 timeout_ms

- reasoning_multi_turn.py: drop the keep-vs-drop comparison and
  recommend keeping ThinkChunks across turns. Per reviewer feedback,
  dropping the reasoning trace degrades MM3.5 performance.
- All four files: replace httpx.Client(timeout=...) with the SDK's
  timeout_ms parameter; remove the httpx import.
---
 examples/mistral/chat/reasoning.py            |  8 +---
 examples/mistral/chat/reasoning_multi_turn.py | 48 +++++--------------
 .../mistral/chat/reasoning_response_shape.py  |  8 +---
 .../mistral/chat/reasoning_with_streaming.py  |  8 +---
 4 files changed, 18 insertions(+), 54 deletions(-)

diff --git a/examples/mistral/chat/reasoning.py b/examples/mistral/chat/reasoning.py
index dbdb14cf..1e2b1348 100644
--- a/examples/mistral/chat/reasoning.py
+++ b/examples/mistral/chat/reasoning.py
@@ -2,8 +2,6 @@
 
 import os
 
-import httpx
-
 from mistralai.client import Mistral
 from mistralai.client.models import TextChunk, ThinkChunk, UserMessage
 
@@ -12,10 +10,8 @@ def main():
     api_key = os.environ["MISTRAL_API_KEY"]
     model = "mistral-medium-3-5"
 
-    client = Mistral(
-        api_key=api_key,
-        client=httpx.Client(timeout=httpx.Timeout(300.0)),
-    )
+    # Bump request timeout because reasoning runs can be long.
+    client = Mistral(api_key=api_key, timeout_ms=300_000)
 
     chat_response = client.chat.complete(
         model=model,
diff --git a/examples/mistral/chat/reasoning_multi_turn.py b/examples/mistral/chat/reasoning_multi_turn.py
index fb0a756d..4c970ded 100644
--- a/examples/mistral/chat/reasoning_multi_turn.py
+++ b/examples/mistral/chat/reasoning_multi_turn.py
@@ -2,21 +2,16 @@
 
 # Multi-turn conversation with a reasoning model.
 #
-# When the assistant returns a list of chunks (ThinkChunk + TextChunk),
-# you must choose what to put back into `messages` for the next turn.
-# This example runs the same 3-turn math chain with two strategies and
-# prints the resulting token usage so you can see the tradeoff:
+# IMPORTANT: for Mistral Medium 3.5, always replay the assistant turn
+# back into `messages` with its ThinkChunks intact. Dropping the
+# reasoning trace across turns DEGRADES the model's performance.
 #
-#   A) keep ThinkChunks  -> the prompt grows fast as reasoning accumulates
-#   B) drop ThinkChunks  -> only the final answer is replayed
-#
-# Both produce the correct answer here. Pick based on whether your task
-# benefits from the model seeing its prior reasoning.
+# This example runs a 3-turn math chain and prints per-turn token
+# usage. The prompt grows as the reasoning trace accumulates; that
+# growth is expected.
 
 import os
 
-import httpx
-
 from mistralai.client import Mistral
 from mistralai.client.models import (
     AssistantMessage,
@@ -39,22 +34,13 @@ def final_text(content):
     return "".join(c.text for c in (content or []) if isinstance(c, TextChunk))
 
 
-def keep_thinking(content):
-    return content
-
-
-def drop_thinking(content):
-    if isinstance(content, str):
-        return content
-    return [c for c in (content or []) if not isinstance(c, ThinkChunk)]
-
+def main():
+    # Bump request timeout because reasoning runs can be long.
+    client = Mistral(api_key=os.environ["MISTRAL_API_KEY"], timeout_ms=300_000)
 
-def run_chain(client, label, build_history):
-    print(f"\n========== {label} ==========")
     messages = []
     total_prompt = 0
     total_completion = 0
-    last_answer = ""
 
     for i, user_text in enumerate(TURNS, start=1):
         messages.append(UserMessage(content=user_text))
@@ -68,13 +54,13 @@ def run_chain(client, label, build_history):
         usage = response.usage
         total_prompt += usage.prompt_tokens
         total_completion += usage.completion_tokens
-        last_answer = final_text(content)
 
         print(
             f"turn {i}: prompt={usage.prompt_tokens:>4} "
-            f"completion={usage.completion_tokens:>4}  -> {last_answer}"
+            f"completion={usage.completion_tokens:>4}  -> {final_text(content)}"
         )
-        messages.append(AssistantMessage(content=build_history(content)))
+        # Replay the full assistant content (ThinkChunks included).
+        messages.append(AssistantMessage(content=content))
 
     print(
         f"TOTAL: prompt={total_prompt} completion={total_completion} "
@@ -82,15 +68,5 @@ def run_chain(client, label, build_history):
     )
 
 
-def main():
-    client = Mistral(
-        api_key=os.environ["MISTRAL_API_KEY"],
-        client=httpx.Client(timeout=httpx.Timeout(300.0)),
-    )
-
-    run_chain(client, "A) keep ThinkChunks across turns", keep_thinking)
-    run_chain(client, "B) drop ThinkChunks across turns", drop_thinking)
-
-
 if __name__ == "__main__":
     main()
diff --git a/examples/mistral/chat/reasoning_response_shape.py b/examples/mistral/chat/reasoning_response_shape.py
index ac93078f..6cb820ab 100644
--- a/examples/mistral/chat/reasoning_response_shape.py
+++ b/examples/mistral/chat/reasoning_response_shape.py
@@ -7,17 +7,13 @@
 import json
 import os
 
-import httpx
-
 from mistralai.client import Mistral
 from mistralai.client.models import UserMessage
 
 
 def main():
-    client = Mistral(
-        api_key=os.environ["MISTRAL_API_KEY"],
-        client=httpx.Client(timeout=httpx.Timeout(300.0)),
-    )
+    # Bump request timeout because reasoning runs can be long.
+    client = Mistral(api_key=os.environ["MISTRAL_API_KEY"], timeout_ms=300_000)
 
     prompt = "What is 12 * 14? Answer in one short sentence."
 
diff --git a/examples/mistral/chat/reasoning_with_streaming.py b/examples/mistral/chat/reasoning_with_streaming.py
index 7b761519..377844a2 100644
--- a/examples/mistral/chat/reasoning_with_streaming.py
+++ b/examples/mistral/chat/reasoning_with_streaming.py
@@ -2,8 +2,6 @@
 
 import os
 
-import httpx
-
 from mistralai.client import Mistral
 from mistralai.client.models import TextChunk, ThinkChunk, UserMessage
 
@@ -12,10 +10,8 @@ def main():
     api_key = os.environ["MISTRAL_API_KEY"]
     model = "mistral-medium-3-5"
 
-    client = Mistral(
-        api_key=api_key,
-        client=httpx.Client(timeout=httpx.Timeout(300.0)),
-    )
+    # Bump request timeout because reasoning runs can be long.
+    client = Mistral(api_key=api_key, timeout_ms=300_000)
 
     # While the model is thinking, delta.content is a list containing a
     # ThinkChunk. After the thinking phase ends, delta.content arrives as

From 7dd374d7b30b970f45529c38abddf7db74eccb99 Mon Sep 17 00:00:00 2001
From: andreaonofrei01 <andrea.onofrei@mistral.ai>
Date: Mon, 4 May 2026 14:50:05 +0200
Subject: [PATCH 3/4] fix lint: remove unused ThinkChunk import in
 reasoning_multi_turn

---
 examples/mistral/chat/reasoning_multi_turn.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/examples/mistral/chat/reasoning_multi_turn.py b/examples/mistral/chat/reasoning_multi_turn.py
index 4c970ded..3ba40e4e 100644
--- a/examples/mistral/chat/reasoning_multi_turn.py
+++ b/examples/mistral/chat/reasoning_multi_turn.py
@@ -16,7 +16,6 @@
 from mistralai.client.models import (
     AssistantMessage,
     TextChunk,
-    ThinkChunk,
     UserMessage,
 )
 

From 1af9da5fbd996a7e0892b3112b07692a653dd7bd Mon Sep 17 00:00:00 2001
From: andreaonofrei01 <andrea.onofrei@mistral.ai>
Date: Mon, 4 May 2026 15:07:47 +0200
Subject: [PATCH 4/4] simplify multi-turn: append response.choices[0].message
 directly

Avoids the unnecessary AssistantMessage(content=content) re-wrap and
forwards any future fields on AssistantMessage automatically.

Verified end-to-end: 3/3 runs of the math chain produce 391 -> 1173 ->
1073, and history inspection confirms each AssistantMessage slot
preserves [ThinkChunk, TextChunk].
---
 examples/mistral/chat/reasoning_multi_turn.py | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/examples/mistral/chat/reasoning_multi_turn.py b/examples/mistral/chat/reasoning_multi_turn.py
index 3ba40e4e..84b880de 100644
--- a/examples/mistral/chat/reasoning_multi_turn.py
+++ b/examples/mistral/chat/reasoning_multi_turn.py
@@ -13,11 +13,7 @@
 import os
 
 from mistralai.client import Mistral
-from mistralai.client.models import (
-    AssistantMessage,
-    TextChunk,
-    UserMessage,
-)
+from mistralai.client.models import TextChunk, UserMessage
 
 MODEL = "mistral-medium-3-5"
 TURNS = [
@@ -49,17 +45,18 @@ def main():
             reasoning_effort="high",
             temperature=0.7,
         )
-        content = response.choices[0].message.content
+        message = response.choices[0].message
         usage = response.usage
         total_prompt += usage.prompt_tokens
         total_completion += usage.completion_tokens
 
         print(
             f"turn {i}: prompt={usage.prompt_tokens:>4} "
-            f"completion={usage.completion_tokens:>4}  -> {final_text(content)}"
+            f"completion={usage.completion_tokens:>4}  -> {final_text(message.content)}"
         )
-        # Replay the full assistant content (ThinkChunks included).
-        messages.append(AssistantMessage(content=content))
+        # Append the full assistant message back into history so the
+        # ThinkChunks are preserved across turns.
+        messages.append(message)
 
     print(
         f"TOTAL: prompt={total_prompt} completion={total_completion} "