diff --git a/examples/mistral/chat/reasoning.py b/examples/mistral/chat/reasoning.py new file mode 100644 index 00000000..1e2b1348 --- /dev/null +++ b/examples/mistral/chat/reasoning.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python + +import os + +from mistralai.client import Mistral +from mistralai.client.models import TextChunk, ThinkChunk, UserMessage + + +def main(): + api_key = os.environ["MISTRAL_API_KEY"] + model = "mistral-medium-3-5" + + # Bump request timeout because reasoning runs can be long. + client = Mistral(api_key=api_key, timeout_ms=300_000) + + chat_response = client.chat.complete( + model=model, + messages=[ + UserMessage( + content=( + "John is one of 4 children. The first sister is 4 years old. " + "Next year, the second sister will be twice as old as the first sister. " + "The third sister is two years older than the second sister. " + "The third sister is half the age of her older brother. " + "How old is John?" + ) + ) + ], + reasoning_effort="high", + temperature=0.7, + ) + + # With reasoning_effort="high", message.content is a list of chunks. + # With reasoning_effort="none", message.content is a plain string. + content = chat_response.choices[0].message.content + if isinstance(content, str): + print(content) + return + + for chunk in content or []: + if isinstance(chunk, ThinkChunk): + print("--- thinking ---") + for inner in chunk.thinking: + if isinstance(inner, TextChunk): + print(inner.text) + print("--- /thinking ---") + elif isinstance(chunk, TextChunk): + print(chunk.text) + + +if __name__ == "__main__": + main() diff --git a/examples/mistral/chat/reasoning_multi_turn.py b/examples/mistral/chat/reasoning_multi_turn.py new file mode 100644 index 00000000..84b880de --- /dev/null +++ b/examples/mistral/chat/reasoning_multi_turn.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python + +# Multi-turn conversation with a reasoning model. +# +# IMPORTANT: for Mistral Medium 3.5, always replay the assistant turn +# back into `messages` with its ThinkChunks intact. Dropping the +# reasoning trace across turns DEGRADES the model's performance. +# +# This example runs a 3-turn math chain and prints per-turn token +# usage. The prompt grows as the reasoning trace accumulates; that +# growth is expected. + +import os + +from mistralai.client import Mistral +from mistralai.client.models import TextChunk, UserMessage + +MODEL = "mistral-medium-3-5" +TURNS = [ + "What is 17 * 23?", + "Now multiply that by 3.", + "And subtract 100 from the result.", +] + + +def final_text(content): + if isinstance(content, str): + return content + return "".join(c.text for c in (content or []) if isinstance(c, TextChunk)) + + +def main(): + # Bump request timeout because reasoning runs can be long. + client = Mistral(api_key=os.environ["MISTRAL_API_KEY"], timeout_ms=300_000) + + messages = [] + total_prompt = 0 + total_completion = 0 + + for i, user_text in enumerate(TURNS, start=1): + messages.append(UserMessage(content=user_text)) + response = client.chat.complete( + model=MODEL, + messages=messages, + reasoning_effort="high", + temperature=0.7, + ) + message = response.choices[0].message + usage = response.usage + total_prompt += usage.prompt_tokens + total_completion += usage.completion_tokens + + print( + f"turn {i}: prompt={usage.prompt_tokens:>4} " + f"completion={usage.completion_tokens:>4} -> {final_text(message.content)}" + ) + # Append the full assistant message back into history so the + # ThinkChunks are preserved across turns. + messages.append(message) + + print( + f"TOTAL: prompt={total_prompt} completion={total_completion} " + f"(sum {total_prompt + total_completion})" + ) + + +if __name__ == "__main__": + main() diff --git a/examples/mistral/chat/reasoning_response_shape.py b/examples/mistral/chat/reasoning_response_shape.py new file mode 100644 index 00000000..6cb820ab --- /dev/null +++ b/examples/mistral/chat/reasoning_response_shape.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python + +# Print the raw shape of a chat response when using `reasoning_effort`. +# Run this first to see what ThinkChunk / TextChunk look like in the wire +# format, then move on to the other reasoning_*.py examples. + +import json +import os + +from mistralai.client import Mistral +from mistralai.client.models import UserMessage + + +def main(): + # Bump request timeout because reasoning runs can be long. + client = Mistral(api_key=os.environ["MISTRAL_API_KEY"], timeout_ms=300_000) + + prompt = "What is 12 * 14? Answer in one short sentence." + + for effort in ["high", "none"]: + print(f"\n========== reasoning_effort={effort!r} ==========") + response = client.chat.complete( + model="mistral-medium-3-5", + messages=[UserMessage(content=prompt)], + reasoning_effort=effort, + temperature=0.7, + ) + message = response.choices[0].message + print(f"type(message.content) = {type(message.content).__name__}") + print("message.content =") + if isinstance(message.content, str): + print(json.dumps(message.content, indent=2)) + else: + print( + json.dumps( + [chunk.model_dump() for chunk in message.content], + indent=2, + ) + ) + + +if __name__ == "__main__": + main() diff --git a/examples/mistral/chat/reasoning_with_streaming.py b/examples/mistral/chat/reasoning_with_streaming.py new file mode 100644 index 00000000..377844a2 --- /dev/null +++ b/examples/mistral/chat/reasoning_with_streaming.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python + +import os + +from mistralai.client import Mistral +from mistralai.client.models import TextChunk, ThinkChunk, UserMessage + + +def main(): + api_key = os.environ["MISTRAL_API_KEY"] + model = "mistral-medium-3-5" + + # Bump request timeout because reasoning runs can be long. + client = Mistral(api_key=api_key, timeout_ms=300_000) + + # While the model is thinking, delta.content is a list containing a + # ThinkChunk. After the thinking phase ends, delta.content arrives as + # plain string fragments. The transition event may contain both a closing + # ThinkChunk and the first TextChunk in a single list. + in_thinking = False + for event in client.chat.stream( + model=model, + messages=[ + UserMessage( + content=( + "If a train leaves Paris at 9am going 120 km/h and another " + "leaves Lyon at 10am going 150 km/h on the same track, " + "when do they meet? Paris-Lyon is 465 km." + ) + ) + ], + reasoning_effort="high", + temperature=0.7, + ): + delta = event.data.choices[0].delta.content + if not delta: + continue + + if isinstance(delta, str): + if in_thinking: + print("\n--- /thinking ---") + in_thinking = False + print(delta, end="", flush=True) + continue + + for chunk in delta: + if isinstance(chunk, ThinkChunk): + if not in_thinking: + print("--- thinking ---") + in_thinking = True + for inner in chunk.thinking: + if isinstance(inner, TextChunk): + print(inner.text, end="", flush=True) + elif isinstance(chunk, TextChunk): + if in_thinking: + print("\n--- /thinking ---") + in_thinking = False + print(chunk.text, end="", flush=True) + + print() + + +if __name__ == "__main__": + main()