From b6841a877ad7b9d65584e37596ec75c7a98691fe Mon Sep 17 00:00:00 2001
From: Amit Saroj <sarojamit4956@gmail.com>
Date: Wed, 3 Jun 2026 23:06:30 +0530
Subject: [PATCH 1/3] fix(ai-openai): migrate WebRTC realtime adapter to OpenAI
 GA API

---
 packages/ai-openai/src/realtime/adapter.ts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/packages/ai-openai/src/realtime/adapter.ts b/packages/ai-openai/src/realtime/adapter.ts
index 5f8a5c09c..5830ce37c 100644
--- a/packages/ai-openai/src/realtime/adapter.ts
+++ b/packages/ai-openai/src/realtime/adapter.ts
@@ -190,7 +190,7 @@ async function createWebRTCConnection(
   // Send SDP to OpenAI and get answer. `offer.sdp` is `string | undefined` per
   // the WebRTC type definitions; coerce to `null` (which `RequestInit.body`
   // accepts) under exactOptionalPropertyTypes.
-  const sdpResponse = await fetch(`${OPENAI_REALTIME_URL}?model=${model}`, {
+  const sdpResponse = await fetch(`${OPENAI_REALTIME_URL}/calls`, {
     method: 'POST',
     headers: {
       Authorization: `Bearer ${token.token}`,
@@ -586,7 +586,7 @@ async function createWebRTCConnection(
     },
 
     updateSession(config: Partial<RealtimeSessionConfig>) {
-      const sessionUpdate: Record<string, unknown> = {}
+      const sessionUpdate: Record<string, unknown> = { type: 'realtime' }
 
       if (config.instructions) {
         sessionUpdate.instructions = config.instructions

From 676dd84a067a1d820d800224494f56117b36185e Mon Sep 17 00:00:00 2001
From: Tom Beckenham <34339192+tombeckenham@users.noreply.github.com>
Date: Wed, 10 Jun 2026 14:06:32 +1000
Subject: [PATCH 2/3] fix(ai-openai): complete realtime Beta-to-GA migration

Completes the GA migration started in this PR so the whole realtime flow
works against OpenAI's GA API (the Beta shape was shut down 2026-05-12):

- openaiRealtimeToken() mints ephemeral keys via POST
  /v1/realtime/client_secrets (the Beta /v1/realtime/sessions endpoint is
  retired) and parses the GA top-level value/expires_at response shape
- session.update payloads use the GA shape via a new pure
  buildSessionUpdate() helper: required session.type, audio.input.*,
  audio.output.voice, output_modalities, max_output_tokens; temperature
  (removed in GA) is dropped with a debug log instead of getting the whole
  update rejected with unknown_parameter
- server events handled under GA names (response.output_audio_transcript.*,
  response.output_audio.*, output_text/output_audio content parts)
- removed the now-unused model local in createWebRTCConnection (the GA
  /calls endpoint rejects ?model=; the model is bound to the ephemeral key)
- default model gpt-realtime; dead gpt-4o-(mini-)realtime-preview ids
  (shut down 2026-05-07) removed from OpenAIRealtimeModel, docs, and
  examples
- unit tests for the session.update payload and client-secret
  request/response shapes; changeset added

Live-verified against the OpenAI API: client_secrets 200 (ek_ token),
/v1/realtime/calls 201 with SDP answer, and session.updated echoing voice,
semantic VAD, tools, output_modalities, and max_output_tokens.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
---
 .changeset/openai-realtime-ga-migration.md    |  12 ++
 docs/media/realtime-chat.md                   |  10 +-
 docs/reference/functions/realtimeToken.md     |   4 +-
 .../_execute-prompt/api.realtime-token.ts     |   2 +-
 .../ts-react-chat/src/lib/use-realtime.ts     |   2 +-
 packages/ai-openai/src/realtime/adapter.ts    |  92 ++++----------
 .../ai-openai/src/realtime/session-update.ts  |  76 ++++++++++++
 packages/ai-openai/src/realtime/token.ts      |  94 +++++++++-----
 packages/ai-openai/src/realtime/types.ts      |  57 +++------
 .../tests/realtime-session-update.test.ts     | 115 ++++++++++++++++++
 .../ai-openai/tests/realtime-token.test.ts    |  55 +++++++++
 packages/ai/src/realtime/index.ts             |   4 +-
 12 files changed, 372 insertions(+), 151 deletions(-)
 create mode 100644 .changeset/openai-realtime-ga-migration.md
 create mode 100644 packages/ai-openai/src/realtime/session-update.ts
 create mode 100644 packages/ai-openai/tests/realtime-session-update.test.ts
 create mode 100644 packages/ai-openai/tests/realtime-token.test.ts

diff --git a/.changeset/openai-realtime-ga-migration.md b/.changeset/openai-realtime-ga-migration.md
new file mode 100644
index 000000000..85e40347b
--- /dev/null
+++ b/.changeset/openai-realtime-ga-migration.md
@@ -0,0 +1,12 @@
+---
+'@tanstack/ai-openai': patch
+'@tanstack/ai': patch
+---
+
+Migrate the OpenAI realtime adapters from the retired Beta API (shut down 2026-05-12) to the GA API:
+
+- `openaiRealtime()` now exchanges WebRTC SDP via `POST /v1/realtime/calls` (the Beta `?model=` shape returned `beta_api_shape_disabled`).
+- `openaiRealtimeToken()` now mints ephemeral keys via `POST /v1/realtime/client_secrets` instead of the retired `/v1/realtime/sessions`, and parses the GA top-level `value`/`expires_at` response shape.
+- `session.update` payloads use the GA shape: required `session.type`, `audio.input.transcription`, `audio.input.turn_detection`, `audio.output.voice`, `output_modalities`, and `max_output_tokens`. `temperature` was removed from the GA session config and is no longer sent (a debug log notes when it is dropped).
+- Server events are handled under their GA names (`response.output_audio_transcript.*`, `response.output_audio.*`, `output_text`/`output_audio` content parts).
+- The default realtime model is now `gpt-realtime`; the `gpt-4o-(mini-)realtime-preview` ids (shut down by OpenAI on 2026-05-07) were removed from `OpenAIRealtimeModel`.
diff --git a/docs/media/realtime-chat.md b/docs/media/realtime-chat.md
index b625d5189..1b5fe9779 100644
--- a/docs/media/realtime-chat.md
+++ b/docs/media/realtime-chat.md
@@ -48,7 +48,7 @@ const getRealtimeToken = createServerFn({ method: 'POST' })
   .handler(async () => {
     return realtimeToken({
       adapter: openaiRealtimeToken({
-        model: 'gpt-4o-realtime-preview',
+        model: 'gpt-realtime',
       }),
     })
   })
@@ -119,7 +119,7 @@ import { openaiRealtimeToken } from '@tanstack/ai-openai'
 
 const token = await realtimeToken({
   adapter: openaiRealtimeToken({
-    model: 'gpt-4o-realtime-preview',
+    model: 'gpt-realtime',
   }),
 })
 ```
@@ -138,10 +138,8 @@ const adapter = openaiRealtime()
 
 | Model | Description |
 |-------|-------------|
-| `gpt-4o-realtime-preview` | Full realtime model |
-| `gpt-4o-mini-realtime-preview` | Smaller, faster realtime model |
-| `gpt-realtime` | Latest realtime model |
-| `gpt-realtime-mini` | Latest mini realtime model |
+| `gpt-realtime` | Full realtime model |
+| `gpt-realtime-mini` | Smaller, faster realtime model |
 
 **Available voices:** `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`, `cedar`
 
diff --git a/docs/reference/functions/realtimeToken.md b/docs/reference/functions/realtimeToken.md
index dd4f6b574..e1548e82f 100644
--- a/docs/reference/functions/realtimeToken.md
+++ b/docs/reference/functions/realtimeToken.md
@@ -41,9 +41,7 @@ export const getRealtimeToken = createServerFn()
   .handler(async () => {
     return realtimeToken({
       adapter: openaiRealtimeToken({
-        model: 'gpt-4o-realtime-preview',
-        voice: 'alloy',
-        instructions: 'You are a helpful assistant...',
+        model: 'gpt-realtime',
       }),
     })
   })
diff --git a/examples/ts-code-mode-web/src/routes/_execute-prompt/api.realtime-token.ts b/examples/ts-code-mode-web/src/routes/_execute-prompt/api.realtime-token.ts
index d1cd1ceea..9b4b38caf 100644
--- a/examples/ts-code-mode-web/src/routes/_execute-prompt/api.realtime-token.ts
+++ b/examples/ts-code-mode-web/src/routes/_execute-prompt/api.realtime-token.ts
@@ -11,7 +11,7 @@ export const Route = createFileRoute(
         try {
           const token = await realtimeToken({
             adapter: openaiRealtimeToken({
-              model: 'gpt-4o-realtime-preview',
+              model: 'gpt-realtime',
             }),
           })
           return new Response(JSON.stringify(token), {
diff --git a/examples/ts-react-chat/src/lib/use-realtime.ts b/examples/ts-react-chat/src/lib/use-realtime.ts
index dbe60fc24..c32dd5031 100644
--- a/examples/ts-react-chat/src/lib/use-realtime.ts
+++ b/examples/ts-react-chat/src/lib/use-realtime.ts
@@ -20,7 +20,7 @@ const getRealtimeTokenFn = createServerFn({ method: 'POST' })
     if (data.provider === 'openai') {
       return realtimeToken({
         adapter: openaiRealtimeToken({
-          model: 'gpt-4o-realtime-preview',
+          model: 'gpt-realtime',
         }),
       })
     }
diff --git a/packages/ai-openai/src/realtime/adapter.ts b/packages/ai-openai/src/realtime/adapter.ts
index 5830ce37c..7d9965243 100644
--- a/packages/ai-openai/src/realtime/adapter.ts
+++ b/packages/ai-openai/src/realtime/adapter.ts
@@ -1,4 +1,5 @@
 import { resolveDebugOption } from '@tanstack/ai/adapter-internals'
+import { buildSessionUpdate } from './session-update'
 import type {
   AnyClientTool,
   AudioVisualization,
@@ -47,7 +48,7 @@ export function openaiRealtime(
       token: RealtimeToken,
       _clientTools?: ReadonlyArray<AnyClientTool>,
     ): Promise<RealtimeConnection> {
-      const model = token.config.model ?? 'gpt-4o-realtime-preview'
+      const model = token.config.model ?? 'gpt-realtime'
       logger.request(`activity=realtime provider=openai model=${model}`, {
         provider: 'openai',
         model,
@@ -73,7 +74,6 @@ async function createWebRTCConnection(
   token: RealtimeToken,
   logger: InternalLogger,
 ): Promise<RealtimeConnection> {
-  const model = token.config.model ?? 'gpt-4o-realtime-preview'
   const eventHandlers = new Map<RealtimeEvent, Set<RealtimeEventHandler<any>>>()
 
   // WebRTC peer connection
@@ -187,9 +187,11 @@ async function createWebRTCConnection(
   const offer = await pc.createOffer()
   await pc.setLocalDescription(offer)
 
-  // Send SDP to OpenAI and get answer. `offer.sdp` is `string | undefined` per
-  // the WebRTC type definitions; coerce to `null` (which `RequestInit.body`
-  // accepts) under exactOptionalPropertyTypes.
+  // Send SDP to OpenAI's GA `/calls` endpoint and get the answer. The model
+  // is bound to the ephemeral token (minted via `/v1/realtime/client_secrets`),
+  // so it must NOT be passed as a query param — GA rejects `?model=` with a
+  // 400. `offer.sdp` is `string | undefined` per the WebRTC type definitions;
+  // coerce to `null`, which `RequestInit.body` accepts.
   const sdpResponse = await fetch(`${OPENAI_REALTIME_URL}/calls`, {
     method: 'POST',
     headers: {
@@ -260,7 +262,7 @@ async function createWebRTCConnection(
         break
       }
 
-      case 'response.audio_transcript.delta': {
+      case 'response.output_audio_transcript.delta': {
         const delta = event.delta as string
         emit('transcript', {
           role: 'assistant',
@@ -270,7 +272,7 @@ async function createWebRTCConnection(
         break
       }
 
-      case 'response.audio_transcript.done': {
+      case 'response.output_audio_transcript.done': {
         const transcript = event.transcript as string
         emit('transcript', { role: 'assistant', transcript, isFinal: true })
         break
@@ -296,14 +298,14 @@ async function createWebRTCConnection(
         break
       }
 
-      case 'response.audio.delta':
+      case 'response.output_audio.delta':
         if (currentMode !== 'speaking') {
           currentMode = 'speaking'
           emit('mode_change', { mode: 'speaking' })
         }
         break
 
-      case 'response.audio.done':
+      case 'response.output_audio.done':
         break
 
       case 'response.function_call_arguments.done': {
@@ -359,12 +361,14 @@ async function createWebRTCConnection(
             if (item.type === 'message' && item.content) {
               const content = item.content as Array<Record<string, unknown>>
               for (const part of content) {
-                if (part.type === 'audio' && part.transcript) {
+                // GA renamed assistant content types: `audio` -> `output_audio`,
+                // `text` -> `output_text`
+                if (part.type === 'output_audio' && part.transcript) {
                   message.parts.push({
                     type: 'audio',
                     transcript: part.transcript as string,
                   })
-                } else if (part.type === 'text' && part.text) {
+                } else if (part.type === 'output_text' && part.text) {
                   message.parts.push({
                     type: 'text',
                     content: part.text as string,
@@ -586,65 +590,19 @@ async function createWebRTCConnection(
     },
 
     updateSession(config: Partial<RealtimeSessionConfig>) {
-      const sessionUpdate: Record<string, unknown> = { type: 'realtime' }
-
-      if (config.instructions) {
-        sessionUpdate.instructions = config.instructions
-      }
-
-      if (config.voice) {
-        sessionUpdate.voice = config.voice
-      }
-
-      if (config.vadMode) {
-        if (config.vadMode === 'semantic') {
-          sessionUpdate.turn_detection = {
-            type: 'semantic_vad',
-            eagerness: config.semanticEagerness ?? 'medium',
-          }
-        } else if (config.vadMode === 'server') {
-          sessionUpdate.turn_detection = {
-            type: 'server_vad',
-            threshold: config.vadConfig?.threshold ?? 0.5,
-            prefix_padding_ms: config.vadConfig?.prefixPaddingMs ?? 300,
-            silence_duration_ms: config.vadConfig?.silenceDurationMs ?? 500,
-          }
-        } else {
-          sessionUpdate.turn_detection = null
-        }
-      }
-
-      if (config.tools !== undefined) {
-        sessionUpdate.tools = config.tools.map((t) => ({
-          type: 'function',
-          name: t.name,
-          description: t.description,
-          parameters: t.inputSchema ?? { type: 'object', properties: {} },
-        }))
-        sessionUpdate.tool_choice = 'auto'
-      }
-
-      if (config.outputModalities) {
-        sessionUpdate.modalities = config.outputModalities
-      }
-
       if (config.temperature !== undefined) {
-        sessionUpdate.temperature = config.temperature
-      }
-
-      if (config.maxOutputTokens !== undefined) {
-        sessionUpdate.max_response_output_tokens = config.maxOutputTokens
+        // The GA API removed `temperature` from session config; sending it
+        // would get the whole update rejected with `unknown_parameter`.
+        logger.provider(
+          'provider=openai direction=out type=session.update dropped `temperature` (removed in the GA realtime API)',
+          { frame: { temperature: config.temperature } },
+        )
       }
 
-      // Always enable input audio transcription so user speech is transcribed
-      sessionUpdate.input_audio_transcription = { model: 'whisper-1' }
-
-      if (Object.keys(sessionUpdate).length > 0) {
-        sendEvent({
-          type: 'session.update',
-          session: sessionUpdate,
-        })
-      }
+      sendEvent({
+        type: 'session.update',
+        session: buildSessionUpdate(config),
+      })
     },
 
     interrupt() {
diff --git a/packages/ai-openai/src/realtime/session-update.ts b/packages/ai-openai/src/realtime/session-update.ts
new file mode 100644
index 000000000..5d0118f2d
--- /dev/null
+++ b/packages/ai-openai/src/realtime/session-update.ts
@@ -0,0 +1,76 @@
+import type { RealtimeSessionConfig } from '@tanstack/ai'
+
+/**
+ * Builds the GA-shaped `session.update` payload for OpenAI's realtime API.
+ *
+ * The GA API requires `session.type` on every update and nests audio
+ * settings under `audio.input` / `audio.output` (the flat Beta field names
+ * were retired when the Beta shape was shut down on 2026-05-12). A
+ * `session.update` containing unknown fields is rejected with
+ * `unknown_parameter` and none of the config is applied, so the exact field
+ * names here are load-bearing.
+ *
+ * `temperature` was removed from the GA session config and is intentionally
+ * never sent; the adapter logs when it drops the option.
+ */
+export function buildSessionUpdate(
+  config: Partial<RealtimeSessionConfig>,
+): Record<string, unknown> {
+  // Always enable input audio transcription so user speech is transcribed
+  const audioInput: Record<string, unknown> = {
+    transcription: { model: 'whisper-1' },
+  }
+
+  if (config.vadMode) {
+    if (config.vadMode === 'semantic') {
+      audioInput.turn_detection = {
+        type: 'semantic_vad',
+        eagerness: config.semanticEagerness ?? 'medium',
+      }
+    } else if (config.vadMode === 'server') {
+      audioInput.turn_detection = {
+        type: 'server_vad',
+        threshold: config.vadConfig?.threshold ?? 0.5,
+        prefix_padding_ms: config.vadConfig?.prefixPaddingMs ?? 300,
+        silence_duration_ms: config.vadConfig?.silenceDurationMs ?? 500,
+      }
+    } else {
+      audioInput.turn_detection = null
+    }
+  }
+
+  const audio: Record<string, unknown> = { input: audioInput }
+
+  if (config.voice) {
+    audio.output = { voice: config.voice }
+  }
+
+  const sessionUpdate: Record<string, unknown> = {
+    type: 'realtime',
+    audio,
+  }
+
+  if (config.instructions) {
+    sessionUpdate.instructions = config.instructions
+  }
+
+  if (config.tools !== undefined) {
+    sessionUpdate.tools = config.tools.map((t) => ({
+      type: 'function',
+      name: t.name,
+      description: t.description,
+      parameters: t.inputSchema ?? { type: 'object', properties: {} },
+    }))
+    sessionUpdate.tool_choice = 'auto'
+  }
+
+  if (config.outputModalities) {
+    sessionUpdate.output_modalities = config.outputModalities
+  }
+
+  if (config.maxOutputTokens !== undefined) {
+    sessionUpdate.max_output_tokens = config.maxOutputTokens
+  }
+
+  return sessionUpdate
+}
diff --git a/packages/ai-openai/src/realtime/token.ts b/packages/ai-openai/src/realtime/token.ts
index 6bff9c9c2..816ac8b1d 100644
--- a/packages/ai-openai/src/realtime/token.ts
+++ b/packages/ai-openai/src/realtime/token.ts
@@ -1,19 +1,67 @@
 import { getOpenAIApiKeyFromEnv } from '../utils/client'
 import type { RealtimeToken, RealtimeTokenAdapter } from '@tanstack/ai'
 import type {
+  OpenAIRealtimeClientSecretResponse,
   OpenAIRealtimeModel,
-  OpenAIRealtimeSessionResponse,
   OpenAIRealtimeTokenOptions,
 } from './types'
 
-const OPENAI_REALTIME_SESSIONS_URL =
-  'https://api.openai.com/v1/realtime/sessions'
+const OPENAI_REALTIME_CLIENT_SECRETS_URL =
+  'https://api.openai.com/v1/realtime/client_secrets'
+
+/**
+ * Builds the GA `/v1/realtime/client_secrets` request body.
+ *
+ * The session config (including its required `type`) is nested under the
+ * `session` key. The model is bound to the resulting ephemeral key, so the
+ * client never sends it during the WebRTC SDP exchange.
+ */
+export function buildClientSecretRequest(
+  model: OpenAIRealtimeModel,
+): Record<string, unknown> {
+  return { session: { type: 'realtime', model } }
+}
+
+/**
+ * Parses the GA client secret response into a {@link RealtimeToken}.
+ *
+ * GA returns the ephemeral key at the top level (`value` / `expires_at`),
+ * not nested under `client_secret` like the retired Beta
+ * `/v1/realtime/sessions` response did.
+ */
+export function parseClientSecretResponse(
+  data: Partial<OpenAIRealtimeClientSecretResponse> | undefined,
+  fallbackModel: OpenAIRealtimeModel,
+): RealtimeToken {
+  // Validate shape before dereferencing — the API could return an error
+  // envelope with 200 status, or a partial response under protocol drift.
+  if (
+    !data ||
+    typeof data.value !== 'string' ||
+    typeof data.expires_at !== 'number' ||
+    !Number.isFinite(data.expires_at)
+  ) {
+    throw new Error(
+      'OpenAI realtime client secret response missing or malformed `value`/`expires_at`',
+    )
+  }
+
+  return {
+    provider: 'openai',
+    token: data.value,
+    expiresAt: data.expires_at * 1000,
+    config: {
+      model: data.session?.model ?? fallbackModel,
+    },
+  }
+}
 
 /**
  * Creates an OpenAI realtime token adapter.
  *
- * This adapter generates ephemeral tokens for client-side WebRTC connections.
- * The token is valid for 10 minutes.
+ * This adapter generates ephemeral keys for client-side WebRTC connections
+ * via the GA `/v1/realtime/client_secrets` endpoint. The key is valid for
+ * 10 minutes by default.
  *
  * @param options - Configuration options for the realtime session
  * @returns A RealtimeTokenAdapter for use with realtimeToken()
@@ -24,15 +72,7 @@ const OPENAI_REALTIME_SESSIONS_URL =
  * import { openaiRealtimeToken } from '@tanstack/ai-openai'
  *
  * const token = await realtimeToken({
- *   adapter: openaiRealtimeToken({
- *     model: 'gpt-4o-realtime-preview',
- *     voice: 'alloy',
- *     instructions: 'You are a helpful assistant.',
- *     turnDetection: {
- *       type: 'semantic_vad',
- *       eagerness: 'medium',
- *     },
- *   }),
+ *   adapter: openaiRealtimeToken({ model: 'gpt-realtime' }),
  * })
  * ```
  */
@@ -45,38 +85,32 @@ export function openaiRealtimeToken(
     provider: 'openai',
 
     async generateToken(): Promise<RealtimeToken> {
-      const model: OpenAIRealtimeModel =
-        options.model ?? 'gpt-4o-realtime-preview'
+      const model: OpenAIRealtimeModel = options.model ?? 'gpt-realtime'
 
-      // Call OpenAI API to create session and get ephemeral token.
       // Only the model is sent server-side; all other session config
-      // (instructions, voice, tools, VAD) is applied client-side via session.update.
-      const response = await fetch(OPENAI_REALTIME_SESSIONS_URL, {
+      // (instructions, voice, tools, VAD) is applied client-side via
+      // session.update.
+      const response = await fetch(OPENAI_REALTIME_CLIENT_SECRETS_URL, {
         method: 'POST',
         headers: {
           Authorization: `Bearer ${apiKey}`,
           'Content-Type': 'application/json',
         },
-        body: JSON.stringify({ model }),
+        body: JSON.stringify(buildClientSecretRequest(model)),
       })
 
       if (!response.ok) {
         const errorText = await response.text()
         throw new Error(
-          `OpenAI realtime session creation failed: ${response.status} ${errorText}`,
+          `OpenAI realtime client secret creation failed: ${response.status} ${errorText}`,
         )
       }
 
-      const sessionData: OpenAIRealtimeSessionResponse = await response.json()
+      const data = (await response.json()) as
+        | Partial<OpenAIRealtimeClientSecretResponse>
+        | undefined
 
-      return {
-        provider: 'openai',
-        token: sessionData.client_secret.value,
-        expiresAt: sessionData.client_secret.expires_at * 1000,
-        config: {
-          model: sessionData.model,
-        },
-      }
+      return parseClientSecretResponse(data, model)
     },
   }
 }
diff --git a/packages/ai-openai/src/realtime/types.ts b/packages/ai-openai/src/realtime/types.ts
index be0bf856c..1adcf3b6d 100644
--- a/packages/ai-openai/src/realtime/types.ts
+++ b/packages/ai-openai/src/realtime/types.ts
@@ -16,15 +16,12 @@ export type OpenAIRealtimeVoice =
   | 'cedar'
 
 /**
- * OpenAI realtime model options
+ * OpenAI realtime model options.
+ *
+ * The `gpt-4o-(mini-)realtime-preview` models were shut down by OpenAI on
+ * 2026-05-07 and are no longer listed here.
  */
-export type OpenAIRealtimeModel =
-  | 'gpt-4o-realtime-preview'
-  | 'gpt-4o-realtime-preview-2024-10-01'
-  | 'gpt-4o-mini-realtime-preview'
-  | 'gpt-4o-mini-realtime-preview-2024-12-17'
-  | 'gpt-realtime'
-  | 'gpt-realtime-mini'
+export type OpenAIRealtimeModel = 'gpt-realtime' | 'gpt-realtime-mini'
 
 /**
  * OpenAI semantic VAD configuration
@@ -54,7 +51,7 @@ export type OpenAITurnDetection =
  * Options for the OpenAI realtime token adapter
  */
 export interface OpenAIRealtimeTokenOptions {
-  /** Model to use (default: 'gpt-4o-realtime-preview') */
+  /** Model to use (default: 'gpt-realtime') */
   model?: OpenAIRealtimeModel
 }
 
@@ -74,38 +71,18 @@ export interface OpenAIRealtimeOptions {
 }
 
 /**
- * OpenAI realtime session response from the API
+ * OpenAI GA realtime client secret response from
+ * `POST /v1/realtime/client_secrets`. Minimal shape — only the fields the
+ * token adapter reads.
  */
-export interface OpenAIRealtimeSessionResponse {
-  id: string
-  object: 'realtime.session'
-  model: string
-  modalities: Array<string>
-  instructions: string
-  voice: string
-  input_audio_format: string
-  output_audio_format: string
-  input_audio_transcription: {
-    model: string
-  } | null
-  turn_detection: {
-    type: string
-    threshold?: number
-    prefix_padding_ms?: number
-    silence_duration_ms?: number
-    eagerness?: string
-  } | null
-  tools: Array<{
+export interface OpenAIRealtimeClientSecretResponse {
+  /** Ephemeral key (`ek_…`) used as the bearer token for the WebRTC SDP exchange */
+  value: string
+  /** Unix timestamp (seconds) when the ephemeral key expires */
+  expires_at: number
+  /** Effective session config the key was minted for */
+  session: {
     type: string
-    name: string
-    description: string
-    parameters: Record<string, unknown>
-  }>
-  tool_choice: string
-  temperature: number
-  max_response_output_tokens: number | string
-  client_secret: {
-    value: string
-    expires_at: number
+    model: string
   }
 }
diff --git a/packages/ai-openai/tests/realtime-session-update.test.ts b/packages/ai-openai/tests/realtime-session-update.test.ts
new file mode 100644
index 000000000..f2bb6d696
--- /dev/null
+++ b/packages/ai-openai/tests/realtime-session-update.test.ts
@@ -0,0 +1,115 @@
+import { describe, expect, it } from 'vitest'
+import { buildSessionUpdate } from '../src/realtime/session-update'
+
+describe('buildSessionUpdate (GA session.update shape)', () => {
+  it('always stamps session.type="realtime" and enables input transcription', () => {
+    expect(buildSessionUpdate({})).toEqual({
+      type: 'realtime',
+      audio: { input: { transcription: { model: 'whisper-1' } } },
+    })
+  })
+
+  it('nests voice under audio.output.voice', () => {
+    const session = buildSessionUpdate({ voice: 'marin' })
+    expect(session.audio).toEqual({
+      input: { transcription: { model: 'whisper-1' } },
+      output: { voice: 'marin' },
+    })
+  })
+
+  it('nests semantic turn detection under audio.input.turn_detection', () => {
+    const session = buildSessionUpdate({
+      vadMode: 'semantic',
+      semanticEagerness: 'high',
+    })
+    expect(session.audio).toEqual({
+      input: {
+        transcription: { model: 'whisper-1' },
+        turn_detection: { type: 'semantic_vad', eagerness: 'high' },
+      },
+    })
+  })
+
+  it('applies server VAD defaults under audio.input.turn_detection', () => {
+    const session = buildSessionUpdate({ vadMode: 'server' })
+    expect(session.audio).toEqual({
+      input: {
+        transcription: { model: 'whisper-1' },
+        turn_detection: {
+          type: 'server_vad',
+          threshold: 0.5,
+          prefix_padding_ms: 300,
+          silence_duration_ms: 500,
+        },
+      },
+    })
+  })
+
+  it('disables turn detection for manual VAD mode', () => {
+    const session = buildSessionUpdate({ vadMode: 'manual' })
+    expect(session.audio).toEqual({
+      input: {
+        transcription: { model: 'whisper-1' },
+        turn_detection: null,
+      },
+    })
+  })
+
+  it('uses GA field names output_modalities and max_output_tokens', () => {
+    const session = buildSessionUpdate({
+      outputModalities: ['audio'],
+      maxOutputTokens: 4096,
+    })
+    expect(session.output_modalities).toEqual(['audio'])
+    expect(session.max_output_tokens).toBe(4096)
+  })
+
+  it('maps tools to the realtime function shape with tool_choice auto', () => {
+    const session = buildSessionUpdate({
+      tools: [
+        {
+          name: 'getWeather',
+          description: 'Get the weather',
+          inputSchema: { type: 'object', properties: { city: {} } },
+        },
+        { name: 'noSchema', description: 'No schema tool' },
+      ],
+    })
+    expect(session.tools).toEqual([
+      {
+        type: 'function',
+        name: 'getWeather',
+        description: 'Get the weather',
+        parameters: { type: 'object', properties: { city: {} } },
+      },
+      {
+        type: 'function',
+        name: 'noSchema',
+        description: 'No schema tool',
+        parameters: { type: 'object', properties: {} },
+      },
+    ])
+    expect(session.tool_choice).toBe('auto')
+  })
+
+  it('never emits Beta field names (GA rejects the whole update on unknown_parameter)', () => {
+    const session = buildSessionUpdate({
+      instructions: 'Be helpful.',
+      voice: 'marin',
+      vadMode: 'server',
+      outputModalities: ['audio', 'text'],
+      temperature: 0.7,
+      maxOutputTokens: 1024,
+    })
+    for (const betaField of [
+      'voice',
+      'modalities',
+      'turn_detection',
+      'input_audio_transcription',
+      'max_response_output_tokens',
+      'temperature',
+    ]) {
+      expect(session).not.toHaveProperty(betaField)
+    }
+  })
+})
diff --git a/packages/ai-openai/tests/realtime-token.test.ts b/packages/ai-openai/tests/realtime-token.test.ts
new file mode 100644
index 000000000..e355e6a83
--- /dev/null
+++ b/packages/ai-openai/tests/realtime-token.test.ts
@@ -0,0 +1,55 @@
+import { describe, expect, it } from 'vitest'
+import {
+  buildClientSecretRequest,
+  parseClientSecretResponse,
+} from '../src/realtime/token'
+
+describe('buildClientSecretRequest', () => {
+  it('nests session config with the required type under the `session` key', () => {
+    expect(buildClientSecretRequest('gpt-realtime-mini')).toEqual({
+      session: { type: 'realtime', model: 'gpt-realtime-mini' },
+    })
+  })
+})
+
+describe('parseClientSecretResponse', () => {
+  it('reads the GA top-level value/expires_at shape and converts seconds to ms', () => {
+    const token = parseClientSecretResponse(
+      {
+        value: 'ek_test_123',
+        expires_at: 1_700_000_000,
+        session: { type: 'realtime', model: 'gpt-realtime' },
+      },
+      'gpt-realtime-mini',
+    )
+    expect(token).toEqual({
+      provider: 'openai',
+      token: 'ek_test_123',
+      expiresAt: 1_700_000_000_000,
+      config: { model: 'gpt-realtime' },
+    })
+  })
+
+  it('falls back to the requested model when the response omits session.model', () => {
+    const token = parseClientSecretResponse(
+      { value: 'ek_test_123', expires_at: 1_700_000_000 },
+      'gpt-realtime-mini',
+    )
+    expect(token.config.model).toBe('gpt-realtime-mini')
+  })
+
+  it('throws on a missing or malformed response instead of returning a broken token', () => {
+    expect(() => parseClientSecretResponse(undefined, 'gpt-realtime')).toThrow(
+      /missing or malformed/,
+    )
+    expect(() =>
+      parseClientSecretResponse({ expires_at: 1_700_000_000 }, 'gpt-realtime'),
+    ).toThrow(/missing or malformed/)
+    expect(() =>
+      parseClientSecretResponse(
+        { value: 'ek_test_123', expires_at: Number.NaN },
+        'gpt-realtime',
+      ),
+    ).toThrow(/missing or malformed/)
+  })
+})
diff --git a/packages/ai/src/realtime/index.ts b/packages/ai/src/realtime/index.ts
index e3970285e..54a7a6439 100644
--- a/packages/ai/src/realtime/index.ts
+++ b/packages/ai/src/realtime/index.ts
@@ -22,9 +22,7 @@ export type * from './types'
  *   .handler(async () => {
  *     return realtimeToken({
  *       adapter: openaiRealtimeToken({
- *         model: 'gpt-4o-realtime-preview',
- *         voice: 'alloy',
- *         instructions: 'You are a helpful assistant...',
+ *         model: 'gpt-realtime',
  *       }),
  *     })
  *   })

From c2dbee8bc1012c2ab8aee8b2f58626f3421fc947 Mon Sep 17 00:00:00 2001
From: Tom Beckenham <34339192+tombeckenham@users.noreply.github.com>
Date: Wed, 10 Jun 2026 14:46:02 +1000
Subject: [PATCH 3/3] fix(ai-openai): collapse output modalities to single
 GA-supported value
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The GA realtime API only accepts ['audio'] or ['text'] for
output_modalities; the Beta API accepted ['audio', 'text'] and the
provider-agnostic RealtimeSessionConfig still legitimately produces it
(e.g. the example UI's audio+text mode). Sending both got the whole
session.update rejected with: Invalid modalities: ['audio', 'text'].

Collapse to ['audio'] when audio is requested — GA audio replies still
stream text via response.output_audio_transcript.* events, so visible
behavior is unchanged. Live-verified: session.updated accepted.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
---
 packages/ai-openai/src/realtime/session-update.ts      |  8 +++++++-
 .../ai-openai/tests/realtime-session-update.test.ts    | 10 ++++++++++
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/packages/ai-openai/src/realtime/session-update.ts b/packages/ai-openai/src/realtime/session-update.ts
index 5d0118f2d..0d9e9f68a 100644
--- a/packages/ai-openai/src/realtime/session-update.ts
+++ b/packages/ai-openai/src/realtime/session-update.ts
@@ -65,7 +65,13 @@ export function buildSessionUpdate(
   }
 
   if (config.outputModalities) {
-    sessionUpdate.output_modalities = config.outputModalities
+    // GA only supports a single output modality: ['audio'] or ['text']
+    // (Beta accepted ['audio', 'text']). Audio replies still stream text
+    // via `response.output_audio_transcript.*` events, so collapsing
+    // ['audio', 'text'] to ['audio'] preserves the visible behavior.
+    sessionUpdate.output_modalities = config.outputModalities.includes('audio')
+      ? ['audio']
+      : ['text']
   }
 
   if (config.maxOutputTokens !== undefined) {
diff --git a/packages/ai-openai/tests/realtime-session-update.test.ts b/packages/ai-openai/tests/realtime-session-update.test.ts
index f2bb6d696..3fb6356a0 100644
--- a/packages/ai-openai/tests/realtime-session-update.test.ts
+++ b/packages/ai-openai/tests/realtime-session-update.test.ts
@@ -64,6 +64,16 @@ describe('buildSessionUpdate (GA session.update shape)', () => {
     expect(session.max_output_tokens).toBe(4096)
   })
 
+  it('collapses ["audio", "text"] to ["audio"] (GA supports a single output modality)', () => {
+    expect(
+      buildSessionUpdate({ outputModalities: ['audio', 'text'] })
+        .output_modalities,
+    ).toEqual(['audio'])
+    expect(
+      buildSessionUpdate({ outputModalities: ['text'] }).output_modalities,
+    ).toEqual(['text'])
+  })
+
   it('maps tools to the realtime function shape with tool_choice auto', () => {
     const session = buildSessionUpdate({
       tools: [