TanStack · tombeckenham · Jun 10, 2026 · Jun 3, 2026 · Jun 7, 2026 · Jun 10, 2026
diff --git a/.changeset/openai-realtime-ga-migration.md b/.changeset/openai-realtime-ga-migration.md
@@ -0,0 +1,12 @@
+---
+'@tanstack/ai-openai': patch
+'@tanstack/ai': patch
+---
+
+Migrate the OpenAI realtime adapters from the retired Beta API (shut down 2026-05-12) to the GA API:
+
+- `openaiRealtime()` now exchanges WebRTC SDP via `POST /v1/realtime/calls` (the Beta `?model=` shape returned `beta_api_shape_disabled`).
+- `openaiRealtimeToken()` now mints ephemeral keys via `POST /v1/realtime/client_secrets` instead of the retired `/v1/realtime/sessions`, and parses the GA top-level `value`/`expires_at` response shape.
+- `session.update` payloads use the GA shape: required `session.type`, `audio.input.transcription`, `audio.input.turn_detection`, `audio.output.voice`, `output_modalities`, and `max_output_tokens`. `temperature` was removed from the GA session config and is no longer sent (a debug log notes when it is dropped).
+- Server events are handled under their GA names (`response.output_audio_transcript.*`, `response.output_audio.*`, `output_text`/`output_audio` content parts).
+- The default realtime model is now `gpt-realtime`; the `gpt-4o-(mini-)realtime-preview` ids (shut down by OpenAI on 2026-05-07) were removed from `OpenAIRealtimeModel`.
diff --git a/docs/media/realtime-chat.md b/docs/media/realtime-chat.md
@@ -48,7 +48,7 @@ const getRealtimeToken = createServerFn({ method: 'POST' })
   .handler(async () => {
     return realtimeToken({
       adapter: openaiRealtimeToken({
-        model: 'gpt-4o-realtime-preview',
+        model: 'gpt-realtime',
       }),
     })
   })
@@ -119,7 +119,7 @@ import { openaiRealtimeToken } from '@tanstack/ai-openai'
 
 const token = await realtimeToken({
   adapter: openaiRealtimeToken({
-    model: 'gpt-4o-realtime-preview',
+    model: 'gpt-realtime',
   }),
 })
 ```
@@ -138,10 +138,8 @@ const adapter = openaiRealtime()
 
 | Model | Description |
 |-------|-------------|
-| `gpt-4o-realtime-preview` | Full realtime model |
-| `gpt-4o-mini-realtime-preview` | Smaller, faster realtime model |
-| `gpt-realtime` | Latest realtime model |
-| `gpt-realtime-mini` | Latest mini realtime model |
+| `gpt-realtime` | Full realtime model |
+| `gpt-realtime-mini` | Smaller, faster realtime model |
 
 **Available voices:** `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`, `cedar`
 

diff --git a/docs/reference/functions/realtimeToken.md b/docs/reference/functions/realtimeToken.md
@@ -41,9 +41,7 @@ export const getRealtimeToken = createServerFn()
   .handler(async () => {
     return realtimeToken({
       adapter: openaiRealtimeToken({
-        model: 'gpt-4o-realtime-preview',
-        voice: 'alloy',
-        instructions: 'You are a helpful assistant...',
+        model: 'gpt-realtime',
       }),
     })
   })

diff --git a/examples/ts-code-mode-web/src/routes/_execute-prompt/api.realtime-token.ts b/examples/ts-code-mode-web/src/routes/_execute-prompt/api.realtime-token.ts
@@ -11,7 +11,7 @@ export const Route = createFileRoute(
         try {
           const token = await realtimeToken({
             adapter: openaiRealtimeToken({
-              model: 'gpt-4o-realtime-preview',
+              model: 'gpt-realtime',
             }),
           })
           return new Response(JSON.stringify(token), {

diff --git a/examples/ts-react-chat/src/lib/use-realtime.ts b/examples/ts-react-chat/src/lib/use-realtime.ts
@@ -20,7 +20,7 @@ const getRealtimeTokenFn = createServerFn({ method: 'POST' })
     if (data.provider === 'openai') {
       return realtimeToken({
         adapter: openaiRealtimeToken({
-          model: 'gpt-4o-realtime-preview',
+          model: 'gpt-realtime',
         }),
       })
     }

diff --git a/packages/ai-openai/src/realtime/adapter.ts b/packages/ai-openai/src/realtime/adapter.ts
@@ -1,4 +1,5 @@
 import { resolveDebugOption } from '@tanstack/ai/adapter-internals'
+import { buildSessionUpdate } from './session-update'
 import type {
   AnyClientTool,
   AudioVisualization,
@@ -47,7 +48,7 @@ export function openaiRealtime(
       token: RealtimeToken,
       _clientTools?: ReadonlyArray<AnyClientTool>,
     ): Promise<RealtimeConnection> {
-      const model = token.config.model ?? 'gpt-4o-realtime-preview'
+      const model = token.config.model ?? 'gpt-realtime'
       logger.request(`activity=realtime provider=openai model=${model}`, {
         provider: 'openai',
         model,
@@ -73,7 +74,6 @@ async function createWebRTCConnection(
   token: RealtimeToken,
   logger: InternalLogger,
 ): Promise<RealtimeConnection> {
-  const model = token.config.model ?? 'gpt-4o-realtime-preview'
   const eventHandlers = new Map<RealtimeEvent, Set<RealtimeEventHandler<any>>>()
 
   // WebRTC peer connection
@@ -187,10 +187,12 @@ async function createWebRTCConnection(
   const offer = await pc.createOffer()
   await pc.setLocalDescription(offer)
 
-  // Send SDP to OpenAI and get answer. `offer.sdp` is `string | undefined` per
-  // the WebRTC type definitions; coerce to `null` (which `RequestInit.body`
-  // accepts) under exactOptionalPropertyTypes.
-  const sdpResponse = await fetch(`${OPENAI_REALTIME_URL}?model=${model}`, {
+  // Send SDP to OpenAI's GA `/calls` endpoint and get the answer. The model
+  // is bound to the ephemeral token (minted via `/v1/realtime/client_secrets`),
+  // so it must NOT be passed as a query param — GA rejects `?model=` with a
+  // 400. `offer.sdp` is `string | undefined` per the WebRTC type definitions;
+  // coerce to `null`, which `RequestInit.body` accepts.
+  const sdpResponse = await fetch(`${OPENAI_REALTIME_URL}/calls`, {
     method: 'POST',
     headers: {
       Authorization: `Bearer ${token.token}`,
@@ -260,7 +262,7 @@ async function createWebRTCConnection(
         break
       }
 
-      case 'response.audio_transcript.delta': {
+      case 'response.output_audio_transcript.delta': {
         const delta = event.delta as string
         emit('transcript', {
           role: 'assistant',
@@ -270,7 +272,7 @@ async function createWebRTCConnection(
         break
       }
 
-      case 'response.audio_transcript.done': {
+      case 'response.output_audio_transcript.done': {
         const transcript = event.transcript as string
         emit('transcript', { role: 'assistant', transcript, isFinal: true })
         break
@@ -296,14 +298,14 @@ async function createWebRTCConnection(
         break
       }
 
-      case 'response.audio.delta':
+      case 'response.output_audio.delta':
         if (currentMode !== 'speaking') {
           currentMode = 'speaking'
           emit('mode_change', { mode: 'speaking' })
         }
         break
 
-      case 'response.audio.done':
+      case 'response.output_audio.done':
         break
 
       case 'response.function_call_arguments.done': {
@@ -359,12 +361,14 @@ async function createWebRTCConnection(
             if (item.type === 'message' && item.content) {
               const content = item.content as Array<Record<string, unknown>>
               for (const part of content) {
-                if (part.type === 'audio' && part.transcript) {
+                // GA renamed assistant content types: `audio` -> `output_audio`,
+                // `text` -> `output_text`
+                if (part.type === 'output_audio' && part.transcript) {
                   message.parts.push({
                     type: 'audio',
                     transcript: part.transcript as string,
                   })
-                } else if (part.type === 'text' && part.text) {
+                } else if (part.type === 'output_text' && part.text) {
                   message.parts.push({
                     type: 'text',
                     content: part.text as string,
@@ -586,65 +590,19 @@ async function createWebRTCConnection(
     },
 
     updateSession(config: Partial<RealtimeSessionConfig>) {
-      const sessionUpdate: Record<string, unknown> = {}
-
-      if (config.instructions) {
-        sessionUpdate.instructions = config.instructions
-      }
-
-      if (config.voice) {
-        sessionUpdate.voice = config.voice
-      }
-
-      if (config.vadMode) {
-        if (config.vadMode === 'semantic') {
-          sessionUpdate.turn_detection = {
-            type: 'semantic_vad',
-            eagerness: config.semanticEagerness ?? 'medium',
-          }
-        } else if (config.vadMode === 'server') {
-          sessionUpdate.turn_detection = {
-            type: 'server_vad',
-            threshold: config.vadConfig?.threshold ?? 0.5,
-            prefix_padding_ms: config.vadConfig?.prefixPaddingMs ?? 300,
-            silence_duration_ms: config.vadConfig?.silenceDurationMs ?? 500,
-          }
-        } else {
-          sessionUpdate.turn_detection = null
-        }
-      }
-
-      if (config.tools !== undefined) {
-        sessionUpdate.tools = config.tools.map((t) => ({
-          type: 'function',
-          name: t.name,
-          description: t.description,
-          parameters: t.inputSchema ?? { type: 'object', properties: {} },
-        }))
-        sessionUpdate.tool_choice = 'auto'
-      }
-
-      if (config.outputModalities) {
-        sessionUpdate.modalities = config.outputModalities
-      }
-
       if (config.temperature !== undefined) {
-        sessionUpdate.temperature = config.temperature
-      }
-
-      if (config.maxOutputTokens !== undefined) {
-        sessionUpdate.max_response_output_tokens = config.maxOutputTokens
+        // The GA API removed `temperature` from session config; sending it
+        // would get the whole update rejected with `unknown_parameter`.
+        logger.provider(
+          'provider=openai direction=out type=session.update dropped `temperature` (removed in the GA realtime API)',
+          { frame: { temperature: config.temperature } },
+        )
       }
 
-      // Always enable input audio transcription so user speech is transcribed
-      sessionUpdate.input_audio_transcription = { model: 'whisper-1' }
-
-      if (Object.keys(sessionUpdate).length > 0) {
-        sendEvent({
-          type: 'session.update',
-          session: sessionUpdate,
-        })
-      }
+      sendEvent({
+        type: 'session.update',
+        session: buildSessionUpdate(config),
+      })
     },
 
     interrupt() {

diff --git a/packages/ai-openai/src/realtime/session-update.ts b/packages/ai-openai/src/realtime/session-update.ts
@@ -0,0 +1,82 @@
+import type { RealtimeSessionConfig } from '@tanstack/ai'
+
+/**
+ * Builds the GA-shaped `session.update` payload for OpenAI's realtime API.
+ *
+ * The GA API requires `session.type` on every update and nests audio
+ * settings under `audio.input` / `audio.output` (the flat Beta field names
+ * were retired when the Beta shape was shut down on 2026-05-12). A
+ * `session.update` containing unknown fields is rejected with
+ * `unknown_parameter` and none of the config is applied, so the exact field
+ * names here are load-bearing.
+ *
+ * `temperature` was removed from the GA session config and is intentionally
+ * never sent; the adapter logs when it drops the option.
+ */
+export function buildSessionUpdate(
+  config: Partial<RealtimeSessionConfig>,
+): Record<string, unknown> {
+  // Always enable input audio transcription so user speech is transcribed
+  const audioInput: Record<string, unknown> = {
+    transcription: { model: 'whisper-1' },
+  }
+
+  if (config.vadMode) {
+    if (config.vadMode === 'semantic') {
+      audioInput.turn_detection = {
+        type: 'semantic_vad',
+        eagerness: config.semanticEagerness ?? 'medium',
+      }
+    } else if (config.vadMode === 'server') {
+      audioInput.turn_detection = {
+        type: 'server_vad',
+        threshold: config.vadConfig?.threshold ?? 0.5,
+        prefix_padding_ms: config.vadConfig?.prefixPaddingMs ?? 300,
+        silence_duration_ms: config.vadConfig?.silenceDurationMs ?? 500,
+      }
+    } else {
+      audioInput.turn_detection = null
+    }
+  }
+
+  const audio: Record<string, unknown> = { input: audioInput }
+
+  if (config.voice) {
+    audio.output = { voice: config.voice }
+  }
+
+  const sessionUpdate: Record<string, unknown> = {
+    type: 'realtime',
+    audio,
+  }
+
+  if (config.instructions) {
+    sessionUpdate.instructions = config.instructions
+  }
+
+  if (config.tools !== undefined) {
+    sessionUpdate.tools = config.tools.map((t) => ({
+      type: 'function',
+      name: t.name,
+      description: t.description,
+      parameters: t.inputSchema ?? { type: 'object', properties: {} },
+    }))
+    sessionUpdate.tool_choice = 'auto'
+  }
+
+  if (config.outputModalities) {
+    // GA only supports a single output modality: ['audio'] or ['text']
+    // (Beta accepted ['audio', 'text']). Audio replies still stream text
+    // via `response.output_audio_transcript.*` events, so collapsing
+    // ['audio', 'text'] to ['audio'] preserves the visible behavior.
+    sessionUpdate.output_modalities = config.outputModalities.includes('audio')
+      ? ['audio']
+      : ['text']
+  }
+
+  if (config.maxOutputTokens !== undefined) {
+    sessionUpdate.max_output_tokens = config.maxOutputTokens
+  }
+
+  return sessionUpdate
+}