Example: Half-cascade with ElevenLabs

Overview

This half-cascade example uses OpenAI Realtime for speech‑to‑text and reasoning, then sends OpenAI text responses to ElevenLabs Realtime TTS.

⬇️ Jump to the Full VoxEngine scenario.

Prerequisites

Store your OpenAI API key in Voximplant ApplicationStorage under OPENAI_API_KEY.
(Optional) Update the ELEVENLABS_VOICE_ID constant in the example to your preferred voice.
(Optional) Store your ElevenLabs API key in Voximplant ApplicationStorage under ELEVENLABS_API_KEY if you want to use your own ElevenLabs account.

How it works

OpenAI runs in text mode (output_modalities: ["text"]).
Caller audio is sent to OpenAI: call.sendMediaTo(voiceAIClient).
ElevenLabs generates speech from OpenAI text and streams it to the call.

Notes

The example uses eleven_turbo_v2_5.
The example instructs the agent to reply in English.
Do not set audio format parameters (for example ulaw_8000) in half-cascade connector requests. VoxEngine’s WebSocket gateway handles media format negotiation automatically.
If no ElevenLabs API key is provided, Voximplant’s default account and billing are used.
Custom / cloned voices are only available when using your own API key.
Use append(text, true) for each response chunk so playback stays responsive.
No audio format/codec params should be passed for this half-cascade flow.

More info

OpenAI module API: https://voximplant.com/docs/references/voxengine/openai
OpenAI Realtime guide: https://voximplant.com/docs/guides/ai/openai-realtime
ElevenLabs module API: https://voximplant.com/docs/references/voxengine/elevenlabs
Realtime TTS guide: https://voximplant.com/docs/guides/speech/realtime-tts

Full VoxEngine scenario

voxeengine-openai-half-cascade-elevenlabs.js

1 /**
2  * Voximplant + OpenAI Realtime API + ElevenLabs TTS demo
3  * Scenario: OpenAI handles STT/LLM, ElevenLabs handles TTS (half-cascade).
4  */
5 
6 require(Modules.OpenAI);
7 require(Modules.ElevenLabs);
8 require(Modules.ApplicationStorage);
9 
10 const SYSTEM_PROMPT = `
11 You are Voxi, a helpful phone assistant.
12 Keep responses short and telephony-friendly.
13 Always reply in English.
14 `;
15 
16 const ELEVENLABS_VOICE_ID = "21m00Tcm4TlvDq8ikWAM";
17 const ELEVENLABS_MODEL_ID = "eleven_turbo_v2_5";
18 
19 const SESSION_CONFIG = {
20     session: {
21         type: "realtime",
22         instructions: SYSTEM_PROMPT,
23         output_modalities: ["text"],
24         turn_detection: {type: "server_vad", interrupt_response: true},
25     },
26 };
27 
28 VoxEngine.addEventListener(AppEvents.CallAlerting, async ({call}) => {
29     let voiceAIClient;
30     let ttsPlayer;
31 
32     call.addEventListener(CallEvents.Disconnected, () => VoxEngine.terminate());
33     call.addEventListener(CallEvents.Failed, () => VoxEngine.terminate());
34 
35     try {
36         call.answer();
37         // call.record({hd_audio: true, stereo: true}); // Optional: record the call
38 
39         const openAiKey = (await ApplicationStorage.get("OPENAI_API_KEY")).value;
40 
41         voiceAIClient = await OpenAI.createRealtimeAPIClient({
42             apiKey: openAiKey,
43             model: "gpt-realtime",
44             onWebSocketClose: (event) => {
45                 Logger.write("===OpenAI.WebSocket.Close===");
46                 if (event) Logger.write(JSON.stringify(event));
47                 VoxEngine.terminate();
48             },
49         });
50 
51         voiceAIClient.addEventListener(OpenAI.RealtimeAPIEvents.SessionCreated, () => {
52             voiceAIClient.sessionUpdate(SESSION_CONFIG);
53         });
54 
55         voiceAIClient.addEventListener(OpenAI.RealtimeAPIEvents.SessionUpdated, () => {
56             call.sendMediaTo(voiceAIClient);  // bridge media between the call and OpenAI
57 
58             // create the TTS player and pass the config parameters
59             ttsPlayer = ElevenLabs.createRealtimeTTSPlayer(" ", {
60                 // headers: [{name: "xi-api-key", value: (await ApplicationStorage.get("ELEVENLABS_API_KEY")).value}], // optional
61                 pathParameters: {voice_id: ELEVENLABS_VOICE_ID},
62                 queryParameters: {
63                     model_id: ELEVENLABS_MODEL_ID,
64                 },
65             });
66             ttsPlayer.sendMediaTo(call); // bridge media between the TTS player and the call
67 
68             voiceAIClient.responseCreate({instructions: "Hello! How can I help today?"});
69         });
70 
71         voiceAIClient.addEventListener(OpenAI.RealtimeAPIEvents.ResponseOutputTextDone, (event) => {
72             const payload = event?.data?.payload || event?.data || {};
73             const text = payload.text || payload.delta;
74             if (!text || !ttsPlayer) return;
75             Logger.write(`===AGENT_TEXT=== ${text}`);
76             ttsPlayer.append(text, true);
77         });
78 
79         // Barge-in: clear both OpenAI and ElevenLabs buffers
80         voiceAIClient.addEventListener(OpenAI.RealtimeAPIEvents.InputAudioBufferSpeechStarted, () => {
81             Logger.write("===BARGE-IN: OpenAI.InputAudioBufferSpeechStarted===");
82             voiceAIClient.clearMediaBuffer();
83             ttsPlayer?.clearBuffer();
84         });
85 
86         // ---------------------- Log all other events for debugging -----------------------
87         [
88             OpenAI.RealtimeAPIEvents.ResponseCreated,
89             OpenAI.RealtimeAPIEvents.ResponseDone,
90             OpenAI.RealtimeAPIEvents.ResponseOutputTextDelta,
91             OpenAI.RealtimeAPIEvents.ConnectorInformation,
92             OpenAI.RealtimeAPIEvents.HTTPResponse,
93             OpenAI.RealtimeAPIEvents.WebSocketError,
94             OpenAI.RealtimeAPIEvents.Unknown,
95             OpenAI.Events.WebSocketMediaStarted,
96             OpenAI.Events.WebSocketMediaEnded,
97         ].forEach((eventName) => {
98             voiceAIClient.addEventListener(eventName, (event) => {
99                 Logger.write(`===${event.name}===`);
100                 if (event?.data) Logger.write(JSON.stringify(event.data));
101             });
102         });
103     } catch (error) {
104         Logger.write("===UNHANDLED_ERROR===");
105         Logger.write(error);
106         voiceAIClient?.close();
107         VoxEngine.terminate();
108     }
109 });