Example: Half-cascade with Inworld

For the complete documentation index, see llms.txt.

Overview

This half-cascade example uses OpenAI Realtime for speech‑to‑text and reasoning, then sends OpenAI text responses to Inworld Realtime TTS.

⬇️ Jump to the Full VoxEngine scenario.

Prerequisites

Store your OpenAI API key in Voximplant Secrets under OPENAI_API_KEY.
Set a voiceId in the Inworld request (createContextParameters.create.voiceId) to choose the TTS voice used in this scenario.
(Optional) Store your Inworld API key in Voximplant Secrets under INWORLD_API_KEY if you want to use your own Inworld account.

How it works

OpenAI runs in text mode (output_modalities: ["text"]).
Caller audio is sent to OpenAI: call.sendMediaTo(voiceAIClient).
Inworld generates speech from OpenAI text and streams it to the call.

Notes

The example sets voiceId: "Ashley" and modelId: "inworld-tts-1.5-mini" in createContextParameters.create. Change these to any supported Inworld voice/model.
Do not set audio format parameters in half-cascade connector requests. VoxEngine’s WebSocket gateway handles media format negotiation automatically.
If no Inworld API key is provided, Voximplant’s default account and billing are used.
Custom / cloned voices are only available when using your own API key.
Generate speech using send({ send_text: { text } })
Flush the context after every turn with send({ flush_context: {} })
Clear buffered speech in barge-in handler with clearBuffer() so interruptions stay natural.

More info

OpenAI module API: https://voximplant.com/docs/references/voxengine/openai
OpenAI Realtime guide: https://voximplant.com/docs/guides/ai/openai-realtime
Inworld module API: https://voximplant.com/docs/references/voxengine/inworld
Realtime TTS guide: https://voximplant.com/docs/guides/speech/realtime-tts

Full VoxEngine scenario

voxeengine-openai-half-cascade-inworld.js

1 /**
2  * Voximplant + OpenAI Realtime API + Inworld TTS demo
3  * Scenario: OpenAI handles STT/LLM, Inworld handles TTS (half-cascade).
4  */
5 
6 require(Modules.OpenAI);
7 require(Modules.Inworld);
8 const SYSTEM_PROMPT = `
9 You are Voxi, a helpful phone assistant.
10 Keep responses short and telephony-friendly.
11 Always reply in English.
12 `;
13 
14 const INWORLD_VOICE_ID = "Ashley";               // set your preference here
15 const INWORLD_MODEL_ID = "inworld-tts-1.5-mini"; // set your preference here, or leave blank to use the default model for the voice
16 
17 const SESSION_CONFIG = {
18     session: {
19         type: "realtime",
20         instructions: SYSTEM_PROMPT,
21         output_modalities: ["text"],
22         turn_detection: {type: "server_vad", interrupt_response: true},
23     },
24 };
25 
26 VoxEngine.addEventListener(AppEvents.CallAlerting, async ({call}) => {
27     let voiceAIClient;
28     let ttsPlayer;
29 
30     call.addEventListener(CallEvents.Disconnected, () => VoxEngine.terminate());
31     call.addEventListener(CallEvents.Failed, () => VoxEngine.terminate());
32 
33     try {
34         call.answer();
35         // call.record({hd_audio: true, stereo: true}); // Optional: record the call
36 
37         const openAiKey = VoxEngine.getSecretValue('OPENAI_API_KEY');
38 
39         voiceAIClient = await OpenAI.createRealtimeAPIClient({
40             apiKey: openAiKey,
41             model: "gpt-realtime-1.5",
42             onWebSocketClose: (event) => {
43                 Logger.write("===OpenAI.WebSocket.Close===");
44                 if (event) Logger.write(JSON.stringify(event));
45                 VoxEngine.terminate();
46             },
47         });
48 
49         voiceAIClient.addEventListener(OpenAI.RealtimeAPIEvents.SessionCreated, () => {
50             voiceAIClient.sessionUpdate(SESSION_CONFIG);
51         });
52 
53         voiceAIClient.addEventListener(OpenAI.RealtimeAPIEvents.SessionUpdated, async () => {
54             call.sendMediaTo(voiceAIClient);        // bridge media between the call and OpenAI
55 
56             // create the TTS player and pass the config parameters
57             ttsPlayer = Inworld.createRealtimeTTSPlayer({
58                 // apiKey: VoxEngine.getSecretValue('INWORLD_API_KEY'),  // optional,
59                 createContextParameters: {
60                     create: {
61                         voiceId: INWORLD_VOICE_ID,
62                         modelId: INWORLD_MODEL_ID,
63                         speakingRate: 1.1,
64                         temperature: 1.3,
65                     }
66                 }
67             });
68             ttsPlayer.sendMediaTo(call);        // bridge media between the TTS player and the call
69 
70             voiceAIClient.responseCreate({instructions: "Hello! How can I help today?"});
71         });
72 
73         voiceAIClient.addEventListener(OpenAI.RealtimeAPIEvents.ResponseOutputTextDone, (event) => {
74             const payload = event?.data?.payload || event?.data || {};
75             const text = payload.text || payload.delta;
76             if (!text || !ttsPlayer) return;
77             Logger.write(`===AGENT_TEXT=== ${text}`);
78             ttsPlayer.send({send_text: {text}});
79             ttsPlayer.send({flush_context: {}});
80         });
81 
82         // Barge-in: clear both OpenAI and Inworld buffers
83         voiceAIClient.addEventListener(OpenAI.RealtimeAPIEvents.InputAudioBufferSpeechStarted, () => {
84             Logger.write("===BARGE-IN: OpenAI.InputAudioBufferSpeechStarted===");
85             voiceAIClient.clearMediaBuffer();
86             ttsPlayer?.clearBuffer();
87         });
88 
89         // ---------------------- Log all other events for debugging -----------------------
90         [
91             OpenAI.RealtimeAPIEvents.ResponseCreated,
92             OpenAI.RealtimeAPIEvents.ResponseDone,
93             OpenAI.RealtimeAPIEvents.ResponseOutputTextDelta,
94             OpenAI.RealtimeAPIEvents.ConnectorInformation,
95             OpenAI.RealtimeAPIEvents.HTTPResponse,
96             OpenAI.RealtimeAPIEvents.WebSocketError,
97             OpenAI.RealtimeAPIEvents.Unknown,
98             OpenAI.Events.WebSocketMediaStarted,
99             OpenAI.Events.WebSocketMediaEnded,
100         ].forEach((eventName) => {
101             voiceAIClient.addEventListener(eventName, (event) => {
102                 Logger.write(`===${event.name}===`);
103                 if (event?.data) Logger.write(JSON.stringify(event.data));
104             });
105         });
106     } catch (error) {
107         Logger.write("===UNHANDLED_ERROR===");
108         Logger.write(error);
109         voiceAIClient?.close();
110         VoxEngine.terminate();
111     }
112 });