*** ## title: 'Example: Half-cascade with Inworld' ## Overview This half-cascade example uses OpenAI Realtime for speech‑to‑text and reasoning, then sends OpenAI text responses to Inworld Realtime TTS. **⬇️ Jump to the [Full VoxEngine scenario](#full-voxengine-scenario).** ## Prerequisites * Store your OpenAI API key in Voximplant `ApplicationStorage` under `OPENAI_API_KEY`. * Set a `voiceId` in the Inworld request (`createContextParameters.create.voiceId`) to choose the TTS voice used in this scenario. * (Optional) Store your Inworld API key in `ApplicationStorage` as `INWORLD_API_KEY` if you want to use your own Inworld account. ## How it works * OpenAI runs in text mode (`output_modalities: ["text"]`). * Caller audio is sent to OpenAI: `call.sendMediaTo(voiceAIClient)`. * Inworld generates speech from OpenAI text and streams it to the call. ## Notes * The example sets `voiceId: "Ashley"` and `modelId: "inworld-tts-1.5-mini"` in `createContextParameters.create`. Change these to any supported Inworld voice/model. * Do not set audio format parameters in half-cascade connector requests. VoxEngine's WebSocket gateway handles media format negotiation automatically. * If no Inworld API key is provided, Voximplant's default account and billing are used. * Custom / cloned voices are only available when using your own API key. * Generate speech using `send({ send_text: { text } })` * Flush the context after every turn with `send({ flush_context: {} })` * Clear buffered speech in barge-in handler with `clearBuffer()` so interruptions stay natural. ## More info * OpenAI module API: [https://voximplant.com/docs/references/voxengine/openai](https://voximplant.com/docs/references/voxengine/openai) * OpenAI Realtime guide: [https://voximplant.com/docs/guides/ai/openai-realtime](https://voximplant.com/docs/guides/ai/openai-realtime) * Inworld module API: [https://voximplant.com/docs/references/voxengine/inworld](https://voximplant.com/docs/references/voxengine/inworld) * Realtime TTS guide: [https://voximplant.com/docs/guides/speech/realtime-tts](https://voximplant.com/docs/guides/speech/realtime-tts) ## Full VoxEngine scenario ```javascript title={"voxeengine-openai-half-cascade-inworld.js"} maxLines={0} /** * Voximplant + OpenAI Realtime API + Inworld TTS demo * Scenario: OpenAI handles STT/LLM, Inworld handles TTS (half-cascade). */ require(Modules.OpenAI); require(Modules.Inworld); require(Modules.ApplicationStorage); const SYSTEM_PROMPT = ` You are Voxi, a helpful phone assistant. Keep responses short and telephony-friendly. Always reply in English. `; const INWORLD_VOICE_ID = "Ashley"; // set your preference here const INWORLD_MODEL_ID = "inworld-tts-1.5-mini"; // set your preference here, or leave blank to use the default model for the voice const SESSION_CONFIG = { session: { type: "realtime", instructions: SYSTEM_PROMPT, output_modalities: ["text"], turn_detection: {type: "server_vad", interrupt_response: true}, }, }; VoxEngine.addEventListener(AppEvents.CallAlerting, async ({call}) => { let voiceAIClient; let ttsPlayer; call.addEventListener(CallEvents.Disconnected, () => VoxEngine.terminate()); call.addEventListener(CallEvents.Failed, () => VoxEngine.terminate()); try { call.answer(); // call.record({hd_audio: true, stereo: true}); // Optional: record the call const openAiKey = (await ApplicationStorage.get("OPENAI_API_KEY")).value; voiceAIClient = await OpenAI.createRealtimeAPIClient({ apiKey: openAiKey, model: "gpt-realtime", onWebSocketClose: (event) => { Logger.write("===OpenAI.WebSocket.Close==="); if (event) Logger.write(JSON.stringify(event)); VoxEngine.terminate(); }, }); voiceAIClient.addEventListener(OpenAI.RealtimeAPIEvents.SessionCreated, () => { voiceAIClient.sessionUpdate(SESSION_CONFIG); }); voiceAIClient.addEventListener(OpenAI.RealtimeAPIEvents.SessionUpdated, async () => { call.sendMediaTo(voiceAIClient); // bridge media between the call and OpenAI // create the TTS player and pass the config parameters ttsPlayer = Inworld.createRealtimeTTSPlayer({ // apiKey: (await ApplicationStorage.get("INWORLD_API_KEY")).value; // optional, createContextParameters: { create: { voiceId: INWORLD_VOICE_ID, modelId: INWORLD_MODEL_ID, speakingRate: 1.1, temperature: 1.3, } } }); ttsPlayer.sendMediaTo(call); // bridge media between the TTS player and the call voiceAIClient.responseCreate({instructions: "Hello! How can I help today?"}); }); voiceAIClient.addEventListener(OpenAI.RealtimeAPIEvents.ResponseOutputTextDone, (event) => { const payload = event?.data?.payload || event?.data || {}; const text = payload.text || payload.delta; if (!text || !ttsPlayer) return; Logger.write(`===AGENT_TEXT=== ${text}`); ttsPlayer.send({send_text: {text}}); ttsPlayer.send({flush_context: {}}); }); // Barge-in: clear both OpenAI and Inworld buffers voiceAIClient.addEventListener(OpenAI.RealtimeAPIEvents.InputAudioBufferSpeechStarted, () => { Logger.write("===BARGE-IN: OpenAI.InputAudioBufferSpeechStarted==="); voiceAIClient.clearMediaBuffer(); ttsPlayer?.clearBuffer(); }); // ---------------------- Log all other events for debugging ----------------------- [ OpenAI.RealtimeAPIEvents.ResponseCreated, OpenAI.RealtimeAPIEvents.ResponseDone, OpenAI.RealtimeAPIEvents.ResponseOutputTextDelta, OpenAI.RealtimeAPIEvents.ConnectorInformation, OpenAI.RealtimeAPIEvents.HTTPResponse, OpenAI.RealtimeAPIEvents.WebSocketError, OpenAI.RealtimeAPIEvents.Unknown, OpenAI.Events.WebSocketMediaStarted, OpenAI.Events.WebSocketMediaEnded, ].forEach((eventName) => { voiceAIClient.addEventListener(eventName, (event) => { Logger.write(`===${event.name}===`); if (event?.data) Logger.write(JSON.stringify(event.data)); }); }); } catch (error) { Logger.write("===UNHANDLED_ERROR==="); Logger.write(error); voiceAIClient?.close(); VoxEngine.terminate(); } }); ```