| 1 | /** |
| 2 | * Voximplant + Gemini Live on Vertex AI + MCP demo |
| 3 | * Scenario: answer an inbound call and look up Voxy Plumbers dispatch |
| 4 | * availability from Zapier Tables through an MCP server. |
| 5 | */ |
| 6 | require(Modules.Gemini); |
| 7 | require(Modules.MCP); |
| 8 | |
| 9 | const SYSTEM_INSTRUCTION = ` |
| 10 | You are the warm, friendly phone assistant for Voxy Plumbers. Keep responses short and natural for a phone call. |
| 11 | Most responses should be under 12 words. Use very brief spoken turns. Ask one question at a time. |
| 12 | |
| 13 | The golden call flow is: |
| 14 | 1. Greet the caller mentioning Voxy Plumbers. |
| 15 | 2. If the caller reports a plumbing problem, use the available MCP tools to check Voxy Plumbers dispatch availability in Zapier Tables. |
| 16 | 3. Do not ask which window they prefer before checking availability. |
| 17 | 4. Use the returned plumber name and arrival window as a suggested appointment. |
| 18 | 5. If the caller says thanks or goodbye, briefly close and call hangup_call. |
| 19 | 6. After the dispatch lookup, say: "Our plumber, [plumber name] is available [arrival window]. Does that work?" |
| 20 | |
| 21 | Use the MCP tools to inspect the enabled Zapier action before executing it. |
| 22 | Do not invent MCP parameter values. If you do not know an exact selected_api, action, tool_name, table_id, or field name from an MCP result, omit that parameter. |
| 23 | Start by calling list_enabled_zapier_actions without filters. |
| 24 | Call the enabled action that reads Zapier Tables records. |
| 25 | Find rows in the Voxy Plumbers Dispatch Availability table where Status is Available, using the table's stored order. |
| 26 | |
| 27 | Example script: |
| 28 | Agent: "Thanks for calling Voxy Plumbers. How can I help?" |
| 29 | Caller: I have a plumbing issue that I need help with. |
| 30 | Agent: [MCP dispatch availability lookup] |
| 31 | Agent: Our plumber, Alice Bob can come today between 3:00 PM and 4:00 PM. |
| 32 | Caller: That's good, goodbye. |
| 33 | Agent: Thanks for calling. We will get you some help soon. Bye [hang_up] |
| 34 | `; |
| 35 | |
| 36 | VoxEngine.addEventListener(AppEvents.CallAlerting, async ({call}) => { |
| 37 | let geminiClient; |
| 38 | let mcpClient; |
| 39 | let pendingGeminiFunction; |
| 40 | let holdPromptPlaying = false; |
| 41 | let resumeGeminiAudio; |
| 42 | const requiredMcpTools = ["list_enabled_zapier_actions", "execute_zapier_read_action"]; |
| 43 | |
| 44 | |
| 45 | // -------------------------- Helpers -------------------------- |
| 46 | const terminate = () => { |
| 47 | geminiClient?.close(); |
| 48 | mcpClient?.close(); |
| 49 | VoxEngine.terminate(); |
| 50 | }; |
| 51 | |
| 52 | // Gemini currently stays silent while an MCP call is pending, |
| 53 | // Zapier can take up to 20 seconds to respond, so use TTS only as a manual hold prompt. |
| 54 | const playHoldPrompt = () => { |
| 55 | Logger.write("===HOLD_PROMPT_STARTED==="); |
| 56 | |
| 57 | geminiClient.stopMediaTo(call); |
| 58 | holdPromptPlaying = true; |
| 59 | resumeGeminiAudio = () => { |
| 60 | holdPromptPlaying = false; |
| 61 | call.removeEventListener(CallEvents.PlaybackFinished, resumeGeminiAudio); |
| 62 | Logger.write("===HOLD_PROMPT_FINISHED==="); |
| 63 | geminiClient.sendMediaTo(call); |
| 64 | }; |
| 65 | call.addEventListener(CallEvents.PlaybackFinished, resumeGeminiAudio); |
| 66 | |
| 67 | call.say("One moment while I check availability.", { |
| 68 | voice: VoiceList.Google.en_US_Chirp3_HD_Aoede, |
| 69 | progressivePlayback: true, |
| 70 | ttsOptions: { |
| 71 | pitch: "default", |
| 72 | volume: "medium", |
| 73 | }, |
| 74 | request: { |
| 75 | audioConfig: { |
| 76 | speakingRate: 1.1, |
| 77 | }, |
| 78 | }, |
| 79 | }); |
| 80 | }; |
| 81 | |
| 82 | const stopHoldPrompt = () => { |
| 83 | if (!holdPromptPlaying) |
| 84 | return; |
| 85 | |
| 86 | holdPromptPlaying = false; |
| 87 | call.removeEventListener(CallEvents.PlaybackFinished, resumeGeminiAudio); |
| 88 | call.stopPlayback(); |
| 89 | Logger.write("===HOLD_PROMPT_INTERRUPTED==="); |
| 90 | geminiClient.sendMediaTo(call); |
| 91 | }; |
| 92 | |
| 93 | try { |
| 94 | // -------------------------- MCP setup -------------------------- |
| 95 | |
| 96 | Logger.write("===MCP_CONNECTING==="); |
| 97 | mcpClient = await MCP.createClient({ |
| 98 | mcpServerConnectionConfig: { |
| 99 | transport: "http", |
| 100 | endpoint: VoxEngine.getSecretValue("ZAPIER_MCP_URL"), |
| 101 | headers: {Accept: "application/json, text/event-stream"}, |
| 102 | clientName: "voximplant-vertex-mcp-demo", |
| 103 | clientVersion: "1.0.0", |
| 104 | }, |
| 105 | }); |
| 106 | |
| 107 | mcpClient.addEventListener(MCP.ServerEvents.ConnectorInformation, (event) => { |
| 108 | Logger.write(`===MCP_CONNECTOR_INFORMATION===> ${JSON.stringify(event.data.payload)}`); |
| 109 | // standard practice to list tools when connecting |
| 110 | mcpClient.listTools({}); |
| 111 | }); |
| 112 | |
| 113 | mcpClient.addEventListener(MCP.ServerEvents.ToolsList, (event) => { |
| 114 | const tools = event?.data?.payload?.tools || []; |
| 115 | Logger.write(`===MCP_TOOLS_LIST===> ${tools.length} Tools Available:`); |
| 116 | // Logger.write(JSON.stringify(tools.map((tool) => `${tool.name}: ${tool.description}`))); |
| 117 | // Logger.write(JSON.stringify(tools)); // full tool object |
| 118 | |
| 119 | // These are the only 2 Zapier tools we need for this Agent, so keep the |
| 120 | // MCP schemas but replace Zapier's broader management-tool descriptions. |
| 121 | const mcpFunctionDeclarations = tools |
| 122 | .filter((tool) => requiredMcpTools.includes(tool.name)) |
| 123 | .map((tool) => ({ |
| 124 | name: tool.name, |
| 125 | description: tool.name === "list_enabled_zapier_actions" |
| 126 | ? "List enabled Zapier actions and their exact action keys. Use only the tools available in this session." |
| 127 | : "Execute an enabled Zapier read action using an action key and params returned by list_enabled_zapier_actions.", |
| 128 | parametersJsonSchema: tool.inputSchema, |
| 129 | })); |
| 130 | |
| 131 | // Verify Zapier returned our 2 required tools |
| 132 | if (mcpFunctionDeclarations.length !== requiredMcpTools.length) { |
| 133 | Logger.write("===MCP_REQUIRED_TOOLS_MISSING==="); |
| 134 | terminate(); |
| 135 | return; |
| 136 | } |
| 137 | |
| 138 | // Gemini requires tools to be defined at startup, so will answer and start the Agent now |
| 139 | // Other LLMs allow a tool redefinition in a session update, allowing this to be done async |
| 140 | startCall(mcpFunctionDeclarations); |
| 141 | }); |
| 142 | |
| 143 | mcpClient.addEventListener(MCP.ServerEvents.ToolResult, (event) => { |
| 144 | Logger.write(`===MCP_TOOL_RESULT===> ${JSON.stringify(event?.data)}`); |
| 145 | |
| 146 | // Keep this demo synchronous with one Gemini tool result -> MCP response at a time for simplicity |
| 147 | if (!pendingGeminiFunction) return; |
| 148 | |
| 149 | // ToDo: handle parsing errors here |
| 150 | let mcpOutput = JSON.parse(event?.data?.payload?.content[0]?.text || "{}"); |
| 151 | |
| 152 | if (!mcpOutput || event.data.payload.isError || mcpOutput.error) { |
| 153 | mcpOutput = {error: mcpOutput.error || "MCP tool failed."}; |
| 154 | } |
| 155 | |
| 156 | // Stop any MCP Tool result wait prompt before responding to the LLM |
| 157 | stopHoldPrompt(); |
| 158 | |
| 159 | geminiClient.sendToolResponse({ |
| 160 | functionResponses: [{ |
| 161 | id: pendingGeminiFunction.id, |
| 162 | name: pendingGeminiFunction.name, |
| 163 | response: { |
| 164 | output: mcpOutput, |
| 165 | }, |
| 166 | }], |
| 167 | }); |
| 168 | pendingGeminiFunction = undefined; |
| 169 | }); |
| 170 | |
| 171 | mcpClient.addEventListener(MCP.ServerEvents.MCPError, (event) => { |
| 172 | Logger.write("===MCP_ERROR==="); |
| 173 | Logger.write(JSON.stringify(event?.data || event || {})); |
| 174 | |
| 175 | if (!pendingGeminiFunction) |
| 176 | return; |
| 177 | |
| 178 | geminiClient.sendToolResponse({ |
| 179 | functionResponses: [{ |
| 180 | id: pendingGeminiFunction.id, |
| 181 | name: pendingGeminiFunction.name, |
| 182 | response: { |
| 183 | error: "Dispatch availability lookup failed.", |
| 184 | }, |
| 185 | }], |
| 186 | }); |
| 187 | pendingGeminiFunction = undefined; |
| 188 | }); |
| 189 | |
| 190 | mcpClient.addEventListener(MCP.ServerEvents.Unknown, (event) => { |
| 191 | Logger.write(`===MCP_UNKNOWN===> ${JSON.stringify(event?.data || event)}`); |
| 192 | }); |
| 193 | |
| 194 | // -------------------------- Handle the Call -------------------------- |
| 195 | |
| 196 | const startCall = async (mcpFunctionDeclarations) => { |
| 197 | |
| 198 | call.answer(); |
| 199 | call.record({hd_audio: true, stereo: true}); |
| 200 | |
| 201 | call.addEventListener(CallEvents.Disconnected, terminate); |
| 202 | call.addEventListener(CallEvents.Failed, terminate); |
| 203 | |
| 204 | |
| 205 | // -------------------- Gemini Live API setup -------------------- |
| 206 | |
| 207 | // Add the MCP tools to the connection configuration |
| 208 | const CONNECT_CONFIG = { |
| 209 | responseModalities: ["AUDIO"], |
| 210 | speechConfig: { |
| 211 | voiceConfig: { |
| 212 | prebuiltVoiceConfig: {voiceName: "Aoede"}, |
| 213 | }, |
| 214 | }, |
| 215 | systemInstruction: { |
| 216 | parts: [{text: SYSTEM_INSTRUCTION}], |
| 217 | }, |
| 218 | tools: [ |
| 219 | { |
| 220 | functionDeclarations: [ |
| 221 | ...mcpFunctionDeclarations, |
| 222 | { |
| 223 | name: "hangup_call", |
| 224 | description: "Hang up the current call", |
| 225 | parametersJsonSchema: { |
| 226 | type: "object", |
| 227 | properties: {}, |
| 228 | required: [], |
| 229 | }, |
| 230 | }, |
| 231 | ], |
| 232 | }, |
| 233 | ], |
| 234 | inputAudioTranscription: {}, |
| 235 | outputAudioTranscription: {}, |
| 236 | }; |
| 237 | |
| 238 | geminiClient = await Gemini.createLiveAPIClient({ |
| 239 | credentials: VoxEngine.getSecretValue("GCP_CREDENTIALS"), |
| 240 | project: VoxEngine.getSecretValue("GCP_PROJECT_ID"), |
| 241 | location: VoxEngine.getSecretValue("GCP_REGION"), |
| 242 | model: "gemini-live-2.5-flash-native-audio", |
| 243 | backend: Gemini.Backend.VERTEX_AI, |
| 244 | connectConfig: CONNECT_CONFIG, |
| 245 | onWebSocketClose: (event) => { |
| 246 | Logger.write(`===GEMINI_WEBSOCKET_CLOSE===> ${JSON.stringify(event)}`); |
| 247 | terminate(); |
| 248 | }, |
| 249 | }); |
| 250 | |
| 251 | geminiClient.addEventListener(Gemini.LiveAPIEvents.SetupComplete, () => { |
| 252 | VoxEngine.sendMediaBetween(call, geminiClient); |
| 253 | geminiClient.sendRealtimeInput({text: "Greet the caller"}); |
| 254 | }); |
| 255 | |
| 256 | geminiClient.addEventListener(Gemini.LiveAPIEvents.ToolCall, (event) => { |
| 257 | const functionCalls = event?.data?.payload?.functionCalls || []; |
| 258 | |
| 259 | for (const fn of functionCalls) { |
| 260 | const {id, name, args} = fn; |
| 261 | |
| 262 | // check the tool request against our MCP tools |
| 263 | if (requiredMcpTools.includes(name)) { |
| 264 | pendingGeminiFunction = {id, name}; |
| 265 | Logger.write("===ZAPIER_MCP Tool Call==="); |
| 266 | Logger.write(JSON.stringify({id, tool: name, args})); |
| 267 | |
| 268 | // Add a special handler since this tool has a delay |
| 269 | if (name === "execute_zapier_read_action") { |
| 270 | playHoldPrompt(); |
| 271 | } |
| 272 | // Send the tool request to the MCP Server |
| 273 | mcpClient.callTool({name, arguments: args}); |
| 274 | } |
| 275 | // We can still define our own tools |
| 276 | else if (name === "hangup_call") { |
| 277 | // Simple timeout to give the agent time to say goodbye - in production this should be event-driven |
| 278 | setTimeout(() => { |
| 279 | call.hangup(); |
| 280 | terminate(); |
| 281 | }, 5000); |
| 282 | const response = { |
| 283 | id, |
| 284 | name, |
| 285 | response: { |
| 286 | output: { |
| 287 | result: "Goodbye. Thank you for calling Voxy Plumbers.", |
| 288 | }, |
| 289 | }, |
| 290 | }; |
| 291 | geminiClient.sendToolResponse({functionResponses: [response]}); |
| 292 | } |
| 293 | // Error handling for non-handled tools |
| 294 | else { |
| 295 | geminiClient.sendToolResponse({ |
| 296 | functionResponses: [{ |
| 297 | id, |
| 298 | name, |
| 299 | response: {error: `Unhandled tool: ${name}`}, |
| 300 | }], |
| 301 | }); |
| 302 | } |
| 303 | } |
| 304 | }); |
| 305 | |
| 306 | geminiClient.addEventListener(Gemini.LiveAPIEvents.ToolCallCancellation, (event) => { |
| 307 | const ids = event?.data?.payload?.ids || []; |
| 308 | ids.forEach((id) => { |
| 309 | Logger.write(`===GEMINI_TOOL_CALL_CANCELLED===> ${JSON.stringify({id})}`); |
| 310 | }); |
| 311 | }); |
| 312 | |
| 313 | // Handle other Gemini events and debug logging |
| 314 | geminiClient.addEventListener(Gemini.LiveAPIEvents.ServerContent, (event) => { |
| 315 | const payload = event?.data?.payload || {}; |
| 316 | if (payload.inputTranscription?.text) |
| 317 | Logger.write(`===USER===> ${payload.inputTranscription.text}`); |
| 318 | if (payload.outputTranscription?.text) |
| 319 | Logger.write(`===AGENT===> ${payload.outputTranscription.text}`); |
| 320 | if (payload.interrupted) { |
| 321 | Logger.write("===BARGE_IN==="); |
| 322 | geminiClient.clearMediaBuffer(); |
| 323 | } |
| 324 | }); |
| 325 | }; |
| 326 | } catch (error) { |
| 327 | Logger.write("===SOMETHING_WENT_WRONG==="); |
| 328 | Logger.write(error); |
| 329 | terminate(); |
| 330 | } |
| 331 | }); |