Example: Using Grok features

Connect an inbound call to a Grok voice agent
View as MarkdownOpen in Claude

Overview

This inbound example showcases Grok tools (function calling, file_search, web_search, x_search) with barge-in for telephony use. The same tool configuration applies to outbound calls—reuse the tools and prompt in your outbound scenario and swap the call entrypoint.

⬇️ Jump to the Full VoxEngine scenario.

Prerequisites

Usage highlights

  • Create a VoiceAgentAPIClient with Grok.createVoiceAgentAPIClient(...).
  • Configure the session with voice, turn_detection, instructions, and tools.
  • Bridge audio with VoxEngine.sendMediaBetween(call, client).
  • Handle function calls with ResponseFunctionCallArgumentsDone.

Feature summary

Turn detection & barge-in

When InputAudioBufferSpeechStarted fires, clear the media buffer so the caller can interrupt the agent:

1voiceAgentAPIClient.addEventListener(
2 Grok.VoiceAgentAPIEvents.InputAudioBufferSpeechStarted,
3 () => voiceAgentAPIClient.clearMediaBuffer()
4);

Function calling

Function calling lets Grok request actions from VoxEngine (transfer, hang up, fetch data) and receive a structured response. For official Grok tool guidance, see https://docs.x.ai/docs/guides/tools/overview#function-calling.

1{
2 type: "function",
3 name: "forward_to_agent",
4 description: "Forward the user to a live agent",
5 parameters: {
6 type: "object",
7 properties: {},
8 required: [],
9 },
10}

Handle tool calls and return outputs to the agent:

1voiceAgentAPIClient.addEventListener(
2 Grok.VoiceAgentAPIEvents.ResponseFunctionCallArgumentsDone,
3 (event) => {
4 const { name, call_id } = event?.data?.payload || {};
5 if (name !== "forward_to_agent" && name !== "hangup_call") return;
6
7 const output =
8 name === "forward_to_agent"
9 ? { result: "Forwarding your call to a live agent. Please hold on." }
10 : { result: "Have a great day, goodbye!" };
11
12 voiceAgentAPIClient.conversationItemCreate({
13 item: {
14 type: "function_call_output",
15 call_id,
16 output: JSON.stringify(output),
17 },
18 });
19 voiceAgentAPIClient.responseCreate({});
20 }
21);

Use file_search to ground responses in your documents. Upload files to Grok, then reference the collection (vector store) ID in your session tools. For official Grok tool guidance, see https://docs.x.ai/docs/guides/tools/overview#file-search.

1{
2 type: "file_search",
3 vector_store_ids: [COLLECTION_ID],
4 max_num_results: 5,
5}
  • Keep your collection focused on the topics the agent should answer.
  • Tune max_num_results to balance relevance and speed.

Enable web_search when you want Grok to fetch public web information at runtime. For official Grok tool guidance, see https://docs.x.ai/docs/guides/tools/overview#web-search.

1{ type: "web_search" }

Use x_search to limit Grok’s social search to specific X handles. For official Grok tool guidance, see https://docs.x.ai/docs/guides/tools/overview#x-search.

1{
2 type: "x_search",
3 allowed_x_handles: ["voximplant", "aylarov"],
4}

Configure before you run

  • Set XAI_API_KEY in ApplicationStorage.
  • Update COLLECTION_ID to point at your uploaded documents (or remove file_search).
  • Adjust the SYSTEM_PROMPT to match your brand voice and escalation rules.

Try it

Notes

See the VoxEngine API Reference for more details.

Full VoxEngine scenario

This scenario includes barge-in handling, function calling, file search, web search, and X search.

voxeengine-grok-features.js
1require(Modules.Grok);
2require(Modules.ApplicationStorage);
3
4const SYSTEM_PROMPT = `
5 Your name is Voxi. You are a helpful voice assistant for phone callers representing the company Voximplant (pronounced VOX-im-plant).
6 You can answer questions about the company, its voice AI integrations, and X/Twitter posts from the "voximplant" and "aylarov" handles.
7 "aylarov" is the X handle for Alexey Aylarov, Voximplant's CEO.
8
9 Keep responses short and telephony-friendly (usually 1-2 sentences).
10 If the user asks for a live agent or an operator, call the "forward_to_agent" function.
11 If the user says goodbye, call the "hangup_call" function.
12
13 When answering a company/product question, prefer searching the knowledge base first.
14 `;
15const COLLECTION_ID = "collection_4c5a63ab-f739-4c13-93d2-05b74095c34a"; // uploaded documents to show RAG
16
17// -------------------- Grok Voice Agent settings --------------------
18const SESSION_PARAMETERS = {
19 session: {
20 voice: "Ara",
21 turn_detection: {type: "server_vad"},
22 instructions: SYSTEM_PROMPT,
23 tools: [
24 {type: "web_search"},
25 {
26 type: "file_search",
27 vector_store_ids: [COLLECTION_ID],
28 max_num_results: 5,
29 },
30 {
31 type: "x_search",
32 allowed_x_handles: ["voximplant", "aylarov"],
33 },
34 {
35 type: "function",
36 name: "forward_to_agent",
37 description: "Forward the user to a live agent",
38 parameters: {
39 type: "object",
40 properties: {},
41 required: [],
42 },
43 },
44 {
45 type: "function",
46 name: "hangup_call",
47 description: "Hangup the call",
48 parameters: {
49 type: "object",
50 properties: {},
51 required: [],
52 },
53 },
54 ],
55 },
56};
57
58VoxEngine.addEventListener(AppEvents.CallAlerting, async ({call}) => {
59 let voiceAIClient = undefined;
60 let hangupCall = false,
61 forwardToLiveAgent = false;
62
63 call.answer();
64 call.record({hd_audio: true, stereo: true}); // optional: call recording
65
66 const callCloseHandler = () => {
67 voiceAIClient?.close();
68 VoxEngine.terminate();
69 };
70 call.addEventListener(CallEvents.Disconnected, callCloseHandler);
71 call.addEventListener(CallEvents.Failed, callCloseHandler);
72
73 try {
74 voiceAIClient = await Grok.createVoiceAgentAPIClient({
75 xAIApiKey: (await ApplicationStorage.get("XAI_API_KEY")).value,
76 onWebSocketClose: (event) => {
77 Logger.write(`===${event.name}===>${JSON.stringify(event.data)}`);
78 VoxEngine.terminate();
79 },
80 });
81
82 voiceAIClient.addEventListener(Grok.VoiceAgentAPIEvents.ConversationCreated, (event) => {
83 Logger.write(`===${event.name}===>${JSON.stringify(event.data)}`);
84
85
86 voiceAIClient.sessionUpdate(SESSION_PARAMETERS);
87 });
88
89 voiceAIClient.addEventListener(Grok.VoiceAgentAPIEvents.SessionUpdated, (event) => {
90 Logger.write(`===${event.name}===>${JSON.stringify(event.data)}`);
91 VoxEngine.sendMediaBetween(call, voiceAIClient);
92 voiceAIClient.responseCreate({instructions: "Hello."});
93 });
94
95 // -------------------- Barge-in (keep it interruption-friendly) --------------------
96 voiceAIClient.addEventListener(Grok.VoiceAgentAPIEvents.InputAudioBufferSpeechStarted, (event) => {
97 Logger.write(`===${event.name}===>${JSON.stringify(event.data)}`);
98 voiceAIClient.clearMediaBuffer();
99 });
100
101 // -------------------- Function calling --------------------
102 voiceAIClient.addEventListener(Grok.VoiceAgentAPIEvents.ResponseFunctionCallArgumentsDone, (event) => {
103 Logger.write(`===${event.name}===>${JSON.stringify(event.data)}`);
104
105 const {name, call_id} = event?.data?.payload || {};
106 let output;
107
108 // Ignore server-side tools like collections_search / web_search / x_search
109 if (name !== "forward_to_agent" && name !== "hangup_call") {
110 Logger.write(`===Ignoring unhandled function call: ${name}===`);
111 return;
112 }
113
114 if (name === "forward_to_agent") {
115 forwardToLiveAgent = true;
116 output = {result: "Forwarding your call to a live agent. Please hold on."};
117 } else if (name === "hangup_call") {
118 hangupCall = true;
119 output = {result: "Have a great day, goodbye!"};
120 }
121
122 // Create a conversationItem and send it
123 voiceAIClient.conversationItemCreate({
124 item: {
125 type: "function_call_output",
126 call_id,
127 output: JSON.stringify(output),
128 },
129 });
130 voiceAIClient.responseCreate({});
131 });
132
133 // -------------------- Log Other Events --------------------
134 [
135 CallEvents.FirstAudioPacketReceived,
136 Grok.Events.WebSocketMediaStarted,
137 Grok.VoiceAgentAPIEvents.InputAudioBufferSpeechStopped,
138 Grok.VoiceAgentAPIEvents.ConversationItemInputAudioTranscriptionCompleted,
139 Grok.VoiceAgentAPIEvents.ConversationItemAdded,
140 Grok.VoiceAgentAPIEvents.ResponseCreated,
141 Grok.VoiceAgentAPIEvents.ResponseOutputItemAdded,
142 Grok.VoiceAgentAPIEvents.ResponseDone,
143 Grok.VoiceAgentAPIEvents.ResponseOutputAudioTranscriptDelta,
144 Grok.VoiceAgentAPIEvents.ResponseOutputAudioTranscriptDone,
145 Grok.VoiceAgentAPIEvents.ResponseOutputAudioDelta, // Not in enum
146 Grok.VoiceAgentAPIEvents.ResponseOutputAudioDone,
147 Grok.VoiceAgentAPIEvents.ResponseOutputItemDone,
148 Grok.VoiceAgentAPIEvents.ConnectorInformation,
149 Grok.VoiceAgentAPIEvents.InputAudioBufferCommitted,
150 Grok.VoiceAgentAPIEvents.WebSocketError,
151 Grok.VoiceAgentAPIEvents.Unknown,
152 ].forEach((evtName) => {
153 voiceAIClient.addEventListener(evtName, (e) => {
154 Logger.write(`===${e.name}===>${JSON.stringify(e)}`);
155 });
156 });
157
158 voiceAIClient.addEventListener(Grok.Events.WebSocketMediaEnded, (event) => {
159 Logger.write(`===${event.name}===>${JSON.stringify(event.data)}`);
160 if (hangupCall) callCloseHandler();
161 else if (forwardToLiveAgent) {
162 call.say("Here is where I would forward the call via the phone network, SIP, or WhatsApp.");
163 // See the forwardCallToPSTN, forwardCallToSIP, forwardCallToUser, and handleBlindTransfer call methods
164 // For this simple demo, we will just close and hang-up
165 call.addEventListener(CallEvents.PlaybackFinished, callCloseHandler);
166 }
167 });
168 } catch (error) {
169 Logger.write("===SOMETHING_WENT_WRONG===");
170 Logger.write(error);
171 VoxEngine.terminate();
172 }
173});