Example: Using Grok features

Connect an inbound call to a Grok voice agent
View as Markdown

For the complete documentation index, see llms.txt.

Overview

This inbound example showcases Grok tools (function calling, file_search, web_search, x_search) with barge-in for telephony use. The same tool configuration applies to outbound calls—reuse the tools and prompt in your outbound scenario and swap the call entrypoint.

⬇️ Jump to the Full VoxEngine scenario.

Prerequisites

Usage highlights

  • Create a VoiceAgentAPIClient with Grok.createVoiceAgentAPIClient(...).
  • Configure the session with voice, turn_detection, instructions, and tools.
  • Bridge audio with VoxEngine.sendMediaBetween(call, client).
  • Handle function calls with ResponseFunctionCallArgumentsDone.

Feature summary

Turn detection & barge-in

When InputAudioBufferSpeechStarted fires, clear the media buffer so the caller can interrupt the agent:

1voiceAgentAPIClient.addEventListener(
2 Grok.VoiceAgentAPIEvents.InputAudioBufferSpeechStarted,
3 () => voiceAgentAPIClient.clearMediaBuffer()
4);

Function calling

Function calling lets Grok request actions from VoxEngine (transfer, hang up, fetch data) and receive a structured response. For official Grok tool guidance, see https://docs.x.ai/docs/guides/tools/overview#function-calling.

1{
2 type: "function",
3 name: "forward_to_agent",
4 description: "Forward the user to a live agent",
5 parameters: {
6 type: "object",
7 properties: {},
8 required: [],
9 },
10}

Handle tool calls and return outputs to the agent:

1voiceAgentAPIClient.addEventListener(
2 Grok.VoiceAgentAPIEvents.ResponseFunctionCallArgumentsDone,
3 (event) => {
4 const { name, call_id } = event?.data?.payload || {};
5 if (name !== "forward_to_agent" && name !== "hangup_call") return;
6
7 const output =
8 name === "forward_to_agent"
9 ? { result: "Forwarding your call to a live agent. Please hold on." }
10 : { result: "Have a great day, goodbye!" };
11
12 voiceAgentAPIClient.conversationItemCreate({
13 item: {
14 type: "function_call_output",
15 call_id,
16 output: JSON.stringify(output),
17 },
18 });
19 voiceAgentAPIClient.responseCreate({});
20 }
21);

Use file_search to ground responses in your documents. Upload files to Grok, then reference the collection (vector store) ID in your session tools. For official Grok tool guidance, see https://docs.x.ai/docs/guides/tools/overview#file-search.

1{
2 type: "file_search",
3 vector_store_ids: [COLLECTION_ID],
4 max_num_results: 5,
5}
  • Keep your collection focused on the topics the agent should answer.
  • Tune max_num_results to balance relevance and speed.

Enable web_search when you want Grok to fetch public web information at runtime. For official Grok tool guidance, see https://docs.x.ai/docs/guides/tools/overview#web-search.

1{ type: "web_search" }

Use x_search to limit Grok’s social search to specific X handles. For official Grok tool guidance, see https://docs.x.ai/docs/guides/tools/overview#x-search.

1{
2 type: "x_search",
3 allowed_x_handles: ["voximplant", "aylarov"],
4}

Configure before you run

  • Set GROK_API_KEY in Voximplant Secrets.
  • Update COLLECTION_ID to point at your uploaded documents (or remove file_search).
  • Adjust the SYSTEM_PROMPT to match your brand voice and escalation rules.

Try it

Notes

See the VoxEngine API Reference for more details.

Full VoxEngine scenario

This scenario includes barge-in handling, function calling, file search, web search, and X search.

voxeengine-grok-features.js
1require(Modules.Grok);
2const SYSTEM_PROMPT = `
3 Your name is Voxi. You are a helpful voice assistant for phone callers representing the company Voximplant (pronounced VOX-im-plant).
4 You can answer questions about the company, its voice AI integrations, and X/Twitter posts from the "voximplant" and "aylarov" handles.
5 "aylarov" is the X handle for Alexey Aylarov, Voximplant's CEO.
6
7 Keep responses short and telephony-friendly (usually 1-2 sentences).
8 If the user asks for a live agent or an operator, call the "forward_to_agent" function.
9 If the user says goodbye, call the "hangup_call" function.
10
11 When answering a company/product question, prefer searching the knowledge base first.
12 `;
13const COLLECTION_ID = "collection_4c5a63ab-f739-4c13-93d2-05b74095c34a"; // uploaded documents to show RAG
14
15// -------------------- Grok Voice Agent settings --------------------
16const SESSION_PARAMETERS = {
17 session: {
18 voice: "Ara",
19 turn_detection: {type: "server_vad"},
20 instructions: SYSTEM_PROMPT,
21 tools: [
22 {type: "web_search"},
23 {
24 type: "file_search",
25 vector_store_ids: [COLLECTION_ID],
26 max_num_results: 5,
27 },
28 {
29 type: "x_search",
30 allowed_x_handles: ["voximplant", "aylarov"],
31 },
32 {
33 type: "function",
34 name: "forward_to_agent",
35 description: "Forward the user to a live agent",
36 parameters: {
37 type: "object",
38 properties: {},
39 required: [],
40 },
41 },
42 {
43 type: "function",
44 name: "hangup_call",
45 description: "Hangup the call",
46 parameters: {
47 type: "object",
48 properties: {},
49 required: [],
50 },
51 },
52 ],
53 },
54};
55
56VoxEngine.addEventListener(AppEvents.CallAlerting, async ({call}) => {
57 let voiceAIClient = undefined;
58 let hangupCall = false,
59 forwardToLiveAgent = false;
60
61 call.answer();
62 call.record({hd_audio: true, stereo: true}); // optional: call recording
63
64 const callCloseHandler = () => {
65 voiceAIClient?.close();
66 VoxEngine.terminate();
67 };
68 call.addEventListener(CallEvents.Disconnected, callCloseHandler);
69 call.addEventListener(CallEvents.Failed, callCloseHandler);
70
71 try {
72 voiceAIClient = await Grok.createVoiceAgentAPIClient({
73 xAIApiKey: VoxEngine.getSecretValue('GROK_API_KEY'),
74 onWebSocketClose: (event) => {
75 Logger.write(`===${event.name}===>${JSON.stringify(event.data)}`);
76 VoxEngine.terminate();
77 },
78 });
79
80 voiceAIClient.addEventListener(Grok.VoiceAgentAPIEvents.ConversationCreated, (event) => {
81 Logger.write(`===${event.name}===>${JSON.stringify(event.data)}`);
82
83
84 voiceAIClient.sessionUpdate(SESSION_PARAMETERS);
85 });
86
87 voiceAIClient.addEventListener(Grok.VoiceAgentAPIEvents.SessionUpdated, (event) => {
88 Logger.write(`===${event.name}===>${JSON.stringify(event.data)}`);
89 VoxEngine.sendMediaBetween(call, voiceAIClient);
90 voiceAIClient.responseCreate({instructions: "Hello."});
91 });
92
93 // -------------------- Barge-in (keep it interruption-friendly) --------------------
94 voiceAIClient.addEventListener(Grok.VoiceAgentAPIEvents.InputAudioBufferSpeechStarted, (event) => {
95 Logger.write(`===${event.name}===>${JSON.stringify(event.data)}`);
96 voiceAIClient.clearMediaBuffer();
97 });
98
99 // -------------------- Function calling --------------------
100 voiceAIClient.addEventListener(Grok.VoiceAgentAPIEvents.ResponseFunctionCallArgumentsDone, (event) => {
101 Logger.write(`===${event.name}===>${JSON.stringify(event.data)}`);
102
103 const {name, call_id} = event?.data?.payload || {};
104 let output;
105
106 // Ignore server-side tools like collections_search / web_search / x_search
107 if (name !== "forward_to_agent" && name !== "hangup_call") {
108 Logger.write(`===Ignoring unhandled function call: ${name}===`);
109 return;
110 }
111
112 if (name === "forward_to_agent") {
113 forwardToLiveAgent = true;
114 output = {result: "Forwarding your call to a live agent. Please hold on."};
115 } else if (name === "hangup_call") {
116 hangupCall = true;
117 output = {result: "Have a great day, goodbye!"};
118 }
119
120 // Create a conversationItem and send it
121 voiceAIClient.conversationItemCreate({
122 item: {
123 type: "function_call_output",
124 call_id,
125 output: JSON.stringify(output),
126 },
127 });
128 voiceAIClient.responseCreate({});
129 });
130
131 // -------------------- Log Other Events --------------------
132 [
133 CallEvents.FirstAudioPacketReceived,
134 Grok.Events.WebSocketMediaStarted,
135 Grok.VoiceAgentAPIEvents.InputAudioBufferSpeechStopped,
136 Grok.VoiceAgentAPIEvents.ConversationItemInputAudioTranscriptionCompleted,
137 Grok.VoiceAgentAPIEvents.ConversationItemAdded,
138 Grok.VoiceAgentAPIEvents.ResponseCreated,
139 Grok.VoiceAgentAPIEvents.ResponseOutputItemAdded,
140 Grok.VoiceAgentAPIEvents.ResponseDone,
141 Grok.VoiceAgentAPIEvents.ResponseOutputAudioTranscriptDelta,
142 Grok.VoiceAgentAPIEvents.ResponseOutputAudioTranscriptDone,
143 Grok.VoiceAgentAPIEvents.ResponseOutputAudioDelta, // Not in enum
144 Grok.VoiceAgentAPIEvents.ResponseOutputAudioDone,
145 Grok.VoiceAgentAPIEvents.ResponseOutputItemDone,
146 Grok.VoiceAgentAPIEvents.ConnectorInformation,
147 Grok.VoiceAgentAPIEvents.InputAudioBufferCommitted,
148 Grok.VoiceAgentAPIEvents.WebSocketError,
149 Grok.VoiceAgentAPIEvents.Unknown,
150 ].forEach((evtName) => {
151 voiceAIClient.addEventListener(evtName, (e) => {
152 Logger.write(`===${e.name}===>${JSON.stringify(e)}`);
153 });
154 });
155
156 voiceAIClient.addEventListener(Grok.Events.WebSocketMediaEnded, (event) => {
157 Logger.write(`===${event.name}===>${JSON.stringify(event.data)}`);
158 if (hangupCall) callCloseHandler();
159 else if (forwardToLiveAgent) {
160 call.say("Here is where I would forward the call via the phone network, SIP, or WhatsApp.");
161 // See the forwardCallToPSTN, forwardCallToSIP, forwardCallToUser, and handleBlindTransfer call methods
162 // For this simple demo, we will just close and hang-up
163 call.addEventListener(CallEvents.PlaybackFinished, callCloseHandler);
164 }
165 });
166 } catch (error) {
167 Logger.write("===SOMETHING_WENT_WRONG===");
168 Logger.write(error);
169 VoxEngine.terminate();
170 }
171});