| 1 | require(Modules.ApplicationStorage); |
| 2 | require(Modules.OpenAI); |
| 3 | |
| 4 | let sessionUrl = null, connected = false, cid = null, realtimeAPIClient = undefined; |
| 5 | |
| 6 | const OPENAI_API_KEY = VoxEngine.getSecretValue('OPENAI_API_KEY'); |
| 7 | const MODEL = "gpt-realtime-1.5"; |
| 8 | const WA_PROXY_URL = "https://waproxy.ngrok.app/webhook"; |
| 9 | |
| 10 | const onWebSocketClose = (event) => { |
| 11 | Logger.write('===ON_WEB_SOCKET_CLOSE=='); |
| 12 | Logger.write(JSON.stringify(event)); |
| 13 | VoxEngine.terminate(); |
| 14 | }; |
| 15 | |
| 16 | VoxEngine.addEventListener(AppEvents.Started, (appEvent) => { |
| 17 | sessionUrl = appEvent.accessSecureURL; |
| 18 | }); |
| 19 | |
| 20 | VoxEngine.addEventListener(AppEvents.HttpRequest, (appEvent) => { |
| 21 | Logger.write("Inbound Http request"); |
| 22 | try { |
| 23 | let data = JSON.parse(appEvent.content); |
| 24 | if (data.text?.body != undefined) { |
| 25 | |
| 26 | const item = { |
| 27 | "item": { |
| 28 | "type": "message", |
| 29 | "role": "user", |
| 30 | "content": [ |
| 31 | { |
| 32 | "type": "input_text", |
| 33 | "text": data.text.body |
| 34 | } |
| 35 | ] |
| 36 | } |
| 37 | } |
| 38 | |
| 39 | realtimeAPIClient.conversationItemCreate(item); |
| 40 | realtimeAPIClient.addEventListener(OpenAI.RealtimeAPIEvents.ConversationItemAdded, (rtEvent) => { |
| 41 | const response = {}; |
| 42 | realtimeAPIClient.responseCreate(response); |
| 43 | realtimeAPIClient.removeEventListener(OpenAI.RealtimeAPIEvents.ConversationItemAdded); |
| 44 | }); |
| 45 | |
| 46 | |
| 47 | } |
| 48 | |
| 49 | } catch (err) { |
| 50 | Logger.write(JSON.stringify(err)); |
| 51 | } |
| 52 | return "OK"; |
| 53 | }); |
| 54 | |
| 55 | VoxEngine.addEventListener(AppEvents.CallAlerting, async ({ callerid, call }) => { |
| 56 | cid = callerid; |
| 57 | const realtimeAPIClientParameters = { |
| 58 | model: MODEL, |
| 59 | apiKey: OPENAI_API_KEY, |
| 60 | type: OpenAI.RealtimeAPIClientType.REALTIME, |
| 61 | onWebSocketClose |
| 62 | }; |
| 63 | |
| 64 | call.answer(); |
| 65 | try { |
| 66 | realtimeAPIClient = await OpenAI.createRealtimeAPIClient(realtimeAPIClientParameters); |
| 67 | const session_update = { |
| 68 | "session": { |
| 69 | "type": "realtime", |
| 70 | "instructions": `Your name is Voxy, you're a friendly and fun guy. You speak English only. You have to collect person's name, company he/she works at and his/her email. Call the 'createProfile' function whenever you learn all information including name, company and email address. You MUST NEVER mention the tools/functions to the user. You speak English ONLY, don't switch to any other language. Always continue the conversation after the user answers.`, |
| 71 | "audio": { |
| 72 | "input": { |
| 73 | "transcription": { |
| 74 | "model": "gpt-4o-transcribe", |
| 75 | "language": "en" |
| 76 | } |
| 77 | }, |
| 78 | "output": { |
| 79 | "voice": "cedar" |
| 80 | } |
| 81 | }, |
| 82 | "tools": [ |
| 83 | { |
| 84 | "type": "function", |
| 85 | "name": "createProfile", |
| 86 | "description": "Save contact information of a user for the purpose of creating/updating profile information.", |
| 87 | "parameters": { |
| 88 | "type": "object", |
| 89 | "properties": { |
| 90 | "name": { |
| 91 | "type": "string", |
| 92 | "description": "The user's name", |
| 93 | }, |
| 94 | "emailAddress": { |
| 95 | "type": "string", |
| 96 | "description": "The user's work/business email address.", |
| 97 | }, |
| 98 | "organization": { |
| 99 | "type": "string", |
| 100 | "description": "The name of the company/organization where the user works." |
| 101 | } |
| 102 | }, |
| 103 | "required": ["name", "organization", "emailAddress"] |
| 104 | } |
| 105 | }, |
| 106 | ], |
| 107 | "tool_choice": "auto" |
| 108 | } |
| 109 | }; |
| 110 | realtimeAPIClient.sessionUpdate(session_update); |
| 111 | VoxEngine.sendMediaBetween(call, realtimeAPIClient); |
| 112 | connected = true; |
| 113 | const response = {}; |
| 114 | realtimeAPIClient.responseCreate(response); |
| 115 | |
| 116 | // Interruptions support: clear the media buffer in case of OpenAI's VAD detected speech input |
| 117 | realtimeAPIClient.addEventListener(OpenAI.RealtimeAPIEvents.InputAudioBufferSpeechStarted, (event) => { |
| 118 | if (realtimeAPIClient) realtimeAPIClient.clearMediaBuffer(); |
| 119 | }); |
| 120 | |
| 121 | realtimeAPIClient.addEventListener(OpenAI.RealtimeAPIEvents.ResponseDone, async (event) => { |
| 122 | // Logger.write("RESPONSE DONE"); |
| 123 | // Logger.write(JSON.stringify(event)); |
| 124 | // Check the function name and act accordingly |
| 125 | if (event.data.payload?.response?.output[0].type == "function_call" && event.data.payload?.response?.output[0].name == "createProfile") { |
| 126 | try { |
| 127 | let args = JSON.parse(event.data.payload.response.output[0].arguments); |
| 128 | if (args.name == "" || args.emailAddress == "" || args.organization == "") return; |
| 129 | Logger.write("Profile created, sending info to WhatsApp"); |
| 130 | const obj = { |
| 131 | entry: [ |
| 132 | { |
| 133 | changes: [ |
| 134 | { |
| 135 | value: { |
| 136 | messages: [ |
| 137 | { |
| 138 | from: cid, |
| 139 | type: "voiceai", |
| 140 | text: { |
| 141 | body: "Name: " + args.name + ", Email: " + args.emailAddress + ", Company: " + args.organization |
| 142 | } |
| 143 | } |
| 144 | ] |
| 145 | }, |
| 146 | field: "messages" |
| 147 | } |
| 148 | ] |
| 149 | } |
| 150 | ] |
| 151 | } |
| 152 | Logger.write(JSON.stringify(obj)); |
| 153 | await Net.httpRequestAsync(WA_PROXY_URL, { |
| 154 | method: "POST", |
| 155 | postData: JSON.stringify(obj), |
| 156 | enableSystemLog: true, |
| 157 | headers: [ |
| 158 | "Content-Type: application/json" |
| 159 | ] |
| 160 | }) |
| 161 | const response = {}; |
| 162 | realtimeAPIClient.responseCreate(response); |
| 163 | } catch (err) { |
| 164 | Logger.write(err); |
| 165 | } |
| 166 | // https://waproxy.ngrok.app/webhook |
| 167 | } |
| 168 | }); |
| 169 | |
| 170 | } catch (error) { |
| 171 | Logger.write('===SOMETHING_WENT_WRONG==='); |
| 172 | Logger.write(error); |
| 173 | VoxEngine.terminate(); |
| 174 | } |
| 175 | |
| 176 | call.record({ hd_audio: true, stereo: true }); |
| 177 | try { |
| 178 | ApplicationStorage.put("WAB_" + callerid, sessionUrl, 60 * 90); // assuming that the call session wouldn't last longer than 1.5 hours |
| 179 | } catch (e) { |
| 180 | Logger.write("ApplicationStorage error: " + JSON.stringify(e)); |
| 181 | } |
| 182 | |
| 183 | call.addEventListener(CallEvents.Disconnected, (callEvent) => { |
| 184 | if (realtimeAPIClient) realtimeAPIClient.close(); |
| 185 | connected = false; |
| 186 | try { |
| 187 | ApplicationStorage.remove("WAB_" + callerid); |
| 188 | } catch (e) { |
| 189 | Logger.write("ApplicationStorage error: " + JSON.stringify(e)); |
| 190 | } |
| 191 | VoxEngine.terminate(); |
| 192 | }) |
| 193 | |
| 194 | |
| 195 | }) |
| 196 | |
| 197 | VoxEngine.addEventListener(AppEvents.Terminating, (appEvent) => { |
| 198 | if (connected) { |
| 199 | try { |
| 200 | ApplicationStorage.remove("WAB_" + cid); |
| 201 | } catch (e) { |
| 202 | Logger.write("ApplicationStorage error: " + JSON.stringify(e)); |
| 203 | } |
| 204 | } |
| 205 | }); |