From bc91ede5f19116e20a79c7a5a82533e0b1e316ae Mon Sep 17 00:00:00 2001 From: Luis Daniel Lucio Quiroz Date: Tue, 30 Apr 2019 23:34:34 -0400 Subject: [PATCH 1/2] Add another way to use Azure Speech to Text service This patch adds BING (legacy) some extra fault tolerance support. For those who still use Bing, Bing sometimes fails It adds another way, which I think it is easier and faster (Azure). Among the outstanding things it has; just configure the zone, and use Memcache to store the access token as per Azure documentation, it saves a query (faster). It also returns the most likely right transcription (digging in the JSON) --- .../resources/functions/record_message.lua | 197 +++++++++++++----- 1 file changed, 143 insertions(+), 54 deletions(-) diff --git a/resources/install/scripts/app/voicemail/resources/functions/record_message.lua b/resources/install/scripts/app/voicemail/resources/functions/record_message.lua index 989d97797b..1ab2aa4a88 100644 --- a/resources/install/scripts/app/voicemail/resources/functions/record_message.lua +++ b/resources/install/scripts/app/voicemail/resources/functions/record_message.lua @@ -66,61 +66,150 @@ end - if (transcribe_provider == "microsoft") then - local api_key1 = settings:get('voicemail', 'microsoft_key1', 'text') or ''; - local api_key2 = settings:get('voicemail', 'microsoft_key2', 'text') or ''; - if (api_key1 ~= '' and api_key2 ~= '') then - access_token_cmd = "curl -X POST \"https://api.cognitive.microsoft.com/sts/v1.0/issueToken\" -H \"Content-type: application/x-www-form-urlencoded\" -H \"Content-Length: 0\" -H \"Ocp-Apim-Subscription-Key: "..api_key1.."\"" - local handle = io.popen(access_token_cmd); - local access_token_result = handle:read("*a"); - handle:close(); - if (debug["info"]) then - freeswitch.consoleLog("notice", "[voicemail] CMD: " .. access_token_cmd .. "\n"); - freeswitch.consoleLog("notice", "[voicemail] RESULT: " .. access_token_result .. "\n"); - end - --Access token request can fail - if (access_token_result == '') then - freeswitch.consoleLog("notice", "[voicemail] ACCESS TOKEN: (null) \n"); - return '' - end - transcribe_cmd = "curl -X POST \"https://speech.platform.bing.com/recognize?scenarios=smd&appid=D4D52672-91D7-4C74-8AD8-42B1D98141A5&locale=" .. transcribe_language .. "&device.os=Freeswitch&version=3.0&format=json&instanceid=" .. gen_uuid() .. "&requestid=" .. gen_uuid() .. "\" -H 'Authorization: Bearer " .. access_token_result .. "' -H 'Content-type: audio/wav; codec=\"audio/pcm\"; samplerate=8000; trustsourcerate=false' --data-binary @"..file_path - local handle = io.popen(transcribe_cmd); - local transcribe_result = handle:read("*a"); - handle:close(); - if (debug["info"]) then - freeswitch.consoleLog("notice", "[voicemail] CMD: " .. transcribe_cmd .. "\n"); - freeswitch.consoleLog("notice", "[voicemail] RESULT: " .. transcribe_result .. "\n"); - end - --Trancribe request can fail - if (transcribe_result == '') then - freeswitch.consoleLog("notice", "[voicemail] TRANSCRIPTION: (null) \n"); - return '' - end - local transcribe_json = JSON.decode(transcribe_result); - --Trancribe result can be nil - if (transcribe_json["results"] == nil) then - freeswitch.consoleLog("notice", "[voicemail] TRANSCRIPTION: results = (null) \n"); - return '' - end - if (debug["info"]) then - if (transcribe_json["results"][1]["name"] == nil) then - freeswitch.consoleLog("notice", "[voicemail] TRANSCRIPTION: (null) \n"); - else - freeswitch.consoleLog("notice", "[voicemail] TRANSCRIPTION: " .. transcribe_json["results"][1]["name"] .. "\n"); - end - if (transcribe_json["results"][1]["confidence"] == nil) then - freeswitch.consoleLog("notice", "[voicemail] CONFIDENCE: (null) \n"); - else - freeswitch.consoleLog("notice", "[voicemail] CONFIDENCE: " .. transcribe_json["results"][1]["confidence"] .. "\n"); - end - end + if (transcribe_provider == "microsoft") then + local api_key1 = settings:get('voicemail', 'microsoft_key1', 'text') or ''; + local api_key2 = settings:get('voicemail', 'microsoft_key2', 'text') or ''; + if (api_key1 ~= '' and api_key2 ~= '') then + access_token_cmd = "curl -X POST \"https://api.cognitive.microsoft.com/sts/v1.0/issueToken\" -H \"Content-type: application/x-www-form-urlencoded\" -H \"Content-Length: 0\" -H \"Ocp-Apim-Subscription-Key: "..api_key1.."\"" + local handle = io.popen(access_token_cmd); + local access_token_result = handle:read("*a"); + handle:close(); + if (debug["info"]) then + freeswitch.consoleLog("notice", "[voicemail] CMD: " .. access_token_cmd .. "\n"); + freeswitch.consoleLog("notice", "[voicemail] RESULT: " .. access_token_result .. "\n"); + end + --Access token request can fail + if (access_token_result == '') then + freeswitch.consoleLog("notice", "[voicemail] ACCESS TOKEN: (null) \n"); + return '' + end + transcribe_cmd = "curl -X POST \"https://speech.platform.bing.com/recognize?scenarios=smd&appid=D4D52672-91D7-4C74-8AD8-42B1D98141A5&locale=" .. transcribe_language .. "&device.os=Freeswitch&version=3.0&format=json&instanceid=" .. gen_uuid() .. "&requestid=" .. gen_uuid() .. "\" -H 'Authorization: Bearer " .. access_token_result .. "' -H 'Content-type: audio/wav; codec=\"audio/pcm\"; samplerate=8000; trustsourcerate=false' --data-binary @"..file_path + local handle = io.popen(transcribe_cmd); + local transcribe_result = handle:read("*a"); + handle:close(); + if (debug["info"]) then + freeswitch.consoleLog("notice", "[voicemail] CMD: " .. transcribe_cmd .. "\n"); + freeswitch.consoleLog("notice", "[voicemail] RESULT: " .. transcribe_result .. "\n"); + end + --Trancribe request can fail + if (transcribe_result == '') then + freeswitch.consoleLog("notice", "[voicemail] TRANSCRIPTION: (null) \n"); + return '' + else + status, transcribe_json = pcall(JSON.decode, transcribe_result); + if not status then + if (debug["info"]) then + freeswitch.consoleLog("notice", "[voicemail] error decoding bing json\n"); + end + return ''; + end + end - transcription = transcribe_json["results"][1]["name"]; - transcription = transcription:gsub(".*<%/profanity>","..."); - confidence = transcribe_json["results"][1]["confidence"]; - return transcription; - end - end + if (debug["info"]) then + if (transcribe_json["results"][1]["name"] == nil) then + freeswitch.consoleLog("notice", "[voicemail] TRANSCRIPTION: (null) \n"); + else + freeswitch.consoleLog("notice", "[voicemail] TRANSCRIPTION: " .. transcribe_json["results"][1]["name"] .. "\n"); + end + if (transcribe_json["results"][1]["confidence"] == nil) then + freeswitch.consoleLog("notice", "[voicemail] CONFIDENCE: (null) \n"); + else + freeswitch.consoleLog("notice", "[voicemail] CONFIDENCE: " .. transcribe_json["results"][1]["confidence"] .. "\n"); + end + end + + transcription = transcribe_json["results"][1]["name"]; + transcription = transcription:gsub(".*<%/profanity>","..."); + confidence = transcribe_json["results"][1]["confidence"]; + return transcription; + end + end + + if (transcribe_provider == "azure") then + local api_key1 = settings:get('voicemail', 'azure_key1', 'text') or ''; + local api_server_region = settings:get('voicemail', 'azure_server_region', 'text') or ''; + if (api_server_region ~= '') then + api_server_region = api_server_region .. "."; + else + if (debug["info"]) then + freeswitch.consoleLog("notice", "[voicemail] azure_server_region default setting must be set\n"); + end + return ''; + end + if (api_key1 ~= '') then + -- search in memcache first, azure documentation claims that the access token is valid for 10 minutes + local cache = require "resources.functions.cache"; + local key = "app:voicemail:azure:access_token"; + local access_token_result = cache.get(key) + + if access_token_result then + if (debug["info"]) then + freeswitch.consoleLog("notice", "[voicemail] Azure access_token recovered from memcached\n"); + end + else + access_token_cmd = "curl -X POST \"https://"..api_server_region.."api.cognitive.microsoft.com/sts/v1.0/issueToken\" -H \"Content-type: application/x-www-form-urlencoded\" -H \"Content-Length: 0\" -H \"Ocp-Apim-Subscription-Key: "..api_key1.."\""; + local handle = io.popen(access_token_cmd); + access_token_result = handle:read("*a"); + handle:close(); + if (debug["info"]) then + freeswitch.consoleLog("notice", "[voicemail] CMD: " .. access_token_cmd .. "\n"); + freeswitch.consoleLog("notice", "[voicemail] ACCESS TOKEN: " .. access_token_result .. "\n"); + end + --Access token request can fail + if (access_token_result == '') then + if (debug["info"]) then + freeswitch.consoleLog("notice", "[voicemail] ACCESS TOKEN: (null) \n"); + end + return '' + end + + --Azure returns JSON when it has to report an error + if (string.sub(access_token_result, 1, 1) == '{') then + if (debug["info"]) then + freeswitch.consoleLog("notice", "[voicemail] ERROR STRING: ".. access_token_result .. "\n"); + end + return '' + end + + cache.set(key, access_token_result, 4200); + if (debug["info"]) then + freeswitch.consoleLog("notice", "[voicemail] Azure access_token saved into memcached: " .. access_token_result .. "\n"); + end + end + + transcribe_cmd = "curl -X POST \"https://"..api_server_region.."stt.speech.microsoft.com/speech/recognition/conversation/cognitiveservices/v1?language=".. transcribe_language .."&format=detailed\" -H 'Authorization: Bearer " .. access_token_result .. "' -H 'Content-type: audio/wav; codec=\"audio/pcm\"; samplerate=8000; trustsourcerate=false' --data-binary @"..file_path + local handle = io.popen(transcribe_cmd); + local transcribe_result = handle:read("*a"); + handle:close(); + if (debug["info"]) then + freeswitch.consoleLog("notice", "[voicemail] CMD: " .. transcribe_cmd .. "\n"); + freeswitch.consoleLog("notice", "[voicemail] RESULT: " .. transcribe_result .. "\n"); + end + --Trancribe request can fail + if (transcribe_result == '') then + freeswitch.consoleLog("notice", "[voicemail] TRANSCRIPTION: (null) \n"); + return '' + end + local transcribe_json = JSON.decode(transcribe_result); + if (debug["info"]) then + if (transcribe_json["NBest"][1]["Display"] == nil) then + freeswitch.consoleLog("notice", "[voicemail] TRANSCRIPTION: (null) \n"); + else + freeswitch.consoleLog("notice", "[voicemail] TRANSCRIPTION: " .. transcribe_json["NBest"][1]["Display"] .. "\n"); + end + if (transcribe_json["NBest"][1]["Confidence"] == nil) then + freeswitch.consoleLog("notice", "[voicemail] CONFIDENCE: (null) \n"); + else + freeswitch.consoleLog("notice", "[voicemail] CONFIDENCE: " .. transcribe_json["NBest"][1]["Confidence"] .. "\n"); + end + end + + transcription = transcribe_json["NBest"][1]["Display"]; + confidence = transcribe_json["NBest"][1]["Confidence"]; + return transcription; + end + end + if (transcribe_provider == "custom") then local transcription_server = settings:get('voicemail', 'transcription_server', 'text') or ''; local api_key = settings:get('voicemail', 'api_key', 'text') or ''; From 3808ed74304be7d9968ea7222f3b5dee2596ae05 Mon Sep 17 00:00:00 2001 From: Luis Daniel Lucio Quiroz Date: Fri, 3 May 2019 23:54:51 -0400 Subject: [PATCH 2/2] Update record_message.lua --- .../app/voicemail/resources/functions/record_message.lua | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resources/install/scripts/app/voicemail/resources/functions/record_message.lua b/resources/install/scripts/app/voicemail/resources/functions/record_message.lua index 1ab2aa4a88..5292c4a57f 100644 --- a/resources/install/scripts/app/voicemail/resources/functions/record_message.lua +++ b/resources/install/scripts/app/voicemail/resources/functions/record_message.lua @@ -171,7 +171,7 @@ return '' end - cache.set(key, access_token_result, 4200); + cache.set(key, access_token_result, 120); if (debug["info"]) then freeswitch.consoleLog("notice", "[voicemail] Azure access_token saved into memcached: " .. access_token_result .. "\n"); end