fusionpbx/app/email_queue/resources/functions/transcribe.php

<?php

if (!function_exists('transcribe')) {
	function transcribe ($file_path, $file_name, $file_extension) {
		//check if the file exists
			if (!file_exists($file_path.'/'.$file_name)) {
				echo "file not found ".$file_path.'/'.$file_name;
				exit;
			}

		//get the email queue settings
			$settings = new settings(['category' => 'voicemail']);

		//transcription variables
			$transcribe_provider = $settings->get('voicemail', 'transcribe_provider');
			$transcribe_language = $settings->get('voicemail', 'transcribe_language');

		//transcribe - watson
			if ($transcribe_provider == 'watson') {
				$api_key = $settings->get('voicemail', 'watson_key');
				$api_url = $settings->get('voicemail', 'watson_url');

				if ($file_extension == "mp3") {
					$content_type = 'audio/mp3';
				}
				if ($file_extension == "wav") {
					$content_type = 'audio/wav';
				}

				if (isset($api_key) && $api_key != '') {
					//start output buffer
					ob_start();
					$out = fopen('php://output', 'w');

					//create the curl resource
					$ch = curl_init();

					//set the curl options
					curl_setopt($ch, CURLOPT_URL, $api_url);
					curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
					curl_setopt($ch, CURLOPT_USERPWD, 'apikey' . ':' . $api_key);
					curl_setopt($ch, CURLOPT_HTTPAUTH, CURLAUTH_BASIC); //set the authentication type
					curl_setopt($ch, CURLOPT_HTTPHEADER, ['Content-Type: '.$content_type]);
					curl_setopt($ch, CURLOPT_BINARYTRANSFER,TRUE);
					curl_setopt($ch, CURLOPT_POSTFIELDS, file_get_contents($file_path.'/'.$file_name));
					curl_setopt($ch, CURLOPT_POST, 1);
					curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 20);	//The number of seconds to wait while trying to connect.
					curl_setopt($ch, CURLOPT_FOLLOWLOCATION, TRUE);	//To follow any "Location: " header that the server sends as part of the HTTP header.
					curl_setopt($ch, CURLOPT_AUTOREFERER, TRUE);	//To automatically set the Referer: field in requests where it follows a Location: redirect.
					curl_setopt($ch, CURLOPT_TIMEOUT, 300);	//The maximum number of seconds to allow cURL functions to execute.
					curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, TRUE);	//To stop cURL from verifying the peer's certificate.
					curl_setopt($ch, CURLOPT_HEADER, 0); //hide the headers when set to 0

					//add verbose for debugging
					curl_setopt($ch, CURLOPT_VERBOSE, true);
					curl_setopt($ch, CURLOPT_STDERR, $out);

					//execute the curl with the options
					$http_content = curl_exec($ch);

					//return the error
					if (curl_errno($ch)) {
						echo 'Error:' . curl_error($ch);
					}

					//close the curl resource
					curl_close($ch);

					//show the debug information
					fclose($out);
					$debug = ob_get_clean();
					echo $debug;

					//$command = "curl -X POST -silent -u \"apikey:".$api_key."\" --header \"Content-type: ".$content_type."\" --data-binary @".$file_path."/".$file_name." \"".$api_url."\"";
					//echo "command: ".$command."\n";

					//ob_start();
					//$result = passthru($command);
					//$json_result = ob_get_contents();
					//ob_end_clean();

					//run the command
					//$http_response = shell_exec($command);
					//echo "http_response:\n".$http_response."\n";

					//remove headers and return the http content
					//$http_response = trim(str_ireplace("HTTP/1.1 100 Continue", "", $http_response));

					//$temp_array = explode("HTTP/1.1 200 OK", $http_response);
					//$http_array = explode("\r\n\r\n", $temp_array[1]);
					//$http_content = trim($http_array[1]);
					echo "http_content:\n".$http_content."\n";

					//validate the json
					$ob = json_decode($http_content);
					if($ob === null) {
						echo "invalid json\n";
						return false;
					}

					$message = '';
					$json = json_decode($http_content, true);
					//echo "json; ".$json."\n";
					foreach($json['results'] as $row) {
						$message .= $row['alternatives'][0]['transcript'];
					}

					$message = str_replace("%HESITATION", " ", trim($message));
					$message = ucfirst($message);
					$array['provider'] = $transcribe_provider;
					$array['language'] = $transcribe_language;
					//$array['command'] = $command;
					$array['message'] = $message;

					return $array;
				}
			}

		//transcribe - google
			if ($transcribe_provider == 'google') {
				$api_key = $settings->get('voicemail', 'google_key');
				$api_url = $settings->get('voicemail', 'google_url');
				$application_credentials = $settings->get('voicemail', 'google_application_credentials');
				$transcribe_language =  $settings->get('voicemail', 'transcribe_language');
				$transcribe_alternate_language = $settings->get('voicemail', 'transcribe_alternate_language');

				if (!isset($transcribe_language) && empty($transcribe_language)) {
					$transcribe_language = 'en-US';
				}
				if (!isset($transcribe_alternate_language) && empty($transcribe_alternate_language)) {
					$transcribe_alternate_language = 'es-US';
				}
				if ($file_extension == "mp3") {
					$content_type = 'audio/mp3';
				}
				if ($file_extension == "wav") {
					$content_type = 'audio/wav';
				}

				//version 1
				if (substr($api_url, 0, 32) == 'https://speech.googleapis.com/v1') {
					if (isset($api_key) && $api_key != '') {
						$command = "sox ".$file_path."/".$file_name." ".$file_path."/".$file_name.".flac trim 0 00:59 ";
						$command .= "&& echo \"{ 'config': { 'languageCode': '".$transcribe_language."', 'enableWordTimeOffsets': false , 'enableAutomaticPunctuation': true , 'alternativeLanguageCodes': '".$transcribe_alternate_language."' }, 'audio': { 'content': '`base64 -w 0 ".$file_path."/".$file_name.".flac`' } }\" ";
						$command .= "| curl -X POST -H \"Content-Type: application/json\" -d @- ".$api_url.":recognize?key=".$api_key." ";
						$command .= "&& rm -f ".$file_path."/".$file_name.".flac";
						echo $command."\n";
					}
				}
				//version 2
				elseif (substr($api_url, 0, 32) == 'https://speech.googleapis.com/v2') {
					if (!empty(($application_credentials))) {
						putenv("GOOGLE_APPLICATION_CREDENTIALS=".$application_credentials);
					}
					$command = "echo \"{ 'config': { 'auto_decoding_config': {}, 'language_codes': ['".$transcribe_language."'], 'model': 'long' }, 'content': '`base64 -w 0 ".$file_path."/".$file_name."`' } \" ";
					$command .= "| curl -X POST -H \"Content-Type: application/json\" -H \"Authorization: Bearer \$(gcloud auth application-default print-access-token)\" -d @- ".$api_url;
					echo $command."\n";
				}

				//ob_start();
				//$result = passthru($command);
				//$json_result = ob_get_contents();
				//ob_end_clean();

				//run the command
				if (!empty($command)) {
					$http_response = shell_exec($command);
				}

				//validate the json
				if (!empty($http_response)) {
					$ob = json_decode($http_response);
					if($ob === null) {
						echo "invalid json\n";
						return false;
					}

					$json = json_decode($http_response, true);
					//echo "json; ".$json."\n";
					$message = '';
					foreach($json['results'] as $row) {
						$message .= $row['alternatives'][0]['transcript'];
					}
				}

				//build the response
				$array['provider'] = $transcribe_provider;
				$array['language'] = $transcribe_language;
				$array['command'] = $command ?? '';
				$array['message'] = $message ?? '';
				//print_r($array);

				return $array;
			}

		//transcribe - azure
			if ($transcribe_provider == 'azure') {
				$api_key = $settings->get('voicemail', 'azure_key');
				$api_url = $settings->get('voicemail', 'azure_server_region');

				if (empty($transcribe_language)) {
					$transcribe_language = 'en-US';
				}

				if ($file_extension == "mp3") {
					$content_type = 'audio/mp3';
				}
				if ($file_extension == "wav") {
					$content_type = 'audio/wav';
				}

				if (isset($api_key) && $api_key != '') {
					$command = "curl -X POST \"https://".$api_url.".api.cognitive.microsoft.com/sts/v1.0/issueToken\" -H \"Content-type: application/x-www-form-urlencoded\" -H \"Content-Length: 0\" -H \"Ocp-Apim-Subscription-Key: ".$api_key."\"";
					$access_token_result = shell_exec($command);
					if (empty($access_token_result)) {
						return false;
					}
					else {
						$file_path = $file_path.'/'.$file_name;
						$command = "curl -X POST \"https://".$api_url.".stt.speech.microsoft.com/speech/recognition/conversation/cognitiveservices/v1?language=".$transcribe_language."&format=detailed\" -H 'Authorization: Bearer ".$access_token_result."' -H 'Content-type: audio/wav; codec=\"audio/pcm\"; samplerate=8000; trustsourcerate=false' --data-binary @".$file_path;
						echo $command."\n";
						$http_response = shell_exec($command);
						$array = json_decode($http_response, true);
						if ($array === null) {
							return false;
						}
						else {
							$message = $array['NBest'][0]['Display'];
						}
					}
					$array['provider'] = $transcribe_provider;
					$array['language'] = $transcribe_language;
					$array['api_key'] = $api_key;
					$array['command'] = $command;
					$array['message'] = $message;
					return $array;
				}

			}

			// transcribe - custom
			// Works with self-hostable transcription service at https://github.com/AccelerateNetworks/an-transcriptions
			if ($transcribe_provider == 'custom') {
				$api_key = $settings->get('voicemail', 'api_key');
				$api_url = $settings->get('voicemail', 'transcription_server');

				if (empty($transcribe_language)) {
					$transcribe_language = 'en-US';
				}

				if ($file_extension == "mp3") {
					$content_type = 'audio/mp3';
				}
				if ($file_extension == "wav") {
					$content_type = 'audio/wav';
				}

				$message = null;
				for($retries = 5; $retries > 0; $retries--) {
					echo "sending voicemail recording to ".$api_url." for transcription";

					// submit the file for transcribing
					$file_path = $file_path.'/'.$file_name;
					$command = "curl -sX POST ".$api_url."/enqueue -H 'Authorization: Bearer ".$api_key."' -F file=@".$file_path;
					$stdout = shell_exec($command);
					$resp = json_decode($stdout, true);
					if ($resp === null) {
						echo "unexpected error: ".$stdout;
						// json not parsable, try again
						continue;
					}

					$transcription_id = $resp['id'];

					// wait for transcription to complete
					sleep(1);

					while(true) {
						echo "checking ".$api_url." for completion of job ".$transcription_id;
						$command = "curl -s ".$api_url."/j/".$transcription_id." -H 'Authorization: Bearer ".$api_key."'";
						$resp = json_decode(shell_exec($command), true);
						if ($resp === null) {
							// json not parsable, try again
							continue;
						}

						if($resp['status'] == "failed") {
							echo "transcription failed, retrying";
							break;
						}

						if($resp['status'] == "finished") {
							echo "transcription succeeded";
							$message = $resp['result'];
							break;
						}

						// transcription is queued or in progress, check again in 1 second
						sleep(1);
					}

					if($message !== null) {
						break;
					}
				}

				if($message == null) {
					return false;
				}

				$array['provider'] = $transcribe_provider;
				$array['language'] = $transcribe_language;
				$array['api_key'] = $api_key;
				// $array['command'] = $command
				$array['message'] = $message;
				return $array;
			}

		//transcribe - openai
		// settings:
		//		openai_key (required)
		//		openai_url
		//		openai_model
			if ($transcribe_provider == 'openai') {
				$api_key = $settings->get('voicemail', 'openai_key');
				$api_url = $settings->get('voicemail', 'openai_url');
				$api_voice_model = $settings->get('voicemail', 'openai_model');

				if (empty($api_url)) {
					$api_url = "https://api.openai.com/v1/audio/transcriptions";
				}

				if (empty($api_voice_model)) {
					$api_voice_model = "whisper-1";
				}

				if (isset($api_key) && $api_key != '') {

					$full_file_name = $file_path.'/'.$file_name ;

					//start output buffer
					ob_start();
					$out = fopen('php://output', 'w');

					//create the curl resource
					$ch = curl_init();

					$post_data = array(
						'model'=>$api_voice_model,
						'file'=>curl_file_create($full_file_name)
					);

					//set the curl options
					curl_setopt_array($ch, array(
						CURLOPT_URL =>$api_url,
						CURLOPT_RETURNTRANSFER => true,
						CURLOPT_SSL_VERIFYPEER => TRUE,
						CURLOPT_HTTPHEADER => array('Authorization: Bearer '.$api_key),
						CURLOPT_POSTFIELDS => $post_data,

					));

					// //add verbose for debugging
					// curl_setopt($ch, CURLOPT_VERBOSE, true);
					curl_setopt($ch, CURLOPT_STDERR, $out);

					//execute the curl with the options
					$http_content = curl_exec($ch);

					//return the error
					if (curl_errno($ch)) {
						echo 'Error:' . curl_error($ch);
					}

					//close the curl resource
					curl_close($ch);

					//show the debug information
					fclose($out);
					$debug = ob_get_clean();
					echo $debug;


					$ob = json_decode($http_content, true);

					$message = $ob['text'];
					return array(
						'provider' => $transcribe_provider,
						'message' => $message
					);
				}

			}
		// todo: add error checking
		//		return array('message' => "Missing valid transcribe_provider";

	}
}

?>