add built in ability for microsoft bing speech to text (#1960)
* add built in ability for microsoft bing speech to text * move json.lua to lower case and more error checking in record_message * Replaced Creative Commons json.lua with lunajson.lua which is MIT license https://github.com/grafi-tt/lunajson/blob/master/LICENSE
This commit is contained in:
parent
76edd90efd
commit
7c4a726766
|
|
@ -188,6 +188,10 @@
|
|||
$apps[$x]['db'][$y]['fields'][$z]['type'] = "text";
|
||||
$apps[$x]['db'][$y]['fields'][$z]['description']['en-us'] = "Voicemail file encoded in base64.";
|
||||
$z++;
|
||||
$apps[$x]['db'][$y]['fields'][$z]['name'] = "message_transcription";
|
||||
$apps[$x]['db'][$y]['fields'][$z]['type'] = "text";
|
||||
$apps[$x]['db'][$y]['fields'][$z]['description']['en-us'] = "Voicemail transcription.";
|
||||
$z++;
|
||||
|
||||
$y = 2; //table array index
|
||||
$z = 0; //field array index
|
||||
|
|
|
|||
|
|
@ -324,6 +324,17 @@ $text['label-message_size']['uk'] = "Розмір";
|
|||
$text['label-message_size']['de-at'] = "Größe";
|
||||
$text['label-message_size']['he'] = "גודל";
|
||||
|
||||
$text['label-transcription']['en-us'] = "Transcription";
|
||||
$text['label-transcription']['es-cl'] = "";
|
||||
$text['label-transcription']['pt-pt'] = "";
|
||||
$text['label-transcription']['fr-fr'] = "";
|
||||
$text['label-transcription']['pt-br'] = "";
|
||||
$text['label-transcription']['pl'] = "";
|
||||
$text['label-transcription']['sv-se'] = "";
|
||||
$text['label-transcription']['uk'] = "";
|
||||
$text['label-transcription']['de-at'] = "";
|
||||
$text['label-transcription']['he'] = "";
|
||||
|
||||
$text['label-message_priority']['en-us'] = "Priority";
|
||||
$text['label-message_priority']['es-cl'] = "Prioridad";
|
||||
$text['label-message_priority']['pt-pt'] = "Prioridade";
|
||||
|
|
|
|||
|
|
@ -137,6 +137,9 @@ if (!(check_str($_REQUEST["action"]) == "download" && check_str($_REQUEST["src"]
|
|||
if ($_SESSION['voicemail']['storage_type']['text'] != 'base64') {
|
||||
echo "<th style='text-align: right;'>".$text['label-message_size']."</th>\n";
|
||||
}
|
||||
if ($_SESSION['voicemail']['transcribe_enabled']['boolean'] == 'true') {
|
||||
echo "<th>".$text['label-transcription']."</th>\n";
|
||||
}
|
||||
if (permission_exists('voicemail_message_delete')) {
|
||||
echo "<td class='list_control_icons' style='width: 25px;'>";
|
||||
echo "<a href='javascript:void(0);' onclick=\"if (confirm('".$text['confirm-delete']."')) { $('#frm').attr('action', 'voicemail_message_delete.php').submit(); }\" alt='".$text['button-delete']."'>".$v_link_label_delete."</a>";
|
||||
|
|
@ -177,10 +180,14 @@ if (!(check_str($_REQUEST["action"]) == "download" && check_str($_REQUEST["src"]
|
|||
echo "<a id='recording_button_".$row['voicemail_message_uuid']."' onclick=\"recording_play('".$row['voicemail_message_uuid']."');\" title='".$text['label-play']." / ".$text['label-pause']."'>".$v_link_label_play."</a>";
|
||||
echo "<a href=\"voicemail_messages.php?action=download&t=bin&id=".$row['voicemail_id']."&voicemail_uuid=".$row['voicemail_uuid']."&uuid=".$row['voicemail_message_uuid']."\" title='".$text['label-download']."'>".$v_link_label_download."</a>";
|
||||
echo " </td>\n";
|
||||
echo " <td valign='top' class='".$row_style[$c]."' style=\"".$style." text-align: right;\">".$row['message_length_label']." </td>\n";
|
||||
echo " <td valign='top' class='".$row_style[$c]."' style=\"".$style." text-align: right;\" nowrap='nowrap'>".$row['message_length_label']." </td>\n";
|
||||
if ($_SESSION['voicemail']['storage_type']['text'] != 'base64') {
|
||||
echo " <td valign='top' class='".$row_style[$c]."' style=\"".$style." text-align: right;\" nowrap='nowrap'>".$row['file_size_label']."</td>\n";
|
||||
}
|
||||
if ($_SESSION['voicemail']['transcribe_enabled']['boolean'] == 'true') {
|
||||
echo " <td valign='top' class='".$row_style[$c]."' style=\"".$style."\">".$row['message_transcription']."</td>\n";
|
||||
}
|
||||
|
||||
if (permission_exists('voicemail_message_delete')) {
|
||||
echo " <td class='list_control_icon' style='width: 25px;'>";
|
||||
echo "<a href='voicemail_message_delete.php?voicemail_messages[".$row['voicemail_uuid']."][]=".$row['voicemail_message_uuid']."' alt='".$text['button-delete']."' onclick=\"return confirm('".$text['confirm-delete']."')\">".$v_link_label_delete."</a>";
|
||||
|
|
|
|||
|
|
@ -37,8 +37,8 @@
|
|||
direct_dial["max_digits"] = 4;
|
||||
|
||||
--debug
|
||||
debug["info"] = false;
|
||||
debug["sql"] = false;
|
||||
debug["info"] = true;
|
||||
debug["sql"] = true;
|
||||
|
||||
--get the argv values
|
||||
script_name = argv[1];
|
||||
|
|
@ -383,6 +383,9 @@
|
|||
if (storage_type == "base64") then
|
||||
table.insert(sql, "message_base64, ");
|
||||
end
|
||||
if (transcribe_enabled == "true") then
|
||||
table.insert(sql, "message_transcription, ");
|
||||
end
|
||||
table.insert(sql, "message_length ");
|
||||
--table.insert(sql, "message_status, ");
|
||||
--table.insert(sql, "message_priority, ");
|
||||
|
|
@ -398,6 +401,9 @@
|
|||
if (storage_type == "base64") then
|
||||
table.insert(sql, "'"..message_base64.."', ");
|
||||
end
|
||||
if (transcribe_enabled == "true") then
|
||||
table.insert(sql, "'"..transcription.."', ");
|
||||
end
|
||||
table.insert(sql, "'"..message_length.."' ");
|
||||
--table.insert(sql, "'"..message_status.."', ");
|
||||
--table.insert(sql, "'"..message_priority.."' ");
|
||||
|
|
|
|||
|
|
@ -26,6 +26,64 @@
|
|||
--load libraries
|
||||
local Database = require "resources.functions.database"
|
||||
local Settings = require "resources.functions.lazy_settings"
|
||||
local JSON = require "resources.functions.lunajson"
|
||||
|
||||
--define uuid function
|
||||
local random = math.random;
|
||||
local function gen_uuid()
|
||||
local template ='xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx';
|
||||
return string.gsub(template, '[xy]', function (c)
|
||||
local v = (c == 'x') and random(0, 0xf) or random(8, 0xb);
|
||||
return string.format('%x', v);
|
||||
end)
|
||||
end
|
||||
|
||||
local function transcribe(file_path,settings)
|
||||
--transcription variables
|
||||
local transcribe_provider = settings:get('voicemail', 'transcribe_provider', 'text') or '';
|
||||
transcribe_language = settings:get('voicemail', 'transcribe_language', 'text') or 'en-US';
|
||||
|
||||
if (debug["info"]) then
|
||||
freeswitch.consoleLog("notice", "[voicemail] transcribe_provider: " .. transcribe_provider .. "\n");
|
||||
freeswitch.consoleLog("notice", "[voicemail] transcribe_language: " .. transcribe_language .. "\n");
|
||||
|
||||
end
|
||||
|
||||
if (transcribe_provider == "microsoft") then
|
||||
local api_key1 = settings:get('voicemail', 'microsoft_key1', 'text') or '';
|
||||
local api_key2 = settings:get('voicemail', 'microsoft_key2', 'text') or '';
|
||||
if (api_key1 ~= '' and api_key2 ~= '') then
|
||||
access_token_cmd = "curl -X POST \"https://oxford-speech.cloudapp.net/token/issueToken\" -H \"Content-type: application/x-www-form-urlencoded\" -d 'grant_type=client_credentials&client_id="..api_key1.."&client_secret="..api_key2.."&scope=https://speech.platform.bing.com'";
|
||||
local handle = io.popen(access_token_cmd);
|
||||
local access_token_result = handle:read("*a");
|
||||
handle:close();
|
||||
access_token_json = JSON.decode(access_token_result);
|
||||
if (debug["info"]) then
|
||||
freeswitch.consoleLog("notice", "[voicemail] CMD: " .. access_token_cmd .. "\n");
|
||||
freeswitch.consoleLog("notice", "[voicemail] RESULT: " .. access_token_result .. "\n");
|
||||
freeswitch.consoleLog("notice", "[voicemail] JSON: " .. access_token_json["access_token"] .. "\n");
|
||||
end
|
||||
|
||||
transcribe_cmd = "curl -X POST \"https://speech.platform.bing.com/recognize?scenarios=smd&appid=D4D52672-91D7-4C74-8AD8-42B1D98141A5&locale=en-US&device.os=Freeswitch&version=3.0&format=json&instanceid=" .. gen_uuid() .. "&requestid=" .. gen_uuid() .. "\" -H 'Authorization: Bearer " .. access_token_json["access_token"] .. "' -H 'Content-type: audio/wav; codec=\"audio/pcm\"; samplerate=8000; trustsourcerate=false' --data-binary @"..file_path
|
||||
local handle = io.popen(transcribe_cmd);
|
||||
local transcribe_result = handle:read("*a");
|
||||
handle:close();
|
||||
local transcribe_json = JSON.decode(transcribe_result);
|
||||
if (debug["info"]) then
|
||||
freeswitch.consoleLog("notice", "[voicemail] CMD: " .. transcribe_cmd .. "\n");
|
||||
freeswitch.consoleLog("notice", "[voicemail] RESULT: " .. transcribe_result .. "\n");
|
||||
freeswitch.consoleLog("notice", "[voicemail] TRANSCRIPTION: " .. transcribe_json["results"][1]["name"] .. "\n");
|
||||
freeswitch.consoleLog("notice", "[voicemail] CONFIDENCE: " .. transcribe_json["results"][1]["confidence"] .. "\n");
|
||||
end
|
||||
|
||||
transcription = transcribe_json["results"][1]["name"];
|
||||
confidence = transcribe_json["results"][1]["confidence"];
|
||||
end
|
||||
return transcription;
|
||||
end
|
||||
|
||||
return '';
|
||||
end
|
||||
|
||||
--save the recording
|
||||
function record_message()
|
||||
|
|
@ -33,7 +91,12 @@
|
|||
local settings = Settings.new(db, domain_name, domain_uuid)
|
||||
|
||||
local max_len_seconds = settings:get('voicemail', 'message_max_length', 'numeric') or 300;
|
||||
|
||||
transcribe_enabled = settings:get('voicemail', 'transcribe_enabled', 'boolean') or "false";
|
||||
|
||||
if (debug["info"]) then
|
||||
freeswitch.consoleLog("notice", "[voicemail] transcribe_enabled: " .. transcribe_enabled .. "\n");
|
||||
end
|
||||
|
||||
--record your message at the tone press any key or stop talking to end the recording
|
||||
if (skip_instructions == "true") then
|
||||
--skip the instructions
|
||||
|
|
@ -157,13 +220,16 @@
|
|||
mkdir(voicemail_dir.."/"..voicemail_id);
|
||||
if (vm_message_ext == "mp3") then
|
||||
shout_exists = trim(api:execute("module_exists", "mod_shout"));
|
||||
if (shout_exists == "true") then
|
||||
if (shout_exists == "true" and transcribe_enabled == "false") then
|
||||
freeswitch.consoleLog("notice", "using mod_shout for mp3 encoding\n");
|
||||
--record in mp3 directly
|
||||
result = session:recordFile(voicemail_dir.."/"..voicemail_id.."/msg_"..uuid..".mp3", max_len_seconds, record_silence_threshold, silence_seconds);
|
||||
else
|
||||
--create initial wav recording
|
||||
result = session:recordFile(voicemail_dir.."/"..voicemail_id.."/msg_"..uuid..".wav", max_len_seconds, record_silence_threshold, silence_seconds);
|
||||
if (transcribe_enabled == "true") then
|
||||
transcription = transcribe(voicemail_dir.."/"..voicemail_id.."/msg_"..uuid..".wav",settings);
|
||||
end
|
||||
--use lame to encode, if available
|
||||
if (file_exists("/usr/bin/lame")) then
|
||||
freeswitch.consoleLog("notice", "using lame for mp3 encoding\n");
|
||||
|
|
@ -183,6 +249,9 @@
|
|||
end
|
||||
else
|
||||
result = session:recordFile(voicemail_dir.."/"..voicemail_id.."/msg_"..uuid.."."..vm_message_ext, max_len_seconds, record_silence_threshold, silence_seconds);
|
||||
if (transcribe_enabled == "true") then
|
||||
transcription = transcribe(voicemail_dir.."/"..voicemail_id.."/msg_"..uuid.."."..vm_message_ext,settings);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
|
|
|||
|
|
@ -115,8 +115,13 @@
|
|||
local message_date = os.date("%A, %d %b %Y %I:%M %p", created_epoch)
|
||||
|
||||
--prepare the files
|
||||
file_subject = scripts_dir.."/app/voicemail/resources/templates/"..default_language.."/"..default_dialect.."/email_subject.tpl";
|
||||
file_body = scripts_dir.."/app/voicemail/resources/templates/"..default_language.."/"..default_dialect.."/email_body.tpl";
|
||||
if (transcription ~= nil) then
|
||||
file_subject = scripts_dir.."/app/voicemail/resources/templates/"..default_language.."/"..default_dialect.."/email_subject.tpl";
|
||||
file_body = scripts_dir.."/app/voicemail/resources/templates/"..default_language.."/"..default_dialect.."/email_body_transcription.tpl";
|
||||
else
|
||||
file_subject = scripts_dir.."/app/voicemail/resources/templates/"..default_language.."/"..default_dialect.."/email_subject.tpl";
|
||||
file_body = scripts_dir.."/app/voicemail/resources/templates/"..default_language.."/"..default_dialect.."/email_body.tpl";
|
||||
end
|
||||
if (not file_exists(file_subject)) then
|
||||
file_subject = scripts_dir.."/app/voicemail/resources/templates/en/us/email_subject.tpl";
|
||||
file_body = scripts_dir.."/app/voicemail/resources/templates/en/us/email_body.tpl";
|
||||
|
|
@ -166,6 +171,9 @@
|
|||
body = body:gsub("${caller_id_name}", caller_id_name);
|
||||
body = body:gsub("${caller_id_number}", caller_id_number);
|
||||
body = body:gsub("${message_date}", message_date);
|
||||
if (transcription ~= nil) then
|
||||
body = body:gsub("${message_text}", transcription);
|
||||
end
|
||||
body = body:gsub("${message_duration}", message_length_formatted);
|
||||
body = body:gsub("${account}", voicemail_name_formatted);
|
||||
body = body:gsub("${voicemail_id}", id);
|
||||
|
|
|
|||
|
|
@ -0,0 +1,69 @@
|
|||
<html>
|
||||
<table width="400" border="0" cellspacing="0" cellpadding="0" align="center"
|
||||
style="border: 1px solid #cbcfd5;-moz-border-radius: 4px;
|
||||
-webkit-border-radius: 4px; border-radius: 4px;">
|
||||
<tr>
|
||||
<td valign="middle" align="center" bgcolor="#e5e9f0" style="background-color: #e5e9f0;
|
||||
color: #000; font-family: Arial; font-size: 14px; padding: 7px;-moz-border-radius: 4px;
|
||||
-webkit-border-radius: 4px; border-radius: 4px;">
|
||||
<strong>New Voicemail</strong>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" style="padding: 15px;">
|
||||
<table width="100%" border="0" cellspacing="0" cellpadding="0">
|
||||
<tr>
|
||||
<td style="color: #333; font-family: Arial; font-size: 12px; padding-bottom: 11px;">
|
||||
<strong>To</strong>
|
||||
</td>
|
||||
<td style="color: #666; font-family: Arial; font-size: 12px; padding-bottom: 11px;">
|
||||
${voicemail_name_formatted}
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="color: #333; font-family: Arial; font-size: 12px; padding-bottom: 11px;" width="20%">
|
||||
<strong>From</strong>
|
||||
</td>
|
||||
<td style="color: #666; font-family: Arial; font-size: 12px; padding-bottom: 11px;" width="80%">
|
||||
${caller_id_number}
|
||||
</td>
|
||||
</tr>
|
||||
<!--
|
||||
<tr>
|
||||
<td style="color: #333; font-family: Arial; font-size: 12px; padding-bottom: 11px;">
|
||||
<strong>Received</strong>
|
||||
</td>
|
||||
<td style="color: #666; font-family: Arial; font-size: 12px; padding-bottom: 11px;">
|
||||
${message_date}
|
||||
</td>
|
||||
</tr>
|
||||
-->
|
||||
<tr>
|
||||
<td style="color: #333; font-family: Arial; font-size: 12px; padding-bottom: 11px;">
|
||||
<strong>Message</strong>
|
||||
</td>
|
||||
<td style="color: #666; font-family: Arial; font-size: 12px; padding-bottom: 11px;">
|
||||
${message}
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="color: #333; font-family: Arial; font-size: 12px; padding-bottom: 11px;">
|
||||
<strong>Message Text</strong>
|
||||
</td>
|
||||
<td style="color: #666; font-family: Arial; font-size: 12px; padding-bottom: 11px;">
|
||||
${message_text}
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="color: #333; font-family: Arial; font-size: 12px; padding-bottom: 11px;">
|
||||
<strong>Length</strong>
|
||||
</td>
|
||||
<td style="color: #666; font-family: Arial; font-size: 12px; padding-bottom: 11px;">
|
||||
${message_duration}
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
</html>
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
local newdecoder = require 'resources.functions.lunajson.decoder'
|
||||
local newencoder = require 'resources.functions.lunajson.encoder'
|
||||
local sax = require 'resources.functions.lunajson.sax'
|
||||
-- If you need multiple contexts of decoder and/or encoder,
|
||||
-- you can require lunajson.decoder and/or lunajson.encoder directly.
|
||||
return {
|
||||
decode = newdecoder(),
|
||||
encode = newencoder(),
|
||||
newparser = sax.newparser,
|
||||
newfileparser = sax.newfileparser,
|
||||
}
|
||||
|
|
@ -0,0 +1,86 @@
|
|||
local inf = math.huge
|
||||
local byte, char, sub = string.byte, string.char, string.sub
|
||||
local setmetatable = setmetatable
|
||||
local floor = math.floor
|
||||
|
||||
local _ENV = nil
|
||||
|
||||
local hextbl = {
|
||||
0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, inf, inf, inf, inf, inf, inf,
|
||||
inf, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, inf, inf, inf, inf, inf, inf, inf, inf, inf,
|
||||
inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf,
|
||||
inf, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, inf, inf, inf, inf, inf, inf, inf, inf, inf,
|
||||
}
|
||||
hextbl.__index = function()
|
||||
return inf
|
||||
end
|
||||
setmetatable(hextbl, hextbl)
|
||||
|
||||
return function(myerror)
|
||||
local escapetbl = {
|
||||
['"'] = '"',
|
||||
['\\'] = '\\',
|
||||
['/'] = '/',
|
||||
['b'] = '\b',
|
||||
['f'] = '\f',
|
||||
['n'] = '\n',
|
||||
['r'] = '\r',
|
||||
['t'] = '\t'
|
||||
}
|
||||
escapetbl.__index = function()
|
||||
myerror("invalid escape sequence")
|
||||
end
|
||||
setmetatable(escapetbl, escapetbl)
|
||||
|
||||
local surrogateprev = 0
|
||||
|
||||
local function subst(ch, rest)
|
||||
-- 0.000003814697265625 = 2^-18
|
||||
-- 0.000244140625 = 2^-12
|
||||
-- 0.015625 = 2^-6
|
||||
local u8
|
||||
if ch == 'u' then
|
||||
local c1, c2, c3, c4 = byte(rest, 1, 4)
|
||||
local ucode = hextbl[c1-47] * 0x1000 + hextbl[c2-47] * 0x100 + hextbl[c3-47] * 0x10 + hextbl[c4-47]
|
||||
if ucode == inf then
|
||||
myerror("invalid unicode charcode")
|
||||
end
|
||||
rest = sub(rest, 5)
|
||||
if ucode < 0x80 then -- 1byte
|
||||
u8 = char(ucode)
|
||||
elseif ucode < 0x800 then -- 2byte
|
||||
u8 = char(0xC0 + floor(ucode * 0.015625), 0x80 + ucode % 0x40)
|
||||
elseif ucode < 0xD800 or 0xE000 <= ucode then -- 3byte
|
||||
u8 = char(0xE0 + floor(ucode * 0.000244140625), 0x80 + floor(ucode * 0.015625) % 0x40, 0x80 + ucode % 0x40)
|
||||
elseif 0xD800 <= ucode and ucode < 0xDC00 then -- surrogate pair 1st
|
||||
if surrogateprev == 0 then
|
||||
surrogateprev = ucode
|
||||
if rest == '' then
|
||||
return ''
|
||||
end
|
||||
end
|
||||
else -- surrogate pair 2nd
|
||||
if surrogateprev == 0 then
|
||||
surrogateprev = 1
|
||||
else
|
||||
ucode = 0x10000 + (surrogateprev - 0xD800) * 0x400 + (ucode - 0xDC00)
|
||||
surrogateprev = 0
|
||||
u8 = char(0xF0 + floor(ucode * 0.000003814697265625), 0x80 + floor(ucode * 0.000244140625) % 0x40, 0x80 + floor(ucode * 0.015625) % 0x40, 0x80 + ucode % 0x40)
|
||||
end
|
||||
end
|
||||
end
|
||||
if surrogateprev ~= 0 then
|
||||
myerror("invalid surrogate pair")
|
||||
end
|
||||
return (u8 or escapetbl[ch]) .. rest
|
||||
end
|
||||
|
||||
local function surrogateok()
|
||||
return surrogateprev == 0
|
||||
end
|
||||
|
||||
return {
|
||||
subst = subst,
|
||||
surrogateok = surrogateok
|
||||
}
|
||||
end
|
||||
|
|
@ -0,0 +1,83 @@
|
|||
local inf = math.huge
|
||||
local byte, char, sub = string.byte, string.char, string.sub
|
||||
local setmetatable = setmetatable
|
||||
|
||||
local _ENV = nil
|
||||
|
||||
local hextbl = {
|
||||
0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, inf, inf, inf, inf, inf, inf,
|
||||
inf, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, inf, inf, inf, inf, inf, inf, inf, inf, inf,
|
||||
inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf,
|
||||
inf, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, inf, inf, inf, inf, inf, inf, inf, inf, inf,
|
||||
}
|
||||
hextbl.__index = function()
|
||||
return inf
|
||||
end
|
||||
setmetatable(hextbl, hextbl)
|
||||
|
||||
return function(myerror)
|
||||
local escapetbl = {
|
||||
['"'] = '"',
|
||||
['\\'] = '\\',
|
||||
['/'] = '/',
|
||||
['b'] = '\b',
|
||||
['f'] = '\f',
|
||||
['n'] = '\n',
|
||||
['r'] = '\r',
|
||||
['t'] = '\t'
|
||||
}
|
||||
escapetbl.__index = function()
|
||||
myerror("invalid escape sequence")
|
||||
end
|
||||
setmetatable(escapetbl, escapetbl)
|
||||
|
||||
local surrogateprev = 0
|
||||
|
||||
local function subst(ch, rest)
|
||||
local u8
|
||||
if ch == 'u' then
|
||||
local c1, c2, c3, c4 = byte(rest, 1, 4)
|
||||
-- multiplications should not be lshift since cn may be inf
|
||||
local ucode = hextbl[c1-47] * 0x1000 + hextbl[c2-47] * 0x100 + hextbl[c3-47] * 0x10 + hextbl[c4-47]
|
||||
if ucode == inf then
|
||||
myerror("invalid unicode charcode")
|
||||
end
|
||||
rest = sub(rest, 5)
|
||||
if ucode < 0x80 then -- 1byte
|
||||
u8 = char(ucode)
|
||||
elseif ucode < 0x800 then -- 2byte
|
||||
u8 = char(0xC0 + (ucode >> 6), 0x80 + (ucode & 0x3F))
|
||||
elseif ucode < 0xD800 or 0xE000 <= ucode then -- 3byte
|
||||
u8 = char(0xE0 + (ucode >> 12), 0x80 + (ucode >> 6 & 0x3F), 0x80 + (ucode & 0x3F))
|
||||
elseif 0xD800 <= ucode and ucode < 0xDC00 then -- surrogate pair 1st
|
||||
if surrogateprev == 0 then
|
||||
surrogateprev = ucode
|
||||
if rest == '' then
|
||||
return ''
|
||||
end
|
||||
end
|
||||
else -- surrogate pair 2nd
|
||||
if surrogateprev == 0 then
|
||||
surrogateprev = 1
|
||||
else
|
||||
ucode = 0x10000 + (surrogateprev - 0xD800 << 10) + (ucode - 0xDC00)
|
||||
surrogateprev = 0
|
||||
u8 = char(0xF0 + (ucode >> 18), 0x80 + (ucode >> 12 & 0x3F), 0x80 + (ucode >> 6 & 0x3F), 0x80 + (ucode & 0x3F))
|
||||
end
|
||||
end
|
||||
end
|
||||
if surrogateprev ~= 0 then
|
||||
myerror("invalid surrogate pair")
|
||||
end
|
||||
return (u8 or escapetbl[ch]) .. rest
|
||||
end
|
||||
|
||||
local function surrogateok()
|
||||
return surrogateprev == 0
|
||||
end
|
||||
|
||||
return {
|
||||
subst = subst,
|
||||
surrogateok = surrogateok
|
||||
}
|
||||
end
|
||||
|
|
@ -0,0 +1,364 @@
|
|||
local error = error
|
||||
local byte, char, find, gsub, match, sub = string.byte, string.char, string.find, string.gsub, string.match, string.sub
|
||||
local tonumber = tonumber
|
||||
local tostring, setmetatable = tostring, setmetatable
|
||||
|
||||
-- The function that interprets JSON strings is separated into another file so as to
|
||||
-- use bitwise operation to speedup unicode codepoints processing on Lua 5.3.
|
||||
local genstrlib
|
||||
if _VERSION == "Lua 5.3" then
|
||||
genstrlib = require 'resources.functions.lunajson._str_lib_lua53'
|
||||
else
|
||||
genstrlib = require 'resources.functions.lunajson._str_lib'
|
||||
end
|
||||
|
||||
local _ENV = nil
|
||||
|
||||
local function newdecoder()
|
||||
local json, pos, nullv, arraylen
|
||||
|
||||
-- `f` is the temporary for dispatcher[c] and
|
||||
-- the dummy for the first return value of `find`
|
||||
local dispatcher, f
|
||||
|
||||
--[[
|
||||
Helper
|
||||
--]]
|
||||
local function decodeerror(errmsg)
|
||||
error("parse error at " .. pos .. ": " .. errmsg)
|
||||
end
|
||||
|
||||
--[[
|
||||
Invalid
|
||||
--]]
|
||||
local function f_err()
|
||||
decodeerror('invalid value')
|
||||
end
|
||||
|
||||
--[[
|
||||
Constants
|
||||
--]]
|
||||
-- null
|
||||
local function f_nul()
|
||||
if sub(json, pos, pos+2) == 'ull' then
|
||||
pos = pos+3
|
||||
return nullv
|
||||
end
|
||||
decodeerror('invalid value')
|
||||
end
|
||||
|
||||
-- false
|
||||
local function f_fls()
|
||||
if sub(json, pos, pos+3) == 'alse' then
|
||||
pos = pos+4
|
||||
return false
|
||||
end
|
||||
decodeerror('invalid value')
|
||||
end
|
||||
|
||||
-- true
|
||||
local function f_tru()
|
||||
if sub(json, pos, pos+2) == 'rue' then
|
||||
pos = pos+3
|
||||
return true
|
||||
end
|
||||
decodeerror('invalid value')
|
||||
end
|
||||
|
||||
--[[
|
||||
Numbers
|
||||
Conceptually, the longest prefix that matches to `-?(0|[1-9][0-9]*)(\.[0-9]*)?([eE][+-]?[0-9]*)?`
|
||||
(in regexp) is captured as a number and its conformance to the JSON spec is checked.
|
||||
--]]
|
||||
-- deal with non-standard locales
|
||||
local radixmark = match(tostring(0.5), '[^0-9]')
|
||||
local fixedtonumber = tonumber
|
||||
if radixmark ~= '.' then
|
||||
if find(radixmark, '%W') then
|
||||
radixmark = '%' .. radixmark
|
||||
end
|
||||
fixedtonumber = function(s)
|
||||
return tonumber(gsub(s, '.', radixmark))
|
||||
end
|
||||
end
|
||||
|
||||
local function error_number()
|
||||
decodeerror('invalid number')
|
||||
end
|
||||
|
||||
-- `0(\.[0-9]*)?([eE][+-]?[0-9]*)?`
|
||||
local function f_zro(mns)
|
||||
local postmp = pos
|
||||
local num
|
||||
local c = byte(json, postmp)
|
||||
if not c then
|
||||
return error_number()
|
||||
end
|
||||
|
||||
if c == 0x2E then -- is this `.`?
|
||||
num = match(json, '^.[0-9]*', pos) -- skipping 0
|
||||
local numlen = #num
|
||||
if numlen == 1 then
|
||||
return error_number()
|
||||
end
|
||||
postmp = pos + numlen
|
||||
c = byte(json, postmp)
|
||||
end
|
||||
|
||||
if c == 0x45 or c == 0x65 then -- is this e or E?
|
||||
local numexp = match(json, '^[^eE]*[eE][-+]?[0-9]+', pos)
|
||||
if not numexp then
|
||||
return error_number()
|
||||
end
|
||||
if num then -- since `0e.*` is always 0.0, ignore those
|
||||
num = numexp
|
||||
end
|
||||
postmp = pos + #numexp
|
||||
end
|
||||
|
||||
pos = postmp
|
||||
if num then
|
||||
num = fixedtonumber(num)
|
||||
else
|
||||
num = 0.0
|
||||
end
|
||||
if mns then
|
||||
num = -num
|
||||
end
|
||||
return num
|
||||
end
|
||||
|
||||
-- `[1-9][0-9]*(\.[0-9]*)?([eE][+-]?[0-9]*)?`
|
||||
local function f_num(mns)
|
||||
pos = pos-1
|
||||
local num = match(json, '^.[0-9]*%.?[0-9]*', pos)
|
||||
if byte(num, -1) == 0x2E then
|
||||
return error_number()
|
||||
end
|
||||
local postmp = pos + #num
|
||||
local c = byte(json, postmp)
|
||||
|
||||
if c == 0x45 or c == 0x65 then -- e or E?
|
||||
num = match(json, '^[^eE]*[eE][-+]?[0-9]+', pos)
|
||||
if not num then
|
||||
return error_number()
|
||||
end
|
||||
postmp = pos + #num
|
||||
end
|
||||
|
||||
pos = postmp
|
||||
num = fixedtonumber(num)-0.0
|
||||
if mns then
|
||||
num = -num
|
||||
end
|
||||
return num
|
||||
end
|
||||
|
||||
-- skip minus sign
|
||||
local function f_mns()
|
||||
local c = byte(json, pos)
|
||||
if c then
|
||||
pos = pos+1
|
||||
if c > 0x30 then
|
||||
if c < 0x3A then
|
||||
return f_num(true)
|
||||
end
|
||||
else
|
||||
if c > 0x2F then
|
||||
return f_zro(true)
|
||||
end
|
||||
end
|
||||
end
|
||||
decodeerror('invalid number')
|
||||
end
|
||||
|
||||
--[[
|
||||
Strings
|
||||
--]]
|
||||
local f_str_lib = genstrlib(decodeerror)
|
||||
local f_str_surrogateok = f_str_lib.surrogateok -- whether codepoints for surrogate pair are correctly paired
|
||||
local f_str_subst = f_str_lib.subst -- the function passed to gsub that interprets escapes
|
||||
|
||||
-- caching interpreted keys for speed
|
||||
local f_str_keycache = setmetatable({}, {__mode="v"})
|
||||
|
||||
local function f_str(iskey)
|
||||
local newpos = pos-2
|
||||
local pos2 = pos
|
||||
local c1, c2
|
||||
repeat
|
||||
newpos = find(json, '"', pos2, true) -- search '"'
|
||||
if not newpos then
|
||||
decodeerror("unterminated string")
|
||||
end
|
||||
pos2 = newpos+1
|
||||
while true do -- skip preceding '\\'s
|
||||
c1, c2 = byte(json, newpos-2, newpos-1)
|
||||
if c2 ~= 0x5C or c1 ~= 0x5C then
|
||||
break
|
||||
end
|
||||
newpos = newpos-2
|
||||
end
|
||||
until c2 ~= 0x5C -- check '"' is not preceded by '\'
|
||||
|
||||
local str = sub(json, pos, pos2-2)
|
||||
pos = pos2
|
||||
|
||||
if iskey then -- check key cache
|
||||
local str2 = f_str_keycache[str]
|
||||
if str2 then
|
||||
return str2
|
||||
end
|
||||
end
|
||||
local str2 = str
|
||||
if find(str2, '\\', 1, true) then -- check if backslash occurs
|
||||
str2 = gsub(str2, '\\(.)([^\\]*)', f_str_subst) -- interpret escapes
|
||||
if not f_str_surrogateok() then
|
||||
decodeerror("invalid surrogate pair")
|
||||
end
|
||||
end
|
||||
if iskey then -- commit key cache
|
||||
f_str_keycache[str] = str2
|
||||
end
|
||||
return str2
|
||||
end
|
||||
|
||||
--[[
|
||||
Arrays, Objects
|
||||
--]]
|
||||
-- array
|
||||
local function f_ary()
|
||||
local ary = {}
|
||||
|
||||
f, pos = find(json, '^[ \n\r\t]*', pos)
|
||||
pos = pos+1
|
||||
|
||||
local i = 0
|
||||
if byte(json, pos) ~= 0x5D then -- check closing bracket ']', that consists an empty array
|
||||
local newpos = pos-1
|
||||
repeat
|
||||
i = i+1
|
||||
f = dispatcher[byte(json,newpos+1)] -- parse value
|
||||
pos = newpos+2
|
||||
ary[i] = f()
|
||||
f, newpos = find(json, '^[ \n\r\t]*,[ \n\r\t]*', pos) -- check comma
|
||||
until not newpos
|
||||
|
||||
f, newpos = find(json, '^[ \n\r\t]*%]', pos) -- check closing bracket
|
||||
if not newpos then
|
||||
decodeerror("no closing bracket of an array")
|
||||
end
|
||||
pos = newpos
|
||||
end
|
||||
|
||||
pos = pos+1
|
||||
if arraylen then -- commit the length of the array if `arraylen` is set
|
||||
ary[0] = i
|
||||
end
|
||||
return ary
|
||||
end
|
||||
|
||||
-- objects
|
||||
local function f_obj()
|
||||
local obj = {}
|
||||
|
||||
f, pos = find(json, '^[ \n\r\t]*', pos)
|
||||
pos = pos+1
|
||||
if byte(json, pos) ~= 0x7D then -- check the closing bracket '}', that consists an empty object
|
||||
local newpos = pos-1
|
||||
|
||||
repeat
|
||||
pos = newpos+1
|
||||
if byte(json, pos) ~= 0x22 then -- check '"'
|
||||
decodeerror("not key")
|
||||
end
|
||||
pos = pos+1
|
||||
local key = f_str(true) -- parse key
|
||||
|
||||
-- optimized for compact json
|
||||
-- c1, c2 == ':', <the first char of the value> or
|
||||
-- c1, c2, c3 == ':', ' ', <the first char of the value>
|
||||
f = f_err
|
||||
do
|
||||
local c1, c2, c3 = byte(json, pos, pos+3)
|
||||
if c1 == 0x3A then
|
||||
newpos = pos
|
||||
if c2 == 0x20 then
|
||||
newpos = newpos+1
|
||||
c2 = c3
|
||||
end
|
||||
f = dispatcher[c2]
|
||||
end
|
||||
end
|
||||
if f == f_err then -- read a colon and arbitrary number of spaces
|
||||
f, newpos = find(json, '^[ \n\r\t]*:[ \n\r\t]*', pos)
|
||||
if not newpos then
|
||||
decodeerror("no colon after a key")
|
||||
end
|
||||
end
|
||||
f = dispatcher[byte(json, newpos+1)] -- parse value
|
||||
pos = newpos+2
|
||||
obj[key] = f()
|
||||
f, newpos = find(json, '^[ \n\r\t]*,[ \n\r\t]*', pos)
|
||||
until not newpos
|
||||
|
||||
f, newpos = find(json, '^[ \n\r\t]*}', pos)
|
||||
if not newpos then
|
||||
decodeerror("no closing bracket of an object")
|
||||
end
|
||||
pos = newpos
|
||||
end
|
||||
|
||||
pos = pos+1
|
||||
return obj
|
||||
end
|
||||
|
||||
--[[
|
||||
The jump table to dispatch a parser for a value, indexed by the code of the value's first char.
|
||||
Nil key means the end of json.
|
||||
--]]
|
||||
dispatcher = {
|
||||
f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err,
|
||||
f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err,
|
||||
f_err, f_err, f_str, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_mns, f_err, f_err,
|
||||
f_zro, f_num, f_num, f_num, f_num, f_num, f_num, f_num, f_num, f_num, f_err, f_err, f_err, f_err, f_err, f_err,
|
||||
f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err,
|
||||
f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_ary, f_err, f_err, f_err, f_err,
|
||||
f_err, f_err, f_err, f_err, f_err, f_err, f_fls, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_nul, f_err,
|
||||
f_err, f_err, f_err, f_err, f_tru, f_err, f_err, f_err, f_err, f_err, f_err, f_obj, f_err, f_err, f_err, f_err,
|
||||
}
|
||||
dispatcher[0] = f_err
|
||||
dispatcher.__index = function()
|
||||
decodeerror("unexpected termination")
|
||||
end
|
||||
setmetatable(dispatcher, dispatcher)
|
||||
|
||||
--[[
|
||||
run decoder
|
||||
--]]
|
||||
local function decode(json_, pos_, nullv_, arraylen_)
|
||||
json, pos, nullv, arraylen = json_, pos_, nullv_, arraylen_
|
||||
|
||||
pos = pos or 1
|
||||
f, pos = find(json, '^[ \n\r\t]*', pos)
|
||||
pos = pos+1
|
||||
|
||||
f = dispatcher[byte(json, pos)]
|
||||
pos = pos+1
|
||||
local v = f()
|
||||
|
||||
if pos_ then
|
||||
return v, pos
|
||||
else
|
||||
f, pos = find(json, '^[ \n\r\t]*', pos)
|
||||
if pos ~= #json then
|
||||
error('json ended')
|
||||
end
|
||||
return v
|
||||
end
|
||||
end
|
||||
|
||||
return decode
|
||||
end
|
||||
|
||||
return newdecoder
|
||||
|
|
@ -0,0 +1,184 @@
|
|||
local error = error
|
||||
local byte, find, format, gsub, match = string.byte, string.find, string.format, string.gsub, string.match
|
||||
local concat = table.concat
|
||||
local tostring = tostring
|
||||
local pairs, type = pairs, type
|
||||
local setmetatable = setmetatable
|
||||
local huge, tiny = 1/0, -1/0
|
||||
|
||||
local f_string_pat
|
||||
if _VERSION == "Lua 5.1" then
|
||||
-- use the cluttered pattern because lua 5.1 does not handle \0 in a pattern correctly
|
||||
f_string_pat = '[^ -!#-[%]^-\255]'
|
||||
else
|
||||
f_string_pat = '[\0-\31"\\]'
|
||||
end
|
||||
|
||||
local _ENV = nil
|
||||
|
||||
local function newencoder()
|
||||
local v, nullv
|
||||
local i, builder, visited
|
||||
|
||||
local function f_tostring(v)
|
||||
builder[i] = tostring(v)
|
||||
i = i+1
|
||||
end
|
||||
|
||||
local radixmark = match(tostring(0.5), '[^0-9]')
|
||||
local delimmark = match(tostring(12345.12345), '[^0-9' .. radixmark .. ']')
|
||||
if radixmark == '.' then
|
||||
radixmark = nil
|
||||
end
|
||||
|
||||
local radixordelim
|
||||
if radixmark or delimmark then
|
||||
radixordelim = true
|
||||
if radixmark and find(radixmark, '%W') then
|
||||
radixmark = '%' .. radixmark
|
||||
end
|
||||
if delimmark and find(delimmark, '%W') then
|
||||
delimmark = '%' .. delimmark
|
||||
end
|
||||
end
|
||||
|
||||
local f_number = function(n)
|
||||
if tiny < n and n < huge then
|
||||
local s = format("%.17g", n)
|
||||
if radixordelim then
|
||||
if delimmark then
|
||||
s = gsub(s, delimmark, '')
|
||||
end
|
||||
if radixmark then
|
||||
s = gsub(s, radixmark, '.')
|
||||
end
|
||||
end
|
||||
builder[i] = s
|
||||
i = i+1
|
||||
return
|
||||
end
|
||||
error('invalid number')
|
||||
end
|
||||
|
||||
local doencode
|
||||
|
||||
local f_string_subst = {
|
||||
['"'] = '\\"',
|
||||
['\\'] = '\\\\',
|
||||
['\b'] = '\\b',
|
||||
['\f'] = '\\f',
|
||||
['\n'] = '\\n',
|
||||
['\r'] = '\\r',
|
||||
['\t'] = '\\t',
|
||||
__index = function(_, c)
|
||||
return format('\\u00%02X', byte(c))
|
||||
end
|
||||
}
|
||||
setmetatable(f_string_subst, f_string_subst)
|
||||
|
||||
local function f_string(s)
|
||||
builder[i] = '"'
|
||||
if find(s, f_string_pat) then
|
||||
s = gsub(s, f_string_pat, f_string_subst)
|
||||
end
|
||||
builder[i+1] = s
|
||||
builder[i+2] = '"'
|
||||
i = i+3
|
||||
end
|
||||
|
||||
local function f_table(o)
|
||||
if visited[o] then
|
||||
error("loop detected")
|
||||
end
|
||||
visited[o] = true
|
||||
|
||||
local tmp = o[0]
|
||||
if type(tmp) == 'number' then -- arraylen available
|
||||
builder[i] = '['
|
||||
i = i+1
|
||||
for j = 1, tmp do
|
||||
doencode(o[j])
|
||||
builder[i] = ','
|
||||
i = i+1
|
||||
end
|
||||
if tmp > 0 then
|
||||
i = i-1
|
||||
end
|
||||
builder[i] = ']'
|
||||
|
||||
else
|
||||
tmp = o[1]
|
||||
if tmp ~= nil then -- detected as array
|
||||
builder[i] = '['
|
||||
i = i+1
|
||||
local j = 2
|
||||
repeat
|
||||
doencode(tmp)
|
||||
tmp = o[j]
|
||||
if tmp == nil then
|
||||
break
|
||||
end
|
||||
j = j+1
|
||||
builder[i] = ','
|
||||
i = i+1
|
||||
until false
|
||||
builder[i] = ']'
|
||||
|
||||
else -- detected as object
|
||||
builder[i] = '{'
|
||||
i = i+1
|
||||
local tmp = i
|
||||
for k, v in pairs(o) do
|
||||
if type(k) ~= 'string' then
|
||||
error("non-string key")
|
||||
end
|
||||
f_string(k)
|
||||
builder[i] = ':'
|
||||
i = i+1
|
||||
doencode(v)
|
||||
builder[i] = ','
|
||||
i = i+1
|
||||
end
|
||||
if i > tmp then
|
||||
i = i-1
|
||||
end
|
||||
builder[i] = '}'
|
||||
end
|
||||
end
|
||||
|
||||
i = i+1
|
||||
visited[o] = nil
|
||||
end
|
||||
|
||||
local dispatcher = {
|
||||
boolean = f_tostring,
|
||||
number = f_number,
|
||||
string = f_string,
|
||||
table = f_table,
|
||||
__index = function()
|
||||
error("invalid type value")
|
||||
end
|
||||
}
|
||||
setmetatable(dispatcher, dispatcher)
|
||||
|
||||
function doencode(v)
|
||||
if v == nullv then
|
||||
builder[i] = 'null'
|
||||
i = i+1
|
||||
return
|
||||
end
|
||||
return dispatcher[type(v)](v)
|
||||
end
|
||||
|
||||
local function encode(v_, nullv_)
|
||||
v, nullv = v_, nullv_
|
||||
i, builder, visited = 1, {}, {}
|
||||
|
||||
doencode(v)
|
||||
return concat(builder)
|
||||
end
|
||||
|
||||
return encode
|
||||
end
|
||||
|
||||
return newencoder
|
||||
|
|
@ -0,0 +1,525 @@
|
|||
local error = error
|
||||
local byte, char, find, gsub, match, sub = string.byte, string.char, string.find, string.gsub, string.match, string.sub
|
||||
local tonumber = tonumber
|
||||
local tostring, type, unpack = tostring, type, table.unpack or unpack
|
||||
|
||||
-- The function that interprets JSON strings is separated into another file so as to
|
||||
-- use bitwise operation to speedup unicode codepoints processing on Lua 5.3.
|
||||
local genstrlib
|
||||
if _VERSION == "Lua 5.3" then
|
||||
genstrlib = require 'resources.functions.lunajson._str_lib_lua53'
|
||||
else
|
||||
genstrlib = require 'resources.functions.lunajson._str_lib'
|
||||
end
|
||||
|
||||
local _ENV = nil
|
||||
|
||||
local function nop() end
|
||||
|
||||
local function newparser(src, saxtbl)
|
||||
local json, jsonnxt
|
||||
local jsonlen, pos, acc = 0, 1, 0
|
||||
|
||||
-- `f` is the temporary for dispatcher[c] and
|
||||
-- the dummy for the first return value of `find`
|
||||
local dispatcher, f
|
||||
|
||||
-- initialize
|
||||
if type(src) == 'string' then
|
||||
json = src
|
||||
jsonlen = #json
|
||||
jsonnxt = function()
|
||||
json = ''
|
||||
jsonlen = 0
|
||||
jsonnxt = nop
|
||||
end
|
||||
else
|
||||
jsonnxt = function()
|
||||
acc = acc + jsonlen
|
||||
pos = 1
|
||||
repeat
|
||||
json = src()
|
||||
if not json then
|
||||
json = ''
|
||||
jsonlen = 0
|
||||
jsonnxt = nop
|
||||
return
|
||||
end
|
||||
jsonlen = #json
|
||||
until jsonlen > 0
|
||||
end
|
||||
jsonnxt()
|
||||
end
|
||||
|
||||
local sax_startobject = saxtbl.startobject or nop
|
||||
local sax_key = saxtbl.key or nop
|
||||
local sax_endobject = saxtbl.endobject or nop
|
||||
local sax_startarray = saxtbl.startarray or nop
|
||||
local sax_endarray = saxtbl.endarray or nop
|
||||
local sax_string = saxtbl.string or nop
|
||||
local sax_number = saxtbl.number or nop
|
||||
local sax_boolean = saxtbl.boolean or nop
|
||||
local sax_null = saxtbl.null or nop
|
||||
|
||||
--[[
|
||||
Helper
|
||||
--]]
|
||||
local function tryc()
|
||||
local c = byte(json, pos)
|
||||
if not c then
|
||||
jsonnxt()
|
||||
c = byte(json, pos)
|
||||
end
|
||||
return c
|
||||
end
|
||||
|
||||
local function parseerror(errmsg)
|
||||
error("parse error at " .. acc + pos .. ": " .. errmsg)
|
||||
end
|
||||
|
||||
local function tellc()
|
||||
return tryc() or parseerror("unexpected termination")
|
||||
end
|
||||
|
||||
local function spaces() -- skip spaces and prepare the next char
|
||||
while true do
|
||||
f, pos = find(json, '^[ \n\r\t]*', pos)
|
||||
if pos ~= jsonlen then
|
||||
pos = pos+1
|
||||
return
|
||||
end
|
||||
if jsonlen == 0 then
|
||||
parseerror("unexpected termination")
|
||||
end
|
||||
jsonnxt()
|
||||
end
|
||||
end
|
||||
|
||||
--[[
|
||||
Invalid
|
||||
--]]
|
||||
local function f_err()
|
||||
parseerror('invalid value')
|
||||
end
|
||||
|
||||
--[[
|
||||
Constants
|
||||
--]]
|
||||
-- fallback slow constants parser
|
||||
local function generic_constant(target, targetlen, ret, sax_f)
|
||||
for i = 1, targetlen do
|
||||
local c = tellc()
|
||||
if byte(target, i) ~= c then
|
||||
parseerror("invalid char")
|
||||
end
|
||||
pos = pos+1
|
||||
end
|
||||
return sax_f(ret)
|
||||
end
|
||||
|
||||
-- null
|
||||
local function f_nul()
|
||||
if sub(json, pos, pos+2) == 'ull' then
|
||||
pos = pos+3
|
||||
return sax_null(nil)
|
||||
end
|
||||
return generic_constant('ull', 3, nil, sax_null)
|
||||
end
|
||||
|
||||
-- false
|
||||
local function f_fls()
|
||||
if sub(json, pos, pos+3) == 'alse' then
|
||||
pos = pos+4
|
||||
return sax_boolean(false)
|
||||
end
|
||||
return generic_constant('alse', 4, false, sax_boolean)
|
||||
end
|
||||
|
||||
-- true
|
||||
local function f_tru()
|
||||
if sub(json, pos, pos+2) == 'rue' then
|
||||
pos = pos+3
|
||||
return sax_boolean(true)
|
||||
end
|
||||
return generic_constant('rue', 3, true, sax_boolean)
|
||||
end
|
||||
|
||||
--[[
|
||||
Numbers
|
||||
Conceptually, the longest prefix that matches to `(0|[1-9][0-9]*)(\.[0-9]*)?([eE][+-]?[0-9]*)?`
|
||||
(in regexp) is captured as a number and its conformance to the JSON spec is checked.
|
||||
--]]
|
||||
-- deal with non-standard locales
|
||||
local radixmark = match(tostring(0.5), '[^0-9]')
|
||||
local fixedtonumber = tonumber
|
||||
if radixmark ~= '.' then -- deals with non-standard locales
|
||||
if find(radixmark, '%W') then
|
||||
radixmark = '%' .. radixmark
|
||||
end
|
||||
fixedtonumber = function(s)
|
||||
return tonumber(gsub(s, '.', radixmark))
|
||||
end
|
||||
end
|
||||
|
||||
-- fallback slow parser
|
||||
local function generic_number(mns)
|
||||
local buf = {}
|
||||
local i = 1
|
||||
|
||||
local c = byte(json, pos)
|
||||
pos = pos+1
|
||||
|
||||
local function nxt()
|
||||
buf[i] = c
|
||||
i = i+1
|
||||
c = tryc()
|
||||
pos = pos+1
|
||||
end
|
||||
|
||||
if c == 0x30 then
|
||||
nxt()
|
||||
else
|
||||
repeat nxt() until not (c and 0x30 <= c and c < 0x3A)
|
||||
end
|
||||
if c == 0x2E then
|
||||
nxt()
|
||||
if not (c and 0x30 <= c and c < 0x3A) then
|
||||
parseerror('invalid number')
|
||||
end
|
||||
repeat nxt() until not (c and 0x30 <= c and c < 0x3A)
|
||||
end
|
||||
if c == 0x45 or c == 0x65 then
|
||||
nxt()
|
||||
if c == 0x2B or c == 0x2D then
|
||||
nxt()
|
||||
end
|
||||
if not (c and 0x30 <= c and c < 0x3A) then
|
||||
parseerror('invalid number')
|
||||
end
|
||||
repeat nxt() until not (c and 0x30 <= c and c < 0x3A)
|
||||
end
|
||||
pos = pos-1
|
||||
|
||||
local num = char(unpack(buf))
|
||||
num = fixedtonumber(num)-0.0
|
||||
if mns then
|
||||
num = -num
|
||||
end
|
||||
return sax_number(num)
|
||||
end
|
||||
|
||||
-- `0(\.[0-9]*)?([eE][+-]?[0-9]*)?`
|
||||
local function f_zro(mns)
|
||||
local postmp = pos
|
||||
local num
|
||||
local c = byte(json, postmp)
|
||||
|
||||
if c == 0x2E then -- is this `.`?
|
||||
num = match(json, '^.[0-9]*', pos) -- skipping 0
|
||||
local numlen = #num
|
||||
if numlen == 1 then
|
||||
pos = pos-1
|
||||
return generic_number(mns)
|
||||
end
|
||||
postmp = pos + numlen
|
||||
c = byte(json, postmp)
|
||||
end
|
||||
|
||||
if c == 0x45 or c == 0x65 then -- is this e or E?
|
||||
local numexp = match(json, '^[^eE]*[eE][-+]?[0-9]+', pos)
|
||||
if not numexp then
|
||||
pos = pos-1
|
||||
return generic_number(mns)
|
||||
end
|
||||
if num then -- since `0e.*` is always 0.0, ignore those
|
||||
num = numexp
|
||||
end
|
||||
postmp = pos + #numexp
|
||||
end
|
||||
|
||||
if postmp > jsonlen then
|
||||
pos = pos-1
|
||||
return generic_number(mns)
|
||||
end
|
||||
pos = postmp
|
||||
if num then
|
||||
num = fixedtonumber(num)
|
||||
else
|
||||
num = 0.0
|
||||
end
|
||||
if mns then
|
||||
num = -num
|
||||
end
|
||||
return sax_number(num)
|
||||
end
|
||||
|
||||
-- `[1-9][0-9]*(\.[0-9]*)?([eE][+-]?[0-9]*)?`
|
||||
local function f_num(mns)
|
||||
pos = pos-1
|
||||
local num = match(json, '^.[0-9]*%.?[0-9]*', pos)
|
||||
if byte(num, -1) == 0x2E then
|
||||
return generic_number(mns)
|
||||
end
|
||||
local postmp = pos + #num
|
||||
local c = byte(json, postmp)
|
||||
|
||||
if c == 0x45 or c == 0x65 then -- e or E?
|
||||
num = match(json, '^[^eE]*[eE][-+]?[0-9]+', pos)
|
||||
if not num then
|
||||
return generic_number(mns)
|
||||
end
|
||||
postmp = pos + #num
|
||||
end
|
||||
|
||||
if postmp > jsonlen then
|
||||
return generic_number(mns)
|
||||
end
|
||||
pos = postmp
|
||||
num = fixedtonumber(num)-0.0
|
||||
if mns then
|
||||
num = -num
|
||||
end
|
||||
return sax_number(num)
|
||||
end
|
||||
|
||||
-- skip minus sign
|
||||
local function f_mns()
|
||||
local c = byte(json, pos) or tellc()
|
||||
if c then
|
||||
pos = pos+1
|
||||
if c > 0x30 then
|
||||
if c < 0x3A then
|
||||
return f_num(true)
|
||||
end
|
||||
else
|
||||
if c > 0x2F then
|
||||
return f_zro(true)
|
||||
end
|
||||
end
|
||||
end
|
||||
parseerror("invalid number")
|
||||
end
|
||||
|
||||
--[[
|
||||
Strings
|
||||
--]]
|
||||
local f_str_lib = genstrlib(parseerror)
|
||||
local f_str_surrogateok = f_str_lib.surrogateok -- whether codepoints for surrogate pair are correctly paired
|
||||
local f_str_subst = f_str_lib.subst -- the function passed to gsub that interprets escapes
|
||||
|
||||
local function f_str(iskey)
|
||||
local pos2 = pos
|
||||
local newpos
|
||||
local str = ''
|
||||
local bs
|
||||
while true do
|
||||
while true do -- search '\' or '"'
|
||||
newpos = find(json, '[\\"]', pos2)
|
||||
if newpos then
|
||||
break
|
||||
end
|
||||
str = str .. sub(json, pos, jsonlen)
|
||||
if pos2 == jsonlen+2 then
|
||||
pos2 = 2
|
||||
else
|
||||
pos2 = 1
|
||||
end
|
||||
jsonnxt()
|
||||
end
|
||||
if byte(json, newpos) == 0x22 then -- break if '"'
|
||||
break
|
||||
end
|
||||
pos2 = newpos+2 -- skip '\<char>'
|
||||
bs = true -- remember that backslash occurs
|
||||
end
|
||||
str = str .. sub(json, pos, newpos-1)
|
||||
pos = newpos+1
|
||||
|
||||
if bs then -- check if backslash occurs
|
||||
str = gsub(str, '\\(.)([^\\]*)', f_str_subst) -- interpret escapes
|
||||
if not f_str_surrogateok() then
|
||||
parseerror("invalid surrogate pair")
|
||||
end
|
||||
end
|
||||
|
||||
if iskey then
|
||||
return sax_key(str)
|
||||
end
|
||||
return sax_string(str)
|
||||
end
|
||||
|
||||
--[[
|
||||
Arrays, Objects
|
||||
--]]
|
||||
-- arrays
|
||||
local function f_ary()
|
||||
sax_startarray()
|
||||
spaces()
|
||||
if byte(json, pos) ~= 0x5D then -- check the closing bracket ']', that consists an empty array
|
||||
local newpos
|
||||
while true do
|
||||
f = dispatcher[byte(json, pos)] -- parse value
|
||||
pos = pos+1
|
||||
f()
|
||||
f, newpos = find(json, '^[ \n\r\t]*,[ \n\r\t]*', pos) -- check comma
|
||||
if not newpos then
|
||||
f, newpos = find(json, '^[ \n\r\t]*%]', pos) -- check closing bracket
|
||||
if newpos then
|
||||
pos = newpos
|
||||
break
|
||||
end
|
||||
spaces() -- since the current chunk can be ended, skip spaces toward following chunks
|
||||
local c = byte(json, pos)
|
||||
if c == 0x2C then -- check comma again
|
||||
pos = pos+1
|
||||
spaces()
|
||||
newpos = pos-1
|
||||
elseif c == 0x5D then -- check closing bracket again
|
||||
break
|
||||
else
|
||||
parseerror("no closing bracket of an array")
|
||||
end
|
||||
end
|
||||
pos = newpos+1
|
||||
if pos > jsonlen then
|
||||
spaces()
|
||||
end
|
||||
end
|
||||
end
|
||||
pos = pos+1
|
||||
return sax_endarray()
|
||||
end
|
||||
|
||||
-- objects
|
||||
local function f_obj()
|
||||
sax_startobject()
|
||||
spaces()
|
||||
if byte(json, pos) ~= 0x7D then -- check the closing bracket `}`, that consists an empty object
|
||||
local newpos
|
||||
while true do
|
||||
if byte(json, pos) ~= 0x22 then
|
||||
parseerror("not key")
|
||||
end
|
||||
pos = pos+1
|
||||
f_str(true)
|
||||
f, newpos = find(json, '^[ \n\r\t]*:[ \n\r\t]*', pos) -- check colon
|
||||
if not newpos then
|
||||
spaces() -- since the current chunk can be ended, skip spaces toward following chunks
|
||||
if byte(json, pos) ~= 0x3A then -- check colon again
|
||||
parseerror("no colon after a key")
|
||||
end
|
||||
pos = pos+1
|
||||
spaces()
|
||||
newpos = pos-1
|
||||
end
|
||||
pos = newpos+1
|
||||
if pos > jsonlen then
|
||||
spaces()
|
||||
end
|
||||
f = dispatcher[byte(json, pos)] -- parse value
|
||||
pos = pos+1
|
||||
f()
|
||||
f, newpos = find(json, '^[ \n\r\t]*,[ \n\r\t]*', pos) -- check comma
|
||||
if not newpos then
|
||||
f, newpos = find(json, '^[ \n\r\t]*}', pos) -- check closing bracket
|
||||
if newpos then
|
||||
pos = newpos
|
||||
break
|
||||
end
|
||||
spaces() -- since the current chunk can be ended, skip spaces toward following chunks
|
||||
local c = byte(json, pos)
|
||||
if c == 0x2C then -- check comma again
|
||||
pos = pos+1
|
||||
spaces()
|
||||
newpos = pos-1
|
||||
elseif c == 0x7D then -- check closing bracket again
|
||||
break
|
||||
else
|
||||
parseerror("no closing bracket of an object")
|
||||
end
|
||||
end
|
||||
pos = newpos+1
|
||||
if pos > jsonlen then
|
||||
spaces()
|
||||
end
|
||||
end
|
||||
end
|
||||
pos = pos+1
|
||||
return sax_endobject()
|
||||
end
|
||||
|
||||
--[[
|
||||
The jump table to dispatch a parser for a value, indexed by the code of the value's first char.
|
||||
Key should be non-nil.
|
||||
--]]
|
||||
dispatcher = {
|
||||
f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err,
|
||||
f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err,
|
||||
f_err, f_err, f_str, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_mns, f_err, f_err,
|
||||
f_zro, f_num, f_num, f_num, f_num, f_num, f_num, f_num, f_num, f_num, f_err, f_err, f_err, f_err, f_err, f_err,
|
||||
f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err,
|
||||
f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_ary, f_err, f_err, f_err, f_err,
|
||||
f_err, f_err, f_err, f_err, f_err, f_err, f_fls, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_nul, f_err,
|
||||
f_err, f_err, f_err, f_err, f_tru, f_err, f_err, f_err, f_err, f_err, f_err, f_obj, f_err, f_err, f_err, f_err,
|
||||
}
|
||||
dispatcher[0] = f_err
|
||||
|
||||
--[[
|
||||
public funcitons
|
||||
--]]
|
||||
local function run()
|
||||
spaces()
|
||||
f = dispatcher[byte(json, pos)]
|
||||
pos = pos+1
|
||||
f()
|
||||
end
|
||||
|
||||
local function read(n)
|
||||
if n < 0 then
|
||||
error("the argument must be non-negative")
|
||||
end
|
||||
local pos2 = (pos-1) + n
|
||||
local str = sub(json, pos, pos2)
|
||||
while pos2 > jsonlen and jsonlen ~= 0 do
|
||||
jsonnxt()
|
||||
pos2 = pos2 - (jsonlen - (pos-1))
|
||||
str = str .. sub(json, pos, pos2)
|
||||
end
|
||||
if jsonlen ~= 0 then
|
||||
pos = pos2+1
|
||||
end
|
||||
return str
|
||||
end
|
||||
|
||||
local function tellpos()
|
||||
return acc + pos
|
||||
end
|
||||
|
||||
return {
|
||||
run = run,
|
||||
tryc = tryc,
|
||||
read = read,
|
||||
tellpos = tellpos,
|
||||
}
|
||||
end
|
||||
|
||||
local function newfileparser(fn, saxtbl)
|
||||
local fp = io.open(fn)
|
||||
local function gen()
|
||||
local s
|
||||
if fp then
|
||||
s = fp:read(8192)
|
||||
if not s then
|
||||
fp:close()
|
||||
fp = nil
|
||||
end
|
||||
end
|
||||
return s
|
||||
end
|
||||
return newparser(gen, saxtbl)
|
||||
end
|
||||
|
||||
return {
|
||||
newparser = newparser,
|
||||
newfileparser = newfileparser
|
||||
}
|
||||
Loading…
Reference in New Issue