function get_next_char(script)
local c = script:readU8()
if c >= 0x80 then c = (c - 0x80) + (script:readU8() * 128) end
return c
end
function get_next_utf8(str)
local ret = ""
local n
local b
n = str:sub(1, 1)
ret = ret .. n
str = str:sub(2)
b = n:byte()
if b <= 0x7f then return ret, str end
if b >= 0x80 and b <= 0xc1 then error("Wrong UTF-8 sequence.") end
n = str:sub(1, 1)
ret = ret .. n
str = str:sub(2)
if b <= 0xdf then return ret, str end
n = str:sub(1, 1)
ret = ret .. n
str = str:sub(2)
if b <= 0xef then return ret, str end
n = str:sub(1, 1)
ret = ret .. n
str = str:sub(2)
if b <= 0xf4 then return ret, str end
error("Wrong UTF-8 sequence.")
end
function dump_special(script, code)
if code == 0 then
return "\n", "\n"
elseif code == 1 then
return "\n\n", "\n"
elseif code == 3 then
local speed = script:readU8()
if speed == 255 then
return '', ""
else
return '', ""
end
elseif code == 5 then
return '', ""
elseif code == 7 then
local rep = script:readU8()
if rep == 1 then
return '', ""
else
return '', ""
end
elseif code == 8 then
local siz = script:readU8()
if siz == 1 then
return '', ""
else
return '', ""
end
elseif code == 19 then
local arg1, arg2
arg1 = script:readU8()
arg2 = script:readU8()
if arg1 == 255 and arg2 == 255 then
return '', ""
else
return '', ""
end
elseif code == 14 then
return '', ""
elseif code == 4 then
return '', ""
else
local a1, a2
if code == 3 or code == 4 or code == 7 or code == 8 or code == 14 then
a1 = script:readU8()
return '', ""
elseif code == 6 or code == 12 or code == 17 or code == 19 then
a1 = script:readU8()
a2 = script:readU8()
return '', ""
else
return '', ""
end
end
error "Should not end up there"
end
function dump_special2(script, code)
if code == 0 then
return "\n", "\n"
elseif code == 1 then
return "\n\n", "\n"
else
local a1, a2
if code == 3 or code == 5 or code == 7 or code == 8 or code == 9 or code == 12 or code == 13 or code == 14 or code == 15 or code == 21 then
a1 = script:readU8()
return '', ""
elseif code == 20 then
a1 = script:readU8()
a2 = script:readU8()
return '', ""
else
return '', ""
end
end
error "Should not end up there"
end
function extract_char(script, lookup)
local c = get_next_char(script)
if c == 0 then return nil end
if c >= 0x4000 then
local code = c - 0x4000
if second_style_script then
return dump_special2(script, code)
else
return dump_special(script, code)
end
else
local l = lookup[c]
if not l and not sloppy_extract then error("Lookup failed for character " .. c) end
if not l then
return '', ""
else
return l, l
end
end
error "Should not end up there"
end
function get_txt_idx(txt)
local sha1 = SHA1(txt)
return all_sha1[sha1]
end
function add_txt_idx(txt)
local idx
idx = #all_txts + 1
local sha1 = SHA1(txt)
all_sha1[sha1] = idx
all_txts[idx] = {
sha1 = sha1,
txt = txt,
}
if all_origins[idx] then error("Something's inconsistant") end
all_origins[idx] = "" .. (current_file - 3610)
return idx
end
function process_ptrs(ptrs_contents, ptr_begin, ptr_end)
local r = {}
for i = ptr_begin, ptr_end do
local idx = get_txt_idx(ptrs_contents[i])
if not idx then
idx = add_txt_idx(ptrs_contents[i])
else
if not all_origins[idx] then error("Something's inconsistant") end
all_origins[idx] = all_origins[idx] .. "," .. (current_file - 3610)
end
r[i] = idx
end
return r
end