Skip to content
Snippets Groups Projects
Verified Commit 03962bfe authored by mirabilos's avatar mirabilos Committed by mirabilos
Browse files

implement safer UTF-8 decoder, including system override hook

parent 982f66e3
No related branches found
No related tags found
No related merge requests found
......@@ -47,6 +47,9 @@
\ifx\UTFviii@check@three\@undefined%
\input{unicodedomino_kernel_fixup_f4_and_checkseq.def}%
\fi%
\ifx\UTFviii@decode\@undefined%
\input{unicodedomino_kernel_better_decode.def}%
\fi%
\input{unicodedomino_kernel_cosmetics.def}%
\input{unicodedomino_compat.def}%
......
% -*- mode: tex -*-
%-
% Copyright © 2018
% mirabilos <t.glaser@tarent.de>
%
% Provided that these terms and disclaimer and all copyright notices
% are retained or reproduced in an accompanying document, permission
% is granted to deal in this work without restriction, including un‐
% limited rights to use, publicly perform, distribute, sell, modify,
% merge, give away, or sublicence.
%
% This work is provided “AS IS” and WITHOUT WARRANTY of any kind, to
% the utmost extent permitted by applicable law, neither express nor
% implied; without malicious intent or gross negligence. In no event
% may a licensor, author or contributor be held liable for indirect,
% direct, other damage, loss, or other issues arising in any way out
% of dealing in the work, even if advised of the possibility of such
% damage or existence of a defect, except proven that it results out
% of said person’s immediate fault when using the work as intended.
%
% This file is dual-licenced under the LPPL version 1.3c or later.
%-
% Improved Unicode decoding using the fixed-up checkseq code.
% override stock function, calling safer decode below
\gdef\decode@UTFviii#1\relax{%
\the\numexpr(\UTFviii@decode0:#1\relax)%
}%
% safer decode, returns 0x1FFFFF for illegal sequences
\gdef\UTFviii@decode#1\relax{%
\if\relax\expandafter\UTFviii@checkseq\string#1\empty\relax%
\UTFviii@dec@lead#1\relax%
\else%
2097151%
\fi%
}%
\gdef\UTFviii@dec@lead#1:#2#3\relax{%
% we know #2 is in 00..7F, C2..F4
\ifnum`#2<"80 %
`#2%
\else%
\ifnum`#2<"E0 %
(`#2-"C0%
\else%
\ifnum`#2<"F0 %
((`#2-"E0%
\else%
(((`#2-"F0%
\fi%
\fi%
\UTFviii@dec@trail#3\relax%
\fi%
}%
\gdef\UTFviii@dec@trail#1#2\relax{%
)*64+(`#1-"80)%
\ifx\relax#2\else\UTFviii@dec@trail#2\relax\fi%
}%
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment