Skip to content
Snippets Groups Projects
unicodedomino_kernel_fixup_f4_and_checkseq.def 2.39 KiB
% -*- mode: tex -*-
%-
% See unicodedomino.sty for copyright and licence terms. Furthermore
% this file is dual-licenced under the LPPL version 1.3c or later.
%-
% Fix check for illegal sequences to fail overlong encoded sequences
% as well as codepoints outside of the Unicode range [0;10FFFF]. Add
% "F4 to the list of permitted lead octets.

%: not forwarded yet, need to do so now (plus unicodedomino_kernel_better_decode.def which is its user)
% new check for illegal sequences
\gdef\UTFviii@checkseq#1:#2#3\empty{%
 \ifnum`#2<"80 %
  \ifx\empty#3\empty%
  \else%
   1%
  \fi%
 \else%
  \ifnum`#2<"C2 %
   1%
  \else%
   \ifnum`#2<"E0 %
    % one 80-BF
    \UTFviii@check@one#3\empty%
   \else%
    \ifnum`#2<"E1 %
     % A0-BF + one 80-BF
     \UTFviii@check@two"A0.#3\empty%
    \else%
     \ifnum`#2<"F0 %
      % two 80-BF
      \UTFviii@check@two"80.#3\empty%
     \else%
      \ifnum`#2<"F1 %
       % 90-BF + two 80-BF
       \UTFviii@check@three"90."BF.#3\empty%
      \else%
       \ifnum`#2<"F4 %
        % three 80-BF
        \UTFviii@check@three"80."BF.#3\empty%
       \else%
        \ifnum`#2<"F5 %
         % 80-8F + two 80-BF
         \UTFviii@check@three"80."8F.#3\empty%
        \else%
         1%
        \fi%
       \fi%
      \fi%
     \fi%
    \fi%
   \fi%
  \fi%
 \fi%
}%
\gdef\UTFviii@check@one#1#2\empty{%
 \ifx\empty#2\empty%
  \ifnum`#1<"80 %
   1%
  \else%
   \ifnum`#1>"BF %
    1%
   \fi%
  \fi%
 \else%
  1%
 \fi%
}%
\gdef\UTFviii@check@two#1.#2#3\empty{%
 \ifx\empty#3\empty%
  1%
 \else%
  \ifnum`#2<#1 %
   1%
  \else%
   \ifnum`#2>"BF %
    1%
   \else%
    \UTFviii@check@one#3\empty%
   \fi%
  \fi%
 \fi%
}%
\gdef\UTFviii@check@three#1.#2.#3#4\empty{%
 \ifx\empty#4\empty%
  1%
 \else%
  \ifnum`#3<#1 %
   1%
  \else%
   \ifnum`#3>#2 %
    1%
   \else%
    \UTFviii@check@two"80.#4\empty%
   \fi%
  \fi%
 \fi%
}%

%: fixed upstream
% bugfix: disallow too large definitions
\let\unicodedomino@parse@XML@charref\parse@XML@charref%
\gdef\parse@XML@charref{%
 \ifnum\count@>"10FFFF\relax%
  \PackageError{inputenc}{%
   Cannot define Unicode char value\space%
   \UTFviii@hexnumber\count@\space%
   (too large)%
  }%
 \fi%
 \unicodedomino@parse@XML@charref%
}%

%: also merged
% bugfix: add "F4 to the list of permitted lead octets
% (needs the above fix)
\begingroup%
\catcode`\~13
\uccode`\~"F4
\def\UTFviii@tmp{\xdef~{\noexpand\UTFviii@four@octets\string~}}%
\uppercase\expandafter{\UTFviii@tmp}%
\endgroup%