From 2fd646085cc54b8fec1f1799b337371c2c2f9a44 Mon Sep 17 00:00:00 2001
From: mirabilos <m@mirbsd.org>
Date: Sun, 5 Aug 2018 20:41:36 +0200
Subject: [PATCH] move the kernel fixup stuff into a separate file

---
 unicodedomino.sty              | 118 +--------------------------
 unicodedomino_kernel_fixup.def | 142 +++++++++++++++++++++++++++++++++
 2 files changed, 144 insertions(+), 116 deletions(-)
 create mode 100644 unicodedomino_kernel_fixup.def

diff --git a/unicodedomino.sty b/unicodedomino.sty
index 278c29b..0c7d255 100644
--- a/unicodedomino.sty
+++ b/unicodedomino.sty
@@ -43,25 +43,8 @@
   {You will need Debian stretch or newer}%
 \fi%
 
-% patch up bug in utf8.def that forbade 0xF4 lead byte
-\begingroup%
-\catcode`\~13
-\uccode`\~"F4
-\def\UTFviii@tmp{\xdef~{\noexpand\UTFviii@four@octets\string~}}%
-\uppercase\expandafter{\UTFviii@tmp}%
-\endgroup%
-
-% now fixup to disallow too large definitions
-\let\unicodedomino@parse@XML@charref\parse@XML@charref%
-\gdef\parse@XML@charref{%
- \ifnum\count@>"10FFFF\relax%
-  \PackageError{inputenc}{%
-   Cannot define Unicode char value\space%
-   \unicodedomino@shex\the\count@\relax\space (too large)%
-  }%
- \fi%
- \unicodedomino@parse@XML@charref%
-}%
+% pull code from other files
+\input{unicodedomino_kernel_fixup.def}%
 
 % nicer printing of codepoint hex numbers, not strictly necessary
 \def\unicodedomino@codepoint#1{%
@@ -87,14 +70,6 @@
  \UTFviii@hexdigit{\numexpr#1\ifnum#1>0-((#1-8)/16)*16\fi\relax}%
 }%
 
-% and arbitrary numbers
-\def\unicodedomino@shex#1\relax{%
- \ifnum#1>15 %
-  \expandafter\unicodedomino@shex\expandafter\the\numexpr(#1-8)/16\relax%
- \fi%
- \UTFviii@hexdigit{\numexpr#1\ifnum#1>0-((#1-8)/16)*16\fi\relax}%
-}%
-
 % override to beautify the output, not strictly necessary but requested
 \gdef\UTFviii@splitcsname#1:#2\relax{%
  #2 (\expandafter\unicodedomino@codepoint\expandafter{%
@@ -223,95 +198,6 @@
  \fi%
 }%
 
-% input validation, including security-relevant checks
-\def\UTFviii@checkseq#1:#2#3\empty{%
- \ifnum`#2<"80 %
-  \ifx\empty#3\empty%
-  \else%
-   1%
-  \fi%
- \else%
-  \ifnum`#2<"C2 %
-   1%
-  \else%
-   \ifnum`#2<"E0 %
-    % one 80-BF
-    \UTFviii@check@one#3\empty%
-   \else%
-    \ifnum`#2<"E1 %
-     % A0-BF + one 80-BF
-     \UTFviii@check@two"A0.#3\empty%
-    \else%
-     \ifnum`#2<"F0 %
-      % two 80-BF
-      \UTFviii@check@two"80.#3\empty%
-     \else%
-      \ifnum`#2<"F1 %
-       % 90-BF + two 80-BF
-       \UTFviii@check@three"90."BF.#3\empty%
-      \else%
-       \ifnum`#2<"F4 %
-        % three 80-BF
-        \UTFviii@check@three"80."BF.#3\empty%
-       \else%
-        \ifnum`#2<"F5 %
-         % 80-8F + two 80-BF
-         \UTFviii@check@three"80."8F.#3\empty%
-        \else%
-         1%
-        \fi%
-       \fi%
-      \fi%
-     \fi%
-    \fi%
-   \fi%
-  \fi%
- \fi%
-}%
-\def\UTFviii@check@one#1#2\empty{%
- \ifx\empty#2\empty%
-  \ifnum`#1<"80 %
-   1%
-  \else%
-   \ifnum`#1>"BF %
-    1%
-   \fi%
-  \fi%
- \else%
-  1%
- \fi%
-}%
-\def\UTFviii@check@two#1.#2#3\empty{%
- \ifx\empty#3\empty%
-  1%
- \else%
-  \ifnum`#2<#1 %
-   1%
-  \else%
-   \ifnum`#2>"BF %
-    1%
-   \else%
-    \UTFviii@check@one#3\empty%
-   \fi%
-  \fi%
- \fi%
-}%
-\def\UTFviii@check@three#1.#2.#3#4\empty{%
- \ifx\empty#4\empty%
-  1%
- \else%
-  \ifnum`#3<#1 %
-   1%
-  \else%
-   \ifnum`#3>#2 %
-    1%
-   \else%
-    \UTFviii@check@two"80.#4\empty%
-   \fi%
-  \fi%
- \fi%
-}%
-
 % clean up after ourselves
 \makeatother%
 \endinput
diff --git a/unicodedomino_kernel_fixup.def b/unicodedomino_kernel_fixup.def
new file mode 100644
index 0000000..c0c4f60
--- /dev/null
+++ b/unicodedomino_kernel_fixup.def
@@ -0,0 +1,142 @@
+% -*- mode: tex -*-
+%-
+% Copyright © 2018
+%	mirabilos <m@mirbsd.org>
+%
+% Provided that these terms and disclaimer and all copyright notices
+% are retained or reproduced in an accompanying document, permission
+% is granted to deal in this work without restriction, including un‐
+% limited rights to use, publicly perform, distribute, sell, modify,
+% merge, give away, or sublicence.
+%
+% This work is provided “AS IS” and WITHOUT WARRANTY of any kind, to
+% the utmost extent permitted by applicable law, neither express nor
+% implied; without malicious intent or gross negligence. In no event
+% may a licensor, author or contributor be held liable for indirect,
+% direct, other damage, loss, or other issues arising in any way out
+% of dealing in the work, even if advised of the possibility of such
+% damage or existence of a defect, except proven that it results out
+% of said person’s immediate fault when using the work as intended.
+%
+% This file is dual-licenced under the LPPL version 1.3c or later.
+%-
+% Fix check for illegal sequences to fail overlong encoded sequences
+% as well as codepoints outside of the Unicode range [0;10FFFF]. Add
+% 0xF4 to the list of permitted lead octets.
+
+% run the fixup code only if necessary {{{ begin
+\ifx\UTFviii@check@three\@undefined%
+
+% new check for illegal sequences
+\def\UTFviii@checkseq#1:#2#3\empty{%
+ \ifnum`#2<"80 %
+  \ifx\empty#3\empty%
+  \else%
+   1%
+  \fi%
+ \else%
+  \ifnum`#2<"C2 %
+   1%
+  \else%
+   \ifnum`#2<"E0 %
+    % one 80-BF
+    \UTFviii@check@one#3\empty%
+   \else%
+    \ifnum`#2<"E1 %
+     % A0-BF + one 80-BF
+     \UTFviii@check@two"A0.#3\empty%
+    \else%
+     \ifnum`#2<"F0 %
+      % two 80-BF
+      \UTFviii@check@two"80.#3\empty%
+     \else%
+      \ifnum`#2<"F1 %
+       % 90-BF + two 80-BF
+       \UTFviii@check@three"90."BF.#3\empty%
+      \else%
+       \ifnum`#2<"F4 %
+        % three 80-BF
+        \UTFviii@check@three"80."BF.#3\empty%
+       \else%
+        \ifnum`#2<"F5 %
+         % 80-8F + two 80-BF
+         \UTFviii@check@three"80."8F.#3\empty%
+        \else%
+         1%
+        \fi%
+       \fi%
+      \fi%
+     \fi%
+    \fi%
+   \fi%
+  \fi%
+ \fi%
+}%
+\def\UTFviii@check@one#1#2\empty{%
+ \ifx\empty#2\empty%
+  \ifnum`#1<"80 %
+   1%
+  \else%
+   \ifnum`#1>"BF %
+    1%
+   \fi%
+  \fi%
+ \else%
+  1%
+ \fi%
+}%
+\def\UTFviii@check@two#1.#2#3\empty{%
+ \ifx\empty#3\empty%
+  1%
+ \else%
+  \ifnum`#2<#1 %
+   1%
+  \else%
+   \ifnum`#2>"BF %
+    1%
+   \else%
+    \UTFviii@check@one#3\empty%
+   \fi%
+  \fi%
+ \fi%
+}%
+\def\UTFviii@check@three#1.#2.#3#4\empty{%
+ \ifx\empty#4\empty%
+  1%
+ \else%
+  \ifnum`#3<#1 %
+   1%
+  \else%
+   \ifnum`#3>#2 %
+    1%
+   \else%
+    \UTFviii@check@two"80.#4\empty%
+   \fi%
+  \fi%
+ \fi%
+}%
+
+% bugfix: disallow too large definitions
+\let\unicodedomino@parse@XML@charref\parse@XML@charref%
+\gdef\parse@XML@charref{%
+ \ifnum\count@>"10FFFF\relax%
+  \PackageError{inputenc}{%
+   Cannot define Unicode char value\space%
+   \UTFviii@hexnumber\count@\space%
+   (too large)%
+  }%
+ \fi%
+ \unicodedomino@parse@XML@charref%
+}%
+
+% bugfix: add 0xF4 to the list of permitted lead octets
+% (needs the above fix)
+\begingroup%
+\catcode`\~13
+\uccode`\~"F4
+\def\UTFviii@tmp{\xdef~{\noexpand\UTFviii@four@octets\string~}}%
+\uppercase\expandafter{\UTFviii@tmp}%
+\endgroup%
+
+% run the fixup code only if necessary }}} end
+\fi%
-- 
GitLab