From dff7cc430763df9a0682bc83e4bb7982dd141eec Mon Sep 17 00:00:00 2001
From: mirabilos <m@mirbsd.org>
Date: Tue, 12 Nov 2019 19:10:20 +0100
Subject: [PATCH] split unicodedomino_kernel_fixup_f4_and_checkseq.def:
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

• rename merged part (10FFFF upper bound in \parse@XML@charref
  and fix "F4 lead octet) ⇒ unicodedomino_kernel_fixup_f4.def
• move not merged part (better \UTFviii@checkseq) together with
  the remaining PR to unicodedomino_kernel_better_decode.def
• reorder inclusion; for now, include fixup_f4 unconditionally
---
 unicodedomino.sty                      |  4 +-
 unicodedomino_kernel_better_decode.def | 94 ++++++++++++++++++++++++-
 unicodedomino_kernel_fixup_f4.def      | 95 +-------------------------
 3 files changed, 96 insertions(+), 97 deletions(-)

diff --git a/unicodedomino.sty b/unicodedomino.sty
index 82aebe1..ad98073 100644
--- a/unicodedomino.sty
+++ b/unicodedomino.sty
@@ -41,12 +41,10 @@
 \input{unicodedomino_compat.def}%
 
 % pull code from other files
-\ifx\UTFviii@check@three\@undefined%
- \input{unicodedomino_kernel_fixup_f4_and_checkseq.def}%
-\fi%
 \ifx\UTFviii@decode\@undefined%
  \input{unicodedomino_kernel_better_decode.def}%
 \fi%
+\input{unicodedomino_kernel_fixup_f4.def}%
 \input{unicodedomino_kernel_cosmetics.def}%
 
 % retrieve the last octet
diff --git a/unicodedomino_kernel_better_decode.def b/unicodedomino_kernel_better_decode.def
index 9412650..3287ab4 100644
--- a/unicodedomino_kernel_better_decode.def
+++ b/unicodedomino_kernel_better_decode.def
@@ -3,7 +3,99 @@
 % See unicodedomino.sty for copyright and licence terms. Furthermore
 % this file is dual-licenced under the LPPL version 1.3c or later.
 %-
-% Improved Unicode decoding using the fixed-up checkseq code.
+% Fix check for illegal sequences to fail overlong encoded sequences
+% as well as codepoints outside of the Unicode range [0;10FFFF]. Use
+% fixed-up check code to improve Unicode decoding.
+
+%: https://github.com/latex3/latex2e/pull/83
+% new check for illegal sequences
+\gdef\UTFviii@checkseq#1:#2#3\empty{%
+ \ifnum`#2<"80 %
+  \ifx\empty#3\empty%
+  \else%
+   1%
+  \fi%
+ \else%
+  \ifnum`#2<"C2 %
+   1%
+  \else%
+   \ifnum`#2<"E0 %
+    % one 80-BF
+    \UTFviii@check@one#3\empty%
+   \else%
+    \ifnum`#2<"E1 %
+     % A0-BF + one 80-BF
+     \UTFviii@check@two"A0.#3\empty%
+    \else%
+     \ifnum`#2<"F0 %
+      % two 80-BF
+      \UTFviii@check@two"80.#3\empty%
+     \else%
+      \ifnum`#2<"F1 %
+       % 90-BF + two 80-BF
+       \UTFviii@check@three"90."BF.#3\empty%
+      \else%
+       \ifnum`#2<"F4 %
+        % three 80-BF
+        \UTFviii@check@three"80."BF.#3\empty%
+       \else%
+        \ifnum`#2<"F5 %
+         % 80-8F + two 80-BF
+         \UTFviii@check@three"80."8F.#3\empty%
+        \else%
+         1%
+        \fi%
+       \fi%
+      \fi%
+     \fi%
+    \fi%
+   \fi%
+  \fi%
+ \fi%
+}%
+\gdef\UTFviii@check@one#1#2\empty{%
+ \ifx\empty#2\empty%
+  \ifnum`#1<"80 %
+   1%
+  \else%
+   \ifnum`#1>"BF %
+    1%
+   \fi%
+  \fi%
+ \else%
+  1%
+ \fi%
+}%
+\gdef\UTFviii@check@two#1.#2#3\empty{%
+ \ifx\empty#3\empty%
+  1%
+ \else%
+  \ifnum`#2<#1 %
+   1%
+  \else%
+   \ifnum`#2>"BF %
+    1%
+   \else%
+    \UTFviii@check@one#3\empty%
+   \fi%
+  \fi%
+ \fi%
+}%
+\gdef\UTFviii@check@three#1.#2.#3#4\empty{%
+ \ifx\empty#4\empty%
+  1%
+ \else%
+  \ifnum`#3<#1 %
+   1%
+  \else%
+   \ifnum`#3>#2 %
+    1%
+   \else%
+    \UTFviii@check@two"80.#4\empty%
+   \fi%
+  \fi%
+ \fi%
+}%
 
 %: https://github.com/latex3/latex2e/pull/83
 % override stock function, calling safer decode below
diff --git a/unicodedomino_kernel_fixup_f4.def b/unicodedomino_kernel_fixup_f4.def
index f535b4b..bc028c1 100644
--- a/unicodedomino_kernel_fixup_f4.def
+++ b/unicodedomino_kernel_fixup_f4.def
@@ -3,99 +3,8 @@
 % See unicodedomino.sty for copyright and licence terms. Furthermore
 % this file is dual-licenced under the LPPL version 1.3c or later.
 %-
-% Fix check for illegal sequences to fail overlong encoded sequences
-% as well as codepoints outside of the Unicode range [0;10FFFF]. Add
-% "F4 to the list of permitted lead octets.
-
-%: https://github.com/latex3/latex2e/pull/83
-% new check for illegal sequences
-\gdef\UTFviii@checkseq#1:#2#3\empty{%
- \ifnum`#2<"80 %
-  \ifx\empty#3\empty%
-  \else%
-   1%
-  \fi%
- \else%
-  \ifnum`#2<"C2 %
-   1%
-  \else%
-   \ifnum`#2<"E0 %
-    % one 80-BF
-    \UTFviii@check@one#3\empty%
-   \else%
-    \ifnum`#2<"E1 %
-     % A0-BF + one 80-BF
-     \UTFviii@check@two"A0.#3\empty%
-    \else%
-     \ifnum`#2<"F0 %
-      % two 80-BF
-      \UTFviii@check@two"80.#3\empty%
-     \else%
-      \ifnum`#2<"F1 %
-       % 90-BF + two 80-BF
-       \UTFviii@check@three"90."BF.#3\empty%
-      \else%
-       \ifnum`#2<"F4 %
-        % three 80-BF
-        \UTFviii@check@three"80."BF.#3\empty%
-       \else%
-        \ifnum`#2<"F5 %
-         % 80-8F + two 80-BF
-         \UTFviii@check@three"80."8F.#3\empty%
-        \else%
-         1%
-        \fi%
-       \fi%
-      \fi%
-     \fi%
-    \fi%
-   \fi%
-  \fi%
- \fi%
-}%
-\gdef\UTFviii@check@one#1#2\empty{%
- \ifx\empty#2\empty%
-  \ifnum`#1<"80 %
-   1%
-  \else%
-   \ifnum`#1>"BF %
-    1%
-   \fi%
-  \fi%
- \else%
-  1%
- \fi%
-}%
-\gdef\UTFviii@check@two#1.#2#3\empty{%
- \ifx\empty#3\empty%
-  1%
- \else%
-  \ifnum`#2<#1 %
-   1%
-  \else%
-   \ifnum`#2>"BF %
-    1%
-   \else%
-    \UTFviii@check@one#3\empty%
-   \fi%
-  \fi%
- \fi%
-}%
-\gdef\UTFviii@check@three#1.#2.#3#4\empty{%
- \ifx\empty#4\empty%
-  1%
- \else%
-  \ifnum`#3<#1 %
-   1%
-  \else%
-   \ifnum`#3>#2 %
-    1%
-   \else%
-    \UTFviii@check@two"80.#4\empty%
-   \fi%
-  \fi%
- \fi%
-}%
+% Disallow codepoints outside of the Unicode range [0;10FFFF]; allow
+% "F4 as lead octet.
 
 %: fixed upstream
 % bugfix: disallow too large definitions
-- 
GitLab