diff --git a/unicodedomino.sty b/unicodedomino.sty index 14678689ec5d8df284aaed41bb3be1b5c69d26f8..89aee1e580007aeebf2e047be8c15ef7b75f1619 100644 --- a/unicodedomino.sty +++ b/unicodedomino.sty @@ -28,7 +28,7 @@ % makes all UTF-8 available in listings. \NeedsTeXFormat{LaTeX2e}% -\ProvidesPackage{unicodedomino}[2018/08/06 1.7 Domino for unknown codepoints]% +\ProvidesPackage{unicodedomino}[2018/10/06 1.9 Domino for unknown codepoints]% \makeatletter% \ifx\numexpr\@undefined% @@ -38,6 +38,9 @@ \PackageError{unicodedomino}{This package requires UTF-8 input encoding}% \fi% +% pull compatibility code for users of older base/utf8ienc.dtx +\input{unicodedomino_compat.def}% + % pull code from other files \ifx\UTFviii@check@three\@undefined% \input{unicodedomino_kernel_fixup_f4_and_checkseq.def}% @@ -46,7 +49,6 @@ \input{unicodedomino_kernel_better_decode.def}% \fi% \input{unicodedomino_kernel_cosmetics.def}% -\input{unicodedomino_compat.def}% % retrieve the last octet \def\unicodedomino@last#1{% @@ -94,6 +96,22 @@ \expandafter\UTFviii@defined\csname u8:#1\endcsname% }% +% split an invalid byte sequence for error output +\gdef\unicodedomino@splitseq#1:#2\relax{% + \unicodedomino@hexseq#2\relax% +}% +\gdef\unicodedomino@hexseq#1#2\relax{% + % display first octet + \space "\unicodedomino@hexbyte{`#1}% + % recursively handle remaining octets + \ifx\relax#2\relax\else\unicodedomino@hexseq#2\relax\fi% +}% +% format a number as two-digit hex +\gdef\unicodedomino@hexbyte#1{% + \ifnum#1<16 0\fi% + \UTFviii@hexnumber{#1}% +}% + % main handler \def\UTFviii@defined#1{% \ifx#1\relax% @@ -110,7 +128,7 @@ \else% % invalid multibyte character \PackageError{inputenc}{Invalid UTF-8 byte sequence:% - \expandafter\UTFviii@splitseq\string#1\relax}% + \expandafter\unicodedomino@splitseq\string#1\relax}% \UTFviii@invalid@help \fi% \else% diff --git a/unicodedomino_compat.def b/unicodedomino_compat.def index 59038af3db265ed375fd887647b9ab6b44e92cc3..bedde489ce3c6f57027a6c4af82541677e6ad6e6 100644 --- a/unicodedomino_compat.def +++ b/unicodedomino_compat.def @@ -33,3 +33,12 @@ Do ensure the source document is saved in UTF-8 encoding% }% \fi% + +% added in v1.2e 2018/09/30(?) +\ifx\UTFviii@hexcodepoint\@undefined% + % override message to format the codepoint correctly + \gdef\UTFviii@splitcsname#1:#2\relax{% + #2 (\expandafter\UTFviii@hexcodepoint\expandafter{% + \the\numexpr\decode@UTFviii#2\relax})% + }% +\fi% diff --git a/unicodedomino_kernel_better_decode.def b/unicodedomino_kernel_better_decode.def index 90d64147fb2d3df8ca538ffd201c3030c580f4a9..9412650d200cdf6463d9296821bd5dff6e1e8581 100644 --- a/unicodedomino_kernel_better_decode.def +++ b/unicodedomino_kernel_better_decode.def @@ -5,11 +5,13 @@ %- % Improved Unicode decoding using the fixed-up checkseq code. +%: https://github.com/latex3/latex2e/pull/83 % override stock function, calling safer decode below \gdef\decode@UTFviii#1\relax{% \the\numexpr(\UTFviii@decode0:#1\relax)% }% +%: https://github.com/latex3/latex2e/pull/83 % safer decode, returns 0x1FFFFF for illegal sequences \gdef\UTFviii@decode#1\relax{% \if\relax\expandafter\UTFviii@checkseq\string#1\empty\relax% @@ -19,6 +21,7 @@ \fi% }% +%: https://github.com/latex3/latex2e/pull/83 \gdef\UTFviii@dec@lead#1:#2#3\relax{% % we know #2 is in 00..7F, C2..F4 \ifnum`#2<"80 % @@ -37,6 +40,7 @@ \fi% }% +%: https://github.com/latex3/latex2e/pull/83 \gdef\UTFviii@dec@trail#1#2\relax{% )*64+(`#1-"80)% \ifx\relax#2\else\UTFviii@dec@trail#2\relax\fi% diff --git a/unicodedomino_kernel_cosmetics.def b/unicodedomino_kernel_cosmetics.def index 0ed6bdd025c82645bbb1a9ef244eb7e8d4c55abb..ee2a96f1b5a6e4e2050f9261951a52b680ec1089 100644 --- a/unicodedomino_kernel_cosmetics.def +++ b/unicodedomino_kernel_cosmetics.def @@ -3,54 +3,25 @@ % See unicodedomino.sty for copyright and licence terms. Furthermore % this file is dual-licenced under the LPPL version 1.3c or later. %- -% Improve error and warning formatting. Fully submitted upstream. - -%: https://github.com/latex3/latex2e/pull/62 -\ifx\UTFviii@hexbyte\@undefined% - % format a number as two-digit hex - \gdef\UTFviii@hexbyte#1{% - \ifnum#1<16 0\fi% - \UTFviii@hexnumber{#1}% - }% +% Improve error and warning formatting. +%: merged, but ifx yet to see +\ifx\UTFviii@check@three\@undefined% % override message to give the byte in hex \def\UTFviii@invalid@err#1{% - \PackageError{inputenc}{Invalid UTF-8 byte "\UTFviii@hexbyte{`#1}}% + \PackageError{inputenc}{Invalid UTF-8 byte "\UTFviii@hexnumber{`#1}}% \UTFviii@invalid@help% }% \fi% -%: https://github.com/latex3/latex2e/pull/63 -\ifx\UTFviii@hexcodepoint\@undefined% - % format a number as Unicode codepoint hex - \gdef\UTFviii@hexcodepoint#1{% - \ifnum#1<16 U+000% - \else\ifnum#1<256 U+00% - \else\ifnum#1<4096 U+0% - \else\ifnum#1<65536 U+% - \else\ifnum#1<1048576 U-000% - \else U-00% - \fi\fi\fi\fi\fi% - \UTFviii@hexnumber{#1}% - }% - - % override message to format the codepoint correctly - \gdef\UTFviii@splitcsname#1:#2\relax{% - #2 (\expandafter\UTFviii@hexcodepoint\expandafter{% - \the\numexpr\decode@UTFviii#2\relax})% - }% -\fi% - -%: https://github.com/latex3/latex2e/pull/62 -% split an invalid byte sequence for output -\ifx\UTFviii@splitseq\@undefined% - \gdef\UTFviii@splitseq#1:#2\relax{% - \UTFviii@hexseq#2\relax% - }% - \gdef\UTFviii@hexseq#1#2\relax{% - % display first octet - \space "\UTFviii@hexbyte{`#1}% - % recursively handle remaining octets - \ifx\relax#2\relax\else\UTFviii@hexseq#2\relax\fi% - }% -\fi% +% override to format a hex Unicode codepoint correctly +\gdef\UTFviii@hexcodepoint#1{% + \ifnum#1<16 U+000% + \else\ifnum#1<256 U+00% + \else\ifnum#1<4096 U+0% + \else\ifnum#1<65536 U+% + \else\ifnum#1<1048576 U-000% + \else U-00% + \fi\fi\fi\fi\fi% + \UTFviii@hexnumber{#1}% +}% diff --git a/unicodedomino_kernel_fixup_f4_and_checkseq.def b/unicodedomino_kernel_fixup_f4_and_checkseq.def index b8833ffc2400f4c1b19493ba2e7d45a367057525..f535b4b50ea0a897049b46531c6b0615a3c56c90 100644 --- a/unicodedomino_kernel_fixup_f4_and_checkseq.def +++ b/unicodedomino_kernel_fixup_f4_and_checkseq.def @@ -7,7 +7,7 @@ % as well as codepoints outside of the Unicode range [0;10FFFF]. Add % "F4 to the list of permitted lead octets. -%: not forwarded yet, will only do so once PR#60 (see below) is in +%: https://github.com/latex3/latex2e/pull/83 % new check for illegal sequences \gdef\UTFviii@checkseq#1:#2#3\empty{% \ifnum`#2<"80 % @@ -97,7 +97,7 @@ \fi% }% -%: https://github.com/latex3/latex2e/pull/60 +%: fixed upstream % bugfix: disallow too large definitions \let\unicodedomino@parse@XML@charref\parse@XML@charref% \gdef\parse@XML@charref{% @@ -111,7 +111,7 @@ \unicodedomino@parse@XML@charref% }% -%: https://github.com/latex3/latex2e/pull/60 +%: also merged % bugfix: add "F4 to the list of permitted lead octets % (needs the above fix) \begingroup%