diff --git a/test.tex b/test.tex index 16a3f4f950a3c30eca56c01b8af1e25ad8d39294..34056c135a807945170b82dd2212c67121333f78 100644 --- a/test.tex +++ b/test.tex @@ -1,3 +1,5 @@ +% demo file, not complex enough for copyright + \documentclass{article}% \usepackage[utf8]{inputenc}% diff --git a/unicodedomino.sty b/unicodedomino.sty index 777c8adc7d5d2c0ecece0a5ae2b53de16e22480d..4c15f3d1e6ce49dde8ee00df4e69fdc17358db8f 100644 --- a/unicodedomino.sty +++ b/unicodedomino.sty @@ -2,9 +2,9 @@ %- % Copyright © 2018 % mirabilos <m@mirbsd.org> -% Copyright © 2017 +% Copyright © 2017, 2018 % mirabilos <t.glaser@tarent.de> -% with contributions by (among others) +% with contributions via chat by (among others) % David Carlisle <http://tex.stackexchange.com/users/1090> % % Provided that these terms and disclaimer and all copyright notices @@ -28,7 +28,7 @@ % makes all UTF-8 available in listings. \NeedsTeXFormat{LaTeX2e}% -\ProvidesPackage{unicodedomino}[2019/05/14 1.8 Domino for unknown codepoints]% +\ProvidesPackage{unicodedomino}[2019/05/14 1.10 Domino for unknown codepoints]% \ifx\numexpr\@undefined% \PackageError{unicodedomino}{This package requires numexpr}% @@ -37,6 +37,9 @@ \PackageError{unicodedomino}{This package requires UTF-8 input encoding}% \fi% +% pull compatibility code for users of older base/utf8ienc.dtx +\input{unicodedomino_compat.def}% + % pull code from other files \ifx\UTFviii@check@three\@undefined% \input{unicodedomino_kernel_fixup_f4_and_checkseq.def}% @@ -45,7 +48,6 @@ \input{unicodedomino_kernel_better_decode.def}% \fi% \input{unicodedomino_kernel_cosmetics.def}% -\input{unicodedomino_compat.def}% % retrieve the last octet \def\unicodedomino@last#1{% @@ -93,6 +95,22 @@ \expandafter\UTFviii@defined\csname u8:#1\endcsname% }% +% split an invalid byte sequence for error output +\gdef\unicodedomino@splitseq#1:#2\relax{% + \unicodedomino@hexseq#2\relax% +}% +\gdef\unicodedomino@hexseq#1#2\relax{% + % display first octet + \space "\unicodedomino@hexbyte{`#1}% + % recursively handle remaining octets + \ifx\relax#2\relax\else\unicodedomino@hexseq#2\relax\fi% +}% +% format a number as two-digit hex +\gdef\unicodedomino@hexbyte#1{% + \ifnum#1<16 0\fi% + \UTFviii@hexnumber{#1}% +}% + % main handler \def\UTFviii@defined#1{% \ifx#1\relax% @@ -109,7 +127,7 @@ \else% % invalid multibyte character \PackageError{inputenc}{Invalid UTF-8 byte sequence:% - \expandafter\UTFviii@splitseq\string#1\relax}% + \expandafter\unicodedomino@splitseq\string#1\relax}% \UTFviii@invalid@help \fi% \else% diff --git a/unicodedomino_compat.def b/unicodedomino_compat.def index 12e62ef939891c5f164098fd38f860d7c8c27909..bedde489ce3c6f57027a6c4af82541677e6ad6e6 100644 --- a/unicodedomino_compat.def +++ b/unicodedomino_compat.def @@ -1,22 +1,7 @@ % -*- mode: tex -*- %- -% Copyright © 2018 -% mirabilos <m@mirbsd.org> -% -% Provided that these terms and disclaimer and all copyright notices -% are retained or reproduced in an accompanying document, permission -% is granted to deal in this work without restriction, including un†-% limited rights to use, publicly perform, distribute, sell, modify, -% merge, give away, or sublicence. -% -% This work is provided “AS IS†and WITHOUT WARRANTY of any kind, to -% the utmost extent permitted by applicable law, neither express nor -% implied; without malicious intent or gross negligence. In no event -% may a licensor, author or contributor be held liable for indirect, -% direct, other damage, loss, or other issues arising in any way out -% of dealing in the work, even if advised of the possibility of such -% damage or existence of a defect, except proven that it results out -% of said person’s immediate fault when using the work as intended. +% See unicodedomino.sty for copyright and licence terms. Furthermore +% this file is dual-licenced under the LPPL version 1.3c or later. %- % Compatibility code with older utf8.def versions @@ -48,3 +33,12 @@ Do ensure the source document is saved in UTF-8 encoding% }% \fi% + +% added in v1.2e 2018/09/30(?) +\ifx\UTFviii@hexcodepoint\@undefined% + % override message to format the codepoint correctly + \gdef\UTFviii@splitcsname#1:#2\relax{% + #2 (\expandafter\UTFviii@hexcodepoint\expandafter{% + \the\numexpr\decode@UTFviii#2\relax})% + }% +\fi% diff --git a/unicodedomino_kernel_better_decode.def b/unicodedomino_kernel_better_decode.def index aa0a72323275cf614d97700591fe9e03aadc8bae..9412650d200cdf6463d9296821bd5dff6e1e8581 100644 --- a/unicodedomino_kernel_better_decode.def +++ b/unicodedomino_kernel_better_decode.def @@ -1,32 +1,17 @@ % -*- mode: tex -*- %- -% Copyright © 2018 -% mirabilos <t.glaser@tarent.de> -% -% Provided that these terms and disclaimer and all copyright notices -% are retained or reproduced in an accompanying document, permission -% is granted to deal in this work without restriction, including un†-% limited rights to use, publicly perform, distribute, sell, modify, -% merge, give away, or sublicence. -% -% This work is provided “AS IS†and WITHOUT WARRANTY of any kind, to -% the utmost extent permitted by applicable law, neither express nor -% implied; without malicious intent or gross negligence. In no event -% may a licensor, author or contributor be held liable for indirect, -% direct, other damage, loss, or other issues arising in any way out -% of dealing in the work, even if advised of the possibility of such -% damage or existence of a defect, except proven that it results out -% of said person’s immediate fault when using the work as intended. -% -% This file is dual-licenced under the LPPL version 1.3c or later. +% See unicodedomino.sty for copyright and licence terms. Furthermore +% this file is dual-licenced under the LPPL version 1.3c or later. %- % Improved Unicode decoding using the fixed-up checkseq code. +%: https://github.com/latex3/latex2e/pull/83 % override stock function, calling safer decode below \gdef\decode@UTFviii#1\relax{% \the\numexpr(\UTFviii@decode0:#1\relax)% }% +%: https://github.com/latex3/latex2e/pull/83 % safer decode, returns 0x1FFFFF for illegal sequences \gdef\UTFviii@decode#1\relax{% \if\relax\expandafter\UTFviii@checkseq\string#1\empty\relax% @@ -36,6 +21,7 @@ \fi% }% +%: https://github.com/latex3/latex2e/pull/83 \gdef\UTFviii@dec@lead#1:#2#3\relax{% % we know #2 is in 00..7F, C2..F4 \ifnum`#2<"80 % @@ -54,6 +40,7 @@ \fi% }% +%: https://github.com/latex3/latex2e/pull/83 \gdef\UTFviii@dec@trail#1#2\relax{% )*64+(`#1-"80)% \ifx\relax#2\else\UTFviii@dec@trail#2\relax\fi% diff --git a/unicodedomino_kernel_cosmetics.def b/unicodedomino_kernel_cosmetics.def index 23de9eee4e0dd8684de007cf9d3757c212ee7bc5..ee2a96f1b5a6e4e2050f9261951a52b680ec1089 100644 --- a/unicodedomino_kernel_cosmetics.def +++ b/unicodedomino_kernel_cosmetics.def @@ -1,73 +1,27 @@ % -*- mode: tex -*- %- -% Copyright © 2017, 2018 -% mirabilos <m@mirbsd.org> -% -% Provided that these terms and disclaimer and all copyright notices -% are retained or reproduced in an accompanying document, permission -% is granted to deal in this work without restriction, including un†-% limited rights to use, publicly perform, distribute, sell, modify, -% merge, give away, or sublicence. -% -% This work is provided “AS IS†and WITHOUT WARRANTY of any kind, to -% the utmost extent permitted by applicable law, neither express nor -% implied; without malicious intent or gross negligence. In no event -% may a licensor, author or contributor be held liable for indirect, -% direct, other damage, loss, or other issues arising in any way out -% of dealing in the work, even if advised of the possibility of such -% damage or existence of a defect, except proven that it results out -% of said person’s immediate fault when using the work as intended. -% -% This file is dual-licenced under the LPPL version 1.3c or later. +% See unicodedomino.sty for copyright and licence terms. Furthermore +% this file is dual-licenced under the LPPL version 1.3c or later. %- -% Improve error and warning formatting. Fully submitted upstream. - -%: https://github.com/latex3/latex2e/pull/62 -\ifx\UTFviii@hexbyte\@undefined% - % format a number as two-digit hex - \gdef\UTFviii@hexbyte#1{% - \ifnum#1<16 0\fi% - \UTFviii@hexnumber{#1}% - }% +% Improve error and warning formatting. +%: merged, but ifx yet to see +\ifx\UTFviii@check@three\@undefined% % override message to give the byte in hex \def\UTFviii@invalid@err#1{% - \PackageError{inputenc}{Invalid UTF-8 byte "\UTFviii@hexbyte{`#1}}% + \PackageError{inputenc}{Invalid UTF-8 byte "\UTFviii@hexnumber{`#1}}% \UTFviii@invalid@help% }% \fi% -%: https://github.com/latex3/latex2e/pull/63 -\ifx\UTFviii@hexcodepoint\@undefined% - % format a number as Unicode codepoint hex - \gdef\UTFviii@hexcodepoint#1{% - \ifnum#1<16 U+000% - \else\ifnum#1<256 U+00% - \else\ifnum#1<4096 U+0% - \else\ifnum#1<65536 U+% - \else\ifnum#1<1048576 U-000% - \else U-00% - \fi\fi\fi\fi\fi% - \UTFviii@hexnumber{#1}% - }% - - % override message to format the codepoint correctly - \gdef\UTFviii@splitcsname#1:#2\relax{% - #2 (\expandafter\UTFviii@hexcodepoint\expandafter{% - \the\numexpr\decode@UTFviii#2\relax})% - }% -\fi% - -%: https://github.com/latex3/latex2e/pull/62 -% split an invalid byte sequence for output -\ifx\UTFviii@splitseq\@undefined% - \gdef\UTFviii@splitseq#1:#2\relax{% - \UTFviii@hexseq#2\relax% - }% - \gdef\UTFviii@hexseq#1#2\relax{% - % display first octet - \space "\UTFviii@hexbyte{`#1}% - % recursively handle remaining octets - \ifx\relax#2\relax\else\UTFviii@hexseq#2\relax\fi% - }% -\fi% +% override to format a hex Unicode codepoint correctly +\gdef\UTFviii@hexcodepoint#1{% + \ifnum#1<16 U+000% + \else\ifnum#1<256 U+00% + \else\ifnum#1<4096 U+0% + \else\ifnum#1<65536 U+% + \else\ifnum#1<1048576 U-000% + \else U-00% + \fi\fi\fi\fi\fi% + \UTFviii@hexnumber{#1}% +}% diff --git a/unicodedomino_kernel_fixup_f4_and_checkseq.def b/unicodedomino_kernel_fixup_f4_and_checkseq.def index 545deae4d197bd3378961af7f87cff78fca662c4..f535b4b50ea0a897049b46531c6b0615a3c56c90 100644 --- a/unicodedomino_kernel_fixup_f4_and_checkseq.def +++ b/unicodedomino_kernel_fixup_f4_and_checkseq.def @@ -1,30 +1,13 @@ % -*- mode: tex -*- %- -% Copyright © 2018 -% mirabilos <m@mirbsd.org> -% -% Provided that these terms and disclaimer and all copyright notices -% are retained or reproduced in an accompanying document, permission -% is granted to deal in this work without restriction, including un†-% limited rights to use, publicly perform, distribute, sell, modify, -% merge, give away, or sublicence. -% -% This work is provided “AS IS†and WITHOUT WARRANTY of any kind, to -% the utmost extent permitted by applicable law, neither express nor -% implied; without malicious intent or gross negligence. In no event -% may a licensor, author or contributor be held liable for indirect, -% direct, other damage, loss, or other issues arising in any way out -% of dealing in the work, even if advised of the possibility of such -% damage or existence of a defect, except proven that it results out -% of said person’s immediate fault when using the work as intended. -% -% This file is dual-licenced under the LPPL version 1.3c or later. +% See unicodedomino.sty for copyright and licence terms. Furthermore +% this file is dual-licenced under the LPPL version 1.3c or later. %- % Fix check for illegal sequences to fail overlong encoded sequences % as well as codepoints outside of the Unicode range [0;10FFFF]. Add % "F4 to the list of permitted lead octets. -%: not forwarded yet, will only do so once PR#60 (see below) is in +%: https://github.com/latex3/latex2e/pull/83 % new check for illegal sequences \gdef\UTFviii@checkseq#1:#2#3\empty{% \ifnum`#2<"80 % @@ -114,7 +97,7 @@ \fi% }% -%: https://github.com/latex3/latex2e/pull/60 +%: fixed upstream % bugfix: disallow too large definitions \let\unicodedomino@parse@XML@charref\parse@XML@charref% \gdef\parse@XML@charref{% @@ -128,7 +111,7 @@ \unicodedomino@parse@XML@charref% }% -%: https://github.com/latex3/latex2e/pull/60 +%: also merged % bugfix: add "F4 to the list of permitted lead octets % (needs the above fix) \begingroup%