diff --git a/unicodedomino.sty b/unicodedomino.sty index 0c7d25571678ecb3f094efb2d843fcacf42d5fc0..d9aa05ef8b6daf190ff72bdd9d1449be0d885542 100644 --- a/unicodedomino.sty +++ b/unicodedomino.sty @@ -40,102 +40,15 @@ \ifx\decode@UTFviii\@undefined% \PackageError{unicodedomino}% {Your utf8.def is too old, consider updating it}% - {You will need Debian stretch or newer}% + {You will need v1.1o 2015/08/28, Debian stretch or newer}% \fi% % pull code from other files \input{unicodedomino_kernel_fixup.def}% +\input{unicodedomino_kernel_cosmetics.def}% +\input{unicodedomino_compat.def}% -% nicer printing of codepoint hex numbers, not strictly necessary -\def\unicodedomino@codepoint#1{% - \ifnum#1>1048575% - U-00% - \else\ifnum#1>65535% - U-000% - \else\ifnum#1>4095% - U+% - \else\ifnum#1>255% - U+0% - \else\ifnum#1>15% - U+00% - \else% - U+000% - \fi\fi\fi\fi\fi% - \expandafter\UTFviii@hexnumber\expandafter{#1}% -}% - -% same for bytes -\def\unicodedomino@bytehex#1{% - \expandafter\UTFviii@hexdigit\expandafter{\the\numexpr(#1-8)/16\relax}% - \UTFviii@hexdigit{\numexpr#1\ifnum#1>0-((#1-8)/16)*16\fi\relax}% -}% - -% override to beautify the output, not strictly necessary but requested -\gdef\UTFviii@splitcsname#1:#2\relax{% - #2 (\expandafter\unicodedomino@codepoint\expandafter{% - \the\numexpr\decode@UTFviii#2\relax})% -}% -\def\UTFviii@invalid@err#1{% - \PackageError{inputenc}{% - Invalid UTF-8 byte 0x\unicodedomino@bytehex{\number`#1}% - }\UTFviii@invalid@help% -}% - -% for invalid encoding output -\gdef\unicodedomino@splith@x#1#2\relax{% - \space 0x\UTFviii@hexnumber{`#1}% - \ifx\relax#2\relax\else\unicodedomino@splith@x#2\relax\fi% -}% -\gdef\unicodedomino@splithex#1:#2\relax{% - \unicodedomino@splith@x#2\relax% -}% - -% render the actual domino piece -\def\unicodedomino@box#1#2{% - \begingroup% - \fboxsep=.1em% - \fboxrule=.4pt% - \texttt{\fbox{\makebox[0pt][l]{\textsuperscript{#1}}\textsubscript{#2}}}% - \endgroup% -}% - -% expand one hex nybble -\def\unicodedomino@hex@ne#1{% - \ifnum#1>15 % - \expandafter\unicodedomino@hex@ne\expandafter{\the\numexpr(#1-8)/16\relax}% - \fi% - \UTFviii@hexdigit{\numexpr#1\ifnum#1>0-((#1-8)/16)*16\fi\relax}.% -}% -% expand all hex nybbles, zero-padded -\def\unicodedomino@hex@ll#1{% - \ifnum#1<1048576 0.\fi% - \ifnum#1<65536 0.\fi% - \ifnum#1<4096 0.\fi% - \ifnum#1<256 0.\fi% - \ifnum#1<16 0.\fi% - \expandafter\unicodedomino@hex@ne\expandafter{#1}% -}% -% call the appropriate box function -\def\unicodedomino@hex@do#1.#2.#3.#4.#5.#6.{% - \ifnum"#1#2=0% - \unicodedomino@box{#3#4}{#5#6}% - \else% - \unicodedomino@box{#1#2#3}{#4#5#6}% - \fi% -}% -% split nybbles and pass on -\protected\def\unicodedomino@hex#1{% - \edef\unicodedomino@tmp{\expandafter\unicodedomino@hex@ll\expandafter{#1}}% - \expandafter\unicodedomino@hex@do\unicodedomino@tmp\relax% -}% -% split, decode and pass on -\def\unicodedomino@decode#1:#2\relax{% - \expandafter\unicodedomino@hex\expandafter{% - \the\numexpr\decode@UTFviii#2\relax% - }% -}% - -% retrieve the last octet (lstlistings compatibility) +% retrieve the last octet \def\unicodedomino@last#1{% \expandafter\unicodedomino@l@st#1\empty\empty\empty% }% @@ -147,12 +60,7 @@ \fi% }% -% handle trimmed octets -\def\unicodedomino@octets#1\empty{% - \expandafter\UTFviii@defined\csname u8:#1\endcsname% -}% - -% collect octet tokens, trim them, pass on to handler +% collect octet tokens, trim them and pass them on to handler \long\def\UTFviii@two@octets#1#2{% \edef\unicodedomino@tmp{#1\unicodedomino@last{\string#2}}% \expandafter\unicodedomino@octets\unicodedomino@tmp\empty% @@ -166,38 +74,79 @@ \expandafter\unicodedomino@octets\unicodedomino@tmp\empty% }% +% handle trimmed octets +\def\unicodedomino@octets#1\empty{% + \expandafter\UTFviii@defined\csname u8:#1\endcsname% +}% + % main handler \def\UTFviii@defined#1{% \ifx#1\relax% \if\relax\expandafter\UTFviii@checkseq\string#1\empty\relax% - % unknown char + % unknown character but valid codepoint \ifx\protect\@typeset@protect% - % not protected + % not protected, replace by domino \PackageWarning{inputenc}{% - Unicode\space char\space\expandafter\UTFviii@splitcsname\string#1\relax% - \space not\space set\space up\space for\space use\MessageBreak with\space% - LaTeX, replacing% + Unicode character \expandafter\UTFviii@splitcsname\string#1\relax\space% + not set up for use\MessageBreak with LaTeX, replacing% }% - % note: same warning as in \UTFviii@undefined@err in utf8.def v1.2c, - % except adding “, replacing†at the end and not being an error - \expandafter\unicodedomino@decode\string#1\relax% + % note: warning is the same as in utf8ienc.dtx v1.2d except + % with the string “, replacing†added at the end + \expandafter\unicodedomino@domino\string#1\relax% \else% % protected, just write the original character \expandafter\@gobblefour\string#1% \fi% \else% - % invalid encoding - \PackageError{inputenc}{% - Invalid\space UTF-8\space byte\space sequence:% - \expandafter\unicodedomino@splithex\string#1\relax% - }{Do ensure the source document is saved in UTF-8 encoding}% + \PackageError{inputenc}{Invalid UTF-8 byte sequence:% + \expandafter\UTFviii@splitseq\string#1\relax}% + \UTFviii@invalid@help \fi% \else% - % known char, expand + % known character, expand + %XXX perhaps do not expand it when protected? \expandafter#1% \fi% }% +% prepare for outputting the domino block +\def\unicodedomino@domino#1:#2\relax{% + \edef\unicodedomino@tmp{% + \expandafter\unicodedomino@domino@hex\expandafter{% + \the\numexpr\decode@UTFviii#2\relax% + }% + }% + \expandafter\unicodedomino@domino@switch\unicodedomino@tmp% +}% + +% convert to six nybbles +\def\unicodedomino@domino@hex#1{% + \ifnum#1<1048576 0\fi% + \ifnum#1<65536 0\fi% + \ifnum#1<4096 0\fi% + \ifnum#1<256 0\fi% + \ifnum#1<16 0\fi% + \UTFviii@hexnumber{#1}% +}% + +% decide whether to output a BMP or astral planes block +\def\unicodedomino@domino@switch#1#2#3#4#5#6{% + \ifnum"#1#2=0% + \unicodedomino@box{#3#4}{#5#6}% + \else% + \unicodedomino@box{#1#2#3}{#4#5#6}% + \fi% +}% + +% render the actual domino piece +\def\unicodedomino@box#1#2{% + \begingroup% + \fboxsep=.1em% + \fboxrule=.4pt% + \texttt{\fbox{\makebox[0pt][l]{\textsuperscript{#1}}\textsubscript{#2}}}% + \endgroup% +}% + % clean up after ourselves \makeatother% \endinput diff --git a/unicodedomino_compat.def b/unicodedomino_compat.def new file mode 100644 index 0000000000000000000000000000000000000000..1beff1822af6182268e72ef00ad05b7aa62aafe9 --- /dev/null +++ b/unicodedomino_compat.def @@ -0,0 +1,27 @@ +% -*- mode: tex -*- +%- +% Copyright © 2018 +% mirabilos <m@mirbsd.org> +% +% Provided that these terms and disclaimer and all copyright notices +% are retained or reproduced in an accompanying document, permission +% is granted to deal in this work without restriction, including un†+% limited rights to use, publicly perform, distribute, sell, modify, +% merge, give away, or sublicence. +% +% This work is provided “AS IS†and WITHOUT WARRANTY of any kind, to +% the utmost extent permitted by applicable law, neither express nor +% implied; without malicious intent or gross negligence. In no event +% may a licensor, author or contributor be held liable for indirect, +% direct, other damage, loss, or other issues arising in any way out +% of dealing in the work, even if advised of the possibility of such +% damage or existence of a defect, except proven that it results out +% of said person’s immediate fault when using the work as intended. +%- +% Compatibility code with older utf8.def versions + +\ifx\UTFviii@invalid@help\@undefined% + \def\UTFviii@invalid@help{% + Do ensure the source document is saved in UTF-8 encoding% + }% +\fi% diff --git a/unicodedomino_kernel_cosmetics.def b/unicodedomino_kernel_cosmetics.def new file mode 100644 index 0000000000000000000000000000000000000000..5928c0658372b9702ee8a43f233bc6bb88c64e0b --- /dev/null +++ b/unicodedomino_kernel_cosmetics.def @@ -0,0 +1,70 @@ +% -*- mode: tex -*- +%- +% Copyright © 2017, 2018 +% mirabilos <m@mirbsd.org> +% +% Provided that these terms and disclaimer and all copyright notices +% are retained or reproduced in an accompanying document, permission +% is granted to deal in this work without restriction, including un†+% limited rights to use, publicly perform, distribute, sell, modify, +% merge, give away, or sublicence. +% +% This work is provided “AS IS†and WITHOUT WARRANTY of any kind, to +% the utmost extent permitted by applicable law, neither express nor +% implied; without malicious intent or gross negligence. In no event +% may a licensor, author or contributor be held liable for indirect, +% direct, other damage, loss, or other issues arising in any way out +% of dealing in the work, even if advised of the possibility of such +% damage or existence of a defect, except proven that it results out +% of said person’s immediate fault when using the work as intended. +% +% This file is dual-licenced under the LPPL version 1.3c or later. +%- +% Improve error and warning formatting. + +% format a number as two-digit hex +\ifx\UTFviii@hexbyte\@undefined% + \def\UTFviii@hexbyte#1{% + \ifnum#1<16 0\fi% + \UTFviii@hexnumber{#1}% + }% +\fi% + +% format a number as Unicode codepoint hex +\ifx\UTFviii@hexcodepoint\@undefined% + \def\UTFviii@hexcodepoint#1{% + \ifnum#1<16 U+000% + \else\ifnum#1<256 U+00% + \else\ifnum#1<4096 U+0% + \else\ifnum#1<65536 U+% + \else\ifnum#1<1048576 U-000% + \else U-00% + \fi\fi\fi\fi\fi% + \UTFviii@hexnumber{#1}% + }% +\fi% + +% split an invalid byte sequence for output +\ifx\UTFviii@splitseq\@undefined% + \gdef\UTFviii@splitseq#1:#2\relax{% + \UTFviii@hexseq#2\relax% + }% + \gdef\UTFviii@hexseq#1#2\relax{% + % display first octet + \space 0x\UTFviii@hexbyte{`#1}% + % recursively handle remaining octets + \ifx\relax#2\relax\else\UTFviii@hexseq#2\relax\fi% + }% +\fi% + +% override message to give the byte in hex +\def\UTFviii@invalid@err#1{% + \PackageError{inputenc}{Invalid UTF-8 byte 0x\UTFviii@hexbyte{`#1}}% + \UTFviii@invalid@help% +}% + +% override message to format the codepoint correctly +\gdef\UTFviii@splitcsname#1:#2\relax{% + #2 (\expandafter\UTFviii@hexcodepoint\expandafter{% + \the\numexpr\decode@UTFviii#2\relax})% +}%