Skip to content
Snippets Groups Projects
Verified Commit 3c292a25 authored by mirabilos's avatar mirabilos Committed by mirabilos
Browse files

split by separation of concerns

parent 2fd64608
No related branches found
No related tags found
No related merge requests found
......@@ -40,102 +40,15 @@
\ifx\decode@UTFviii\@undefined%
\PackageError{unicodedomino}%
{Your utf8.def is too old, consider updating it}%
{You will need Debian stretch or newer}%
{You will need v1.1o 2015/08/28, Debian stretch or newer}%
\fi%
% pull code from other files
\input{unicodedomino_kernel_fixup.def}%
\input{unicodedomino_kernel_cosmetics.def}%
\input{unicodedomino_compat.def}%
% nicer printing of codepoint hex numbers, not strictly necessary
\def\unicodedomino@codepoint#1{%
\ifnum#1>1048575%
U-00%
\else\ifnum#1>65535%
U-000%
\else\ifnum#1>4095%
U+%
\else\ifnum#1>255%
U+0%
\else\ifnum#1>15%
U+00%
\else%
U+000%
\fi\fi\fi\fi\fi%
\expandafter\UTFviii@hexnumber\expandafter{#1}%
}%
% same for bytes
\def\unicodedomino@bytehex#1{%
\expandafter\UTFviii@hexdigit\expandafter{\the\numexpr(#1-8)/16\relax}%
\UTFviii@hexdigit{\numexpr#1\ifnum#1>0-((#1-8)/16)*16\fi\relax}%
}%
% override to beautify the output, not strictly necessary but requested
\gdef\UTFviii@splitcsname#1:#2\relax{%
#2 (\expandafter\unicodedomino@codepoint\expandafter{%
\the\numexpr\decode@UTFviii#2\relax})%
}%
\def\UTFviii@invalid@err#1{%
\PackageError{inputenc}{%
Invalid UTF-8 byte 0x\unicodedomino@bytehex{\number`#1}%
}\UTFviii@invalid@help%
}%
% for invalid encoding output
\gdef\unicodedomino@splith@x#1#2\relax{%
\space 0x\UTFviii@hexnumber{`#1}%
\ifx\relax#2\relax\else\unicodedomino@splith@x#2\relax\fi%
}%
\gdef\unicodedomino@splithex#1:#2\relax{%
\unicodedomino@splith@x#2\relax%
}%
% render the actual domino piece
\def\unicodedomino@box#1#2{%
\begingroup%
\fboxsep=.1em%
\fboxrule=.4pt%
\texttt{\fbox{\makebox[0pt][l]{\textsuperscript{#1}}\textsubscript{#2}}}%
\endgroup%
}%
% expand one hex nybble
\def\unicodedomino@hex@ne#1{%
\ifnum#1>15 %
\expandafter\unicodedomino@hex@ne\expandafter{\the\numexpr(#1-8)/16\relax}%
\fi%
\UTFviii@hexdigit{\numexpr#1\ifnum#1>0-((#1-8)/16)*16\fi\relax}.%
}%
% expand all hex nybbles, zero-padded
\def\unicodedomino@hex@ll#1{%
\ifnum#1<1048576 0.\fi%
\ifnum#1<65536 0.\fi%
\ifnum#1<4096 0.\fi%
\ifnum#1<256 0.\fi%
\ifnum#1<16 0.\fi%
\expandafter\unicodedomino@hex@ne\expandafter{#1}%
}%
% call the appropriate box function
\def\unicodedomino@hex@do#1.#2.#3.#4.#5.#6.{%
\ifnum"#1#2=0%
\unicodedomino@box{#3#4}{#5#6}%
\else%
\unicodedomino@box{#1#2#3}{#4#5#6}%
\fi%
}%
% split nybbles and pass on
\protected\def\unicodedomino@hex#1{%
\edef\unicodedomino@tmp{\expandafter\unicodedomino@hex@ll\expandafter{#1}}%
\expandafter\unicodedomino@hex@do\unicodedomino@tmp\relax%
}%
% split, decode and pass on
\def\unicodedomino@decode#1:#2\relax{%
\expandafter\unicodedomino@hex\expandafter{%
\the\numexpr\decode@UTFviii#2\relax%
}%
}%
% retrieve the last octet (lstlistings compatibility)
% retrieve the last octet
\def\unicodedomino@last#1{%
\expandafter\unicodedomino@l@st#1\empty\empty\empty%
}%
......@@ -147,12 +60,7 @@
\fi%
}%
% handle trimmed octets
\def\unicodedomino@octets#1\empty{%
\expandafter\UTFviii@defined\csname u8:#1\endcsname%
}%
% collect octet tokens, trim them, pass on to handler
% collect octet tokens, trim them and pass them on to handler
\long\def\UTFviii@two@octets#1#2{%
\edef\unicodedomino@tmp{#1\unicodedomino@last{\string#2}}%
\expandafter\unicodedomino@octets\unicodedomino@tmp\empty%
......@@ -166,38 +74,79 @@
\expandafter\unicodedomino@octets\unicodedomino@tmp\empty%
}%
% handle trimmed octets
\def\unicodedomino@octets#1\empty{%
\expandafter\UTFviii@defined\csname u8:#1\endcsname%
}%
% main handler
\def\UTFviii@defined#1{%
\ifx#1\relax%
\if\relax\expandafter\UTFviii@checkseq\string#1\empty\relax%
% unknown char
% unknown character but valid codepoint
\ifx\protect\@typeset@protect%
% not protected
% not protected, replace by domino
\PackageWarning{inputenc}{%
Unicode\space char\space\expandafter\UTFviii@splitcsname\string#1\relax%
\space not\space set\space up\space for\space use\MessageBreak with\space%
LaTeX, replacing%
Unicode character \expandafter\UTFviii@splitcsname\string#1\relax\space%
not set up for use\MessageBreak with LaTeX, replacing%
}%
% note: same warning as in \UTFviii@undefined@err in utf8.def v1.2c,
% except adding “, replacing” at the end and not being an error
\expandafter\unicodedomino@decode\string#1\relax%
% note: warning is the same as in utf8ienc.dtx v1.2d except
% with the string “, replacing” added at the end
\expandafter\unicodedomino@domino\string#1\relax%
\else%
% protected, just write the original character
\expandafter\@gobblefour\string#1%
\fi%
\else%
% invalid encoding
\PackageError{inputenc}{%
Invalid\space UTF-8\space byte\space sequence:%
\expandafter\unicodedomino@splithex\string#1\relax%
}{Do ensure the source document is saved in UTF-8 encoding}%
\PackageError{inputenc}{Invalid UTF-8 byte sequence:%
\expandafter\UTFviii@splitseq\string#1\relax}%
\UTFviii@invalid@help
\fi%
\else%
% known char, expand
% known character, expand
%XXX perhaps do not expand it when protected?
\expandafter#1%
\fi%
}%
% prepare for outputting the domino block
\def\unicodedomino@domino#1:#2\relax{%
\edef\unicodedomino@tmp{%
\expandafter\unicodedomino@domino@hex\expandafter{%
\the\numexpr\decode@UTFviii#2\relax%
}%
}%
\expandafter\unicodedomino@domino@switch\unicodedomino@tmp%
}%
% convert to six nybbles
\def\unicodedomino@domino@hex#1{%
\ifnum#1<1048576 0\fi%
\ifnum#1<65536 0\fi%
\ifnum#1<4096 0\fi%
\ifnum#1<256 0\fi%
\ifnum#1<16 0\fi%
\UTFviii@hexnumber{#1}%
}%
% decide whether to output a BMP or astral planes block
\def\unicodedomino@domino@switch#1#2#3#4#5#6{%
\ifnum"#1#2=0%
\unicodedomino@box{#3#4}{#5#6}%
\else%
\unicodedomino@box{#1#2#3}{#4#5#6}%
\fi%
}%
% render the actual domino piece
\def\unicodedomino@box#1#2{%
\begingroup%
\fboxsep=.1em%
\fboxrule=.4pt%
\texttt{\fbox{\makebox[0pt][l]{\textsuperscript{#1}}\textsubscript{#2}}}%
\endgroup%
}%
% clean up after ourselves
\makeatother%
\endinput
% -*- mode: tex -*-
%-
% Copyright © 2018
% mirabilos <m@mirbsd.org>
%
% Provided that these terms and disclaimer and all copyright notices
% are retained or reproduced in an accompanying document, permission
% is granted to deal in this work without restriction, including un‐
% limited rights to use, publicly perform, distribute, sell, modify,
% merge, give away, or sublicence.
%
% This work is provided “AS IS” and WITHOUT WARRANTY of any kind, to
% the utmost extent permitted by applicable law, neither express nor
% implied; without malicious intent or gross negligence. In no event
% may a licensor, author or contributor be held liable for indirect,
% direct, other damage, loss, or other issues arising in any way out
% of dealing in the work, even if advised of the possibility of such
% damage or existence of a defect, except proven that it results out
% of said person’s immediate fault when using the work as intended.
%-
% Compatibility code with older utf8.def versions
\ifx\UTFviii@invalid@help\@undefined%
\def\UTFviii@invalid@help{%
Do ensure the source document is saved in UTF-8 encoding%
}%
\fi%
% -*- mode: tex -*-
%-
% Copyright © 2017, 2018
% mirabilos <m@mirbsd.org>
%
% Provided that these terms and disclaimer and all copyright notices
% are retained or reproduced in an accompanying document, permission
% is granted to deal in this work without restriction, including un‐
% limited rights to use, publicly perform, distribute, sell, modify,
% merge, give away, or sublicence.
%
% This work is provided “AS IS” and WITHOUT WARRANTY of any kind, to
% the utmost extent permitted by applicable law, neither express nor
% implied; without malicious intent or gross negligence. In no event
% may a licensor, author or contributor be held liable for indirect,
% direct, other damage, loss, or other issues arising in any way out
% of dealing in the work, even if advised of the possibility of such
% damage or existence of a defect, except proven that it results out
% of said person’s immediate fault when using the work as intended.
%
% This file is dual-licenced under the LPPL version 1.3c or later.
%-
% Improve error and warning formatting.
% format a number as two-digit hex
\ifx\UTFviii@hexbyte\@undefined%
\def\UTFviii@hexbyte#1{%
\ifnum#1<16 0\fi%
\UTFviii@hexnumber{#1}%
}%
\fi%
% format a number as Unicode codepoint hex
\ifx\UTFviii@hexcodepoint\@undefined%
\def\UTFviii@hexcodepoint#1{%
\ifnum#1<16 U+000%
\else\ifnum#1<256 U+00%
\else\ifnum#1<4096 U+0%
\else\ifnum#1<65536 U+%
\else\ifnum#1<1048576 U-000%
\else U-00%
\fi\fi\fi\fi\fi%
\UTFviii@hexnumber{#1}%
}%
\fi%
% split an invalid byte sequence for output
\ifx\UTFviii@splitseq\@undefined%
\gdef\UTFviii@splitseq#1:#2\relax{%
\UTFviii@hexseq#2\relax%
}%
\gdef\UTFviii@hexseq#1#2\relax{%
% display first octet
\space 0x\UTFviii@hexbyte{`#1}%
% recursively handle remaining octets
\ifx\relax#2\relax\else\UTFviii@hexseq#2\relax\fi%
}%
\fi%
% override message to give the byte in hex
\def\UTFviii@invalid@err#1{%
\PackageError{inputenc}{Invalid UTF-8 byte 0x\UTFviii@hexbyte{`#1}}%
\UTFviii@invalid@help%
}%
% override message to format the codepoint correctly
\gdef\UTFviii@splitcsname#1:#2\relax{%
#2 (\expandafter\UTFviii@hexcodepoint\expandafter{%
\the\numexpr\decode@UTFviii#2\relax})%
}%
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment