Skip to content
Snippets Groups Projects
Commit dde3b0f1 authored by mirabilos's avatar mirabilos Committed by mirabilos
Browse files

Merge branch 'master' of github.com:mirabilos/tex-unicodedomino

parents 8e887025 26d0352a
No related branches found
No related tags found
No related merge requests found
......@@ -28,7 +28,7 @@
% makes all UTF-8 available in listings.
\NeedsTeXFormat{LaTeX2e}%
\ProvidesPackage{unicodedomino}[2018/08/05 1.3 Domino for unknown codepoints]%
\ProvidesPackage{unicodedomino}[2018/08/05 1.4 Domino for unknown codepoints]%
\makeatletter%
\ifx\numexpr\@undefined%
......@@ -40,127 +40,17 @@
\ifx\decode@UTFviii\@undefined%
\PackageError{unicodedomino}%
{Your utf8.def is too old, consider updating it}%
{You will need Debian stretch or newer}%
{You will need v1.1o 2015/08/28, Debian stretch or newer}%
\fi%
% patch up bug in utf8.def that forbade 0xF4 lead byte
\begingroup%
\catcode`\~13
\uccode`\~"F4
\def\UTFviii@tmp{\xdef~{\noexpand\UTFviii@four@octets\string~}}%
\uppercase\expandafter{\UTFviii@tmp}%
\endgroup%
% now fixup to disallow too large definitions
\let\unicodedomino@parse@XML@charref\parse@XML@charref%
\gdef\parse@XML@charref{%
\ifnum\count@>"10FFFF\relax%
\PackageError{inputenc}{%
Cannot define Unicode char value\space%
\unicodedomino@shex\the\count@\relax\space (too large)%
}%
\fi%
\unicodedomino@parse@XML@charref%
}%
% nicer printing of codepoint hex numbers, not strictly necessary
\def\unicodedomino@codepoint#1{%
\ifnum#1>1048575%
U-00%
\else\ifnum#1>65535%
U-000%
\else\ifnum#1>4095%
U+%
\else\ifnum#1>255%
U+0%
\else\ifnum#1>15%
U+00%
\else%
U+000%
\fi\fi\fi\fi\fi%
\expandafter\UTFviii@hexnumber\expandafter{#1}%
}%
% same for bytes
\def\unicodedomino@bytehex#1{%
\expandafter\UTFviii@hexdigit\expandafter{\the\numexpr(#1-8)/16\relax}%
\UTFviii@hexdigit{\numexpr#1\ifnum#1>0-((#1-8)/16)*16\fi\relax}%
}%
% and arbitrary numbers
\def\unicodedomino@shex#1\relax{%
\ifnum#1>15 %
\expandafter\unicodedomino@shex\expandafter\the\numexpr(#1-8)/16\relax%
\fi%
\UTFviii@hexdigit{\numexpr#1\ifnum#1>0-((#1-8)/16)*16\fi\relax}%
}%
% override to beautify the output, not strictly necessary but requested
\gdef\UTFviii@splitcsname#1:#2\relax{%
#2 (\expandafter\unicodedomino@codepoint\expandafter{%
\the\numexpr\decode@UTFviii#2\relax})%
}%
\def\UTFviii@invalid@err#1{%
\PackageError{inputenc}{%
Invalid UTF-8 byte 0x\unicodedomino@bytehex{\number`#1}%
}\UTFviii@invalid@help%
}%
% for invalid encoding output
\gdef\unicodedomino@splith@x#1#2\relax{%
\space 0x\UTFviii@hexnumber{`#1}%
\ifx\relax#2\relax\else\unicodedomino@splith@x#2\relax\fi%
}%
\gdef\unicodedomino@splithex#1:#2\relax{%
\unicodedomino@splith@x#2\relax%
}%
% render the actual domino piece
\def\unicodedomino@box#1#2{%
\begingroup%
\fboxsep=.1em%
\fboxrule=.4pt%
\texttt{\fbox{\makebox[0pt][l]{\textsuperscript{#1}}\textsubscript{#2}}}%
\endgroup%
}%
% expand one hex nybble
\def\unicodedomino@hex@ne#1{%
\ifnum#1>15 %
\expandafter\unicodedomino@hex@ne\expandafter{\the\numexpr(#1-8)/16\relax}%
\fi%
\UTFviii@hexdigit{\numexpr#1\ifnum#1>0-((#1-8)/16)*16\fi\relax}.%
}%
% expand all hex nybbles, zero-padded
\def\unicodedomino@hex@ll#1{%
\ifnum#1<1048576 0.\fi%
\ifnum#1<65536 0.\fi%
\ifnum#1<4096 0.\fi%
\ifnum#1<256 0.\fi%
\ifnum#1<16 0.\fi%
\expandafter\unicodedomino@hex@ne\expandafter{#1}%
}%
% call the appropriate box function
\def\unicodedomino@hex@do#1.#2.#3.#4.#5.#6.{%
\ifnum"#1#2=0%
\unicodedomino@box{#3#4}{#5#6}%
\else%
\unicodedomino@box{#1#2#3}{#4#5#6}%
\fi%
}%
% split nybbles and pass on
\protected\def\unicodedomino@hex#1{%
\edef\unicodedomino@tmp{\expandafter\unicodedomino@hex@ll\expandafter{#1}}%
\expandafter\unicodedomino@hex@do\unicodedomino@tmp\relax%
}%
% split, decode and pass on
\def\unicodedomino@decode#1:#2\relax{%
\expandafter\unicodedomino@hex\expandafter{%
\the\numexpr\decode@UTFviii#2\relax%
}%
}%
% pull code from other files
\ifx\UTFviii@check@three\@undefined%
\input{unicodedomino_kernel_fixup_f4_and_checkseq.def}%
\fi%
\input{unicodedomino_kernel_cosmetics.def}%
\input{unicodedomino_compat.def}%
% retrieve the last octet (lstlistings compatibility)
% retrieve the last octet
\def\unicodedomino@last#1{%
\expandafter\unicodedomino@l@st#1\empty\empty\empty%
}%
......@@ -172,12 +62,7 @@
\fi%
}%
% handle trimmed octets
\def\unicodedomino@octets#1\empty{%
\expandafter\UTFviii@defined\csname u8:#1\endcsname%
}%
% collect octet tokens, trim them, pass on to handler
% collect octet tokens, trim them and pass them on to handler
\long\def\UTFviii@two@octets#1#2{%
\edef\unicodedomino@tmp{#1\unicodedomino@last{\string#2}}%
\expandafter\unicodedomino@octets\unicodedomino@tmp\empty%
......@@ -191,171 +76,78 @@
\expandafter\unicodedomino@octets\unicodedomino@tmp\empty%
}%
% handle trimmed octets
\def\unicodedomino@octets#1\empty{%
\expandafter\UTFviii@defined\csname u8:#1\endcsname%
}%
% main handler
\def\UTFviii@defined#1{%
\ifx#1\relax%
\if\relax\expandafter\UTFviii@chkseq\string#1\empty\relax%
% unknown char
\ifx\protect\@typeset@protect%
% not protected
\ifx\protect\@typeset@protect%
% not protected
\ifx#1\relax%
% unknown character
\if\relax\expandafter\UTFviii@checkseq\string#1\empty\relax%
% valid codepoint / multibyte sequence
\PackageWarning{inputenc}{%
Unicode\space char\space\expandafter\UTFviii@splitcsname\string#1\relax%
\space not\space set\space up\space for\space use\MessageBreak with\space%
LaTeX, replacing%
Unicode character \expandafter\UTFviii@splitcsname\string#1\relax\space%
not set up for use\MessageBreak with LaTeX, replacing%
}%
% note: same warning as in \UTFviii@undefined@err in utf8.def v1.2c,
% except adding “, replacing” at the end and not being an error
\expandafter\unicodedomino@decode\string#1\relax%
% note: warning is the same as in utf8ienc.dtx v1.2d except
% with the string “, replacing” added at the end
\expandafter\unicodedomino@domino\string#1\relax%
\else%
% protected, just write the original character
\expandafter\@gobblefour\string#1%
% invalid multibyte character
\PackageError{inputenc}{Invalid UTF-8 byte sequence:%
\expandafter\UTFviii@splitseq\string#1\relax}%
\UTFviii@invalid@help
\fi%
\else%
% invalid encoding
\PackageError{inputenc}{%
Invalid\space UTF-8\space byte\space sequence:%
\expandafter\unicodedomino@splithex\string#1\relax%
}{Do ensure the source document is saved in UTF-8 encoding}%
% known character, expand
\expandafter#1%
\fi%
\else%
% known char, expand
\expandafter#1%
% protected, just write the original multibyte character
\expandafter\@gobblefour\string#1%
\fi%
}%
% input validation, including security-relevant checks
\def\UTFviii@chkseq#1:#2#3\empty{%
\ifnum`#2<"80 %
\ifx\empty#3\empty%
\else%
1%
\fi%
\else%
\ifnum`#2<"C2 %
1%
\else%
\ifnum`#2<"E0 %
% one 80-BF
\UTFviii@chksq@onetrail#3\empty%
\else%
\ifnum`#2<"E1 %
% A0-BF + one 80-BF
\UTFviii@chksq@a@trail#3\empty%
\else%
\ifnum`#2<"F0 %
% two 80-BF
\UTFviii@chksq@twotrail#3\empty%
\else%
\ifnum`#2<"F1 %
% 90-BF + two 80-BF
\UTFviii@chksq@ninetytrails#3\empty%
\else%
\ifnum`#2<"F4 %
% three 80-BF
\UTFviii@chksq@threetrail#3\empty%
\else%
\ifnum`#2<"F5 %
% 80-8F + two 80-BF
\UTFviii@chksq@belowninetytrails#3\empty%
\else%
1%
\fi%
\fi%
\fi%
\fi%
\fi%
\fi%
\fi%
\fi%
}%
\def\UTFviii@chksq@onetrail#1#2\empty{%
\ifx\empty#2\empty%
\ifnum`#1<"80 %
1%
\else%
\ifnum`#1<"C0 %
\else%
1%
\fi%
\fi%
\else%
1%
\fi%
}%
\def\UTFviii@chksq@a@trail#1#2\empty{%
\ifx\empty#2\empty%
1%
\else%
\ifnum`#1<"A0 %
1%
\else%
\ifnum`#1<"C0 %
\UTFviii@chksq@onetrail#2\empty%
\else%
1%
\fi%
\fi%
\fi%
}%
\def\UTFviii@chksq@twotrail#1#2\empty{%
\ifx\empty#2\empty%
1%
\else%
\ifnum`#1<"80 %
1%
\else%
\ifnum`#1<"C0 %
\UTFviii@chksq@onetrail#2\empty%
\else%
1%
\fi%
\fi%
\fi%
% prepare for outputting the domino block
\def\unicodedomino@domino#1:#2\relax{%
\edef\unicodedomino@tmp{%
\expandafter\unicodedomino@domino@hex\expandafter{%
\the\numexpr\decode@UTFviii#2\relax%
}%
}%
\expandafter\unicodedomino@domino@switch\unicodedomino@tmp%
}%
\def\UTFviii@chksq@ninetytrails#1#2\empty{%
\ifx\empty#2\empty%
1%
\else%
\ifnum`#1<"90 %
1%
\else%
\ifnum`#1<"C0 %
\UTFviii@chksq@twotrail#2\empty%
\else%
1%
\fi%
\fi%
\fi%
% convert to six nybbles
\def\unicodedomino@domino@hex#1{%
\ifnum#1<1048576 0\fi%
\ifnum#1<65536 0\fi%
\ifnum#1<4096 0\fi%
\ifnum#1<256 0\fi%
\ifnum#1<16 0\fi%
\UTFviii@hexnumber{#1}%
}%
\def\UTFviii@chksq@threetrail#1#2\empty{%
\ifx\empty#2\empty%
1%
% decide whether to output a BMP or astral planes block
\def\unicodedomino@domino@switch#1#2#3#4#5#6{%
\ifnum"#1#2=0%
\unicodedomino@box{#3#4}{#5#6}%
\else%
\ifnum`#1<"80 %
1%
\else%
\ifnum`#1<"C0 %
\UTFviii@chksq@twotrail#2\empty%
\else%
1%
\fi%
\fi%
\unicodedomino@box{#1#2#3}{#4#5#6}%
\fi%
}%
\def\UTFviii@chksq@belowninetytrails#1#2\empty{%
\ifx\empty#2\empty%
1%
\else%
\ifnum`#1<"80 %
1%
\else%
\ifnum`#1<"90 %
\UTFviii@chksq@twotrail#2\empty%
\else%
1%
\fi%
\fi%
\fi%
% render the actual domino piece
\def\unicodedomino@box#1#2{%
\begingroup%
\fboxsep=.1em%
\fboxrule=.4pt%
\texttt{\fbox{\makebox[0pt][l]{\textsuperscript{#1}}\textsubscript{#2}}}%
\endgroup%
}%
% clean up after ourselves
......
% -*- mode: tex -*-
%-
% Copyright © 2018
% mirabilos <m@mirbsd.org>
%
% Provided that these terms and disclaimer and all copyright notices
% are retained or reproduced in an accompanying document, permission
% is granted to deal in this work without restriction, including un‐
% limited rights to use, publicly perform, distribute, sell, modify,
% merge, give away, or sublicence.
%
% This work is provided “AS IS” and WITHOUT WARRANTY of any kind, to
% the utmost extent permitted by applicable law, neither express nor
% implied; without malicious intent or gross negligence. In no event
% may a licensor, author or contributor be held liable for indirect,
% direct, other damage, loss, or other issues arising in any way out
% of dealing in the work, even if advised of the possibility of such
% damage or existence of a defect, except proven that it results out
% of said person’s immediate fault when using the work as intended.
%-
% Compatibility code with older utf8.def versions
% added in v1.2a 2018/03/24
\ifx\UTFviii@invalid@help\@undefined%
\def\UTFviii@invalid@help{%
Do ensure the source document is saved in UTF-8 encoding%
}%
\fi%
% -*- mode: tex -*-
%-
% Copyright © 2017, 2018
% mirabilos <m@mirbsd.org>
%
% Provided that these terms and disclaimer and all copyright notices
% are retained or reproduced in an accompanying document, permission
% is granted to deal in this work without restriction, including un‐
% limited rights to use, publicly perform, distribute, sell, modify,
% merge, give away, or sublicence.
%
% This work is provided “AS IS” and WITHOUT WARRANTY of any kind, to
% the utmost extent permitted by applicable law, neither express nor
% implied; without malicious intent or gross negligence. In no event
% may a licensor, author or contributor be held liable for indirect,
% direct, other damage, loss, or other issues arising in any way out
% of dealing in the work, even if advised of the possibility of such
% damage or existence of a defect, except proven that it results out
% of said person’s immediate fault when using the work as intended.
%
% This file is dual-licenced under the LPPL version 1.3c or later.
%-
% Improve error and warning formatting. Fully submitted upstream.
%: https://github.com/latex3/latex2e/pull/62
\ifx\UTFviii@hexbyte\@undefined%
% format a number as two-digit hex
\def\UTFviii@hexbyte#1{%
\ifnum#1<16 0\fi%
\UTFviii@hexnumber{#1}%
}%
% override message to give the byte in hex
\def\UTFviii@invalid@err#1{%
\PackageError{inputenc}{Invalid UTF-8 byte "\UTFviii@hexbyte{`#1}}%
\UTFviii@invalid@help%
}%
\fi%
%: https://github.com/latex3/latex2e/pull/63
\ifx\UTFviii@hexcodepoint\@undefined%
% format a number as Unicode codepoint hex
\def\UTFviii@hexcodepoint#1{%
\ifnum#1<16 U+000%
\else\ifnum#1<256 U+00%
\else\ifnum#1<4096 U+0%
\else\ifnum#1<65536 U+%
\else\ifnum#1<1048576 U-000%
\else U-00%
\fi\fi\fi\fi\fi%
\UTFviii@hexnumber{#1}%
}%
% override message to format the codepoint correctly
\gdef\UTFviii@splitcsname#1:#2\relax{%
#2 (\expandafter\UTFviii@hexcodepoint\expandafter{%
\the\numexpr\decode@UTFviii#2\relax})%
}%
\fi%
%: https://github.com/latex3/latex2e/pull/62
% split an invalid byte sequence for output
\ifx\UTFviii@splitseq\@undefined%
\gdef\UTFviii@splitseq#1:#2\relax{%
\UTFviii@hexseq#2\relax%
}%
\gdef\UTFviii@hexseq#1#2\relax{%
% display first octet
\space "\UTFviii@hexbyte{`#1}%
% recursively handle remaining octets
\ifx\relax#2\relax\else\UTFviii@hexseq#2\relax\fi%
}%
\fi%
% -*- mode: tex -*-
%-
% Copyright © 2018
% mirabilos <m@mirbsd.org>
%
% Provided that these terms and disclaimer and all copyright notices
% are retained or reproduced in an accompanying document, permission
% is granted to deal in this work without restriction, including un‐
% limited rights to use, publicly perform, distribute, sell, modify,
% merge, give away, or sublicence.
%
% This work is provided “AS IS” and WITHOUT WARRANTY of any kind, to
% the utmost extent permitted by applicable law, neither express nor
% implied; without malicious intent or gross negligence. In no event
% may a licensor, author or contributor be held liable for indirect,
% direct, other damage, loss, or other issues arising in any way out
% of dealing in the work, even if advised of the possibility of such
% damage or existence of a defect, except proven that it results out
% of said person’s immediate fault when using the work as intended.
%
% This file is dual-licenced under the LPPL version 1.3c or later.
%-
% Fix check for illegal sequences to fail overlong encoded sequences
% as well as codepoints outside of the Unicode range [0;10FFFF]. Add
% "F4 to the list of permitted lead octets.
%: not forwarded yet, will only do so once PR#60 (see below) is in
% new check for illegal sequences
\def\UTFviii@checkseq#1:#2#3\empty{%
\ifnum`#2<"80 %
\ifx\empty#3\empty%
\else%
1%
\fi%
\else%
\ifnum`#2<"C2 %
1%
\else%
\ifnum`#2<"E0 %
% one 80-BF
\UTFviii@check@one#3\empty%
\else%
\ifnum`#2<"E1 %
% A0-BF + one 80-BF
\UTFviii@check@two"A0.#3\empty%
\else%
\ifnum`#2<"F0 %
% two 80-BF
\UTFviii@check@two"80.#3\empty%
\else%
\ifnum`#2<"F1 %
% 90-BF + two 80-BF
\UTFviii@check@three"90."BF.#3\empty%
\else%
\ifnum`#2<"F4 %
% three 80-BF
\UTFviii@check@three"80."BF.#3\empty%
\else%
\ifnum`#2<"F5 %
% 80-8F + two 80-BF
\UTFviii@check@three"80."8F.#3\empty%
\else%
1%
\fi%
\fi%
\fi%
\fi%
\fi%
\fi%
\fi%
\fi%
}%
\def\UTFviii@check@one#1#2\empty{%
\ifx\empty#2\empty%
\ifnum`#1<"80 %
1%
\else%
\ifnum`#1>"BF %
1%
\fi%
\fi%
\else%
1%
\fi%
}%
\def\UTFviii@check@two#1.#2#3\empty{%
\ifx\empty#3\empty%
1%
\else%
\ifnum`#2<#1 %
1%
\else%
\ifnum`#2>"BF %
1%
\else%
\UTFviii@check@one#3\empty%
\fi%
\fi%
\fi%
}%
\def\UTFviii@check@three#1.#2.#3#4\empty{%
\ifx\empty#4\empty%
1%
\else%
\ifnum`#3<#1 %
1%
\else%
\ifnum`#3>#2 %
1%
\else%
\UTFviii@check@two"80.#4\empty%
\fi%
\fi%
\fi%
}%
%: https://github.com/latex3/latex2e/pull/60
% bugfix: disallow too large definitions
\let\unicodedomino@parse@XML@charref\parse@XML@charref%
\gdef\parse@XML@charref{%
\ifnum\count@>"10FFFF\relax%
\PackageError{inputenc}{%
Cannot define Unicode char value\space%
\UTFviii@hexnumber\count@\space%
(too large)%
}%
\fi%
\unicodedomino@parse@XML@charref%
}%
%: https://github.com/latex3/latex2e/pull/60
% bugfix: add "F4 to the list of permitted lead octets
% (needs the above fix)
\begingroup%
\catcode`\~13
\uccode`\~"F4
\def\UTFviii@tmp{\xdef~{\noexpand\UTFviii@four@octets\string~}}%
\uppercase\expandafter{\UTFviii@tmp}%
\endgroup%
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment