% -*- mode: tex -*-
%-
% Copyright © 2018
%	mirabilos <m@mirbsd.org>
% Copyright © 2017
%	mirabilos <t.glaser@tarent.de>
% with contributions by (among others)
%	David Carlisle <http://tex.stackexchange.com/users/1090>
%
% Provided that these terms and disclaimer and all copyright notices
% are retained or reproduced in an accompanying document, permission
% is granted to deal in this work without restriction, including un‐
% limited rights to use, publicly perform, distribute, sell, modify,
% merge, give away, or sublicence.
%
% This work is provided “AS IS” and WITHOUT WARRANTY of any kind, to
% the utmost extent permitted by applicable law, neither express nor
% implied; without malicious intent or gross negligence. In no event
% may a licensor, author or contributor be held liable for indirect,
% direct, other damage, loss, or other issues arising in any way out
% of dealing in the work, even if advised of the possibility of such
% damage or existence of a defect, except proven that it results out
% of said person’s immediate fault when using the work as intended.
%-
% Change UTF-8 input encoding to not error out, but substitute, upon
% encountering characters not set up with newunicodechar. Also fixes
% decoding to error out on overlong encoded characters, and (likely)
% makes all UTF-8 available in listings.

\NeedsTeXFormat{LaTeX2e}%
\ProvidesPackage{unicodedomino}[2018/08/05 1.2 Domino for unknown codepoints]%
\makeatletter%

\ifx\numexpr\@undefined%
 \PackageError{unicodedomino}{This package requires numexpr}%
\fi%
\ifx\UTFviii@defined\@undefined%
 \PackageError{unicodedomino}{This package requires UTF-8 input encoding}%
\fi%

% nicer printing of codepoint hex numbers, not strictly necessary
\def\unicodedomino@codepoint#1{%
 \ifnum#1>65535%
  U-000%
 \else\ifnum#1>4095%
  U+%
 \else%
  U+0%
 \fi\fi%
 \expandafter\UTFviii@hexnumber\expandafter{#1}%
}%

% override to beautify the output, not strictly necessary but requested
\gdef\UTFviii@splitcsname#1:#2\relax{%
 #2 (\expandafter\unicodedomino@codepoint\expandafter{%
  \the\numexpr\decode@UTFviii#2\relax})%
}%

% for invalid encoding output
\gdef\unicodedomino@splith@x#1#2\relax{%
 \space 0x\UTFviii@hexnumber{`#1}%
 \ifx\relax#2\relax\else\unicodedomino@splith@x#2\relax\fi%
}%
\gdef\unicodedomino@splithex#1:#2\relax{%
 \unicodedomino@splith@x#2\relax%
}%

% render the actual domino piece
\def\unicodedomino@box#1#2{%
 \begingroup%
 \fboxsep=.1em%
 \fboxrule=.4pt%
 \texttt{\fbox{\makebox[0pt][l]{\textsuperscript{#1}}\textsubscript{#2}}}%
 \endgroup%
}%

% expand one hex nybble
\def\unicodedomino@hex@ne#1{%
 \ifnum#1>15 %
  \expandafter\unicodedomino@hex@ne\expandafter{\the\numexpr(#1-8)/16\relax}%
 \fi%
 \UTFviii@hexdigit{\numexpr#1\ifnum#1>0-((#1-8)/16)*16\fi\relax}.%
}%
% expand all hex nybbles, zero-padded
\def\unicodedomino@hex@ll#1{%
 \ifnum#1<65536 0.\fi%
 \ifnum#1<4096 0.\fi%
 \expandafter\unicodedomino@hex@ne\expandafter{#1}%
}%
% call the appropriate box function
\def\unicodedomino@hex@do#1.#2.#3.#4.#5.{%
 \ifnum#1>0%
  \unicodedomino@box{0#1#2}{#3#4#5}%
 \else%
  \unicodedomino@box{#2#3}{#4#5}%
 \fi%
}%
% split nybbles and pass on
\protected\def\unicodedomino@hex#1{%
 \edef\temp{\expandafter\unicodedomino@hex@ll\expandafter{#1}}%
 \expandafter\unicodedomino@hex@do\temp\relax%
}%
% split, decode and pass on
\def\unicodedomino@decode#1:#2\relax{%
 \expandafter\unicodedomino@hex\expandafter{%
  \the\numexpr\decode@UTFviii#2\relax%
 }%
}%

% retrieve the last octet (lstlistings compatibility)
\def\unicodedomino@last#1{%
 \expandafter\unicodedomino@l@st#1\empty\empty\empty%
}%
\def\unicodedomino@l@st#1#2#3\empty{%
 \ifx\empty#2%
  #1%
 \else%
  \unicodedomino@l@st#2#3\empty\empty%
 \fi%
}%

% handle trimmed octets
\def\unicodedomino@octets#1\empty{%
 \expandafter\UTFviii@defined\csname u8:#1\endcsname%
}%

% collect octet tokens, trim them, pass on to handler
\long\def\UTFviii@two@octets#1#2{%
 \edef\temp{#1\unicodedomino@last{\string#2}}%
 \expandafter\unicodedomino@octets\temp\empty%
}%
\long\def\UTFviii@three@octets#1#2#3{%
 \edef\temp{#1\unicodedomino@last{\string#2}\unicodedomino@last{\string#3}}%
 \expandafter\unicodedomino@octets\temp\empty%
}%
\long\def\UTFviii@four@octets#1#2#3#4{%
 \edef\temp{#1\unicodedomino@last{\string#2}\unicodedomino@last{\string#3}\unicodedomino@last{\string#4}}%
 \expandafter\unicodedomino@octets\temp\empty%
}%

% main handler
\def\UTFviii@defined#1{%
 \ifx#1\relax%
  \if\relax\expandafter\UTFviii@chkseq\string#1\empty\relax%
   % unknown char
   \ifx\protect\@typeset@protect%
    % not protected
    \PackageWarning{inputenc}{%
     Unicode\space char\space\expandafter\UTFviii@splitcsname\string#1\relax%
     \space not\space set\space up\space for\space use\MessageBreak with\space%
     LaTeX, replacing%
    }%
    % note: same warning as in \UTFviii@undefined@err in utf8.def v1.2c,
    % except adding “, replacing” at the end and not being an error
    \expandafter\unicodedomino@decode\string#1\relax%
   \else%
    % protected, just write the original character
    \expandafter\@gobblefour\string#1%
   \fi%
  \else%
   % invalid encoding
   \PackageError{inputenc}{%
    Invalid\space UTF-8\space byte\space sequence:%
    \expandafter\unicodedomino@splithex\string#1\relax%
   }{Do ensure the source document is saved in UTF-8 encoding.}%
  \fi%
 \else%
  % known char, expand
  \expandafter#1%
 \fi%
}%

% input validation, including security-relevant checks
\def\UTFviii@chkseq#1:#2#3\empty{%
 \ifnum`#2<"80 %
  \ifx\empty#3\empty%
  \else%
   1%
  \fi%
 \else%
  \ifnum`#2<"C2 %
   1%
  \else%
   \ifnum`#2<"E0 %
    % one 80-BF
    \UTFviii@chksq@onetrail#3\empty%
   \else%
    \ifnum`#2<"E1 %
     % A0-BF + one 80-BF
     \UTFviii@chksq@a@trail#3\empty%
    \else%
     \ifnum`#2<"F0 %
      % two 80-BF
      \UTFviii@chksq@twotrail#3\empty%
     \else%
      \ifnum`#2<"F1 %
       % 90-BF + two 80-BF
       \UTFviii@chksq@ninetytrails#3\empty%
      \else%
       \ifnum`#2<"F4 %
        % three 80-BF
        \UTFviii@chksq@threetrail#3\empty%
       \else%
        \ifnum`#2<"F5 %
         % 80-8F + two 80-BF
         \UTFviii@chksq@belowninetytrails#3\empty%
        \else%
         1%
        \fi%
       \fi%
      \fi%
     \fi%
    \fi%
   \fi%
  \fi%
 \fi%
}%
\def\UTFviii@chksq@onetrail#1#2\empty{%
 \ifx\empty#2\empty%
  \ifnum`#1<"80 %
   1%
  \else%
   \ifnum`#1<"C0 %
   \else%
    1%
   \fi%
  \fi%
 \else%
  1%
 \fi%
}%
\def\UTFviii@chksq@a@trail#1#2\empty{%
 \ifx\empty#2\empty%
  1%
 \else%
  \ifnum`#1<"A0 %
   1%
  \else%
   \ifnum`#1<"C0 %
    \UTFviii@chksq@onetrail#2\empty%
   \else%
    1%
   \fi%
  \fi%
 \fi%
}%
\def\UTFviii@chksq@twotrail#1#2\empty{%
 \ifx\empty#2\empty%
  1%
 \else%
  \ifnum`#1<"80 %
   1%
  \else%
   \ifnum`#1<"C0 %
    \UTFviii@chksq@onetrail#2\empty%
   \else%
    1%
   \fi%
  \fi%
 \fi%
}%
\def\UTFviii@chksq@ninetytrails#1#2\empty{%
 \ifx\empty#2\empty%
  1%
 \else%
  \ifnum`#1<"90 %
   1%
  \else%
   \ifnum`#1<"C0 %
    \UTFviii@chksq@twotrail#2\empty%
   \else%
    1%
   \fi%
  \fi%
 \fi%
}%
\def\UTFviii@chksq@threetrail#1#2\empty{%
 \ifx\empty#2\empty%
  1%
 \else%
  \ifnum`#1<"80 %
   1%
  \else%
   \ifnum`#1<"C0 %
    \UTFviii@chksq@twotrail#2\empty%
   \else%
    1%
   \fi%
  \fi%
 \fi%
}%
\def\UTFviii@chksq@belowninetytrails#1#2\empty{%
 \ifx\empty#2\empty%
  1%
 \else%
  \ifnum`#1<"80 %
   1%
  \else%
   \ifnum`#1<"90 %
    \UTFviii@chksq@twotrail#2\empty%
   \else%
    1%
   \fi%
  \fi%
 \fi%
}%

% clean up after ourselves
\makeatother%
\endinput