% -*- mode: tex -*-
%-
% Copyright © 2018
%	mirabilos <m@mirbsd.org>
% Copyright © 2017, 2018, 2019
%	mirabilos <t.glaser@tarent.de>
% with contributions via chat by (among others)
%	David Carlisle <http://tex.stackexchange.com/users/1090>
%
% Provided that these terms and disclaimer and all copyright notices
% are retained or reproduced in an accompanying document, permission
% is granted to deal in this work without restriction, including un‐
% limited rights to use, publicly perform, distribute, sell, modify,
% merge, give away, or sublicence.
%
% This work is provided “AS IS” and WITHOUT WARRANTY of any kind, to
% the utmost extent permitted by applicable law, neither express nor
% implied; without malicious intent or gross negligence. In no event
% may a licensor, author or contributor be held liable for indirect,
% direct, other damage, loss, or other issues arising in any way out
% of dealing in the work, even if advised of the possibility of such
% damage or existence of a defect, except proven that it results out
% of said person’s immediate fault when using the work as intended.
%-
% Change UTF-8 input encoding to not error out, but substitute, upon
% encountering characters not set up with newunicodechar. Also fixes
% decoding to error out on overlong encoded characters, and (likely)
% makes all UTF-8 available in listings.

\NeedsTeXFormat{LaTeX2e}%
\ProvidesPackage{unicodedomino}[2019/11/12 1.11~ Domino for unknown codepoints]% in Tₑχ/LᴬTᴇΧ

\ifx\numexpr\@undefined%
 \PackageError{unicodedomino}{This package requires numexpr}%
\fi%
\ifx\UTFviii@defined\@undefined%
 \PackageError{unicodedomino}{This package requires UTF-8 input encoding}%
\fi%

% pull compatibility code for users of older base/utf8ienc.dtx
\input{unicodedomino_compat.def}%

% pull code from other files
\ifx\UTFviii@decode\@undefined%
 \input{unicodedomino_kernel_better_decode.def}%
\fi%
\input{unicodedomino_kernel_cosmetics.def}%

% retrieve the last octet
\def\unicodedomino@last#1{%
 \expandafter\unicodedomino@l@st#1\empty\empty\empty%
}%
\def\unicodedomino@l@st#1#2#3\empty{%
 \ifx\empty#2%
  #1%
 \else%
  \unicodedomino@l@st#2#3\empty\empty%
 \fi%
}%

% collect octet tokens, trim them and pass them on to handler
\ifx\UTFviii@two@octets@combine\@undefined%
 % v1.2h 2019/07/09, or older
 \long\def\UTFviii@two@octets#1#2{%
  \ifx\protect\@typeset@protect%
   \edef\unicodedomino@tmp{#1\unicodedomino@last{\string#2}}%
   \expandafter\unicodedomino@octets\unicodedomino@tmp\empty%
  \else%
   % protected, just write the original multibyte character
   \string#1\string#2%
  \fi%
 }%
 \long\def\UTFviii@three@octets#1#2#3{%
  \ifx\protect\@typeset@protect%
   \edef\unicodedomino@tmp{#1\unicodedomino@last{\string#2}\unicodedomino@last{\string#3}}%
   \expandafter\unicodedomino@octets\unicodedomino@tmp\empty%
  \else%
   % protected, just write the original multibyte character
   \string#1\string#2\string#3%
  \fi%
 }%
 \long\def\UTFviii@four@octets#1#2#3#4{%
  \ifx\protect\@typeset@protect%
   \edef\unicodedomino@tmp{#1\unicodedomino@last{\string#2}\unicodedomino@last{\string#3}\unicodedomino@last{\string#4}}%
   \expandafter\unicodedomino@octets\unicodedomino@tmp\empty%
  \else%
   % protected, just write the original multibyte character
   \string#1\string#2\string#3\string#4%
  \fi%
 }%
\else%
 % v1.2i 2019/07/09, or newer
 \long\def\UTFviii@two@octets@combine#1#2{%
   \edef\unicodedomino@tmp{#1\unicodedomino@last{\string#2}}%
   \expandafter\unicodedomino@octets\unicodedomino@tmp\empty%
 }%
 \long\def\UTFviii@three@octets@combine#1#2#3{%
   \edef\unicodedomino@tmp{#1\unicodedomino@last{\string#2}\unicodedomino@last{\string#3}}%
   \expandafter\unicodedomino@octets\unicodedomino@tmp\empty%
 }%
 \long\def\UTFviii@four@octets@combine#1#2#3#4{%
   \edef\unicodedomino@tmp{#1\unicodedomino@last{\string#2}\unicodedomino@last{\string#3}\unicodedomino@last{\string#4}}%
   \expandafter\unicodedomino@octets\unicodedomino@tmp\empty%
 }%
\fi%

% handle trimmed octets
\def\unicodedomino@octets#1\empty{%
 \expandafter\UTFviii@defined\csname u8:#1\endcsname%
}%

% split an invalid byte sequence for error output
\gdef\unicodedomino@splitseq#1:#2\relax{%
 \unicodedomino@hexseq#2\relax%
}%
\gdef\unicodedomino@hexseq#1#2\relax{%
 % display first octet
 \space "\unicodedomino@hexbyte{`#1}%
 % recursively handle remaining octets
 \ifx\relax#2\relax\else\unicodedomino@hexseq#2\relax\fi%
}%
% format a number as two-digit hex
\gdef\unicodedomino@hexbyte#1{%
 \ifnum#1<16 0\fi%
 \UTFviii@hexnumber{#1}%
}%

% main handler
\def\UTFviii@defined#1{%
 \ifx#1\relax%
  % unknown character
  \if\relax\expandafter\UTFviii@checkseq\string#1\empty\relax%
   % valid codepoint / multibyte sequence
   \PackageWarning{inputenc}{%
    Unicode character \expandafter\UTFviii@splitcsname\string#1\relax\space%
    not set up for use\MessageBreak with LaTeX, replacing%
   }%
   % note: warning is the same as in utf8ienc.dtx v1.2d except
   % with the string “, replacing” added at the end
   \expandafter\unicodedomino@domino\string#1\relax%
  \else%
   % invalid multibyte character
   \PackageError{inputenc}{Invalid UTF-8 byte sequence:%
    \expandafter\unicodedomino@splitseq\string#1\relax}%
    \UTFviii@invalid@help
  \fi%
 \else%
  % known character, expand
  \expandafter#1%
 \fi%
}%

% prepare for outputting the domino block
\def\unicodedomino@domino#1:#2\relax{%
 \edef\unicodedomino@tmp{%
  \expandafter\unicodedomino@domino@hex\expandafter{%
   \the\numexpr\decode@UTFviii#2\relax%
  }%
 }%
 \expandafter\unicodedomino@domino@switch\unicodedomino@tmp%
}%

% convert to six nybbles
\def\unicodedomino@domino@hex#1{%
 \ifnum#1<1048576 0\fi%
 \ifnum#1<65536 0\fi%
 \ifnum#1<4096 0\fi%
 \ifnum#1<256 0\fi%
 \ifnum#1<16 0\fi%
 \UTFviii@hexnumber{#1}%
}%

% decide whether to output a BMP or astral planes block
\def\unicodedomino@domino@switch#1#2#3#4#5#6{%
 \ifnum"#1#2=0%
  \unicodedomino@box{#3#4}{#5#6}%
 \else%
  \unicodedomino@box{#1#2#3}{#4#5#6}%
 \fi%
}%

% render the actual domino piece
\def\unicodedomino@box#1#2{%
 \begingroup%
 \fboxsep=.1em%
 \fboxrule=.4pt%
 \texttt{\fbox{\makebox[0pt][l]{\textsuperscript{#1}}\textsubscript{#2}}}%
 \endgroup%
}%

\endinput