1. 25 Jul, 2022 1 commit
  2. 31 May, 2022 3 commits
  3. 20 Jan, 2022 1 commit
    • mirabilos's avatar
      strip PDF page groups from checked-in PDFs · a2bf5133
      mirabilos authored and mirabilos's avatar mirabilos committed
      see commit 636879066fa0e31f129ca3c1054953a07633041c in verein.git
      for the rationale; the method used was…
      
      $ git find -gitfiles \*.pdf -print0 | xargs -0r mksh ~/pdfstrippagegroup.ksh
      
      … with the following script:
      $ cat ~/pdfstrippagegroup.ksh
      #!/bin/mksh
      
      unset LANGUAGE
      export LC_ALL=C
      
      die() {
      	print -ru2 -- "E: ${0##*/}: $dst: $*"
      	exit 1
      }
      
      for dst in "$@"; do
      	tpf=$dst\~.tmp
      	tq1=$tpf.1.qdf
      	tq2=$tpf.2.qdf
      	tq3=$tpf.3.qdf
      	tmp=$tpf.pdf
      	qpdf --stream-data=uncompress --normalize-content=n --qdf \
      	    "$dst" "$tq1" || die 'qpdf error converting to QDF'
      	# remove PDF page groups (required)
      	# also remove Producer info (size improvement)
      	perl -e '
      		use strict;
      		use warnings;
      
      		my $s = "0";
      		my $ispage = 0;
      		my $gt = "<ERR>";
      		while (my $line = <>) {
      			my $skip = 0;
      			chomp(my $l = $line);
      			if ($l eq "<<") {
      				$ispage = 0;
      				$s = 1 if $s eq 0;
      			} elsif ($l eq ">>") {
      				if ($s eq 3) {
      					# only omit page groups
      					print $gt unless $ispage;
      				}
      				$s = 0;
      			} elsif ($l eq "  /Type /Page") {
      				if (($s == 1) || ($s == 3)) {
      					$ispage = 1;
      				}
      			} elsif (($s == 1) && ($l eq "  /Group <<")) {
      				$gt = $line;
      				$s = 2;
      				$skip = 1;
      			} elsif (($s == 1) && ($l =~ qr`^  /Group `)) {
      				$gt = $line;
      				$s = 3;
      				$skip = 1;
      			} elsif ($s == 2) {
      				$gt .= $line;
      				$s = 3 if $l eq "  >>";
      				$skip = 1;
      			} elsif ($l eq "trailer <<") {
      				$s = 4;
      			} elsif ($s == 4) {
      				# size optimisation hack
      				# remove CreationDate, Producer, etc.
      				$skip = 1 if $l =~ qr`^  /Info `;
      			}
      			print $line unless $skip;
      		}
      	' <"$tq1" >"$tq2" || die 'error during perl QDF filtering'
      	fix-qdf <"$tq2" >"$tq3" || die 'error during fix-qdf'
      	# bullseye first then buster which lacks options
      	if ! (set -x; qpdf --stream-data=compress --recompress-flate \
      	    --compression-level=9 --normalize-content=n \
      	    --object-streams=disable --deterministic-id \
      	    "$tq3" "$tmp") 2>"$tq1" && ! (set -x; qpdf \
      	    --stream-data=compress --normalize-content=n \
      	    --object-streams=disable --deterministic-id \
      	    "$tq3" "$tmp") 2>>"$tq1"; then
      		cat "$tq1" >&2
      		die 'qpdf error converting from QDF'
      	fi
      	mv "$tmp" "$dst" || die 'could not create target file'
      	rm "$tpf"* || die 'error cleaning up tmp QDF files'
      	print -ru2 -- "I: fixed up $dst"
      done
      print -ru2 -- "I: done"
      a2bf5133
  4. 20 Nov, 2021 2 commits
  5. 18 Nov, 2021 3 commits
  6. 17 Nov, 2021 1 commit
  7. 20 Oct, 2021 1 commit
  8. 15 Oct, 2021 1 commit
  9. 01 Jul, 2021 2 commits
  10. 14 Apr, 2021 1 commit
  11. 28 Jan, 2021 1 commit
  12. 20 Nov, 2020 1 commit
  13. 31 Oct, 2020 2 commits
  14. 20 Jun, 2020 1 commit
  15. 14 Jun, 2020 2 commits
  16. 13 Jun, 2020 4 commits
  17. 13 May, 2020 1 commit
  18. 22 Dec, 2019 5 commits
  19. 21 Dec, 2019 1 commit
  20. 14 Dec, 2019 1 commit
  21. 14 Nov, 2019 3 commits
  22. 13 Nov, 2019 2 commits