Skip to content
Snippets Groups Projects
  1. Jun 11, 2024
  2. Jan 07, 2024
  3. Jun 10, 2023
  4. Jan 28, 2023
  5. Jul 25, 2022
  6. May 31, 2022
  7. Jan 20, 2022
    • mirabilos's avatar
      strip PDF page groups from checked-in PDFs · a2bf5133
      mirabilos authored and mirabilos's avatar mirabilos committed
      see commit 636879066fa0e31f129ca3c1054953a07633041c in verein.git
      for the rationale; the method used was…
      
      $ git find -gitfiles \*.pdf -print0 | xargs -0r mksh ~/pdfstrippagegroup.ksh
      
      … with the following script:
      $ cat ~/pdfstrippagegroup.ksh
      #!/bin/mksh
      
      unset LANGUAGE
      export LC_ALL=C
      
      die() {
      	print -ru2 -- "E: ${0##*/}: $dst: $*"
      	exit 1
      }
      
      for dst in "$@"; do
      	tpf=$dst\~.tmp
      	tq1=$tpf.1.qdf
      	tq2=$tpf.2.qdf
      	tq3=$tpf.3.qdf
      	tmp=$tpf.pdf
      	qpdf --stream-data=uncompress --normalize-content=n --qdf \
      	    "$dst" "$tq1" || die 'qpdf error converting to QDF'
      	# remove PDF page groups (required)
      	# also remove Producer info (size improvement)
      	perl -e '
      		use strict;
      		use warnings;
      
      		my $s = "0";
      		my $ispage = 0;
      		my $gt = "<ERR>";
      		while (my $line = <>) {
      			my $skip = 0;
      			chomp(my $l = $line);
      			if ($l eq "<<") {
      				$ispage = 0;
      				$s = 1 if $s eq 0;
      			} elsif ($l eq ">>") {
      				if ($s eq 3) {
      					# only omit page groups
      					print $gt unless $ispage;
      				}
      				$s = 0;
      			} elsif ($l eq "  /Type /Page") {
      				if (($s == 1) || ($s == 3)) {
      					$ispage = 1;
      				}
      			} elsif (($s == 1) && ($l eq "  /Group <<")) {
      				$gt = $line;
      				$s = 2;
      				$skip = 1;
      			} elsif (($s == 1) && ($l =~ qr`^  /Group `)) {
      				$gt = $line;
      				$s = 3;
      				$skip = 1;
      			} elsif ($s == 2) {
      				$gt .= $line;
      				$s = 3 if $l eq "  >>";
      				$skip = 1;
      			} elsif ($l eq "trailer <<") {
      				$s = 4;
      			} elsif ($s == 4) {
      				# size optimisation hack
      				# remove CreationDate, Producer, etc.
      				$skip = 1 if $l =~ qr`^  /Info `;
      			}
      			print $line unless $skip;
      		}
      	' <"$tq1" >"$tq2" || die 'error during perl QDF filtering'
      	fix-qdf <"$tq2" >"$tq3" || die 'error during fix-qdf'
      	# bullseye first then buster which lacks options
      	if ! (set -x; qpdf --stream-data=compress --recompress-flate \
      	    --compression-level=9 --normalize-content=n \
      	    --object-streams=disable --deterministic-id \
      	    "$tq3" "$tmp") 2>"$tq1" && ! (set -x; qpdf \
      	    --stream-data=compress --normalize-content=n \
      	    --object-streams=disable --deterministic-id \
      	    "$tq3" "$tmp") 2>>"$tq1"; then
      		cat "$tq1" >&2
      		die 'qpdf error converting from QDF'
      	fi
      	mv "$tmp" "$dst" || die 'could not create target file'
      	rm "$tpf"* || die 'error cleaning up tmp QDF files'
      	print -ru2 -- "I: fixed up $dst"
      done
      print -ru2 -- "I: done"
      a2bf5133
  8. Nov 20, 2021
  9. Nov 18, 2021
  10. Nov 17, 2021
  11. Oct 20, 2021
  12. Oct 15, 2021
  13. Jul 01, 2021
  14. Apr 14, 2021
  15. Jan 28, 2021
  16. Nov 20, 2020
  17. Oct 31, 2020
  18. Jun 20, 2020
  19. Jun 14, 2020
  20. Jun 13, 2020
  21. May 13, 2020
  22. Dec 22, 2019
Loading