about summary refs log tree commit homepage
path: root/lib/PublicInbox/GzipFilter.pm
diff options
context:
space:
mode:
Diffstat (limited to 'lib/PublicInbox/GzipFilter.pm')
-rw-r--r--lib/PublicInbox/GzipFilter.pm112
1 files changed, 60 insertions, 52 deletions
diff --git a/lib/PublicInbox/GzipFilter.pm b/lib/PublicInbox/GzipFilter.pm
index e37f1f76..8b630f25 100644
--- a/lib/PublicInbox/GzipFilter.pm
+++ b/lib/PublicInbox/GzipFilter.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 #
 # In public-inbox <=1.5.0, public-inbox-httpd favored "getline"
@@ -18,6 +18,7 @@ use Compress::Raw::Zlib qw(Z_OK);
 use PublicInbox::CompressNoop;
 use PublicInbox::Eml;
 use PublicInbox::GitAsyncCat;
+use Carp qw(carp);
 
 our @EXPORT_OK = qw(gzf_maybe);
 my %OPT = (-WindowBits => 15 + 16, -AppendOutput => 1);
@@ -92,30 +93,24 @@ sub gone { # what: search/over/mm
         undef;
 }
 
-# for GetlineBody (via Qspawn) when NOT using $env->{'pi-httpd.async'}
+# for GetlineResponse (via Qspawn) when NOT using $env->{'pi-httpd.async'}
 # Also used for ->getline callbacks
-sub translate ($$) {
-        my $self = $_[0]; # $_[1] => input
+sub translate {
+        my $self = shift; # $_[1] => input
 
         # allocate the zlib context lazily here, instead of in ->new.
         # Deflate contexts are memory-intensive and this object may
         # be sitting in the Qspawn limiter queue for a while.
-        my $gz = $self->{gz} //= gzip_or_die();
-        my $zbuf = delete($self->{zbuf});
-        if (defined $_[1]) { # my $buf = $_[1];
-                my $err = $gz->deflate($_[1], $zbuf);
-                die "gzip->deflate: $err" if $err != Z_OK;
-                return $zbuf if length($zbuf) >= 8192;
-
-                $self->{zbuf} = $zbuf;
-                '';
+        $self->{gz} //= gzip_or_die();
+        if (defined $_[0]) { # my $buf = $_[1];
+                zmore($self, @_);
+                length($self->{zbuf}) >= 8192 ? delete($self->{zbuf}) : '';
         } else { # undef == EOF
-                my $err = $gz->flush($zbuf);
-                die "gzip->flush: $err" if $err != Z_OK;
-                $zbuf;
+                $self->zflush;
         }
 }
 
+# returns PublicInbox::HTTP::{Chunked,Identity}
 sub http_out ($) {
         my ($self) = @_;
         $self->{http_out} // do {
@@ -128,73 +123,86 @@ sub http_out ($) {
         };
 }
 
+# returns undef if HTTP client disconnected, may return 0
+# because ->translate can return ''
 sub write {
-        # my $ret = bytes::length($_[1]); # XXX does anybody care?
-        http_out($_[0])->write(translate($_[0], $_[1]));
+        my $self = shift;
+        http_out($self)->write($self->translate(@_));
+}
+
+sub zfh {
+        $_[0]->{zfh} // do {
+                open($_[0]->{zfh}, '>>', \($_[0]->{pbuf} //= '')) or
+                        die "open: $!";
+                $_[0]->{zfh}
+        };
 }
 
 # similar to ->translate; use this when we're sure we know we have
 # more data to buffer after this
 sub zmore {
-        my $self = $_[0]; # $_[1] => input
+        my $self = shift;
+        my $zfh = delete $self->{zfh};
+        if (@_ > 1 || $zfh) {
+                print { $zfh // zfh($self) } @_;
+                @_ = (delete $self->{pbuf});
+                delete $self->{zfh};
+        };
         http_out($self);
-        my $err = $self->{gz}->deflate($_[1], $self->{zbuf});
-        die "gzip->deflate: $err" if $err != Z_OK;
-        undef;
+        my $err;
+        ($err = $self->{gz}->deflate($_[0], $self->{zbuf})) == Z_OK or
+                die "gzip->deflate: $err";
 }
 
 # flushes and returns the final bit of gzipped data
-sub zflush ($;$) {
-        my $self = $_[0]; # $_[1] => final input (optional)
-        my $zbuf = delete $self->{zbuf};
-        my $gz = delete $self->{gz};
+sub zflush ($;@) {
+        my $self = shift; # $_[1..Inf] => final input (optional)
+        zmore($self, @_) if scalar(@_) || $self->{zfh};
+        # not a bug, recursing on DS->write failure
+        my $gz = delete $self->{gz} // return '';
         my $err;
-        if (defined $_[1]) {
-                $err = $gz->deflate($_[1], $zbuf);
-                die "gzip->deflate: $err" if $err != Z_OK;
-        }
-        $err = $gz->flush($zbuf);
-        die "gzip->flush: $err" if $err != Z_OK;
+        my $zbuf = delete $self->{zbuf};
+        ($err = $gz->flush($zbuf)) == Z_OK or die "gzip->flush: $err";
         $zbuf;
 }
 
 sub close {
         my ($self) = @_;
         my $http_out = http_out($self) // return;
-        $http_out->write(zflush($self));
-        delete($self->{http_out})->close;
+        $http_out->write($self->zflush);
+        (delete($self->{http_out}) // return)->close;
 }
 
-sub bail  {
+sub bail {
         my $self = shift;
-        if (my $env = $self->{env}) {
-                warn @_, "\n";
-                my $http = $env->{'psgix.io'} or return; # client abort
-                eval { $http->close }; # should hit our close
-                warn "E: error in http->close: $@" if $@;
-                eval { $self->close }; # just in case...
-                warn "E: error in self->close: $@" if $@;
-        } else {
-                warn @_, "\n";
-        }
+        carp @_;
+        my $env = $self->{env} or return;
+        my $http = $env->{'psgix.io'} or return; # client abort
+        eval { $http->close }; # should hit our close
+        carp "E: error in http->close: $@" if $@;
+        eval { $self->close }; # just in case...
+        carp "E: error in self->close: $@" if $@;
 }
 
 # this is public-inbox-httpd-specific
 sub async_blob_cb { # git->cat_async callback
         my ($bref, $oid, $type, $size, $self) = @_;
-        my $http = $self->{env}->{'psgix.io'};
+        my $http = $self->{env}->{'psgix.io'}; # PublicInbox::HTTP
         $http->{forward} or return; # client aborted
-        my $smsg = $self->{smsg} or bail($self, 'BUG: no smsg');
-        if (!defined($oid)) {
+        my $smsg = $self->{smsg} or return bail($self, 'BUG: no smsg');
+        $type // return
+                bail($self, "abort: $smsg->{blob} $self->{ibx}->{inboxdir}");
+        if ($type ne 'blob') {
                 # it's possible to have TOCTOU if an admin runs
                 # public-inbox-(edit|purge), just move onto the next message
-                warn "E: $smsg->{blob} missing in $self->{ibx}->{inboxdir}\n";
+                warn "E: $smsg->{blob} $type in $self->{ibx}->{inboxdir}\n";
                 return $http->next_step($self->can('async_next'));
         }
-        $smsg->{blob} eq $oid or bail($self, "BUG: $smsg->{blob} != $oid");
+        $smsg->{blob} eq $oid or return
+                bail($self, "BUG: $smsg->{blob} != $oid");
         eval { $self->async_eml(PublicInbox::Eml->new($bref)) };
-        bail($self, "E: async_eml: $@") if $@;
-        if ($self->{-low_prio}) {
+        return bail($self, "E: async_eml: $@") if $@;
+        if ($self->{-low_prio}) { # run via PublicInbox::WWW::event_step
                 push(@{$self->{www}->{-low_prio_q}}, $self) == 1 and
                                 PublicInbox::DS::requeue($self->{www});
         } else {