From 2c115a58c613d9ba18acd06f88f5c3bf03e8b8fd Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 14 Dec 2016 20:58:00 +0000 Subject: wwwtext: remove outdated comment I originally envisioned wwwtext being more flexible and able to serve arbitrary blobs; but at this point I consider it redundant and public-inbox is not wiki software. --- lib/PublicInbox/WwwText.pm | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/PublicInbox/WwwText.pm b/lib/PublicInbox/WwwText.pm index b0f262cd..2e58eeb4 100644 --- a/lib/PublicInbox/WwwText.pm +++ b/lib/PublicInbox/WwwText.pm @@ -1,7 +1,6 @@ # Copyright (C) 2016 all contributors # License: AGPL-3.0+ -# -# serves the /$INBOX/_/* endpoints from :text/* of the git tree + package PublicInbox::WwwText; use strict; use warnings; -- cgit v1.2.3-24-ge0c7 From ab93e9dcb3bf1f439f1afea9e124b88ac830ca7f Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 14 Dec 2016 19:28:53 +0000 Subject: t/thread-cycle: no need for Xapian to run this test We don't actually use anything from SearchMsg, just the class name. --- t/thread-cycle.t | 1 - 1 file changed, 1 deletion(-) diff --git a/t/thread-cycle.t b/t/thread-cycle.t index b0844490..9dd2aa3c 100644 --- a/t/thread-cycle.t +++ b/t/thread-cycle.t @@ -3,7 +3,6 @@ use strict; use warnings; use Test::More; -use_ok('PublicInbox::SearchMsg'); use_ok('PublicInbox::SearchThread'); use Email::Simple; my $mt = eval { -- cgit v1.2.3-24-ge0c7 From 464048b28be5063a3151742feaaa170c9d9e3b19 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 14 Dec 2016 23:53:06 +0000 Subject: TODO: note IO::KQueue for the ticket Do not require users to have network access to know what the link refers to. --- TODO | 1 + 1 file changed, 1 insertion(+) diff --git a/TODO b/TODO index d2efcbb6..55720a2c 100644 --- a/TODO +++ b/TODO @@ -47,6 +47,7 @@ all need to be considered for everything we introduce) * portability to FreeBSD (and other Free Software *BSDs) ugh... https://rt.cpan.org/Ticket/Display.html?id=116615 + (IO::KQueue is broken with Danga::Socket) * improve documentation -- cgit v1.2.3-24-ge0c7 From f084e94a4774b95eb45f55fc9f0dfda678522e54 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 17 Dec 2016 04:27:52 +0000 Subject: feed: support publicinbox..feedmax This allows users to customize by using smaller or larger Atom feeds than the default value of 25 entries. --- Documentation/public-inbox-config.pod | 8 ++++++++ lib/PublicInbox/Config.pm | 3 ++- lib/PublicInbox/Feed.pm | 5 +---- lib/PublicInbox/Inbox.pm | 11 +++++++++++ t/config.t | 2 ++ t/feed.t | 13 ++++--------- 6 files changed, 28 insertions(+), 14 deletions(-) diff --git a/Documentation/public-inbox-config.pod b/Documentation/public-inbox-config.pod index 00376457..cfd6c93d 100644 --- a/Documentation/public-inbox-config.pod +++ b/Documentation/public-inbox-config.pod @@ -120,6 +120,14 @@ addresses or mirrors. Default: none +=item publicinbox..feedmax + +The size of an Atom feed for the inbox. If specified more than +once, only the last value is used. Invalid values (<= 0) will +be treated as the default value. + +Default: 25 + =back =head1 ENVIRONMENT diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm index 8d66cf8c..6e31df72 100644 --- a/lib/PublicInbox/Config.pm +++ b/lib/PublicInbox/Config.pm @@ -145,7 +145,8 @@ sub _fill { my $rv = {}; foreach my $k (qw(mainrepo address filter url newsgroup - infourl watch watchheader httpbackendmax)) { + infourl watch watchheader httpbackendmax + feedmax)) { my $v = $self->{"$pfx.$k"}; $rv->{$k} = $v if defined $v; } diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm index 31d82adb..2a33fd29 100644 --- a/lib/PublicInbox/Feed.pm +++ b/lib/PublicInbox/Feed.pm @@ -8,9 +8,6 @@ use warnings; use Email::MIME; use PublicInbox::View; use PublicInbox::WwwAtomStream; -use constant { - MAX_PER_PAGE => 25, # this needs to be tunable -}; # main function sub generate { @@ -114,7 +111,7 @@ sub new_html_footer { sub each_recent_blob { my ($ctx, $cb) = @_; - my $max = $ctx->{max} || MAX_PER_PAGE; + my $max = $ctx->{-inbox}->{feedmax}; my $hex = '[a-f0-9]'; my $addmsg = qr!^:000000 100644 \S+ \S+ A\t(${hex}{2}/${hex}{38})$!; my $delmsg = qr!^:100644 000000 \S+ \S+ D\t(${hex}{2}/${hex}{38})$!; diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm index 8c639082..5503980f 100644 --- a/lib/PublicInbox/Inbox.pm +++ b/lib/PublicInbox/Inbox.pm @@ -29,11 +29,22 @@ sub _weaken_later ($) { $WEAKEN->{"$self"} = $self; } +sub _set_uint ($$$) { + my ($opts, $field, $default) = @_; + my $val = $opts->{$field}; + if (defined $val) { + $val = $val->[-1] if ref($val) eq 'ARRAY'; + $val = undef if $val !~ /\A\d+\z/; + } + $opts->{$field} = $val || $default; +} + sub new { my ($class, $opts) = @_; my $v = $opts->{address} ||= 'public-inbox@example.com'; my $p = $opts->{-primary_address} = ref($v) eq 'ARRAY' ? $v->[0] : $v; $opts->{domain} = ($p =~ /\@(\S+)\z/) ? $1 : 'localhost'; + _set_uint($opts, 'feedmax', 25); weaken($opts->{-pi_config}); bless $opts, $class; } diff --git a/t/config.t b/t/config.t index 073d1d03..4bbbc838 100644 --- a/t/config.t +++ b/t/config.t @@ -30,6 +30,7 @@ my $tmpdir = tempdir('pi-config-XXXXXX', TMPDIR => 1, CLEANUP => 1); 'url' => 'http://example.com/meta', -primary_address => 'meta@public-inbox.org', 'name' => 'meta', + feedmax => 100, -pi_config => $cfg, }, "lookup matches expected output"); @@ -45,6 +46,7 @@ my $tmpdir = tempdir('pi-config-XXXXXX', TMPDIR => 1, CLEANUP => 1); 'mainrepo' => '/home/pi/test-main.git', 'domain' => 'public-inbox.org', 'name' => 'test', + feedmax => 100, 'url' => 'http://example.com/test', -pi_config => $cfg, }, "lookup matches expected output for test"); diff --git a/t/feed.t b/t/feed.t index 19a9ba09..b60273ed 100644 --- a/t/feed.t +++ b/t/feed.t @@ -46,6 +46,7 @@ my $ibx = PublicInbox::Inbox->new({ name => 'testbox', mainrepo => $git_dir, url => 'http://example.com/test', + feedmax => 3, }); my $git = $ibx->git; my $im = PublicInbox::Import->new($git, $ibx->{name}, 'test@example'); @@ -101,10 +102,7 @@ EOF { # check initial feed { - my $feed = string_feed({ - -inbox => $ibx, - max => 3 - }); + my $feed = string_feed({ -inbox => $ibx }); SKIP: { skip 'XML::Feed missing', 2 unless $have_xml_feed; my $p = XML::Feed->parse(\$feed); @@ -142,10 +140,7 @@ EOF # check spam shows up { - my $spammy_feed = string_feed({ - -inbox => $ibx, - max => 3 - }); + my $spammy_feed = string_feed({ -inbox => $ibx }); SKIP: { skip 'XML::Feed missing', 2 unless $have_xml_feed; my $p = XML::Feed->parse(\$spammy_feed); @@ -167,7 +162,7 @@ EOF # spam no longer shows up { - my $feed = string_feed({ -inbox => $ibx, max => 3 }); + my $feed = string_feed({ -inbox => $ibx }); SKIP: { skip 'XML::Feed missing', 2 unless $have_xml_feed; my $p = XML::Feed->parse(\$feed); -- cgit v1.2.3-24-ge0c7 From 85b2fa7ddd46b8f39ae0ce642eadfe73f39b9746 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 13 Dec 2016 02:33:30 +0000 Subject: atom: implement message threading per RFC 4685 This will allows certain feed readers to render a message thread as described in . Feed readers with knowledge of of RFC 4685 are unknown to us at this time, but perhaps this will encourage future implementations. Existing feed readers I've tested (newsbeuter, feed2imap) seem to ignore these tags gracefully without degradation. --- TODO | 2 -- lib/PublicInbox/WwwAtomStream.pm | 40 ++++++++++++++++++++++++++-------------- 2 files changed, 26 insertions(+), 16 deletions(-) diff --git a/TODO b/TODO index 55720a2c..b85887ad 100644 --- a/TODO +++ b/TODO @@ -34,8 +34,6 @@ all need to be considered for everything we introduce) the links should point to an anchor tag within the same page, instead; giving the user options. -* implement RFC 4685 (Atom message threading) - * configurable constants (index limits, search results) * handle messages with multiple Message-IDs diff --git a/lib/PublicInbox/WwwAtomStream.pm b/lib/PublicInbox/WwwAtomStream.pm index 5720384c..a6817b31 100644 --- a/lib/PublicInbox/WwwAtomStream.pm +++ b/lib/PublicInbox/WwwAtomStream.pm @@ -9,10 +9,10 @@ use warnings; # FIXME: locale-independence: use POSIX qw(strftime); use Date::Parse qw(strptime); - +use Digest::SHA qw(sha1_hex); use PublicInbox::Address; use PublicInbox::Hval qw(ascii_html); -use PublicInbox::MID qw/mid_clean mid2path mid_escape/; +use PublicInbox::MID qw/mid_clean mid_escape/; # called by PSGI server after getline: sub close {} @@ -72,7 +72,8 @@ sub atom_header { my $mtime = (stat($ibx->{mainrepo}))[9] || time; qq(\n) . - qq{} . + qq() . qq{$title} . qq() . @@ -81,22 +82,33 @@ sub atom_header { feed_updated(gmtime($mtime)); } +sub mid2uuid ($) { + my ($mid) = @_; + utf8::encode($mid); # really screwed up In-Reply-To fields exist + $mid = sha1_hex($mid); + my $h = '[a-f0-9]'; + my (@uuid5) = ($mid =~ m!\A($h{8})($h{4})($h{4})($h{4})($h{12})!o); + 'urn:uuid:' . join('-', @uuid5); +} + # returns undef or string sub feed_entry { my ($self, $mime) = @_; my $ctx = $self->{ctx}; my $hdr = $mime->header_obj; my $mid = mid_clean($hdr->header_raw('Message-ID')); - - my $uuid = mid2path($mid); - $uuid =~ tr!/!!d; - my $h = '[a-f0-9]'; - my (@uuid5) = ($uuid =~ m!\A($h{8})($h{4})($h{4})($h{4})($h{12})!o); - $uuid = 'urn:uuid:' . join('-', @uuid5); - - $mid = PublicInbox::Hval->new_msgid($mid); - my $href = $ctx->{feed_base_url} . $mid->{href}. '/'; - + my $irt = PublicInbox::View::in_reply_to($hdr); + my $uuid = mid2uuid($mid); + my $base = $ctx->{feed_base_url}; + if (defined $irt) { + my $irt_uuid = mid2uuid($irt); + $irt = mid_escape($irt); + $irt = qq(); + } else { + $irt = ''; + } + my $href = $base . mid_escape($mid) . '/'; my $date = $hdr->header('Date'); my @t = eval { strptime($date) } if defined $date; @t = gmtime(time) unless scalar @t; @@ -124,7 +136,7 @@ sub feed_entry { PublicInbox::View::multipart_text_as_html($mime, $href) . '' . qq!!. - "$uuid"; + "$uuid$irt"; } sub feed_updated { -- cgit v1.2.3-24-ge0c7 From f67eb4d518670f87c97dca67fd2d8ad500ace81e Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 14 Dec 2016 21:00:13 +0000 Subject: wwwtext: link to RFC4685 (Atom Threading) This should give this feature some more visibility. --- lib/PublicInbox/WwwText.pm | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/PublicInbox/WwwText.pm b/lib/PublicInbox/WwwText.pm index 2e58eeb4..449cb499 100644 --- a/lib/PublicInbox/WwwText.pm +++ b/lib/PublicInbox/WwwText.pm @@ -181,6 +181,10 @@ message threading $WIKI_URL/Atom_(standard) https://tools.ietf.org/html/rfc4287 + Atom Threading Extensions (RFC4685) is supported: + + https://tools.ietf.org/html/rfc4685 + Finally, the gzipped mbox for a thread is available for downloading and importing into your favorite mail client: -- cgit v1.2.3-24-ge0c7 From 7bf8dbcf615cc26f622f5a69192995dfa738a595 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 17 Dec 2016 05:50:30 +0000 Subject: t/config.t: fix feedmax default Oops :x --- t/config.t | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/t/config.t b/t/config.t index 4bbbc838..7271351b 100644 --- a/t/config.t +++ b/t/config.t @@ -30,7 +30,7 @@ my $tmpdir = tempdir('pi-config-XXXXXX', TMPDIR => 1, CLEANUP => 1); 'url' => 'http://example.com/meta', -primary_address => 'meta@public-inbox.org', 'name' => 'meta', - feedmax => 100, + feedmax => 25, -pi_config => $cfg, }, "lookup matches expected output"); @@ -46,7 +46,7 @@ my $tmpdir = tempdir('pi-config-XXXXXX', TMPDIR => 1, CLEANUP => 1); 'mainrepo' => '/home/pi/test-main.git', 'domain' => 'public-inbox.org', 'name' => 'test', - feedmax => 100, + feedmax => 25, 'url' => 'http://example.com/test', -pi_config => $cfg, }, "lookup matches expected output for test"); -- cgit v1.2.3-24-ge0c7 From 5f3046879e1aa713e95b46795fc49c9f83923750 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 17 Dec 2016 12:04:10 +0000 Subject: searchmsg: remove locale-dependency for ->date strftime is locale-dependent, which can cause surprising failures for some users. --- lib/PublicInbox/SearchMsg.pm | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/lib/PublicInbox/SearchMsg.pm b/lib/PublicInbox/SearchMsg.pm index 5779d1e2..4c65fbe0 100644 --- a/lib/PublicInbox/SearchMsg.pm +++ b/lib/PublicInbox/SearchMsg.pm @@ -7,12 +7,10 @@ package PublicInbox::SearchMsg; use strict; use warnings; use Search::Xapian; -use POSIX qw//; use Date::Parse qw/str2time/; use PublicInbox::MID qw/mid_clean/; use PublicInbox::Address; our $PFX2TERM_RE = undef; -use POSIX qw(strftime); sub new { my ($class, $mime) = @_; @@ -72,13 +70,21 @@ sub subject ($) { __hdr($_[0], 'subject') } sub to ($) { __hdr($_[0], 'to') } sub cc ($) { __hdr($_[0], 'cc') } +# no strftime, that is locale-dependent +my @DoW = qw(Sun Mon Tue Wed Thu Fri Sat); +my @MoY = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec); + sub date ($) { my ($self) = @_; my $date = __hdr($self, 'date'); return $date if defined $date; my $ts = $self->{ts}; return unless defined $ts; - $self->{date} = strftime('%a, %d %b %Y %T +0000', gmtime($ts)); + my ($sec, $min, $hour, $mday, $mon, $year, $wday) = gmtime($ts); + $self->{date} = "$DoW[$wday], ". + sprintf("%02d $MoY[$mon] %04d %02d:%02d:%02d +0000", + $mday, $year+1900, $hour, $min, $sec); + } sub from ($) { -- cgit v1.2.3-24-ge0c7 From 0ccce545505026019fe2fa4ed7da6f329bf91be1 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 17 Dec 2016 12:04:11 +0000 Subject: searchmsg: do not memoize {date} field We only generate the ->date once in NNTP, so creating the hash entry is a waste. --- lib/PublicInbox/SearchMsg.pm | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/lib/PublicInbox/SearchMsg.pm b/lib/PublicInbox/SearchMsg.pm index 4c65fbe0..d62f02c8 100644 --- a/lib/PublicInbox/SearchMsg.pm +++ b/lib/PublicInbox/SearchMsg.pm @@ -76,13 +76,10 @@ my @MoY = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec); sub date ($) { my ($self) = @_; - my $date = __hdr($self, 'date'); - return $date if defined $date; my $ts = $self->{ts}; return unless defined $ts; my ($sec, $min, $hour, $mday, $mon, $year, $wday) = gmtime($ts); - $self->{date} = "$DoW[$wday], ". - sprintf("%02d $MoY[$mon] %04d %02d:%02d:%02d +0000", + "$DoW[$wday], " . sprintf("%02d $MoY[$mon] %04d %02d:%02d:%02d +0000", $mday, $year+1900, $hour, $min, $sec); } -- cgit v1.2.3-24-ge0c7 From 1a75ba282c16f8c15b7891090d0997628d7021dc Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 20 Dec 2016 03:03:56 +0000 Subject: tests: add thread-all testing for benchmarking I'll be using this to improve message threading performance. --- MANIFEST | 1 + t/thread-all.t | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) create mode 100644 t/thread-all.t diff --git a/MANIFEST b/MANIFEST index 3388b1a1..8f5e487e 100644 --- a/MANIFEST +++ b/MANIFEST @@ -156,6 +156,7 @@ t/qspawn.t t/search.t t/spamcheck_spamc.t t/spawn.t +t/thread-all.t t/thread-cycle.t t/utf8.mbox t/view.t diff --git a/t/thread-all.t b/t/thread-all.t new file mode 100644 index 00000000..8ccf4f8c --- /dev/null +++ b/t/thread-all.t @@ -0,0 +1,38 @@ +# Copyright (C) 2016 all contributors +# License: AGPL-3.0+ +# +# real-world testing of search threading +use strict; +use warnings; +use Test::More; +use Time::HiRes qw(clock_gettime CLOCK_MONOTONIC); +my $pi_dir = $ENV{GIANT_PI_DIR}; +plan skip_all => "GIANT_PI_DIR not defined for $0" unless $pi_dir; +eval { require PublicInbox::Search; }; +plan skip_all => "Xapian missing for $0" if $@; +my $srch = eval { PublicInbox::Search->new($pi_dir) }; +plan skip_all => "$pi_dir not initialized for $0" if $@; + +require PublicInbox::View; +require PublicInbox::SearchThread; + +my $pfx = PublicInbox::Search::xpfx('thread'); +my $opts = { limit => 1000000, asc => 1 }; +my $t0 = clock_gettime(CLOCK_MONOTONIC); +my $elapsed; + +my $sres = $srch->_do_enquire(undef, $opts); +$elapsed = clock_gettime(CLOCK_MONOTONIC) - $t0; +diag "enquire: $elapsed"; + +$t0 = clock_gettime(CLOCK_MONOTONIC); +my $msgs = PublicInbox::View::load_results($srch, $sres); +$elapsed = clock_gettime(CLOCK_MONOTONIC) - $t0; +diag "load_results $elapsed"; + +$t0 = clock_gettime(CLOCK_MONOTONIC); +PublicInbox::View::thread_results($msgs); +$elapsed = clock_gettime(CLOCK_MONOTONIC) - $t0; +diag "thread_results $elapsed"; + +done_testing(); -- cgit v1.2.3-24-ge0c7 From 478d03688600a4c7b50e205d15d76113e019f3cd Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 20 Dec 2016 03:03:57 +0000 Subject: searchmsg: remove ensure_metadata Instead, only preload the ->mid field for threading, as we only need ->thread and ->path once in Search->get_thread (but we will need the ->mid field repeatedly). This more than doubles View->load_results performance on according to thread-all on an inbox with over 300K messages. --- lib/PublicInbox/Search.pm | 6 ------ lib/PublicInbox/SearchMsg.pm | 39 ++++++++++++--------------------------- lib/PublicInbox/View.pm | 2 +- t/search.t | 2 -- 4 files changed, 13 insertions(+), 36 deletions(-) diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index 24cb2667..d4f6f77a 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -108,12 +108,6 @@ my %all_pfx = (%bool_pfx_internal, %bool_pfx_external, %prob_prefix); sub xpfx { $all_pfx{$_[0]} } -our %PFX2TERM_RMAP; -my %meta_pfx = (mid => 1, thread => 1, path => 1); -while (my ($k, $v) = each %all_pfx) { - $PFX2TERM_RMAP{$v} = $k if $meta_pfx{$k}; -} - my $mail_query = Search::Xapian::Query->new(xpfx('type') . 'mail'); sub xdir { diff --git a/lib/PublicInbox/SearchMsg.pm b/lib/PublicInbox/SearchMsg.pm index d62f02c8..96406c6f 100644 --- a/lib/PublicInbox/SearchMsg.pm +++ b/lib/PublicInbox/SearchMsg.pm @@ -10,7 +10,6 @@ use Search::Xapian; use Date::Parse qw/str2time/; use PublicInbox::MID qw/mid_clean/; use PublicInbox::Address; -our $PFX2TERM_RE = undef; sub new { my ($class, $mime) = @_; @@ -121,29 +120,17 @@ sub references { defined $x ? $x : ''; } -sub ensure_metadata { - my ($self) = @_; +sub _get_term_val ($$$) { + my ($self, $pfx, $re) = @_; my $doc = $self->{doc}; my $end = $doc->termlist_end; - - unless (defined $PFX2TERM_RE) { - my $or = join('|', keys %PublicInbox::Search::PFX2TERM_RMAP); - $PFX2TERM_RE = qr/\A($or)/; - } - - while (my ($pfx, $field) = each %PublicInbox::Search::PFX2TERM_RMAP) { - # ideally we'd move this out of the loop: - my $i = $doc->termlist_begin; - - $i->skip_to($pfx); - if ($i != $end) { - my $val = $i->get_termname; - - if ($val =~ s/$PFX2TERM_RE//o) { - $self->{$field} = $val; - } - } + my $i = $doc->termlist_begin; + $i->skip_to($pfx); + if ($i != $end) { + my $val = $i->get_termname; + $val =~ s/$re// and return $val; } + undef; } sub mid ($;$) { @@ -154,8 +141,8 @@ sub mid ($;$) { } elsif (my $rv = $self->{mid}) { $rv; } else { - $self->ensure_metadata; # needed for ghosts - $self->{mid} ||= $self->_extract_mid; + $self->{mid} = _get_term_val($self, 'Q', qr/\AQ/) || + $self->_extract_mid; } } @@ -194,16 +181,14 @@ sub thread_id { my ($self) = @_; my $tid = $self->{thread}; return $tid if defined $tid; - $self->ensure_metadata; - $self->{thread}; + $self->{thread} = _get_term_val($self, 'G', qr/\AG/); # *G*roup } sub path { my ($self) = @_; my $path = $self->{path}; return $path if defined $path; - $self->ensure_metadata; - $self->{path}; + $self->{path} = _get_term_val($self, 'XPATH', qr/\AXPATH/); # path } 1; diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm index fa47a16a..a50cb642 100644 --- a/lib/PublicInbox/View.pm +++ b/lib/PublicInbox/View.pm @@ -737,7 +737,7 @@ sub indent_for { sub load_results { my ($srch, $sres) = @_; my $msgs = delete $sres->{msgs}; - $srch->retry_reopen(sub { [ map { $_->ensure_metadata; $_ } @$msgs ] }); + $srch->retry_reopen(sub { [ map { $_->mid; $_ } @$msgs ] }); } sub msg_timestamp { diff --git a/t/search.t b/t/search.t index eed9c9b6..c16811d8 100644 --- a/t/search.t +++ b/t/search.t @@ -109,7 +109,6 @@ sub filter_mids { my $found = $ro->lookup_message(''); ok($found, "message found"); is($root_id, $found->{doc_id}, 'doc_id set correctly'); - $found->ensure_metadata; is($found->mid, 'root@s', 'mid set correctly'); ok(int($found->thread_id) > 0, 'thread_id is an integer'); @@ -290,7 +289,6 @@ sub filter_mids { body => "LOOP!\n")); ok($doc_id > 0, "doc_id defined with circular reference"); my $smsg = $rw->lookup_message('circle@a'); - $smsg->ensure_metadata; is($smsg->references, '', "no references created"); my $msg = PublicInbox::SearchMsg->load_doc($smsg->{doc}); is($s, $msg->subject, 'long subject not rewritten'); -- cgit v1.2.3-24-ge0c7 From 93474f58d361b2ace4d5e51d5be4c220513da8d0 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 20 Dec 2016 23:42:35 +0000 Subject: searchthread: update comment about loop prevention It definitely is necessary to prevent looping with the %seen hash. --- lib/PublicInbox/SearchThread.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/PublicInbox/SearchThread.pm b/lib/PublicInbox/SearchThread.pm index 601a84b0..fafe7d7b 100644 --- a/lib/PublicInbox/SearchThread.pm +++ b/lib/PublicInbox/SearchThread.pm @@ -129,7 +129,7 @@ sub add_child { sub has_descendent { my ($self, $child) = @_; - my %seen; # loop prevention XXX may not be necessary + my %seen; # loop prevention while ($child) { return 1 if $self == $child || $seen{$child}++; $child = $child->{parent}; -- cgit v1.2.3-24-ge0c7 From 90b3d23352a0c37680ac266acaf4fef73a781bc9 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 20 Dec 2016 23:42:36 +0000 Subject: searchthread: simplify API and remove needless OO This simplifies callers to prevent errors and avoids needless object-orientation in favor of a single procedure call to handle threading and ordering. --- lib/PublicInbox/SearchThread.pm | 37 +++++++++++++------------------------ lib/PublicInbox/SearchView.pm | 25 ++++++++++++------------- lib/PublicInbox/View.pm | 19 ++++++++----------- t/thread-cycle.t | 8 ++++---- 4 files changed, 37 insertions(+), 52 deletions(-) diff --git a/lib/PublicInbox/SearchThread.pm b/lib/PublicInbox/SearchThread.pm index fafe7d7b..2cd066db 100644 --- a/lib/PublicInbox/SearchThread.pm +++ b/lib/PublicInbox/SearchThread.pm @@ -21,32 +21,28 @@ package PublicInbox::SearchThread; use strict; use warnings; -sub new { - return bless { - messages => $_[1], - id_table => {}, - rootset => [] - }, $_[0]; -} - sub thread { - my $self = shift; - _add_message($self, $_) foreach @{$self->{messages}}; - my $id_table = delete $self->{id_table}; - $self->{rootset} = [ grep { + my ($messages, $ordersub) = @_; + my $id_table = {}; + _add_message($id_table, $_) foreach @$messages; + my $rootset = [ grep { !delete($_->{parent}) && $_->visible } values %$id_table ]; + $id_table = undef; + $rootset = $ordersub->($rootset); + $_->order_children($ordersub) for @$rootset; + $rootset; } sub _get_cont_for_id ($$) { - my ($self, $mid) = @_; - $self->{id_table}{$mid} ||= PublicInbox::SearchThread::Msg->new($mid); + my ($id_table, $mid) = @_; + $id_table->{$mid} ||= PublicInbox::SearchThread::Msg->new($mid); } sub _add_message ($$) { - my ($self, $smsg) = @_; + my ($id_table, $smsg) = @_; # A. if id_table... - my $this = _get_cont_for_id($self, $smsg->{mid}); + my $this = _get_cont_for_id($id_table, $smsg->{mid}); $this->{smsg} = $smsg; # B. For each element in the message's References field: @@ -59,7 +55,7 @@ sub _add_message ($$) { my $prev; foreach my $ref ($refs =~ m/<([^>]+)>/g) { # Find a Container object for the given Message-ID - my $cont = _get_cont_for_id($self, $ref); + my $cont = _get_cont_for_id($id_table, $ref); # Link the References field's Containers together in # the order implied by the References header @@ -82,13 +78,6 @@ sub _add_message ($$) { $prev->add_child($this) if defined $prev; } -sub order { - my ($self, $ordersub) = @_; - my $rootset = $ordersub->($self->{rootset}); - $self->{rootset} = $rootset; - $_->order_children($ordersub) for @$rootset; -} - package PublicInbox::SearchThread::Msg; use strict; use warnings; diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm index 50a2c01c..bd634d8d 100644 --- a/lib/PublicInbox/SearchView.pm +++ b/lib/PublicInbox/SearchView.pm @@ -161,6 +161,15 @@ sub search_nav_bot { $rv .= ''; } +sub sort_relevance { + my ($pct) = @_; + sub { + [ sort { (eval { $pct->{$b->topmost->{id}} } || 0) + <=> + (eval { $pct->{$a->topmost->{id}} } || 0) + } @{$_[0]} ] }; +} + sub mset_thread { my ($ctx, $mset, $q) = @_; my %pct; @@ -171,18 +180,8 @@ sub mset_thread { $smsg; } ($mset->items) ]}); - my $th = PublicInbox::SearchThread->new($msgs); - $th->thread; - if ($q->{r}) { # order by relevance - $th->order(sub { - [ sort { (eval { $pct{$b->topmost->{id}} } || 0) - <=> - (eval { $pct{$a->topmost->{id}} } || 0) - } @{$_[0]} ]; - }); - } else { # order by time (default for threaded view) - $th->order(*PublicInbox::View::sort_ts); - } + my $rootset = PublicInbox::SearchThread::thread($msgs, + $q->{r} ? sort_relevance(\%pct) : *PublicInbox::View::sort_ts); my $skel = search_nav_bot($mset, $q). "
";
 	my $inbox = $ctx->{-inbox};
 	$ctx->{-upfx} = '';
@@ -196,7 +195,7 @@ sub mset_thread {
 	$ctx->{seen} = {};
 	$ctx->{s_nr} = scalar(@$msgs).'+ results';
 
-	PublicInbox::View::walk_thread($th, $ctx,
+	PublicInbox::View::walk_thread($rootset, $ctx,
 		*PublicInbox::View::pre_thread);
 
 	my $mime;
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index a50cb642..b7796651 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -260,8 +260,8 @@ sub _th_index_lite {
 }
 
 sub walk_thread {
-	my ($th, $ctx, $cb) = @_;
-	my @q = map { (0, $_, -1) } @{$th->{rootset}};
+	my ($rootset, $ctx, $cb) = @_;
+	my @q = map { (0, $_, -1) } @$rootset;
 	while (@q) {
 		my ($level, $node, $i) = splice(@q, 0, 3);
 		defined $node or next;
@@ -285,10 +285,10 @@ sub thread_index_entry {
 }
 
 sub stream_thread ($$) {
-	my ($th, $ctx) = @_;
+	my ($rootset, $ctx) = @_;
 	my $inbox = $ctx->{-inbox};
 	my $mime;
-	my @q = map { (0, $_) } @{$th->{rootset}};
+	my @q = map { (0, $_) } @$rootset;
 	my $level;
 	while (@q) {
 		$level = shift @q;
@@ -350,10 +350,10 @@ sub thread_html {
 	$ctx->{mapping} = {};
 	$ctx->{s_nr} = "$nr+ messages in thread";
 
-	my $th = thread_results($msgs);
-	walk_thread($th, $ctx, *pre_thread);
+	my $rootset = thread_results($msgs);
+	walk_thread($rootset, $ctx, *pre_thread);
 	$skel .= '
'; - return stream_thread($th, $ctx) unless $ctx->{flat}; + return stream_thread($rootset, $ctx) unless $ctx->{flat}; # flat display: lazy load the full message from smsg my $inbox = $ctx->{-inbox}; @@ -749,10 +749,7 @@ sub msg_timestamp { sub thread_results { my ($msgs) = @_; require PublicInbox::SearchThread; - my $th = PublicInbox::SearchThread->new($msgs); - $th->thread; - $th->order(*sort_ts); - $th + PublicInbox::SearchThread::thread($msgs, *sort_ts); } sub missing_thread { diff --git a/t/thread-cycle.t b/t/thread-cycle.t index 9dd2aa3c..16ceee71 100644 --- a/t/thread-cycle.t +++ b/t/thread-cycle.t @@ -72,11 +72,11 @@ SKIP: { done_testing(); sub thread_to_s { - my $th = PublicInbox::SearchThread->new(shift); - $th->thread; - $th->order(sub { [ sort { $a->{id} cmp $b->{id} } @{$_[0]} ] }); + my ($msgs) = @_; + my $rootset = PublicInbox::SearchThread::thread($msgs, sub { + [ sort { $a->{id} cmp $b->{id} } @{$_[0]} ] }); my $st = ''; - my @q = map { (0, $_) } @{$th->{rootset}}; + my @q = map { (0, $_) } @$rootset; while (@q) { my $level = shift @q; my $node = shift @q or next; -- cgit v1.2.3-24-ge0c7 From 15e14f11af0b9919f11e0c186b365ae0154e7e77 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 22 Dec 2016 07:29:17 +0000 Subject: doc: various comments on async handling Notes for future developers (myself included) since we can't assume people can read my mind. --- lib/PublicInbox/GitHTTPBackend.pm | 2 +- lib/PublicInbox/HTTPD.pm | 2 ++ lib/PublicInbox/Listener.pm | 6 +++++- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/lib/PublicInbox/GitHTTPBackend.pm b/lib/PublicInbox/GitHTTPBackend.pm index 1987a013..a069fd94 100644 --- a/lib/PublicInbox/GitHTTPBackend.pm +++ b/lib/PublicInbox/GitHTTPBackend.pm @@ -227,7 +227,7 @@ sub serve_smart { $r->[0] == 403 ? serve_dumb($env, $git, $path) : $r; }; my $res; - my $async = $env->{'pi-httpd.async'}; + my $async = $env->{'pi-httpd.async'}; # XXX unstable API my $io = $env->{'psgix.io'}; my $cb = sub { my $r = $rd_hdr->() or return; diff --git a/lib/PublicInbox/HTTPD.pm b/lib/PublicInbox/HTTPD.pm index 433d6da7..fb476624 100644 --- a/lib/PublicInbox/HTTPD.pm +++ b/lib/PublicInbox/HTTPD.pm @@ -29,6 +29,8 @@ sub new { 'psgi.multiprocess' => Plack::Util::TRUE, 'psgix.harakiri'=> Plack::Util::FALSE, 'psgix.input.buffered' => Plack::Util::TRUE, + + # XXX unstable API! 'pi-httpd.async' => do { no warnings 'once'; *pi_httpd_async diff --git a/lib/PublicInbox/Listener.pm b/lib/PublicInbox/Listener.pm index 5f351a77..a3a2015b 100644 --- a/lib/PublicInbox/Listener.pm +++ b/lib/PublicInbox/Listener.pm @@ -13,7 +13,7 @@ require IO::Handle; sub new ($$$) { my ($class, $s, $cb) = @_; setsockopt($s, SOL_SOCKET, SO_KEEPALIVE, 1); - setsockopt($s, IPPROTO_TCP, TCP_NODELAY, 1); + setsockopt($s, IPPROTO_TCP, TCP_NODELAY, 1); # ignore errors on non-TCP listen($s, 1024); IO::Handle::blocking($s, 0); my $self = fields::new($class); @@ -26,8 +26,12 @@ sub new ($$$) { sub event_read { my ($self) = @_; my $sock = $self->{sock}; + # no loop here, we want to fairly distribute clients # between multiple processes sharing the same socket + # XXX our event loop needs better granularity for + # a single accept() here to be, umm..., acceptable + # on high-traffic sites. if (my $addr = accept(my $c, $sock)) { IO::Handle::blocking($c, 0); # no accept4 :< $self->{post_accept}->($c, $addr, $sock); -- cgit v1.2.3-24-ge0c7 From a3481323fcce58b84d6209d928e7091cd8e69db5 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 22 Dec 2016 08:00:26 +0000 Subject: search: lookup_mail handles modified DBs We call lookup_mail all over the place, be sure we can handle database modifications in those cases. --- lib/PublicInbox/Search.pm | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index d4f6f77a..b59430d8 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -289,8 +289,10 @@ sub lookup_message { sub lookup_mail { # no ghosts! my ($self, $mid) = @_; - my $smsg = lookup_message($self, $mid) or return; - PublicInbox::SearchMsg->load_doc($smsg->{doc}); + retry_reopen($self, sub { + my $smsg = lookup_message($self, $mid) or return; + PublicInbox::SearchMsg->load_doc($smsg->{doc}); + }); } sub find_unique_doc_id { -- cgit v1.2.3-24-ge0c7 From 8b8be7ffc64e9e36308df2a592b6afd55e593442 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 24 Dec 2016 11:52:41 +0000 Subject: view: remove unused parameter And add a comment about it to remind our future selves. --- lib/PublicInbox/View.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm index b7796651..cf40b555 100644 --- a/lib/PublicInbox/View.pm +++ b/lib/PublicInbox/View.pm @@ -441,7 +441,7 @@ sub attach_link ($$$$;$) { sub add_text_body { my ($upfx, $p) = @_; # from msg_iter: [ Email::MIME, depth, @idx ] - my ($part, $depth, @idx) = @$p; + my ($part, $depth) = @$p; # attachment @idx is unused my $ct = $part->content_type || 'text/plain'; my $fn = $part->filename; -- cgit v1.2.3-24-ge0c7 From 2e05c4503e44d4d9bf126589b7c6ba71db7ce94e Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 24 Dec 2016 11:52:42 +0000 Subject: view: stop chomping off whitespace at ends of messages This allows a 3-4% speedup in $MESSAGE_ID/T/ page generation speed for a 368+ message thread. It also more faithfully preserves the message as intended; even if the it makes the sender look like a space-wasting slob :P --- lib/PublicInbox/View.pm | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm index cf40b555..97a8bcbc 100644 --- a/lib/PublicInbox/View.pm +++ b/lib/PublicInbox/View.pm @@ -490,15 +490,13 @@ sub add_text_body { } } - my $end = "\n"; - if (@quot) { - $end = ''; + if (@quot) { # ugh, top posted flush_quote(\$s, $l, \@quot); + } elsif ($s =~ /\n\z/s) { # common, last line ends with a newline + $s; + } else { # some editors don't do newlines... + $s .= "\n"; } - $s =~ s/[ \t]+$//sgm; # kill per-line trailing whitespace - $s =~ s/\A\n+//s; # kill leading blank lines - $s =~ s/\s+\z//s; # kill all trailing spaces - $s .= $end; } sub _msg_html_prepare { -- cgit v1.2.3-24-ge0c7 From f083ef6b36fcfe5bea35427636fc8aff4e729ef6 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 24 Dec 2016 11:52:43 +0000 Subject: view: do not modify array during iteration This results in a half percent speedup or so doing $MESSAGE_ID/T/ HTML generation for a 368 message thread. --- lib/PublicInbox/View.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm index 97a8bcbc..39ca959c 100644 --- a/lib/PublicInbox/View.pm +++ b/lib/PublicInbox/View.pm @@ -476,7 +476,7 @@ sub add_text_body { } my @quot; my $l = PublicInbox::Linkify->new; - while (defined(my $cur = shift @lines)) { + foreach my $cur (@lines) { if ($cur !~ /^>/) { # show the previously buffered quote inline flush_quote(\$s, $l, \@quot) if @quot; -- cgit v1.2.3-24-ge0c7 From d9e9c8ed84a15c7fdf8fa57e82fcec9de7ecba87 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 24 Dec 2016 11:52:44 +0000 Subject: linkify: modify argument in place This results in over 1% speedup doing $MESSAGE_ID/T/ HTML generation for a 368-message thread. --- lib/PublicInbox/Linkify.pm | 17 +++++++---------- lib/PublicInbox/View.pm | 5 ++--- 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/lib/PublicInbox/Linkify.pm b/lib/PublicInbox/Linkify.pm index acd2a47e..8e1728c7 100644 --- a/lib/PublicInbox/Linkify.pm +++ b/lib/PublicInbox/Linkify.pm @@ -22,11 +22,10 @@ my $LINK_RE = qr{(\()?\b((?:ftps?|https?|nntps?|gopher):// (?:\#[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%\?]+)? )}xi; -sub new { bless {}, shift } +sub new { bless {}, $_[0] } sub linkify_1 { - my ($self, $s) = @_; - $s =~ s!$LINK_RE! + $_[1] =~ s!$LINK_RE! my $beg = $1 || ''; my $url = $2; my $end = ''; @@ -50,19 +49,17 @@ sub linkify_1 { # only escape ampersands, others do not match LINK_RE $url =~ s/&/&/g; - $self->{$key} = $url; + $_[0]->{$key} = $url; $beg . 'PI-LINK-'. $key . $end; !ge; - $s; + $_[1]; } sub linkify_2 { - my ($self, $s) = @_; - # Added "PI-LINK-" prefix to avoid false-positives on git commits - $s =~ s!\bPI-LINK-([a-f0-9]{40})\b! + $_[1] =~ s!\bPI-LINK-([a-f0-9]{40})\b! my $key = $1; - my $url = $self->{$key}; + my $url = $_[0]->{$key}; if (defined $url) { "$url"; } else { @@ -70,7 +67,7 @@ sub linkify_2 { $key; } !ge; - $s; + $_[1]; } 1; diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm index 39ca959c..e4e9d7d2 100644 --- a/lib/PublicInbox/View.pm +++ b/lib/PublicInbox/View.pm @@ -482,9 +482,8 @@ sub add_text_body { flush_quote(\$s, $l, \@quot) if @quot; # regular line, OK - $cur = $l->linkify_1($cur); - $cur = ascii_html($cur); - $s .= $l->linkify_2($cur); + $l->linkify_1($cur); + $s .= $l->linkify_2(ascii_html($cur)); } else { push @quot, $cur; } -- cgit v1.2.3-24-ge0c7 From 9511829a9c836a2887d9a569275cc599a463d922 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sun, 25 Dec 2016 09:40:25 +0000 Subject: http: fix clobbering of $null_io Oops, this would be disatrous if we started handling bigger request bodies or slow clients. Fixes: c008654229a9 ("avoid IO::File for anonymous temporary files") --- lib/PublicInbox/HTTP.pm | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/PublicInbox/HTTP.pm b/lib/PublicInbox/HTTP.pm index cac14be3..c4b74b45 100644 --- a/lib/PublicInbox/HTTP.pm +++ b/lib/PublicInbox/HTTP.pm @@ -328,7 +328,7 @@ sub more ($$) { sub input_prepare { my ($self, $env) = @_; - my $input = $null_io; + my $input; my $len = $env->{CONTENT_LENGTH}; if ($len) { if ($len > $MAX_REQUEST_BUFFER) { @@ -339,6 +339,8 @@ sub input_prepare { } elsif (env_chunked($env)) { $len = CHUNK_START; open($input, '+>', undef); + } else { + $input = $null_io; } # TODO: expire idle clients on ENFILE / EMFILE -- cgit v1.2.3-24-ge0c7 From 1aff1f07b5f6acc661aefc0d6fc1d744e6f8025d Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sun, 25 Dec 2016 06:39:13 +0000 Subject: githttpbackend: minor readability improvement Use a more meaningful variable name for the Qspawn object, since this module is the reference for its use. --- lib/PublicInbox/GitHTTPBackend.pm | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/PublicInbox/GitHTTPBackend.pm b/lib/PublicInbox/GitHTTPBackend.pm index a069fd94..86b8ebcc 100644 --- a/lib/PublicInbox/GitHTTPBackend.pm +++ b/lib/PublicInbox/GitHTTPBackend.pm @@ -206,11 +206,11 @@ sub serve_smart { } $env{GIT_HTTP_EXPORT_ALL} = '1'; $env{PATH_TRANSLATED} = "$git_dir/$path"; - my %rdr = ( 0 => fileno($in) ); - my $x = PublicInbox::Qspawn->new([qw(git http-backend)], \%env, \%rdr); + my $rdr = { 0 => fileno($in) }; + my $qsp = PublicInbox::Qspawn->new([qw(git http-backend)], \%env, $rdr); my ($fh, $rpipe); my $end = sub { - if (my $err = $x->finish) { + if (my $err = $qsp->finish) { err($env, "git http-backend ($git_dir): $err"); } $fh->close if $fh; # async-only @@ -258,7 +258,7 @@ sub serve_smart { # holding the input here is a waste of FDs and memory $env->{'psgi.input'} = undef; - $x->start($limiter, sub { # may run later, much later... + $qsp->start($limiter, sub { # may run later, much later... ($rpipe) = @_; $in = undef; if ($async) { -- cgit v1.2.3-24-ge0c7 From 292ca34140489da2c3458e1d45da5a9ae4af540d Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sun, 25 Dec 2016 06:52:03 +0000 Subject: githttpbackend: simplify compatibility code Fewer conditionals means theres fewer code paths to test and makes things easier-to-read. --- examples/public-inbox.psgi | 1 + lib/PublicInbox/GitHTTPBackend.pm | 15 ++++++--------- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/examples/public-inbox.psgi b/examples/public-inbox.psgi index e97f917f..98cde92d 100644 --- a/examples/public-inbox.psgi +++ b/examples/public-inbox.psgi @@ -14,6 +14,7 @@ my $www = PublicInbox::WWW->new; # share the public-inbox code itself: my $src = $ENV{SRC_GIT_DIR}; # '/path/to/public-inbox.git' +$src = PublicInbox::Git->new($src) if defined $src; builder { eval { diff --git a/lib/PublicInbox/GitHTTPBackend.pm b/lib/PublicInbox/GitHTTPBackend.pm index 86b8ebcc..4ad3fd1e 100644 --- a/lib/PublicInbox/GitHTTPBackend.pm +++ b/lib/PublicInbox/GitHTTPBackend.pm @@ -46,6 +46,9 @@ sub r ($;$) { sub serve { my ($env, $git, $path) = @_; + # XXX compatibility... ugh, can we stop supporting this? + $git = PublicInbox::Git->new($git) unless ref($git); + # Documentation/technical/http-protocol.txt in git.git # requires one and exactly one query parameter: if ($env->{QUERY_STRING} =~ /\Aservice=git-\w+-pack\z/ || @@ -98,7 +101,7 @@ sub serve_dumb { return r(404); } - my $f = (ref $git ? $git->{git_dir} : $git) . '/' . $path; + my $f = $git->{git_dir} . '/' . $path; return r(404) unless -f $f && -r _; # just in case it's a FIFO :P my $size = -s _; @@ -196,14 +199,8 @@ sub serve_smart { my $val = $env->{$name}; $env{$name} = $val if defined $val; } - my ($git_dir, $limiter); - if (ref $git) { - $limiter = $git->{-httpbackend_limiter} || $default_limiter; - $git_dir = $git->{git_dir}; - } else { - $limiter = $default_limiter; - $git_dir = $git; - } + my $limiter = $git->{-httpbackend_limiter} || $default_limiter; + my $git_dir = $git->{git_dir}; $env{GIT_HTTP_EXPORT_ALL} = '1'; $env{PATH_TRANSLATED} = "$git_dir/$path"; my $rdr = { 0 => fileno($in) }; -- cgit v1.2.3-24-ge0c7 From f773704937f088c2ef6d5be1038e541284cf5372 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sun, 25 Dec 2016 07:33:02 +0000 Subject: githttpbackend: minor cleanups to improve readability Fewer returns improves readability and the diffstat agrees. --- lib/PublicInbox/GitHTTPBackend.pm | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/lib/PublicInbox/GitHTTPBackend.pm b/lib/PublicInbox/GitHTTPBackend.pm index 4ad3fd1e..1fa5e30e 100644 --- a/lib/PublicInbox/GitHTTPBackend.pm +++ b/lib/PublicInbox/GitHTTPBackend.pm @@ -225,7 +225,6 @@ sub serve_smart { }; my $res; my $async = $env->{'pi-httpd.async'}; # XXX unstable API - my $io = $env->{'psgix.io'}; my $cb = sub { my $r = $rd_hdr->() or return; $rd_hdr = undef; @@ -236,17 +235,16 @@ sub serve_smart { $rpipe->close; $end->(); } - return $res->($r); - } - if ($async) { + $res->($r); + } elsif ($async) { $fh = $res->($r); - return $async->async_pass($io, $fh, \$buf); + $async->async_pass($env->{'psgix.io'}, $fh, \$buf); + } else { # for synchronous PSGI servers + require PublicInbox::GetlineBody; + $r->[2] = PublicInbox::GetlineBody->new($rpipe, $end, + $buf); + $res->($r); } - - # for synchronous PSGI servers - require PublicInbox::GetlineBody; - $r->[2] = PublicInbox::GetlineBody->new($rpipe, $end, $buf); - $res->($r); }; sub { ($res) = @_; -- cgit v1.2.3-24-ge0c7 From d0164b3c9048bfd733a82b8fcd53d032e97552cc Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sun, 25 Dec 2016 08:09:48 +0000 Subject: httpd/async: improve variable naming We only refer to PublicInbox::HTTP objects here, so '$io' was a bad name. --- lib/PublicInbox/HTTPD/Async.pm | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/lib/PublicInbox/HTTPD/Async.pm b/lib/PublicInbox/HTTPD/Async.pm index 68514f5a..79951ca6 100644 --- a/lib/PublicInbox/HTTPD/Async.pm +++ b/lib/PublicInbox/HTTPD/Async.pm @@ -30,10 +30,10 @@ sub restart_read_cb ($) { } sub async_pass { - my ($self, $io, $fh, $bref) = @_; - # In case the client HTTP connection ($io) dies, it + my ($self, $http, $fh, $bref) = @_; + # In case the client HTTP connection ($http) dies, it # will automatically close this ($self) object. - $io->{forward} = $self; + $http->{forward} = $self; $fh->write($$bref); my $restart_read = restart_read_cb($self); weaken($self); @@ -41,10 +41,10 @@ sub async_pass { my $r = sysread($self->{sock}, $$bref, 8192); if ($r) { $fh->write($$bref); - return if $io->{closed}; - if ($io->{write_buf_size}) { + return if $http->{closed}; + if ($http->{write_buf_size}) { $self->watch_read(0); - $io->write($restart_read); # D::S::write + $http->write($restart_read); # D::S::write } # stay in watch_read, but let other clients # get some work done, too. @@ -55,7 +55,7 @@ sub async_pass { # Done! Error handling will happen in $fh->close # called by the {cleanup} handler - $io->{forward} = undef; + $http->{forward} = undef; $self->close; } } -- cgit v1.2.3-24-ge0c7 From 427245acacaf04a882d5524e662075909b96905b Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Mon, 26 Dec 2016 03:05:15 +0000 Subject: evcleanup: ensure deferred close from timers are handled ASAP Danga::Socket defers close() syscalls until the end of the event loop to avoid FD recycling. Unfortunately, this is dependent on IO events firing and waking the process up from poll/kevent/epoll_wait. Without any I/O activity, a socket could remain in the @Danga::Socket::ToClose array indefinitely. Thus, we will trigger a fake IO event after running all timers to trigger the deferred close in Danga::Socket::PostEventLoop. --- lib/PublicInbox/EvCleanup.pm | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/lib/PublicInbox/EvCleanup.pm b/lib/PublicInbox/EvCleanup.pm index 2b77c617..b9fe843b 100644 --- a/lib/PublicInbox/EvCleanup.pm +++ b/lib/PublicInbox/EvCleanup.pm @@ -30,9 +30,19 @@ sub _run_all ($) { $_->() foreach @$run; } +# ensure Danga::Socket::ToClose fires after timers fire +sub _asap_close () { $asapq->[1] ||= _asap_timer() } + sub _run_asap () { _run_all($asapq) } -sub _run_next () { _run_all($nextq) } -sub _run_later () { _run_all($laterq) } +sub _run_next () { + _run_all($nextq); + _asap_close(); +} + +sub _run_later () { + _run_all($laterq); + _asap_close(); +} # Called by Danga::Socket sub event_write { -- cgit v1.2.3-24-ge0c7