* [PATCH] wip
@ 2015-12-22 0:15 Eric Wong
0 siblings, 0 replies; 23+ messages in thread
From: Eric Wong @ 2015-12-22 0:15 UTC (permalink / raw)
To: spew
---
lib/PublicInbox/RepoBrowse.pm | 9 ++-
lib/PublicInbox/RepoBrowseCommit.pm | 2 +-
lib/PublicInbox/RepoBrowseTree.pm | 146 ++++++++++++++++++++++++++++++++++++
lib/PublicInbox/Spawn.pm | 19 +++++
4 files changed, 171 insertions(+), 5 deletions(-)
create mode 100644 lib/PublicInbox/RepoBrowseTree.pm
create mode 100644 lib/PublicInbox/Spawn.pm
diff --git a/lib/PublicInbox/RepoBrowse.pm b/lib/PublicInbox/RepoBrowse.pm
index 5865d65..0b3197e 100644
--- a/lib/PublicInbox/RepoBrowse.pm
+++ b/lib/PublicInbox/RepoBrowse.pm
@@ -23,7 +23,7 @@ use warnings;
use URI::Escape qw(uri_escape_utf8 uri_unescape);
use PublicInbox::RepoConfig;
-my %CMD = map { lc($_) => $_ } qw(Log Commit);
+my %CMD = map { lc($_) => $_ } qw(Log Commit Tree);
sub new {
my ($class, $file) = @_;
@@ -39,9 +39,10 @@ sub run {
# URL syntax: / repo [ / cmd [ / path ] ]
# cmd: log | commit | diff | tree | view | blob | snapshot
- # repo and path may both contain '/'
+ # repo and path (@extra) may both contain '/'
my $rconfig = $self->{rconfig};
- my (undef, $repo_path, @extra) = split(m{/+}, $cgi->path_info, -1);
+ my $path_info = uri_unescape($cgi->path_info);
+ my (undef, $repo_path, @extra) = split(m{/+}, $path_info, -1);
return r404() unless $repo_path;
my $repo_info;
@@ -53,7 +54,7 @@ sub run {
my $req = {
repo_info => $repo_info,
- path => \@extra,
+ extra => \@extra, # path
cgi => $cgi,
rconfig => $rconfig,
};
diff --git a/lib/PublicInbox/RepoBrowseCommit.pm b/lib/PublicInbox/RepoBrowseCommit.pm
index 67f63a3..43f637a 100644
--- a/lib/PublicInbox/RepoBrowseCommit.pm
+++ b/lib/PublicInbox/RepoBrowseCommit.pm
@@ -110,7 +110,7 @@ sub call_git {
sub git_blob_hrefs {
my ($rel, @ids) = @_;
- map { "<a\nhref=\"${rel}blob?id=$_\"" } @ids;
+ map { "<a\nhref=\"${rel}tree?id=$_\"" } @ids;
}
sub git_blob_links {
diff --git a/lib/PublicInbox/RepoBrowseTree.pm b/lib/PublicInbox/RepoBrowseTree.pm
new file mode 100644
index 0000000..8427a5d
--- /dev/null
+++ b/lib/PublicInbox/RepoBrowseTree.pm
@@ -0,0 +1,146 @@
+# Copyright (C) 2015 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+package PublicInbox::RepoBrowseTree;
+use strict;
+use warnings;
+use base qw(PublicInbox::RepoBrowseBase);
+use PublicInbox::Spawn qw(git);
+use PublicInbox::GitCatFile;
+use URI::Escape qw(uri_escape_utf8);
+use Encode qw/find_encoding/;
+my $enc_utf8 = find_encoding('UTF-8');
+
+my %GIT_MODE = (
+ '100644' => ' ', # blob
+ '100755' => 'x', # executable blob
+ '040000' => 'd', # tree
+ '120000' => 'l', # symlink
+ '160000' => 'g', # commit (gitlink)
+);
+
+sub git_tree_stream {
+ my ($self, $req, $res) = @_; # res: Plack callback
+ my $repo_info = $req->{repo_info};
+ my $dir = $repo_info->{path};
+ my @extra = @{$req->{extra}};
+ my $tslash;
+ if (@extra && $extra[-1] eq '') { # no trailing slash
+ pop @extra;
+ $tslash = 1;
+ }
+ my $tree_path = join('/', @extra);
+ my $q = PublicInbox::RepoBrowseQuery->new($req->{cgi});
+ my $id = $q->{id};
+ $id eq '' and $id = 'HEAD';
+
+ my $obj = "$id:$tree_path";
+ my $git = $repo_info->{git} ||= PublicInbox::GitCatFile->new($dir);
+ my ($hex, $type, $size) = $git->check($obj);
+
+ if (!defined($type) || ($type ne 'blob' && $type ne 'tree')) {
+ return $res->([404, ['Content-Type'=>'text/html'],
+ ['Not Found']]);
+ }
+
+ my $fh = $res->([200, ['Content-Type'=>'text/html; charset=UTF-8']]);
+ $fh->write('<html><head><title></title></head><body>'.
+ PublicInbox::Hval::PRE);
+
+ if ($type eq 'tree') {
+ tree_show($fh, $git, $dir, $hex, $q, \@extra, $tslash);
+ } elsif ($type eq 'blob') {
+ blob_show($fh, $git, $hex);
+ }
+ $fh->write('</body></html>');
+ $fh->close;
+}
+
+sub call_git {
+ my ($self, $req) = @_;
+ sub { git_tree_stream($self, $req, @_) };
+}
+
+sub blob_binary {
+ my ($fh) = @_;
+ $fh->write("Binary file cannot be displayed\n");
+}
+
+sub blob_show {
+ my ($fh, $git, $hex) = @_;
+ # ref: buffer_is_binary in git.git
+ my $to_read = 8000; # git uses this size to detect binary files
+ my $text_p;
+ $git->cat_file($hex, sub {
+ my ($cat, $left) = @_; # $$left == $size
+ my $n = 0;
+ $to_read = $$left if $to_read > $$left;
+ my $r = read($cat, my $buf, $to_read);
+ return unless defined($r) && $r > 0;
+ $$left -= $r;
+
+ return blob_binary($fh) if (index($buf, "\0") >= 0);
+
+ $text_p = 1;
+ $fh->write('<table><tr><td>'.PublicInbox::Hval::PRE);
+ while (1) {
+ my @buf = split("\n", $buf, -1);
+ $buf = pop @buf; # last line, careful...
+ $n += scalar @buf;
+ foreach my $l (@buf) {
+ $l = $enc_utf8->decode($l);
+ $l = PublicInbox::Hval::ascii_html($l);
+ $l .= "\n";
+ $fh->write($l);
+ }
+ last if ($$left == 0 || !defined $buf);
+
+ $to_read = $$left if $to_read > $$left;
+ my $off = length $buf; # last line from previous read
+ $r = read($cat, $buf, $to_read, $off);
+ return unless defined($r) && $r > 0;
+ $$left -= $r;
+ }
+
+ $fh->write('</pre></td><td><pre>');
+ foreach my $i (1..$n) {
+ $fh->write("<a id=n$i href='#n$i'>$i</a>\n");
+ }
+ $fh->write('</pre></td></tr></table>');
+ 0;
+ });
+}
+
+sub tree_show {
+ my ($fh, $git, $dir, $hex, $q, $extra, $tslash) = @_;
+
+ my $ls = git($dir, qw(ls-tree --abbrev=16 -l -z), $hex);
+ local $/ = "\0";
+ my $pfx = $tslash ? './' :
+ (@$extra ? uri_escape_utf8($extra->[-1]).'/' : 'tree/');
+
+ my $qs = $q->qs;
+ while (defined(my $l = <$ls>)) {
+ chomp $l;
+ my ($m, $t, $x, $s, $path) =
+ ($l =~ /\A(\S+) (\S+) (\S+)( *\S+)\t(.+)\z/s);
+ $m = $GIT_MODE{$m} or next;
+
+ my $ref = uri_escape_utf8($path);
+ $path = PublicInbox::Hval::ascii_html($path);
+
+ if ($m eq 'g') {
+ # TODO: support cross-repository gitlinks
+ $fh->write('g' . (' ' x 18) . "$path @ $x\n");
+ next;
+ }
+ elsif ($m eq 'd') { $path = "<b>$path/</b>" }
+ elsif ($m eq 'x') { $path = "<b>$path</b>" }
+ elsif ($m eq 'l') { $path = "<i>$path</i>" }
+
+ $ref = $pfx.PublicInbox::Hval::ascii_html($ref).$qs;
+ $fh->write("$m log raw $s <a\nhref=\"$ref\">$path</a>\n");
+ }
+ $fh->write('</pre>');
+}
+
+1;
diff --git a/lib/PublicInbox/Spawn.pm b/lib/PublicInbox/Spawn.pm
new file mode 100644
index 0000000..6407a90
--- /dev/null
+++ b/lib/PublicInbox/Spawn.pm
@@ -0,0 +1,19 @@
+# Copyright (C) 2015 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+package PublicInbox::Spawn;
+use strict;
+use warnings;
+use base qw(Exporter);
+our @EXPORT_OK = qw(git);
+
+sub git {
+ my ($git_dir, @cmd) = @_;
+ @cmd = ('git', "--git-dir=$git_dir", @cmd);
+
+ open(my $fh, '-|', @cmd) or
+ die('open `'.join(' ', @cmd) . "' fork/pipe failed: $!\n");
+
+ $fh;
+}
+
+1;
--
EW
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH] wip
@ 2016-06-26 3:46 Eric Wong
0 siblings, 0 replies; 23+ messages in thread
From: Eric Wong @ 2016-06-26 3:46 UTC (permalink / raw)
To: spew
From: Eric Wong <normalperson@yhbt.net>
---
lib/PublicInbox/View.pm | 164 +++++++++++++-----------------------------------
1 file changed, 44 insertions(+), 120 deletions(-)
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 38e35bf..58361db 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -92,7 +92,7 @@ sub _hdr_names ($$) {
# this is already inside a <pre>
sub index_entry {
my ($mime, $level, $state) = @_;
- my $midx = $state->{anchor_idx}++;
+ $state->{anchor_idx}++;
my $ctx = $state->{ctx};
my $srch = $ctx->{srch};
my $hdr = $mime->header_obj;
@@ -100,28 +100,25 @@ sub index_entry {
my $mid_raw = mid_clean(mid_mime($mime));
my $id = anchor_for($mid_raw);
- my $seen = $state->{seen};
- $seen->{$id} = "#$id"; # save the anchor for children, later
-
my $mid = PublicInbox::Hval->new_msgid($mid_raw);
my $root_anchor = $state->{root_anchor} || '';
my $path = $root_anchor ? '../../' : '';
my $href = $mid->as_href;
my $irt = in_reply_to($hdr);
- my $parent_anchor = $seen->{anchor_for($irt)} if defined $irt;
- $subj = ascii_html($subj);
- $subj = "<a\nhref=\"${path}$href/\">$subj</a>";
+ $subj = '<b>'.ascii_html($subj).'</b>';
$subj = "<u\nid=u>$subj</u>" if $root_anchor eq $id;
my $ts = _msg_date($hdr);
- my $rv = "<pre\nid=s$midx>";
+ my $rv = "<pre\nid=$id>";
$rv .= "<b\nid=$id>$subj</b>\n";
- my $txt = "${path}$href/raw";
+ my $mhref = $path.$href.'/';
my $fh = $state->{fh};
my $from = _hdr_names($hdr, 'From');
- $rv .= "- $from @ $ts UTC (<a\nhref=\"$txt\">raw</a>)\n";
+ $rv .= "- $from @ $ts UTC (";
+ $rv .= "<a\nhref=\"${mhref}\">permalink</a> ";
+ $rv .= "/ <a\nhref=\"${mhref}raw\">raw</a>)\n";
my @tocc;
foreach my $f (qw(To Cc)) {
my $dst = _hdr_names($hdr, $f);
@@ -130,36 +127,19 @@ sub index_entry {
$rv .= ' '.join('; +', @tocc) . "\n" if @tocc;
$fh->write($rv .= "\n");
- my $mhref = "${path}$href/";
# scan through all parts, looking for displayable text
msg_iter($mime, sub { index_walk($fh, $mhref, $_[0]) });
- $rv = "\n" . html_footer($hdr, 0, $ctx, "$path$href/#R");
+ $rv = "\n" . html_footer($hdr, 0, $ctx, $mhref.'#R');
if (defined $irt) {
- unless (defined $parent_anchor) {
- my $v = PublicInbox::Hval->new_msgid($irt, 1);
- $v = $v->as_href;
- $parent_anchor = "${path}$v/";
- }
- $rv .= " <a\nhref=\"$parent_anchor\">parent</a>";
+ $irt = anchor_for($irt);
+ $rv .= " <a\nhref=#$irt>parent</a>";
}
if (my $pct = $state->{pct}) { # used by SearchView.pm
$rv .= " [relevance $pct->{$mid_raw}%]";
- } elsif ($srch) {
- my $threaded = 'threaded';
- my $flat = 'flat';
- my $end = '';
- if ($ctx->{flat}) {
- $flat = "<b>$flat</b>";
- $end = "\n"; # for lynx
- } else {
- $threaded = "<b>$threaded</b>";
- }
- $rv .= " [<a\nhref=\"${path}$href/t/#u\">$threaded</a>";
- $rv .= "|<a\nhref=\"${path}$href/T/#u\">$flat</a>]$end";
}
- $fh->write($rv .= '</pre>');
+ $fh->write($rv .= "\n</pre>"); # '\n' for lynx
}
sub thread_html {
@@ -180,48 +160,47 @@ sub walk_thread {
}
# only private functions below.
+#
+sub pre_thread {
+ my ($state, $level, $node) = @_;
+ $state->{mapping}->{$node->messageid} = $node;
+ skel_dump($state, $level, $node);
+}
sub emit_thread_html {
my ($res, $ctx, $foot, $srch) = @_;
my $mid = $ctx->{mid};
- my $flat = $ctx->{flat};
- my $msgs = load_results($srch->get_thread($mid, { asc => $flat }));
+ my $msgs = load_results($srch->get_thread($mid, {asc => 1}));
my $nr = scalar @$msgs;
return missing_thread($res, $ctx) if $nr == 0;
- my $seen = {};
+ my $skel = '';
my $state = {
+ seen => {},
res => $res,
ctx => $ctx,
- seen => $seen,
+ srch => $ctx->{srch},
root_anchor => anchor_for($mid),
anchor_idx => 0,
cur_level => 0,
+ mapping => {}, # mid -> node
+ dst => \$skel,
};
- require PublicInbox::Git;
- $ctx->{git} ||= PublicInbox::Git->new($ctx->{git_dir});
- if ($flat) {
- pre_anchor_entry($seen, $_) for (@$msgs);
- __thread_entry($state, $_, 0) for (@$msgs);
- } else {
- walk_thread(thread_results($msgs), $state, *thread_entry);
- if (my $max = $state->{cur_level}) {
- $state->{fh}->write(
- ('</ul></li>' x ($max - 1)) . '</ul>');
- }
- }
+ walk_thread(thread_results($msgs), $state, *pre_thread);
+
+ __thread_entry($state, $_, 0) for @$msgs;
# there could be a race due to a message being deleted in git
# but still being in the Xapian index:
my $fh = delete $state->{fh} or return missing_thread($res, $ctx);
- my $final_anchor = $state->{anchor_idx};
- my $next = "<a\nid=s$final_anchor>";
- $next .= $final_anchor == 1 ? 'only message in' : 'end of';
- $next .= " thread</a>, back to <a\nhref=\"../../\">index</a>";
+ my $next = @$msgs == 1 ? 'only message in thread' : 'end of thread';
+ $next .= ", back to <a\nhref=\"../../\">index</a>";
$next .= "\ndownload thread: ";
$next .= "<a\nhref=\"../t.mbox.gz\">mbox.gz</a>";
$next .= " / follow: <a\nhref=\"../t.atom\">Atom feed</a>";
+ $next .= "\n";
+ $next .= $skel;
$fh->write('<hr /><pre>' . $next . "\n\n".
$foot . '</pre></body></html>');
$fh->close;
@@ -563,12 +542,6 @@ sub thread_html_head {
"</head><body>");
}
-sub pre_anchor_entry {
- my ($seen, $mime) = @_;
- my $id = anchor_for(mid_mime($mime));
- $seen->{$id} = "#$id"; # save the anchor for children, later
-}
-
sub ghost_parent {
my ($upfx, $mid) = @_;
# 'subject dummy' is used internally by Mail::Thread
@@ -580,36 +553,9 @@ sub ghost_parent {
qq{[parent not found: <<a\nhref="$upfx$href/">$html</a>>]};
}
-sub thread_adj_level {
- my ($state, $level) = @_;
-
- my $max = $state->{cur_level};
- if ($level <= 0) {
- return '' if $max == 0; # flat output
-
- # reset existing lists
- my $x = $max > 1 ? ('</ul></li>' x ($max - 1)) : '';
- $state->{fh}->write($x . '</ul>');
- $state->{cur_level} = 0;
- return '';
- }
- if ($level == $max) { # continue existing list
- $state->{fh}->write('<li>');
- } elsif ($level < $max) {
- my $x = $max > 1 ? ('</ul></li>' x ($max - $level)) : '';
- $state->{fh}->write($x .= '<li>');
- $state->{cur_level} = $level;
- } else { # ($level > $max) # start a new level
- $state->{cur_level} = $level;
- $state->{fh}->write(($max ? '<li>' : '') . '<ul><li>');
- }
- '</li>';
-}
-
sub ghost_flush {
- my ($state, $upfx, $mid, $level) = @_;
- my $end = '<pre>'. ghost_parent($upfx, $mid) . '</pre>';
- $state->{fh}->write($end .= thread_adj_level($state, $level));
+ my ($state, $upfx, $mid) = @_;
+ $state->{fh}->write('<pre>'. ghost_parent($upfx, $mid) . '</pre>');
}
sub __thread_entry {
@@ -623,16 +569,7 @@ sub __thread_entry {
$mime = Email::MIME->new($mime);
thread_html_head($mime, $state) if $state->{anchor_idx} == 0;
- if (my $ghost = delete $state->{ghost}) {
- # n.b. ghost messages may only be parents, not children
- foreach my $g (@$ghost) {
- ghost_flush($state, '../../', @$g);
- }
- }
- my $end = thread_adj_level($state, $level);
index_entry($mime, $level, $state);
- $state->{fh}->write($end) if $end;
-
1;
}
@@ -641,23 +578,6 @@ sub indent_for {
INDENT x ($level - 1);
}
-sub __ghost_prepare {
- my ($state, $node, $level) = @_;
- my $ghost = $state->{ghost} ||= [];
- push @$ghost, [ $node->messageid, $level ];
-}
-
-sub thread_entry {
- my ($state, $level, $node) = @_;
- if (my $mime = $node->message) {
- unless (__thread_entry($state, $mime, $level)) {
- __ghost_prepare($state, $node, $level);
- }
- } else {
- __ghost_prepare($state, $node, $level);
- }
-}
-
sub load_results {
my ($sres) = @_;
@@ -738,7 +658,8 @@ sub _skel_header {
$s = $s->as_html;
}
my $m = PublicInbox::Hval->new_msgid($mid);
- $m = $state->{upfx} . $m->as_href . '/';
+ my $upfx = $state->{upfx};
+ $m = defined $upfx ? $upfx.$m->as_href.'/' : '#'.anchor_for($m->raw);
$$dst .= "$pfx<a\nhref=\"$m\">";
$$dst .= defined($s) ? "$s</a> $f\n" : "$f</a>\n";
}
@@ -754,14 +675,17 @@ sub skel_dump {
my $dst = $state->{dst};
if ($mid eq 'subject dummy') {
$$dst .= "\t[no common parent]\n";
- } else {
- $$dst .= ' [not found] ';
- $$dst .= indent_for($level) . th_pfx($level);
- $mid = PublicInbox::Hval->new_msgid($mid);
- my $href = $state->{upfx} . $mid->as_href . '/';
- my $html = $mid->as_html;
- $$dst .= qq{<<a\nhref="$href">$html</a>>\n};
+ return;
}
+ $$dst .= ' [not found] ';
+ $$dst .= indent_for($level) . th_pfx($level);
+ $mid = PublicInbox::Hval->new_msgid($mid);
+ my $href;
+ my $upfx = $state->{upfx};
+ $upfx = '../../' unless defined $upfx; # thread index view
+ my $href = $upfx . $mid->as_href . '/';
+ my $html = $mid->as_html;
+ $$dst .= qq{<<a\nhref="$href">$html</a>>\n};
}
}
--
EW
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH] wip
@ 2016-08-18 2:16 Eric Wong
0 siblings, 0 replies; 23+ messages in thread
From: Eric Wong @ 2016-08-18 2:16 UTC (permalink / raw)
To: spew
---
MANIFEST | 2 +
lib/PublicInbox/Search.pm | 30 +++++++-
lib/PublicInbox/WWW.pm | 14 ++++
lib/PublicInbox/WwwStream.pm | 4 +-
lib/PublicInbox/WwwText.pm | 160 +++++++++++++++++++++++++++++++++++++++++++
t/psgi_text.t | 39 +++++++++++
6 files changed, 247 insertions(+), 2 deletions(-)
create mode 100644 lib/PublicInbox/WwwText.pm
create mode 100644 t/psgi_text.t
diff --git a/MANIFEST b/MANIFEST
index bed6050..306945a 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -82,6 +82,7 @@ lib/PublicInbox/WWW.pm
lib/PublicInbox/WatchMaildir.pm
lib/PublicInbox/WwwAttach.pm
lib/PublicInbox/WwwStream.pm
+lib/PublicInbox/WwwText.pm
sa_config/Makefile
sa_config/README
sa_config/root/etc/spamassassin/public-inbox.pre
@@ -141,6 +142,7 @@ t/plack.t
t/precheck.t
t/psgi_attach.t
t/psgi_mount.t
+t/psgi_text.t
t/qspawn.t
t/search.t
t/spamcheck_spamc.t
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 7561ef4..4fff1e4 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -51,6 +51,7 @@ my %bool_pfx_internal = (
thread => 'G', # newsGroup (or similar entity - e.g. a web forum name)
);
+# do we still need these? probably not..
my %bool_pfx_external = (
path => 'XPATH',
mid => 'Q', # uniQue id (Message-ID)
@@ -62,6 +63,26 @@ my %prob_prefix = (
m => 'Q', # 'mid' is exact, 'm' can do partial
);
+# not documenting m: and mid:, the using the URLs works w/o Xapian
+our $HELP = <<EOF;
+s: match within Subject only e.g. s:"a quick brown fox"
+ This is a probabilistic search with support for stemming
+ and wildcards '*'.
+
+d: date range as YYYYMMDD e.g. d:19931002..20101002
+ Open-ended ranges such as d:19931002.. and d:..20101002
+ are also supported.
+EOF
+# TODO: (from mairix, some of these are maybe)
+# b (body), f (From:), c (Cc:), n (attachment), t (To:)
+# tc (To:+Cc:), bs (body + Subject), tcf (To: +Cc: +From:)
+#
+# Non-mairix:
+# df (filenames from diff)
+# nq (non-quoted body)
+# da (diff a/ removed lines)
+# db (diff b/ added lines)
+
my %all_pfx = (%bool_pfx_internal, %bool_pfx_external, %prob_prefix);
sub xpfx { $all_pfx{$_[0]} }
@@ -191,7 +212,7 @@ sub qp {
# just parse the spec to avoid the extra DB handles for now.
if (my $altid = $self->{altid}) {
for (@$altid) {
- # $_ = 'serial:gmane:/path/to/gmane.msgmap.sqlite3'
+ # $_ = 'serial:gmane:file=/path/to/gmane.msgmap.sqlite3'
/\Aserial:(\w+):/ or next;
my $pfx = $1;
# gmane => XGMANE
@@ -321,4 +342,11 @@ sub enquire {
$self->{enquire} ||= Search::Xapian::Enquire->new($self->{xdb});
}
+sub help {
+ my ($self) = @_;
+ my $ret = [ $HELP ];
+ # TODO: merge altid prefixes
+ $ret;
+}
+
1;
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index 6f6a003..6d6349a 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -96,6 +96,8 @@ sub call {
} elsif ($path_info =~ m!$INBOX_RE/$MID_RE/f/?\z!o) {
r301($ctx, $1, $2);
+ } elsif ($path_info =~ m!$INBOX_RE/_/text/(.+)\z!o) {
+ get_text($ctx, $1, $2);
# convenience redirects order matters
} elsif ($path_info =~ m!$INBOX_RE/([^/]{2,})\z!o) {
@@ -238,6 +240,18 @@ sub get_thread {
PublicInbox::View::thread_html($ctx);
}
+# /$INBOX/_/text/$KEY/
+# /$INBOX/_/text/$KEY/raw
+# KEY may contain slashes
+sub get_text {
+ my ($ctx, $inbox, $key) = @_;
+ my $r404 = invalid_inbox($ctx, $inbox);
+ return $r404 if $r404;
+
+ require PublicInbox::WwwText;
+ PublicInbox::WwwText::get_text($ctx, $key);
+}
+
sub ctx_get {
my ($ctx, $key) = @_;
my $val = $ctx->{$key};
diff --git a/lib/PublicInbox/WwwStream.pm b/lib/PublicInbox/WwwStream.pm
index 9ed25e1..c89e6de 100644
--- a/lib/PublicInbox/WwwStream.pm
+++ b/lib/PublicInbox/WwwStream.pm
@@ -31,10 +31,12 @@ sub _html_top ($) {
my $desc = ascii_html($obj->description);
my $title = $ctx->{-title_html} || $desc;
my $upfx = $ctx->{-upfx} || '';
+ my $help = $upfx.'_/text/help';
my $atom = $ctx->{-atom} || $upfx.'new.atom';
my $tip = $ctx->{-html_tip} || '';
my $top = "<b>$desc</b>";
- my $links = "<a\nhref=\"$atom\">Atom feed</a>";
+ my $links = "<a\nhref=\"$help\">help</a> / ".
+ "<a\nhref=\"$atom\">Atom feed</a>";
if ($obj->search) {
my $q_val = $ctx->{-q_value_html};
if (defined $q_val && $q_val ne '') {
diff --git a/lib/PublicInbox/WwwText.pm b/lib/PublicInbox/WwwText.pm
new file mode 100644
index 0000000..51a91d1
--- /dev/null
+++ b/lib/PublicInbox/WwwText.pm
@@ -0,0 +1,160 @@
+# Copyright (C) 2016 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+#
+# serves the /$INBOX/_/* endpoints from :text/* of the git tree
+package PublicInbox::WwwText;
+use strict;
+use warnings;
+use PublicInbox::Linkify;
+use PublicInbox::WwwStream;
+use PublicInbox::Hval qw(ascii_html);
+our $QP_URL = 'https://xapian.org/docs/queryparser.html';
+our $WIKI_URL = 'https://en.wikipedia.org/wiki';
+
+# /$INBOX/_/text/$KEY/ # KEY may contain slashes
+sub get_text {
+ my ($ctx, $key) = @_;
+ my $code = 200;
+
+ # get the raw text the same way we get mboxrds
+ my $raw = ($key =~ s!/raw\z!!);
+ my $have_tslash = ($key =~ s!/\z!!) if !$raw;
+
+ my $txt = '';
+ if (!_default_text($ctx, $key, \$txt)) {
+ $code = 404;
+ $txt = "404 Not Found ($key)\n";
+ }
+ if ($raw) {
+ return [ $code, [ 'Content-Type', 'text/plain',
+ 'Content-Length', bytes::length($txt) ],
+ [ $txt ] ]
+ }
+
+ # enforce trailing slash for "wget -r" compatibility
+ if (!$have_tslash && $code == 200) {
+ my $url = $ctx->{-inbox}->base_url($ctx->{env});
+ $url .= "_/text/$key/";
+
+ return [ 302, [ 'Content-Type', 'text/plain',
+ 'Location', $url ],
+ [ "Redirecting to $url\n" ] ];
+ }
+
+ # Follow git commit message conventions,
+ # first line is the Subject/title
+ my ($title) = ($txt =~ /\A([^\n]*)/s);
+ _do_linkify($txt);
+ $ctx->{-title_html} = ascii_html($title);
+
+ my $nslash = ($key =~ tr!/!/!);
+ $ctx->{-upfx} = '../../../' . ('../' x $nslash);
+
+ PublicInbox::WwwStream->response($ctx, $code, sub {
+ my ($nr, undef) = @_;
+ $nr == 1 ? '<pre>'.$txt.'</pre>' : undef
+ });
+}
+
+sub _do_linkify {
+ my $l = PublicInbox::Linkify->new;
+ $_[0] = $l->linkify_2(ascii_html($l->linkify_1($_[0])));
+}
+
+sub _default_text ($$$) {
+ my ($ctx, $key, $txt) = @_;
+ return if $key ne 'help'; # TODO more keys?
+
+ my $ibx = $ctx->{-inbox};
+ my $base_url = $ibx->base_url($ctx->{env});
+ $$txt .= "public-inbox help for $base_url\n";
+ $$txt .= <<EOF;
+
+overview
+--------
+
+ public-inbox uses Message-ID identifiers in URLs.
+ One may look up messages by substituting Message-IDs
+ (without the leading '<' or trailing '>') into the URL.
+ Forward slash ('/') characters in the Message-IDs
+ need to be escaped as "%2F" (without quotes).
+
+ Thus, it is possible to retrieve any message by its
+ Message-ID by going to:
+
+ $base_url<Message-ID>/
+
+ (without the '<' or '>')
+
+ Message-IDs are described at:
+
+ $WIKI_URL/Message-ID
+
+EOF
+
+ # n.b. we use the Xapian DB for any regeneratable,
+ # order-of-arrival-independent data.
+ my $srch = $ibx->search;
+ if ($srch) {
+ $$txt .= <<EOF;
+search
+------
+
+ This public-inbox has search functionality provided by Xapian.
+
+ It supports typical AND, OR, NOT, '+', '-' queries present
+ in other search engines.
+
+ Prefixes supported:
+
+EOF
+ my $help = $srch->help;
+ foreach my $h (@$help) {
+ $h =~ s/^/\t/sgm;
+ $$txt .= $h;
+ $$txt .= "\n";
+ }
+
+ $$txt .= <<EOF;
+ See $QP_URL for more details.
+
+message threading
+-----------------
+
+ Message threading is enabled for this public-inbox,
+ additional endpoints for message threads are available:
+
+ * $base_url<Message-ID>/T/#u
+
+ Loads the thread belonging to the given <Message-ID>
+ in flat chronological order. The "#u" anchor
+ focuses the browser on the given <Message-ID>.
+
+ * $base_url<Message-ID>/t/#u
+
+ Loads the thread belonging to the given <Message-ID>
+ in threaded order with nesting. For deep threads,
+ this requires a wide display or horizontal scrolling.
+
+ Both of these HTML endpoints are suitable for offline reading
+ using the thread overview at the bottom of each page.
+
+ Users of feed readers may follow a particular thread using:
+
+ * $base_url<Message-ID>/t.atom
+
+ $WIKI_URL/Atom_(standard)
+
+ Finally, the gzipped mbox for a thread is available for
+ downloading and importing into your favorite mail client:
+
+ * $base_url<Message-ID>/t.mbox.gz
+
+ $WIKI_URL/Mbox
+ (we use the mboxrd variant)
+EOF
+ }
+ 1;
+}
+
+1;
diff --git a/t/psgi_text.t b/t/psgi_text.t
new file mode 100644
index 0000000..bf565f8
--- /dev/null
+++ b/t/psgi_text.t
@@ -0,0 +1,39 @@
+# Copyright (C) 2016 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use warnings;
+use Test::More;
+use Email::MIME;
+use File::Temp qw/tempdir/;
+my $tmpdir = tempdir('psgi-text-XXXXXX', TMPDIR => 1, CLEANUP => 1);
+my $maindir = "$tmpdir/main.git";
+my $addr = 'test-public@example.com';
+my $cfgpfx = "publicinbox.test";
+my @mods = qw(HTTP::Request::Common Plack::Test URI::Escape);
+foreach my $mod (@mods) {
+ eval "require $mod";
+ plan skip_all => "$mod missing for psgi_text.t" if $@;
+}
+use_ok $_ foreach @mods;
+use PublicInbox::Import;
+use PublicInbox::Git;
+use PublicInbox::Config;
+use PublicInbox::WWW;
+use_ok 'PublicInbox::WwwText';
+use Plack::Builder;
+my $config = PublicInbox::Config->new({
+ "$cfgpfx.address" => $addr,
+ "$cfgpfx.mainrepo" => $maindir,
+});
+is(0, system(qw(git init -q --bare), $maindir), "git init (main)");
+my $www = PublicInbox::WWW->new($config);
+
+test_psgi(sub { $www->call(@_) }, sub {
+ my ($cb) = @_;
+ my $res;
+ $res = $cb->(GET('/test/_/text/help/'));
+ like($res->content, qr!<title>public-inbox help.*</title>!,
+ 'default help');
+});
+
+done_testing();
--
EW
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH] wip
@ 2016-08-23 20:07 Eric Wong
0 siblings, 0 replies; 23+ messages in thread
From: Eric Wong @ 2016-08-23 20:07 UTC (permalink / raw)
To: spew
From: Eric Wong <normalperson@yhbt.net>
---
.gitignore | 1 +
Documentation/.gitignore | 2 +-
Documentation/include.mk | 15 +++-
Documentation/public-inbox-config.pod | 90 +++++++++++++++++++
Documentation/public-inbox-daemon.pod | 165 ++++++++++++++++++++++++++++++++++
Documentation/public-inbox-httpd.pod | 36 ++++++++
Documentation/public-inbox-nntpd.pod | 31 +++++++
lib/PublicInbox/WWW.pod | 40 +++++++++
8 files changed, 376 insertions(+), 4 deletions(-)
create mode 100644 Documentation/public-inbox-config.pod
create mode 100644 Documentation/public-inbox-daemon.pod
create mode 100644 Documentation/public-inbox-httpd.pod
create mode 100644 Documentation/public-inbox-nntpd.pod
create mode 100644 lib/PublicInbox/WWW.pod
diff --git a/.gitignore b/.gitignore
index 3b333a5..6a44471 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,5 +9,6 @@
*.1
*.5
*.7
+*.8
*.html
*.gz
diff --git a/Documentation/.gitignore b/Documentation/.gitignore
index 8ba4186..107ad36 100644
--- a/Documentation/.gitignore
+++ b/Documentation/.gitignore
@@ -1 +1 @@
-/public-inbox-mda.txt
+/public-inbox-*.txt
diff --git a/Documentation/include.mk b/Documentation/include.mk
index 5d73028..99f3d43 100644
--- a/Documentation/include.mk
+++ b/Documentation/include.mk
@@ -16,33 +16,42 @@ podtext = $(PODTEXT) $(PODTEXT_OPTS)
m1 =
m1 += public-inbox-mda
+m1 += public-inbox-httpd
+m1 += public-inbox-nntpd
m5 =
+m5 += public-inbox-config
m7 =
+m8 =
+m8 += public-inbox-daemon
man1 := $(addsuffix .1, $(m1))
man5 := $(addsuffix .5, $(m5))
man7 := $(addsuffix .7, $(m7))
+man8 := $(addsuffix .8, $(m8))
all:: man html
-man: $(man1) $(man5) $(man7)
+man: $(man1) $(man5) $(man7) $(man8)
prefix ?= $(HOME)
mandir ?= $(prefix)/share/man
man1dir = $(mandir)/man1
man5dir = $(mandir)/man5
man7dir = $(mandir)/man7
+man8dir = $(mandir)/man8
install-man: man
test -z "$(man1)" || $(INSTALL) -d -m 755 $(DESTDIR)$(man1dir)
test -z "$(man5)" || $(INSTALL) -d -m 755 $(DESTDIR)$(man5dir)
test -z "$(man7)" || $(INSTALL) -d -m 755 $(DESTDIR)$(man7dir)
+ test -z "$(man8)" || $(INSTALL) -d -m 755 $(DESTDIR)$(man8dir)
test -z "$(man1)" || $(INSTALL) -m 644 $(man1) $(DESTDIR)$(man1dir)
test -z "$(man5)" || $(INSTALL) -m 644 $(man5) $(DESTDIR)$(man5dir)
test -z "$(man7)" || $(INSTALL) -m 644 $(man7) $(DESTDIR)$(man7dir)
+ test -z "$(man8)" || $(INSTALL) -m 644 $(man8) $(DESTDIR)$(man8dir)
-%.1 : Documentation/%.pod
- $(podman) -s 1 $< $@+ && mv $@+ $@
+%.1 %.5 %.7 %.8 : Documentation/%.pod
+ $(podman) -s $(suffix $@) $< $@+ && mv $@+ $@
mantxt = $(addprefix Documentation/, $(addsuffix .txt, $(m1)))
docs += $(mantxt)
diff --git a/Documentation/public-inbox-config.pod b/Documentation/public-inbox-config.pod
new file mode 100644
index 0000000..855879d
--- /dev/null
+++ b/Documentation/public-inbox-config.pod
@@ -0,0 +1,90 @@
+=head1 NAME
+
+public-inbox-config - public-inbox config file description
+
+=head1 SYNOPSIS
+
+~/.public-inbox/config
+
+=head1 DESCRIPTION
+
+The public-inbox config file is parseable by L<git-config(1)>.
+This is a global configuration file for mapping/discovering
+all public-inboxes used by a particular user.
+
+=head1 CONFIGURATION FILE
+
+=head2 EXAMPLE
+
+ [publicinbox "test"]
+ mainrepo = /home/user/path/to/test.git
+ ; multiple addresses are supported
+ address = test@example.com
+ ; address = alternate@example.com
+ url = http://example.com/test
+ newsgroup = inbox.test
+
+=head2 VARIABLES
+
+=over 8
+
+=item publicinbox.<name>.address
+
+The email address of the public-inbox. May be specified
+more than once for merging multiple mailing lists (migrating
+to new addresses). This must be specified.
+
+Default: none, required
+
+=item publicinbox.<name>.mainrepo
+
+The path to the git repository which hosts the public-inbox.
+
+Default: none, required
+
+=item publicinbox.<name>.url
+
+The primary URL for hosting the HTTP/HTTPS archives.
+Additional HTTP/HTTPS URLs may be specified via C<$GIT_DIR/cloneurl>
+as documented in L<gitweb(1)>
+
+Default: none, optional
+
+=item publicinbox.<name>.newsgroup
+
+The NNTP group name for use with L<public-inbox-nntpd(8)>. This
+may be any newsgroup name with hiearchies delimited by '.'. For
+example, the newsgroup for by L<mailto:meta@public-inbox.org>
+is: inbox.comp.mail.public-inbox.meta
+
+Default: none, optional.
+
+=back
+
+=head1 ENVIRONMENT
+
+=over 8
+
+=item PI_CONFIG
+
+May be used to override the default "~/.public-inbox/config" value.
+
+=back
+
+=head1 CONTACT
+
+Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org>
+
+The mail archives are hosted at L<https://public-inbox.org/meta/>
+and L<http://hjrcffqmbrq6wope.onion/meta/>
+
+=head1 COPYRIGHT
+
+Copyright 2016 all contributors L<mailto:meta@public-inbox.org>
+
+License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt>
+
+=head1 SEE ALSO
+
+L<git(1)>, L<git-config(1)>, L<public-inbox-daemon(8)>,
+L<public-inbox-mda(1)>,
diff --git a/Documentation/public-inbox-daemon.pod b/Documentation/public-inbox-daemon.pod
new file mode 100644
index 0000000..b02db9e
--- /dev/null
+++ b/Documentation/public-inbox-daemon.pod
@@ -0,0 +1,165 @@
+=head1 NAME
+
+public-inbox-daemon - public-inbox network server daemons
+
+=head1 SYNOPSIS
+
+B<public-inbox-httpd>
+B<public-inbox-nntpd>
+
+=head1 DESCRIPTION
+
+Common daemon options and behavior for public-inbox network
+daemons. Network daemons for public-inbox provide read-only
+NNTP and HTTP access to public-inboxes. Write access to a
+public-inbox repository will never be required to run these.
+
+These daemons are implementing with a common core using
+non-blocking sockets to scale to thousands of connected clients.
+These daemons may also utilize multiple pre-forked worker
+processes to take advantage of multiple CPUs.
+
+Native TLS (Transport Layer Security) support is planned.
+
+=head1 OPTIONS
+
+=over
+
+=item -l, --listen ADDRESS
+
+This takes an absolute path to a Unix socket or HOST:PORT
+to listen on. For example, to listen to TCP connections on
+port 119, use: C<-l 0.0.0.0:119>. This may also point to
+a Unix socket (C<-l /path/to/http.sock>) for a reverse proxy
+like L<nginx(1)> to use.
+
+May be specified multiple times to allow listening on multiple
+sockets.
+
+Default: server-dependent unless socket activation is used with
+L<systemd(1)> or similar (see L<systemd.socket(5)>).
+
+=item -1, --stdout PATH
+
+Specify an appendable path to redirect stdout descriptor (1) to.
+Using this is preferable to setting up the redirect externally
+(e.g. E<gt>E<gt>/path/to/log in shell) since it allows
+SIGUSR1 to be handled (see L<SIGNALS/SIGNALS> below).
+
+Default: /dev/null
+
+=item -2, --stderr PATH
+
+Like C<--stdout>, but for the stderr descriptor (2).
+
+=item -W, --worker-processes
+
+Set the number of worker processes.
+
+Normally, this should match the number of CPUs on the system to
+take full advantage of the hardware. However, users of
+memory-constrained systems may want to lower this.
+
+Setting this to zero (C<-W0>) disables the master/worker split;
+saving some memory but removing the ability to use SIGTTIN
+to increase worker processes or have the worker restarted by
+the master on crashes.
+
+Default: 1
+
+=back
+
+=head1 SIGNALS
+
+Most of our signal handling behavior is copied from L<nginx(1)>
+and/or L<starman(1)>; so it is possible to reuse common scripts
+for them.
+
+=over 8
+
+=item SIGUSR1
+
+Reopens log files pointed to by --stdout and --stderr options.
+
+=item SIGUSR2
+
+Spawn a new process with the intention to replace the running one.
+See L</UPGRADING> below.
+
+=item SIGHUP
+
+Reload config files associated with the process.
+(FIXME: not tested for -httpd, yet)
+
+=item SIGTTIN
+
+Increase the number of running workers processes by one.
+
+=item SIGTTOU
+
+Decrease the number of running worker processes by one.
+
+=item SIGWINCH
+
+Stop all running worker processes. SIGHUP or SIGTTIN
+may be used to restart workers.
+
+=item SIGQUIT
+
+Gracefully terminate the running process.
+
+=back
+
+SIGTTOU, SIGTTIN, SIGWINCH all have on effect when worker
+processes are disabled with C<-W0> on the command-line.
+
+=head1 ENVIRONMENT
+
+=over 8
+
+=item PI_CONFIG
+
+config file. default: ~/.public-inbox/config
+See L<public-inbox-config(5)>
+
+=item LISTEN_FDS, LISTEN_PID
+
+Used by systemd (and compatible) installations for socket
+activation. See L<systemd.socket(5)> and L<sd_listen_fds(3)>.
+
+=back
+
+=head1 UPGRADING
+
+There are two ways to upgrade a running process.
+
+Users of process management systems with socket activation
+(L<systemd(1)> or similar) may rely on multiple instances For
+systemd, this means using two (or more) '@' instances for each
+service (e.g. C<SERVICENAME@INSTANCE>) as documented in
+L<systemd.unit(5)>.
+
+Users of traditional SysV init may use SIGUSR2 to spawn
+a replacement process and gracefully terminate the old
+process using SIGQUIT.
+
+In either case, the old process will not truncate running
+responses; so responses to expensive requests do not get
+interrupted and lost.
+
+=head1 CONTACT
+
+Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org>
+
+The mail archives are hosted at L<https://public-inbox.org/meta/>
+and L<http://hjrcffqmbrq6wope.onion/meta/>
+
+=head1 COPYRIGHT
+
+Copyright 2013-2016 all contributors L<mailto:meta@public-inbox.org>
+
+License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt>
+
+=head1 SEE ALSO
+
+L<public-inbox-httpd(1)>, L<public-inbox-nntpd(1)>
diff --git a/Documentation/public-inbox-httpd.pod b/Documentation/public-inbox-httpd.pod
new file mode 100644
index 0000000..8141f7c
--- /dev/null
+++ b/Documentation/public-inbox-httpd.pod
@@ -0,0 +1,36 @@
+=head1 NAME
+
+public-inbox-httpd - PSGI server optimized for public-inbox
+
+=head1 SYNOPSIS
+
+B<public-inbox-httpd> [OPTIONS] [/path/to/myapp.psgi]
+
+=head1 DESCRIPTION
+
+public-inbox-httpd is a PSGI/Plack server supporting HTTP/1.1.
+It uses options and environment variables common to all
+L<public-inbox-daemon(8)> implementations in addition to
+the PSGI file.
+
+This may point to a PSGI file for supporting generic PSGI applications.
+If a PSGI file is not specified, L<PublicInbox::WWW(3pm)> is loaded
+with a default middleware stack.
+
+=head1 CONTACT
+
+Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org>
+
+The mail archives are hosted at L<https://public-inbox.org/meta/>
+and L<http://hjrcffqmbrq6wope.onion/meta/>
+
+=head1 COPYRIGHT
+
+Copyright 2013-2016 all contributors L<mailto:meta@public-inbox.org>
+
+License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt>
+
+=head1 SEE ALSO
+
+L<git(1)>, L<git-config(1)>, L<public-inbox-daemon(8)>,
+L<Plack(3pm)>
diff --git a/Documentation/public-inbox-nntpd.pod b/Documentation/public-inbox-nntpd.pod
new file mode 100644
index 0000000..40b6b8b
--- /dev/null
+++ b/Documentation/public-inbox-nntpd.pod
@@ -0,0 +1,31 @@
+=head1 NAME
+
+public-inbox-nntpd - NNTP server for sharing public-inbox
+
+=head1 SYNOPSIS
+
+B<public-inbox-nntpd> [OPTIONS]
+
+=head1 DESCRIPTION
+
+public-inbox-nntpd provides a NNTP daemon for public-inbox.
+It uses options and environment variables common to all
+L<public-inbox-daemon(8)> implementations.
+
+=head1 CONTACT
+
+All feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org>
+
+The mail archives are hosted at L<https://public-inbox.org/meta/>,
+L<nntp://news.public-inbox.org/inbox.comp.mail.public-inbox.meta>,
+L<nntp://hjrcffqmbrq6wope.onion/inbox.comp.mail.public-inbox.meta>
+
+=head1 COPYRIGHT
+
+Copyright 2013-2016 all contributors L<mailto:meta@public-inbox.org>
+
+License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt>
+
+=head1 SEE ALSO
+
+L<git(1)>, L<git-config(1)>, L<public-inbox-daemon(8)>
diff --git a/lib/PublicInbox/WWW.pod b/lib/PublicInbox/WWW.pod
new file mode 100644
index 0000000..cfc5b4b
--- /dev/null
+++ b/lib/PublicInbox/WWW.pod
@@ -0,0 +1,40 @@
+=head1 NAME
+
+PublicInbox::WWW - PSGI interface for public-inbox
+
+=head1 SYNOPSIS
+
+In your .psgi file:
+
+ use PublicInbox::WWW;
+ PublicInbox::WWW->preload; # optional
+
+ my $www = PublicInbox::WWW->new;
+ builder {
+ enable 'Head';
+ mount '/inboxes' => sub { $www->call(@_) };
+ };
+
+
+=head1 DESCRIPTION
+
+This application loads I<.psgi> files (or actually whichever filename
+extensions) from the root directory and run it as a PSGI
+application. Suppose you have a directory containing C<foo.psgi> and
+C<bar.psgi>, map this application to C</app> with
+L<Plack::App::URLMap> and you can access them via the URL:
+
+ http://example.com/app/foo.psgi
+ http://example.com/app/bar.psgi
+
+to load them. You can rename the file to the one without C<.psgi>
+extension to make the URL look nicer, or use the URL rewriting tools
+like L<Plack::Middleware::Rewrite> to do the same thing.
+
+=head1 AUTHOR
+
+Tatsuhiko Miyagawa
+
+=head1 SEE ALSO
+
+L<Plack::App::CGIBin>
--
EW
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH] wip
@ 2017-04-05 18:40 Eric Wong
0 siblings, 0 replies; 23+ messages in thread
From: Eric Wong @ 2017-04-05 18:40 UTC (permalink / raw)
To: spew
---
lib/yahns/config.rb | 23 +++++++-
lib/yahns/http_context.rb | 4 ++
lib/yahns/rack_proxy.rb | 143 ++++++++++++++++++++++++++++++++++++++++++++++
lib/yahns/submaster.rb | 12 ++++
4 files changed, 181 insertions(+), 1 deletion(-)
create mode 100644 lib/yahns/rack_proxy.rb
create mode 100644 lib/yahns/submaster.rb
diff --git a/lib/yahns/config.rb b/lib/yahns/config.rb
index e545d59..1f4d17e 100644
--- a/lib/yahns/config.rb
+++ b/lib/yahns/config.rb
@@ -436,6 +436,27 @@ def commit!(server)
server.__send__("#{var}=", val) if val != :unset
end
- @app_ctx.each { |app| app.logger ||= server.logger }
+ # count extra workers for rack_proxy (and maybe others)
+ submasters = 0
+ afc = @set[:atfork_child]
+ afc = [] if afc == :unset
+
+ nsm = 0
+ @app_ctx.each do |ctx|
+ ctx.logger ||= server.logger
+ next unless ctx.respond_to?(:submasters)
+ cmd_list = ctx.submasters or next
+ require_relative 'submaster'
+ submasters += cmd_list.size
+ cmd_list.each do |cmd|
+ afc << Yahns::Submaster.new(cmd, nsm += 1)
+ end
+ end
+ if submasters > 0
+ wp = @set[:worker_processes]
+ wp = 1 if wp == :unset # gotta have one worker
+ server.__send__(:worker_processes=, wp + submasters)
+ server.__send__(:atfork_child=, afc)
+ end
end
end
diff --git a/lib/yahns/http_context.rb b/lib/yahns/http_context.rb
index 40f2c58..70a75d7 100644
--- a/lib/yahns/http_context.rb
+++ b/lib/yahns/http_context.rb
@@ -91,4 +91,8 @@ def tmpio_for(len, env)
end
tmp
end
+
+ def submasters
+ @yahns_rack.respond_to?(:submasters) ? @yahns_rack.submasters : nil
+ end
end
diff --git a/lib/yahns/rack_proxy.rb b/lib/yahns/rack_proxy.rb
new file mode 100644
index 0000000..1dee36a
--- /dev/null
+++ b/lib/yahns/rack_proxy.rb
@@ -0,0 +1,143 @@
+# -*- encoding: binary -*-
+# Copyright (C) 2017 all contributors <yahns-public@yhbt.net>
+# License: GPL-3.0+ <https://www.gnu.org/licenses/gpl-3.0.txt>
+# frozen_string_literal: true
+require_relative 'rack'
+require_relative 'proxy_pass'
+require 'socket'
+
+# Basically, a lazy way to setup ProxyPass to hand off some (or all)
+# requests to any HTTP server backend (e.g. varnish, etc)
+class Yahns::RackProxy < Yahns::Rack # :nodoc:
+
+ # the key is the destination returned by the top-level config.ru
+ # and the value is a splattable array for spawning another process
+ # via Process.exec
+ # {
+ # # [ key, backend URL, ] => %w(splattable array for Process.exec),
+ # [:pass, 'http://127.0.0.1:9292/' ] => %w(rackup /path/to/config.ru)
+ # [:lsock, 'unix:/path/to/sock' ] => %w(bleh -l /path/to/sock ...)
+ #
+ # # Users of Ruby 2.3+ can shorten their config when
+ # # running systemd-aware daemons which will bind to
+ # # a random TCP port:
+ # :pri => %w(blah -c conf.rb config.ru),
+ # :alt => %w(blah -c /path/to/alt.conf.rb alt.ru),
+ # :psgi => %w(blah foo.psgi),
+ # ...
+ # }
+
+ # By default, proxy all requests by using the :pass return value
+ # Users can selectively process requests for non-buggy code in
+ # the core yahns processes.
+ PROXY_ALL = lambda { |env| :pass } # :nodoc:
+ attr_reader :submasters
+
+ def initialize(ru = PROXY_ALL, mapping, opts = {})
+ @submasters = []
+ case mapping
+ when Hash # multiple HTTP backends running different commands
+ # nothing to do { key: splattable array for Process.spawn }
+ when Array # only one backend
+ mapping = { pass: cmd }
+ else
+ raise ArgumentError, "#{cmd.inspect} must be an Array or Hash"
+ end
+ env = nil
+
+ @proxy_pass_map = {}
+ mapping.each do |key, cmd|
+ case key
+ when Array # undocumented for now..
+ key, url, ppopts = *key
+ when Symbol # OK
+ ppopts = {}
+ else
+ raise ArgumentError, "#{key.inspect} is not a symbol"
+ end
+ Array === cmd or raise ArgumentError,
+ "#{cmd.inspect} must be a splattable array for Process.exec"
+ @proxy_pass_map[key] and raise ArgumentError,
+ "#{key.inspect} may not be repeated in mapping"
+
+ cmd = cmd.dup
+ unless url
+ if RUBY_VERSION.to_f < 2.3 && env.nil? # only warn once
+ warn "Ruby < 2.3 may crash when emulating systemd to pass FDs\n",
+" http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-core/69895\n"
+ end
+
+ # nope, no UNIXServer support, maybe not worth it to deal
+ # with FS perms in containers.
+ # Also, we can use TCP Fast Open support under Linux
+ srv = random_tcp_listener(ppopts)
+ addr = srv.addr
+ url = "http://#{addr[3]}:#{addr[1]}/"
+
+ # pretend to be systemd for sd_listen_fds(3) users
+ # submaster will #call any env values before calling Process.exec,
+ # so we can lazy-expand LISTEN_PID, here:
+ env ||= { 'LISTEN_FDS' => '1', 'LISTEN_PID' => Process.method(:pid) }
+ case cmd[0]
+ when Hash
+ cmd[0] = cmd[0].merge(env)
+ else
+ cmd.unshift(env)
+ end
+
+ rdr = { 3 => srv }
+ case cmd[-1]
+ when Hash
+ cmd[-1] = cmd[-1].merge(rdr)
+ else
+ cmd << rdr
+ end
+ end
+
+ @submasters << cmd
+ @proxy_pass_map[key] = Yahns::ProxyPass.new(url, ppopts)
+ end
+ super(ru, opts) # Yahns::Rack#initialize
+ end
+
+ def build_app!
+ super # Yahns::Rack#build_app!
+ proxy_app = @app
+
+ # wrap the (possibly-)user-supplied app
+ @app = lambda do |env|
+ res = proxy_app.call(env)
+
+ # standard Rack responses may be handled in yahns proper:
+ Array === res and return res
+
+ # the response is :pass or another Symbol, not a proper Rack response!
+ # shove the env over to the appropriate Yahns::ProxyPass which
+ # talks to a backend HTTP process:
+ ppass = @proxy_pass_map[res] and return ppass.call(env)
+
+ # oops, user screwed up :<
+ logger = env['rack.logger'] and
+ logger.error("bad response from user-supplied proxy: #{res.inspect}")
+
+ [ 500, [ %w(Content-Type text/plain) ], [] ]
+ end
+ end
+
+ def random_tcp_listener(opts) # TODO: should we support options?
+ srv = TCPServer.new('127.0.0.1', 0) # 0: bind random port
+ srv.setsockopt(:SOL_SOCKET, :SO_KEEPALIVE, 1)
+ srv.setsockopt(:IPPROTO_TCP, :TCP_NODELAY, 1)
+
+ # Deferring accepts slows down core yahns, but it's useful for
+ # less-sophisticated upstream (backend) servers:
+ Socket.const_defined?(:TCP_DEFER_ACCEPT) and
+ srv.setsockopt(:IPPROTO_TCP, :TCP_DEFER_ACCEPT, 1)
+
+ srv.listen(1024)
+ srv
+ end
+end
+
+# register ourselves
+Yahns::Config::APP_CLASS[:rack_proxy] = Yahns::RackProxy
diff --git a/lib/yahns/submaster.rb b/lib/yahns/submaster.rb
new file mode 100644
index 0000000..b86b425
--- /dev/null
+++ b/lib/yahns/submaster.rb
@@ -0,0 +1,12 @@
+# -*- encoding: binary -*-
+# Copyright (C) 2017 all contributors <yahns-public@yhbt.net>
+# License: GPL-3.0+ <https://www.gnu.org/licenses/gpl-3.0.txt>
+# frozen_string_literal: true
+class Yahns::Submaster < Struct.new(cmd, idx)
+
+ # atfork_child
+ def call(worker_nr)
+ return if worker_nr != idx
+ exec(*cmd)
+ end
+end
--
EW
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH] wip
@ 2017-04-12 20:17 Eric Wong
0 siblings, 0 replies; 23+ messages in thread
From: Eric Wong @ 2017-04-12 20:17 UTC (permalink / raw)
To: spew
---
lib/PublicInbox/Search.pm | 9 ++++++---
lib/PublicInbox/SearchIdx.pm | 15 ++++++++++-----
2 files changed, 16 insertions(+), 8 deletions(-)
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 02d1827..7005b6f 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -74,6 +74,9 @@ my %prob_prefix = (
q => 'XQUOT',
nq => 'XNQ',
+ df => 'XDF',
+ da => 'XDA',
+ db => 'XDB',
# default:
'' => 'XMID S A XNQ XQUOT XFN',
@@ -97,12 +100,12 @@ EOF
'a:' => 'match within the To, Cc, and From headers',
'tc:' => 'match within the To and Cc headers',
'bs:' => 'match within the Subject and body',
+ 'df:' => 'match filenames from diff',
+ 'da:' => 'diff pre-image removed (-) lines',
+ 'db:' => 'diff post-image added(+) lines',
);
chomp @HELP;
# TODO:
-# df (filenames from diff)
-# da (diff a/ removed lines)
-# db (diff b/ added lines)
my $mail_query = Search::Xapian::Query->new('T' . 'mail');
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 8200b54..c8f5205 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -130,10 +130,15 @@ sub index_users ($$) {
}
sub index_body ($$$) {
- my ($tg, $lines, $inc) = @_;
- $tg->index_text(join("\n", @$lines), $inc, $inc ? 'XNQ' : 'XQUOT');
- @$lines = ();
+ my ($tg, $lines, $doc) = @_;
+ my $txt = join("\n", @$lines);
+ $tg->index_text($txt, !!$doc, $doc ? 'XNQ' : 'XQUOT');
$tg->increase_termpos;
+ if ($doc && $txt =~ /^[\-\+]/ms) { # does it look like a diff?
+ foreach (@$lines) {
+ }
+ }
+ @$lines = ();
}
sub add_message {
@@ -200,7 +205,7 @@ sub add_message {
my @lines = split(/\n/, $body);
while (defined(my $l = shift @lines)) {
if ($l =~ /^>/) {
- index_body($tg, \@orig, 1) if @orig;
+ index_body($tg, \@orig, $doc) if @orig;
push @quot, $l;
} else {
index_body($tg, \@quot, 0) if @quot;
@@ -208,7 +213,7 @@ sub add_message {
}
}
index_body($tg, \@quot, 0) if @quot;
- index_body($tg, \@orig, 1) if @orig;
+ index_body($tg, \@orig, $doc) if @orig;
});
link_message($self, $smsg, $old_tid);
--
EW
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH] WIP
@ 2017-07-15 1:42 Eric Wong
0 siblings, 0 replies; 23+ messages in thread
From: Eric Wong @ 2017-07-15 1:42 UTC (permalink / raw)
To: spew
---
Makefile | 2 +
cbm_ref.c | 438 ++++++++++++++++++++++++++++++++++++++++++++++++++++
cbm_ref.h | 16 ++
t/helper/.gitignore | 1 +
t/helper/test-cbm.c | 45 ++++++
5 files changed, 502 insertions(+)
create mode 100644 cbm_ref.c
create mode 100644 cbm_ref.h
create mode 100644 t/helper/test-cbm.c
diff --git a/Makefile b/Makefile
index ba4359ef8d..8e60926d4d 100644
--- a/Makefile
+++ b/Makefile
@@ -633,6 +633,7 @@ X =
PROGRAMS += $(patsubst %.o,git-%$X,$(PROGRAM_OBJS))
+TEST_PROGRAMS_NEED_X += test-cbm
TEST_PROGRAMS_NEED_X += test-chmtime
TEST_PROGRAMS_NEED_X += test-ctype
TEST_PROGRAMS_NEED_X += test-config
@@ -748,6 +749,7 @@ LIB_OBJS += branch.o
LIB_OBJS += bulk-checkin.o
LIB_OBJS += bundle.o
LIB_OBJS += cache-tree.o
+LIB_OBJS += cbm_ref.o
LIB_OBJS += color.o
LIB_OBJS += column.o
LIB_OBJS += combine-diff.o
diff --git a/cbm_ref.c b/cbm_ref.c
new file mode 100644
index 0000000000..c2f2dcacb1
--- /dev/null
+++ b/cbm_ref.c
@@ -0,0 +1,438 @@
+/*
+ * Copyright (C) Eric Wong <e@80x24.org>
+ * License: GPL-2.0+ <https://www.gnu.org/licenses/gpl-2.0.txt>
+ *
+ * Byte ordering: Network byte order on filesystem, all in-memory
+ * structures are in native byte order
+ *
+ */
+#include "git-compat-util.h"
+#include "cache.h"
+#include "cbm_ref.h"
+
+/*
+ * align blocks up to the nearest 16 bytes boundary,
+ * it should give mmap compatibility for all 64-bit systems
+ */
+#define CBM_ALIGN 16
+
+/* don't penalize the majority of users who will never need >= 4GB for refs */
+typedef uint32_t cbm_off_t;
+#define CBM_SIZE_MAX UINT32_MAX
+
+/* like "struct list_head" in list.h, but "pointers" are file offsets */
+struct flist_head {
+ cbm_off_t next, prev;
+};
+
+/* the first 36 bytes of the file contains this header: */
+struct cbm_ref_tree {
+ uint8_t magic_bits[16]; /* version, etc .. */
+ struct flist_head free_list;
+ cbm_off_t root; /* 0 == empty */
+};
+/* ... the rest of the file is just made of several cbm_ref, one per ref: */
+struct cbm_ref {
+ uint16_t total_size; /* includes final '\0' in name[] */
+ uint16_t name_byte;
+ uint16_t oid_byte;
+ uint8_t name_otherbits;
+ uint8_t oid_otherbits;
+
+ /* "pointers" are file offsets */
+ cbm_off_t name_child[2];
+ cbm_off_t oid_child[2];
+ union {
+ struct flist_head free_space;
+ struct flist_head oid_alias;
+ } as;
+ struct object_id oid;
+ char name[FLEX_ARRAY];
+};
+
+/* open file, this struct in memory only (malloc-ed) */
+struct cbm {
+ int fd;
+ int flags;
+ /*
+ * We can add mmap support, later:
+ char *mmptr;
+ size_t mmsize;
+ */
+ struct cbm_ref_tree t;
+};
+
+static inline size_t size_align(size_t size)
+{
+ return ((size + (CBM_ALIGN - 1)) & ~(CBM_ALIGN - 1));
+}
+
+static cbm_off_t ntoh_off(cbm_off_t off)
+{
+ return sizeof(off) == sizeof(uint32_t) ? ntohl(off) : ntohll(off);
+}
+
+static cbm_off_t hton_off(cbm_off_t off)
+{
+ return sizeof(off) == sizeof(uint32_t) ? htonl(off) : htonll(off);
+}
+
+static void pwrite_or_die(int fd, void *buf, size_t len, off_t off)
+{
+ ssize_t w;
+again:
+ w = pwrite(fd, buf, len, off);
+ if (w < 0) {
+ if (errno == EINTR) goto again;
+ die_errno("pwrite failed");
+ }
+ if (w != len)
+ die("short pwrite: %ld < %lu", (long)w, (unsigned long)len);
+}
+
+static void ntoh_flist(struct flist_head *head)
+{
+ head->next = ntoh_off(head->next);
+ head->prev = ntoh_off(head->prev);
+}
+
+static void hton_flist(struct flist_head *head)
+{
+ head->next = hton_off(head->next);
+ head->prev = hton_off(head->prev);
+}
+
+static void xpread_in_full(int fd, void *ptr, size_t len, off_t off)
+{
+ ssize_t r = pread_in_full(fd, ptr, len, off);
+
+ if (r < 0)
+ die_errno("pread failed @ %"PRIuMAX"\n", (uintmax_t)off);
+ if (r != (ssize_t)len)
+ die("pread short %ld/%"PRIuMAX" @ %"PRIuMAX"\n",
+ (long)r, (uintmax_t)len, (uintmax_t)off);
+}
+
+static cbm_off_t ptr_read(const struct cbm *cbm, cbm_off_t off)
+{
+ cbm_off_t ptr;
+ xpread_in_full(cbm->fd, &ptr, sizeof(ptr), off);
+
+ return ntoh_off(ptr);
+}
+
+static void ptr_write(const struct cbm *cbm, cbm_off_t off, cbm_off_t val)
+{
+ cbm_off_t tmp = hton_off(val);
+
+ pwrite_or_die(cbm->fd, &tmp, sizeof(tmp), off);
+}
+
+#define FLIST_INIT(type, ptr, MEMBER) \
+ flist_init(&((ptr)->MEMBER), offsetof(type,MEMBER))
+
+static void flist_init(struct flist_head *head, cbm_off_t base)
+{
+ head->next = head->prev = base;
+}
+
+static void ntoh_cbm_ref(struct cbm_ref *ent)
+{
+ ent->total_size = ntohs(ent->total_size);
+ ent->name_byte = ntohs(ent->name_byte);
+ ent->name_child[0] = ntoh_off(ent->name_child[0]);
+ ent->name_child[1] = ntoh_off(ent->name_child[1]);
+ ent->oid_byte = ntohs(ent->oid_byte);
+ ent->oid_child[0] = ntoh_off(ent->oid_child[0]);
+ ent->oid_child[1] = ntoh_off(ent->oid_child[1]);
+ ntoh_flist(&ent->as.oid_alias);
+}
+
+static void hton_cbm_ref(struct cbm_ref *ent)
+{
+ ent->total_size = htons(ent->total_size);
+ ent->name_byte = htons(ent->name_byte);
+ ent->name_child[0] = hton_off(ent->name_child[0]);
+ ent->name_child[1] = hton_off(ent->name_child[1]);
+ ent->oid_byte = htons(ent->oid_byte);
+ ent->oid_child[0] = hton_off(ent->oid_child[0]);
+ ent->oid_child[1] = hton_off(ent->oid_child[1]);
+ hton_flist(&ent->as.oid_alias);
+}
+
+/* loads a cbm_ref into memory from a given FS offset */
+static void
+cbm_ref_load(const struct cbm *cbm, struct cbm_ref **ent, cbm_off_t off)
+{
+ size_t size = size_align(sizeof(struct cbm_ref));
+ uint16_t total_size;
+ struct cbm_ref *node;
+
+ node = *ent = xrealloc(*ent, size);
+ xpread_in_full(cbm->fd, node, size, off);
+
+ total_size = ntohs(node->total_size);
+ if (total_size > size) {
+ size = total_size;
+ *ent = xrealloc(*ent, size);
+ xpread_in_full(cbm->fd, *ent, size, off);
+ }
+ ntoh_cbm_ref(*ent);
+}
+
+static void
+cbm_ref_dump(struct cbm *cbm, const struct cbm_ref *ent, cbm_off_t off)
+{
+ struct cbm_ref *tmp = alloca(ent->total_size);
+
+ memcpy(tmp, ent, ent->total_size);
+ hton_cbm_ref(tmp);
+ pwrite_or_die(cbm->fd, tmp, ent->total_size, off);
+}
+
+static cbm_off_t
+nearest_by_name(const struct cbm *cbm, struct cbm_ref **ent,
+ const char *name, cbm_off_t p)
+{
+ const uint8_t *const ubytes = (const uint8_t *)name;
+ const size_t name_len = strlen(name);
+
+ while (1 & p) {
+ uint8_t c = 0;
+ int direction;
+ struct cbm_ref *cur;
+
+ cbm_ref_load(cbm, ent, p - 1);
+ cur = *ent;
+
+ if (cur->name_byte < name_len)
+ c = ubytes[cur->name_byte];
+ direction = (1 + (cur->name_otherbits | c)) >> 8;
+ p = cur->name_child[direction];
+ }
+ return p;
+}
+
+/* returns true and populates oid if found, false if not */
+int cbm_fetch(const struct cbm *cbm, const char *name, struct object_id *oid)
+{
+ cbm_off_t p = cbm->t.root;
+ struct cbm_ref *ent = NULL;
+ int ret;
+
+ if (!p)
+ return 0;
+
+ p = nearest_by_name(cbm, &ent, name, p);
+ cbm_ref_load(cbm, &ent, p);
+ ret = 0 == strcmp(name, ent->name);
+ if (ret)
+ oidcpy(oid, &ent->oid);
+ free(ent);
+ return ret;
+}
+
+/* returns an offset where ent->total_size can be safely written */
+static cbm_off_t
+cbm_alloc_begin(struct cbm *cbm, const struct cbm_ref *ent)
+{
+ struct stat sb;
+ /* TODO: scan free_list */
+
+ if (fstat(cbm->fd, &sb) < 0)
+ die_errno("fstat failed");
+
+ if (sb.st_size == 0)
+ sb.st_size = sizeof(cbm->t);
+ else if (sb.st_size < sizeof(cbm->t))
+ die("cbm size mismatch %"PRIuMAX, sb.st_size);
+
+ if (sb.st_size > CBM_SIZE_MAX || sb.st_size < 0)
+ die("size overflow");
+
+ return (cbm_off_t)sb.st_size;
+}
+
+int cbm_set(struct cbm *cbm, const char *name, const struct object_id *oid)
+{
+ const uint8_t *const ubytes = (const uint8_t *)name;
+ const size_t name_len = strlen(name);
+ cbm_off_t p = cbm->t.root;
+ struct cbm_ref *new_node, *cur = NULL;
+ uint32_t new_byte;
+ uint32_t new_otherbits;
+ uint32_t new_direction;
+ uint8_t c;
+ size_t size = size_align(name_len + 1 + sizeof(*new_node));
+ size_t wherep;
+ const uint8_t *cur_ubytes;
+ cbm_off_t new_off;
+
+ if (size > UINT16_MAX) {
+ warning("too long: %s %"PRIuMAX, name, (uintmax_t)size);
+ return 0;
+ }
+
+ if (!p) {
+ new_node = alloca(size);
+ memset(new_node, 0, sizeof(*new_node));
+ new_node->total_size = (uint16_t)size;
+ memcpy(new_node->name, name, name_len + 1);
+ oidcpy(&new_node->oid, oid);
+
+ cbm->t.root = cbm_alloc_begin(cbm, new_node);
+ cbm_ref_dump(cbm, new_node, cbm->t.root);
+
+ return 2;
+ }
+
+ p = nearest_by_name(cbm, &cur, name, p);
+ if (!cur)
+ cbm_ref_load(cbm, &cur, p);
+
+ fprintf(stderr, "nearest :%u %p %s\n", p, cur, cur->name);
+ cur_ubytes = (void *)cur->name;
+ for (new_byte = 0; new_byte < name_len; new_byte++) {
+ if (cur_ubytes[new_byte] != ubytes[new_byte]) {
+ new_otherbits = cur_ubytes[new_byte] ^ ubytes[new_byte];
+ goto different_byte_found;
+ }
+ }
+
+ if (cur_ubytes[new_byte] != 0) {
+ new_otherbits = cur_ubytes[new_byte];
+ goto different_byte_found;
+ }
+
+ /* overwrite existing ref */
+ fprintf(stderr, "clobber\n");
+ assert(!strcmp(cur->name, name));
+ if (oidcmp(&cur->oid, oid)) {
+ oidcpy(&cur->oid, oid);
+ cbm_ref_dump(cbm, cur, p);
+ }
+ free(cur);
+
+ return 1;
+
+different_byte_found:
+
+ new_otherbits |= new_otherbits >> 1;
+ new_otherbits |= new_otherbits >> 2;
+ new_otherbits |= new_otherbits >> 4;
+ new_otherbits = (new_otherbits & ~(new_otherbits >> 1)) ^ 255;
+
+ c = cur_ubytes[new_byte];
+ new_direction = (1 + (new_otherbits | c)) >> 8;
+
+ new_node = alloca(size);
+ memset(new_node, 0, size);
+ new_node->total_size = (uint16_t)size;
+ memcpy(new_node->name, name, name_len + 1);
+ oidcpy(&new_node->oid, oid);
+
+ cbm_flush(cbm, 0); /* ensure cbm->t.root is synchronized begin: */
+ new_off = cbm_alloc_begin(cbm, new_node);
+
+ new_node->name_byte = new_byte;
+ new_node->name_otherbits = new_otherbits;
+ new_node->name_child[1 - new_direction] = new_off; /* point to self */
+
+ wherep = offsetof(struct cbm_ref_tree, root);
+ p = ptr_read(cbm, wherep);
+ while (1 & p) {
+ int direction;
+
+ p--;
+ cbm_ref_load(cbm, &cur, p);
+
+ if (cur->name_byte > new_byte)
+ break;
+ if (cur->name_byte == new_byte &&
+ cur->name_otherbits > new_otherbits)
+ break;
+ c = 0;
+ if (cur->name_byte < name_len)
+ c = ubytes[cur->name_byte];
+
+ fprintf(stderr, "off: %zu\n", offsetof(struct cbm_ref, name_child));
+ wherep = p + offsetof(struct cbm_ref, name_child);
+ direction = (1 + (cur->name_otherbits | c)) >> 8;
+ if (direction)
+ wherep += sizeof(cbm_off_t);
+ p = ptr_read(cbm, wherep);
+ }
+
+ free(cur);
+
+ new_node->name_child[new_direction] = ptr_read(cbm, wherep);
+ cbm_ref_dump(cbm, new_node, new_off);
+
+ if (wherep == offsetof(struct cbm_ref_tree, root))
+ cbm->t.root = new_off + 1; /* to be flushed, later */
+ else
+ ptr_write(cbm, wherep, new_off + 1);
+
+ return 2;
+}
+
+void cbm_flush(struct cbm *cbm, unsigned int flush_flags)
+{
+ struct cbm_ref_tree t = cbm->t;
+
+ hton_flist(&t.free_list);
+ t.root = hton_off(t.root);
+ pwrite_or_die(cbm->fd, &t, sizeof(t), 0);
+
+ if (flush_flags & CBM_FSYNC)
+ fsync(cbm->fd);
+}
+
+struct cbm *cbm_open(const char *path, int flags)
+{
+ struct cbm *cbm;
+ ssize_t n;
+ int fd = xopen(path, flags, 0666);
+
+ if (fd < 0)
+ return NULL;
+#if defined(F_GETFD) && defined(F_SETFD) && defined(FD_CLOEXEC)
+ {
+ int flags = fcntl(fd, F_GETFD);
+ if (flags == -1)
+ die_errno("F_GETFD failed");
+ if (fcntl(fd, F_SETFD, flags | FD_CLOEXEC) == -1)
+ die_errno("F_SETFD failed");
+ }
+#endif
+
+ cbm = xmalloc(sizeof(*cbm));
+ cbm->fd = fd;
+ cbm->flags = flags;
+
+ n = pread_in_full(fd, &cbm->t, sizeof(cbm->t), 0);
+
+ if (n == 0) {
+ cbm->t.root = 0;
+ FLIST_INIT(struct cbm_ref_tree, &cbm->t, free_list);
+ } else if (n == sizeof(cbm->t)) {
+ cbm->t.root = ntoh_off(cbm->t.root);
+ fprintf(stderr, "root: %u\n", cbm->t.root);
+ ntoh_flist(&cbm->t.free_list);
+ } else {
+ die("unexpected cbm size: %ld", n);
+ }
+
+ return cbm;
+}
+
+void cbm_close(struct cbm *cbm, unsigned int close_flags)
+{
+ if (cbm->flags & O_RDWR)
+ cbm_flush(cbm, close_flags);
+ else if (close_flags & CBM_FSYNC)
+ fsync(cbm->fd);
+
+ close(cbm->fd);
+ free(cbm);
+}
diff --git a/cbm_ref.h b/cbm_ref.h
new file mode 100644
index 0000000000..1ecc65052c
--- /dev/null
+++ b/cbm_ref.h
@@ -0,0 +1,16 @@
+#ifndef CBM_REF_H
+#define CBM_REF_H
+#include "cache.h"
+struct cbm;
+
+#define CBM_FSYNC (0x1u)
+
+struct cbm *cbm_open(const char *path, int flags);
+void cbm_flush(struct cbm *cbm, unsigned int flush_flags);
+void cbm_close(struct cbm *, unsigned int flags);
+
+int cbm_fetch(const struct cbm *, const char *name, struct object_id *);
+int cbm_delete(struct cbm *, const char *name, struct object_id *);
+int cbm_set(struct cbm *, const char *name, const struct object_id *);
+
+#endif /* CBM_REF_H */
diff --git a/t/helper/.gitignore b/t/helper/.gitignore
index 721650256e..71648cc187 100644
--- a/t/helper/.gitignore
+++ b/t/helper/.gitignore
@@ -1,3 +1,4 @@
+/test-cbm
/test-chmtime
/test-ctype
/test-config
diff --git a/t/helper/test-cbm.c b/t/helper/test-cbm.c
new file mode 100644
index 0000000000..f77603ca8f
--- /dev/null
+++ b/t/helper/test-cbm.c
@@ -0,0 +1,45 @@
+#include "cbm_ref.h"
+#include "git-compat-util.h"
+
+static const char usage_str[] = "FILE [REFNAME [HEXVALUE]]";
+
+int cmd_main(int argc, const char **argv)
+{
+ int ret = 1;
+
+ if (argc <= 0) {
+ fprintf(stderr, "usage: %s %s\n", argv[0], usage_str);
+ } else if (argc == 2) {
+ fprintf(stderr, "dump todo\n");
+ } else if (argc >= 3 && argc <= 4) {
+ struct cbm *cbm = cbm_open(argv[1], O_RDWR|O_CREAT);
+ struct object_id oid;
+
+ if (!cbm)
+ die_errno("failed to open: %s", argv[1]);
+
+ if (argc == 3) {
+ if (cbm_fetch(cbm, argv[2], &oid)) {
+ printf("%s\n", oid_to_hex(&oid));
+ ret = 0;
+ } else {
+ fprintf(stderr, "not found\n");
+ }
+ } else if (argc == 4) {
+ const char *p = argv[3];
+
+ if (parse_oid_hex(p, &oid, &p))
+ die("invalid hex: %s", argv[3]);
+
+ switch (cbm_set(cbm, argv[2], &oid)) {
+ case 1: fprintf(stderr, "overwritten\n"); break;
+ case 2: fprintf(stderr, "new\n"); break;
+ default: die("cbm_set error");
+ }
+
+ ret = 0;
+ }
+ cbm_close(cbm, 0);
+ }
+ return ret;
+}
--
EW
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH] wip
@ 2018-06-24 8:39 Eric Wong
0 siblings, 0 replies; 23+ messages in thread
From: Eric Wong @ 2018-06-24 8:39 UTC (permalink / raw)
To: spew
From: Eric Wong <normalperson@yhbt.net>
---
mjit.c | 11 +++-
process.c | 160 ++++++++++++++++++++++++++----------------------------
signal.c | 30 +++++-----
thread.c | 3 +
4 files changed, 104 insertions(+), 100 deletions(-)
diff --git a/mjit.c b/mjit.c
index 55ff7e21ee..685a57dfb8 100644
--- a/mjit.c
+++ b/mjit.c
@@ -111,7 +111,8 @@ extern void rb_native_cond_wait(rb_nativethread_cond_t *cond, rb_nativethread_lo
extern int rb_thread_create_mjit_thread(void (*child_hook)(void), void (*worker_func)(void));
-pid_t ruby_waitpid_locked(rb_vm_t *, rb_pid_t, int *status, int options);
+pid_t ruby_waitpid_locked(rb_vm_t *, rb_pid_t, int *status, int options,
+ rb_nativethread_cond_t *cond);
#define RB_CONDATTR_CLOCK_MONOTONIC 1
@@ -385,7 +386,9 @@ exec_process(const char *path, char *const argv[])
int stat, exit_code;
pid_t pid;
rb_vm_t *vm = GET_VM();
+ rb_nativethread_cond_t cond;
+ rb_native_cond_initialize(&cond);
rb_nativethread_lock_lock(&vm->waitpid_lock);
pid = start_process(path, argv);
if (pid <= 0) {
@@ -393,10 +396,11 @@ exec_process(const char *path, char *const argv[])
return -2;
}
for (;;) {
- pid_t r = ruby_waitpid_locked(vm, pid, &stat, 0);
+ pid_t r = ruby_waitpid_locked(vm, pid, &stat, 0, &cond);
if (r == -1) {
- if (errno == EINTR) continue;
+ if (errno == EINTR) continue; /* should never happen */
fprintf(stderr, "waitpid: %s\n", strerror(errno));
+ exit_code = -2;
break;
}
else if (r == pid) {
@@ -410,6 +414,7 @@ exec_process(const char *path, char *const argv[])
}
}
rb_nativethread_lock_unlock(&vm->waitpid_lock);
+ rb_native_cond_destroy(&cond);
return exit_code;
}
diff --git a/process.c b/process.c
index bdb92036b7..3b293b8d6b 100644
--- a/process.c
+++ b/process.c
@@ -899,153 +899,145 @@ do_waitpid(rb_pid_t pid, int *st, int flags)
struct waitpid_state {
struct list_node wnode;
- rb_nativethread_cond_t cond;
+ union {
+ rb_nativethread_cond_t *cond; /* non-Ruby thread */
+ rb_execution_context_t *ec; /* normal Ruby execution context */
+ } wake;
+ struct rb_unblock_callback ubf;
rb_pid_t ret;
rb_pid_t pid;
int status;
int options;
int errnum;
- rb_vm_t *vm;
+ unsigned int is_ruby : 1;
};
+void rb_native_mutex_lock(rb_nativethread_lock_t *);
+void rb_native_mutex_unlock(rb_nativethread_lock_t *);
void rb_native_cond_signal(rb_nativethread_cond_t *);
void rb_native_cond_wait(rb_nativethread_cond_t *, rb_nativethread_lock_t *);
-void rb_native_cond_initialize(rb_nativethread_cond_t *);
-void rb_native_cond_destroy(rb_nativethread_cond_t *);
-/* only called by vm->main_thread */
+/* called by timer-thread */
void
-rb_sigchld(rb_vm_t *vm)
+ruby_waitpid_all(rb_vm_t *vm)
{
struct waitpid_state *w = 0, *next;
- rb_nativethread_lock_lock(&vm->waitpid_lock);
+ rb_native_mutex_lock(&vm->waitpid_lock);
list_for_each_safe(&vm->waiting_pids, w, next, wnode) {
w->ret = do_waitpid(w->pid, &w->status, w->options | WNOHANG);
+
if (w->ret == 0) continue;
if (w->ret == -1) w->errnum = errno;
+
list_del_init(&w->wnode);
- rb_native_cond_signal(&w->cond);
+ if (w->is_ruby) {
+ /*
+ * we call this in timer-thread, because vm->main_thread
+ * cannot wake itself up...
+ */
+ rb_thread_wakeup_alive(rb_ec_thread_ptr(w->wake.ec)->self);
+ }
+ else {
+ rb_native_cond_signal(w->wake.cond);
+ }
}
- rb_nativethread_lock_unlock(&vm->waitpid_lock);
+ rb_native_mutex_unlock(&vm->waitpid_lock);
}
static void
-waitpid_state_init(struct waitpid_state *w, rb_vm_t *vm, pid_t pid, int options)
+waitpid_state_init(struct waitpid_state *w, pid_t pid, int options)
{
- rb_native_cond_initialize(&w->cond);
w->ret = 0;
w->pid = pid;
w->status = 0;
w->options = options;
- w->vm = vm;
list_node_init(&w->wnode);
+ w->is_ruby = ruby_thread_has_gvl_p();
}
-/* must be called with vm->waitpid_lock held, this is not interruptible */
+/*
+ * must be called with vm->waitpid_lock held, this is not interruptible
+ */
pid_t
-ruby_waitpid_locked(rb_vm_t *vm, rb_pid_t pid, int *status, int options)
+ruby_waitpid_locked(rb_vm_t *vm, rb_pid_t pid, int *status, int options,
+ rb_nativethread_cond_t *cond)
{
struct waitpid_state w;
assert(!ruby_thread_has_gvl_p() && "must not have GVL");
- waitpid_state_init(&w, vm, pid, options);
+ waitpid_state_init(&w, pid, options);
w.ret = do_waitpid(w.pid, &w.status, w.options | WNOHANG);
if (w.ret) {
- if (w.ret == -1) {
- w.errnum = errno;
- }
+ if (w.ret == -1) w.errnum = errno;
}
else {
+ w.wake.cond = cond;
list_add(&vm->waiting_pids, &w.wnode);
- while (!w.ret) {
- rb_native_cond_wait(&w.cond, &vm->waitpid_lock);
- }
+ do {
+ rb_native_cond_wait(w.wake.cond, &vm->waitpid_lock);
+ } while (!w.ret);
list_del(&w.wnode);
}
if (status) {
*status = w.status;
}
- rb_native_cond_destroy(&w.cond);
errno = w.errnum;
return w.ret;
}
-static void
-waitpid_ubf(void *x)
-{
- struct waitpid_state *w = x;
- rb_nativethread_lock_lock(&w->vm->waitpid_lock);
- if (!w->ret) {
- w->errnum = EINTR;
- w->ret = -1;
- }
- rb_native_cond_signal(&w->cond);
- rb_nativethread_lock_unlock(&w->vm->waitpid_lock);
-}
-
-static void *
-waitpid_nogvl(void *x)
+static VALUE
+waitpid_sleep(VALUE x)
{
- struct waitpid_state *w = x;
+ struct waitpid_state *w = (struct waitpid_state *)x;
- /* let rb_sigchld handle it */
- rb_native_cond_wait(&w->cond, &w->vm->waitpid_lock);
+ do {
+ rb_thread_sleep_forever(); /* handles interrupts for us */
+ } while (!w->ret);
- return 0;
+ return Qfalse;
}
static VALUE
-waitpid_wait(VALUE x)
+waitpid_ensure(VALUE x)
{
struct waitpid_state *w = (struct waitpid_state *)x;
- rb_nativethread_lock_lock(&w->vm->waitpid_lock);
- w->ret = do_waitpid(w->pid, &w->status, w->options | WNOHANG);
- if (w->ret) {
- if (w->ret == -1) {
- w->errnum = errno;
- }
- }
- else {
- rb_execution_context_t *ec = GET_EC();
+ if (w->ret <= 0) {
+ rb_vm_t *vm = rb_ec_vm_ptr(w->wake.ec);
- list_add(&w->vm->waiting_pids, &w->wnode);
- do {
- rb_thread_call_without_gvl2(waitpid_nogvl, w, waitpid_ubf, w);
- if (RUBY_VM_INTERRUPTED_ANY(ec) ||
- (w->ret == -1 && w->errnum == EINTR)) {
- rb_nativethread_lock_unlock(&w->vm->waitpid_lock);
-
- RUBY_VM_CHECK_INTS(ec);
-
- rb_nativethread_lock_lock(&w->vm->waitpid_lock);
- if (w->ret == -1 && w->errnum == EINTR) {
- w->ret = do_waitpid(w->pid, &w->status, w->options|WNOHANG);
- if (w->ret == -1)
- w->errnum = errno;
- }
- }
- } while (!w->ret);
+ rb_native_mutex_lock(&vm->waitpid_lock);
+ list_del(&w->wnode);
+ rb_native_mutex_unlock(&vm->waitpid_lock);
}
- rb_nativethread_lock_unlock(&w->vm->waitpid_lock);
return Qfalse;
}
-static VALUE
-waitpid_ensure(VALUE x)
+static void
+waitpid_wait(struct waitpid_state *w)
{
- struct waitpid_state *w = (struct waitpid_state *)x;
+ rb_vm_t *vm = rb_ec_vm_ptr(w->wake.ec);
- if (w->ret <= 0) {
- rb_nativethread_lock_lock(&w->vm->waitpid_lock);
- list_del_init(&w->wnode);
- rb_nativethread_lock_unlock(&w->vm->waitpid_lock);
+ /*
+ * Lock here to prevent do_waitpid from stealing work from the
+ * ruby_waitpid_locked done by mjit workers since mjit works
+ * outside of GVL
+ */
+ rb_native_mutex_lock(&vm->waitpid_lock);
+
+ w->ret = do_waitpid(w->pid, &w->status, w->options | WNOHANG);
+ if (w->ret) {
+ if (w->ret == -1) w->errnum = errno;
+
+ rb_native_mutex_unlock(&vm->waitpid_lock);
}
+ else {
+ list_add(&vm->waiting_pids, &w->wnode);
+ rb_native_mutex_unlock(&vm->waitpid_lock);
- rb_native_cond_destroy(&w->cond);
- return Qfalse;
+ rb_ensure(waitpid_sleep, (VALUE)w, waitpid_ensure, (VALUE)w);
+ }
}
rb_pid_t
@@ -1059,11 +1051,10 @@ rb_waitpid(rb_pid_t pid, int *st, int flags)
else {
struct waitpid_state w;
- waitpid_state_init(&w, GET_VM(), pid, flags);
- rb_ensure(waitpid_wait, (VALUE)&w, waitpid_ensure, (VALUE)&w);
- if (st) {
- *st = w.status;
- }
+ waitpid_state_init(&w, pid, flags);
+ w.wake.ec = GET_EC();
+ waitpid_wait(&w);
+ if (st) *st = w.status;
result = w.ret;
}
if (result > 0) {
@@ -1348,6 +1339,9 @@ after_exec_non_async_signal_safe(void)
{
rb_thread_reset_timer_thread();
rb_thread_start_timer_thread();
+ if (rb_signal_buff_size()) {
+ rb_thread_wakeup_timer_thread();
+ }
}
static void
diff --git a/signal.c b/signal.c
index c20b01ea36..e339434113 100644
--- a/signal.c
+++ b/signal.c
@@ -1052,18 +1052,23 @@ rb_trap_exit(void)
}
}
-static int
-sig_is_chld(int sig)
-{
#if defined(SIGCLD)
- return (sig == SIGCLD);
+# define RUBY_SIGCHLD (SIGCLD)
#elif defined(SIGCHLD)
- return (sig == SIGCHLD);
+# define RUBY_SIGCHLD (SIGCHLD)
+#else
+# define RUBY_SIGCHLD (0)
#endif
- return 0;
-}
-void rb_sigchld(rb_vm_t *); /* process.c */
+void ruby_waitpid_all(rb_vm_t *); /* process.c */
+
+void
+ruby_sigchld_handler(rb_vm_t *vm)
+{
+ if (signal_buff.cnt[RUBY_SIGCHLD]) {
+ ruby_waitpid_all(vm);
+ }
+}
void
rb_signal_exec(rb_thread_t *th, int sig)
@@ -1072,9 +1077,6 @@ rb_signal_exec(rb_thread_t *th, int sig)
VALUE cmd = vm->trap_list.cmd[sig];
int safe = vm->trap_list.safe[sig];
- if (sig_is_chld(sig)) {
- rb_sigchld(vm);
- }
if (cmd == 0) {
switch (sig) {
case SIGINT:
@@ -1176,7 +1178,7 @@ trap_handler(VALUE *cmd, int sig)
VALUE command;
if (NIL_P(*cmd)) {
- if (sig_is_chld(sig)) {
+ if (sig == RUBY_SIGCHLD) {
goto sig_dfl;
}
func = SIG_IGN;
@@ -1199,7 +1201,7 @@ trap_handler(VALUE *cmd, int sig)
break;
case 14:
if (memcmp(cptr, "SYSTEM_DEFAULT", 14) == 0) {
- if (sig_is_chld(sig)) {
+ if (sig == RUBY_SIGCHLD) {
goto sig_dfl;
}
func = SIG_DFL;
@@ -1209,7 +1211,7 @@ trap_handler(VALUE *cmd, int sig)
case 7:
if (memcmp(cptr, "SIG_IGN", 7) == 0) {
sig_ign:
- if (sig_is_chld(sig)) {
+ if (sig == RUBY_SIGCHLD) {
goto sig_dfl;
}
func = SIG_IGN;
diff --git a/thread.c b/thread.c
index 8c9aafe07a..613599eaa6 100644
--- a/thread.c
+++ b/thread.c
@@ -4146,6 +4146,8 @@ rb_threadptr_check_signal(rb_thread_t *mth)
}
}
+void ruby_sigchld_handler(rb_vm_t *); /* signal.c */
+
static void
timer_thread_function(void *arg)
{
@@ -4164,6 +4166,7 @@ timer_thread_function(void *arg)
rb_native_mutex_unlock(&vm->thread_destruct_lock);
/* check signal */
+ ruby_sigchld_handler(vm);
rb_threadptr_check_signal(vm->main_thread);
#if 0
--
EW
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH] wip
@ 2018-06-24 11:55 Eric Wong
0 siblings, 0 replies; 23+ messages in thread
From: Eric Wong @ 2018-06-24 11:55 UTC (permalink / raw)
To: spew
From: Eric Wong <normalperson@yhbt.net>
---
mjit.c | 13 ++--
process.c | 173 ++++++++++++++++++++++++-----------------------
signal.c | 66 ++++++++----------
thread.c | 11 +++
thread_pthread.c | 2 +-
vm_core.h | 3 +
6 files changed, 141 insertions(+), 127 deletions(-)
diff --git a/mjit.c b/mjit.c
index c9c1208355..82c8ae2d7f 100644
--- a/mjit.c
+++ b/mjit.c
@@ -111,7 +111,8 @@ extern void rb_native_cond_wait(rb_nativethread_cond_t *cond, rb_nativethread_lo
extern int rb_thread_create_mjit_thread(void (*child_hook)(void), void (*worker_func)(void));
-pid_t ruby_waitpid_locked(rb_vm_t *, rb_pid_t, int *status, int options);
+pid_t ruby_waitpid_locked(rb_vm_t *, rb_pid_t, int *status, int options,
+ rb_nativethread_cond_t *cond);
#define RB_CONDATTR_CLOCK_MONOTONIC 1
@@ -388,6 +389,7 @@ exec_process(const char *path, char *const argv[])
int stat, exit_code;
pid_t pid;
rb_vm_t *vm = GET_VM();
+ rb_nativethread_cond_t cond;
rb_nativethread_lock_lock(&vm->waitpid_lock);
pid = start_process(path, argv);
@@ -395,11 +397,13 @@ exec_process(const char *path, char *const argv[])
rb_nativethread_lock_unlock(&vm->waitpid_lock);
return -2;
}
+ rb_native_cond_initialize(&cond);
for (;;) {
- pid_t r = ruby_waitpid_locked(vm, pid, &stat, 0);
+ pid_t r = ruby_waitpid_locked(vm, pid, &stat, 0, &cond);
if (r == -1) {
- if (errno == EINTR) continue;
+ if (errno == EINTR) continue; /* should never happen */
fprintf(stderr, "waitpid: %s\n", strerror(errno));
+ exit_code = -2;
break;
}
else if (r == pid) {
@@ -413,6 +417,7 @@ exec_process(const char *path, char *const argv[])
}
}
rb_nativethread_lock_unlock(&vm->waitpid_lock);
+ rb_native_cond_destroy(&cond);
return exit_code;
}
@@ -1563,7 +1568,7 @@ mjit_finish(void)
absence. So wait for a clean finish of the threads. */
while (pch_status == PCH_NOT_READY) {
verbose(3, "Waiting wakeup from make_pch");
- /* release GVL to handle interrupts */
+ /* release GVL to handle interrupts */
rb_thread_call_without_gvl(wait_pch, 0, ubf_pch, 0);
}
CRITICAL_SECTION_FINISH(3, "in mjit_finish to wakeup from pch");
diff --git a/process.c b/process.c
index bdb92036b7..044804338f 100644
--- a/process.c
+++ b/process.c
@@ -899,153 +899,153 @@ do_waitpid(rb_pid_t pid, int *st, int flags)
struct waitpid_state {
struct list_node wnode;
- rb_nativethread_cond_t cond;
+ union {
+ rb_nativethread_cond_t *cond; /* non-Ruby thread */
+ rb_execution_context_t *ec; /* normal Ruby execution context */
+ } wake;
rb_pid_t ret;
rb_pid_t pid;
int status;
int options;
int errnum;
- rb_vm_t *vm;
+ unsigned int is_ruby : 1;
};
+void rb_native_mutex_lock(rb_nativethread_lock_t *);
+void rb_native_mutex_unlock(rb_nativethread_lock_t *);
void rb_native_cond_signal(rb_nativethread_cond_t *);
void rb_native_cond_wait(rb_nativethread_cond_t *, rb_nativethread_lock_t *);
-void rb_native_cond_initialize(rb_nativethread_cond_t *);
-void rb_native_cond_destroy(rb_nativethread_cond_t *);
-/* only called by vm->main_thread */
+FILE *mstream;
+
+/* called by main thread */
void
-rb_sigchld(rb_vm_t *vm)
+rb_waitpid_all(rb_vm_t *vm)
{
struct waitpid_state *w = 0, *next;
- rb_nativethread_lock_lock(&vm->waitpid_lock);
+ rb_native_mutex_lock(&vm->waitpid_lock);
list_for_each_safe(&vm->waiting_pids, w, next, wnode) {
w->ret = do_waitpid(w->pid, &w->status, w->options | WNOHANG);
+
if (w->ret == 0) continue;
if (w->ret == -1) w->errnum = errno;
+
list_del_init(&w->wnode);
- rb_native_cond_signal(&w->cond);
+ if (mstream) fprintf(mstream, "%d %d %d\n", getpid(), w->ret, w->pid);
+ if (w->is_ruby) {
+ /*
+ * we call this in timer-thread, because vm->main_thread
+ * cannot wake itself up...
+ */
+ rb_thread_wakeup_alive(rb_ec_thread_ptr(w->wake.ec)->self);
+ }
+ else {
+ rb_native_cond_signal(w->wake.cond);
+ }
}
- rb_nativethread_lock_unlock(&vm->waitpid_lock);
+ rb_native_mutex_unlock(&vm->waitpid_lock);
}
static void
-waitpid_state_init(struct waitpid_state *w, rb_vm_t *vm, pid_t pid, int options)
+waitpid_state_init(struct waitpid_state *w, pid_t pid, int options)
{
- rb_native_cond_initialize(&w->cond);
w->ret = 0;
w->pid = pid;
- w->status = 0;
w->options = options;
- w->vm = vm;
list_node_init(&w->wnode);
}
-/* must be called with vm->waitpid_lock held, this is not interruptible */
+/*
+ * must be called with vm->waitpid_lock held, this is not interruptible
+ */
pid_t
-ruby_waitpid_locked(rb_vm_t *vm, rb_pid_t pid, int *status, int options)
+ruby_waitpid_locked(rb_vm_t *vm, rb_pid_t pid, int *status, int options,
+ rb_nativethread_cond_t *cond)
{
struct waitpid_state w;
assert(!ruby_thread_has_gvl_p() && "must not have GVL");
- waitpid_state_init(&w, vm, pid, options);
+ waitpid_state_init(&w, pid, options);
+ w.is_ruby = 0;
w.ret = do_waitpid(w.pid, &w.status, w.options | WNOHANG);
if (w.ret) {
- if (w.ret == -1) {
- w.errnum = errno;
- }
+ if (w.ret == -1) w.errnum = errno;
}
else {
+ w.wake.cond = cond;
list_add(&vm->waiting_pids, &w.wnode);
- while (!w.ret) {
- rb_native_cond_wait(&w.cond, &vm->waitpid_lock);
- }
+ if (mstream) fprintf(mstream, "%d add- %d\n", getpid(), w.pid);
+ do {
+ rb_native_cond_wait(w.wake.cond, &vm->waitpid_lock);
+ } while (!w.ret);
list_del(&w.wnode);
}
if (status) {
*status = w.status;
}
- rb_native_cond_destroy(&w.cond);
errno = w.errnum;
return w.ret;
}
-static void
-waitpid_ubf(void *x)
-{
- struct waitpid_state *w = x;
- rb_nativethread_lock_lock(&w->vm->waitpid_lock);
- if (!w->ret) {
- w->errnum = EINTR;
- w->ret = -1;
- }
- rb_native_cond_signal(&w->cond);
- rb_nativethread_lock_unlock(&w->vm->waitpid_lock);
-}
+void rb_thread_sleep_interruptible(struct timespec *ts); /* thread.c */
-static void *
-waitpid_nogvl(void *x)
+static VALUE
+waitpid_sleep(VALUE x)
{
- struct waitpid_state *w = x;
+ struct waitpid_state *w = (struct waitpid_state *)x;
- /* let rb_sigchld handle it */
- rb_native_cond_wait(&w->cond, &w->vm->waitpid_lock);
+ rb_thread_check_ints();
+ while (!w->ret) {
+ rb_thread_sleep_interruptible(0);
+ rb_thread_check_ints();
+ }
- return 0;
+ return Qfalse;
}
static VALUE
-waitpid_wait(VALUE x)
+waitpid_ensure(VALUE x)
{
struct waitpid_state *w = (struct waitpid_state *)x;
- rb_nativethread_lock_lock(&w->vm->waitpid_lock);
- w->ret = do_waitpid(w->pid, &w->status, w->options | WNOHANG);
- if (w->ret) {
- if (w->ret == -1) {
- w->errnum = errno;
- }
- }
- else {
- rb_execution_context_t *ec = GET_EC();
+ if (w->ret == 0) {
+ rb_vm_t *vm = rb_ec_vm_ptr(w->wake.ec);
- list_add(&w->vm->waiting_pids, &w->wnode);
- do {
- rb_thread_call_without_gvl2(waitpid_nogvl, w, waitpid_ubf, w);
- if (RUBY_VM_INTERRUPTED_ANY(ec) ||
- (w->ret == -1 && w->errnum == EINTR)) {
- rb_nativethread_lock_unlock(&w->vm->waitpid_lock);
-
- RUBY_VM_CHECK_INTS(ec);
-
- rb_nativethread_lock_lock(&w->vm->waitpid_lock);
- if (w->ret == -1 && w->errnum == EINTR) {
- w->ret = do_waitpid(w->pid, &w->status, w->options|WNOHANG);
- if (w->ret == -1)
- w->errnum = errno;
- }
- }
- } while (!w->ret);
+ if (mstream) fprintf(mstream, "%d ensure del %d\n", getpid(), w->pid);
+ rb_native_mutex_lock(&vm->waitpid_lock);
+ list_del(&w->wnode);
+ rb_native_mutex_unlock(&vm->waitpid_lock);
}
- rb_nativethread_lock_unlock(&w->vm->waitpid_lock);
return Qfalse;
}
-static VALUE
-waitpid_ensure(VALUE x)
+static void
+waitpid_wait(struct waitpid_state *w)
{
- struct waitpid_state *w = (struct waitpid_state *)x;
+ rb_vm_t *vm = rb_ec_vm_ptr(w->wake.ec);
- if (w->ret <= 0) {
- rb_nativethread_lock_lock(&w->vm->waitpid_lock);
- list_del_init(&w->wnode);
- rb_nativethread_lock_unlock(&w->vm->waitpid_lock);
+ /*
+ * Lock here to prevent do_waitpid from stealing work from the
+ * ruby_waitpid_locked done by mjit workers since mjit works
+ * outside of GVL
+ */
+ rb_native_mutex_lock(&vm->waitpid_lock);
+
+ w->ret = do_waitpid(w->pid, &w->status, w->options | WNOHANG);
+ if (w->ret) {
+ if (w->ret == -1) w->errnum = errno;
+
+ rb_native_mutex_unlock(&vm->waitpid_lock);
}
+ else {
+ list_add(&vm->waiting_pids, &w->wnode);
+ if (mstream) fprintf(mstream, "%d add++ %d\n", getpid(), w->pid);
+ rb_native_mutex_unlock(&vm->waitpid_lock);
- rb_native_cond_destroy(&w->cond);
- return Qfalse;
+ rb_ensure(waitpid_sleep, (VALUE)w, waitpid_ensure, (VALUE)w);
+ }
}
rb_pid_t
@@ -1059,11 +1059,11 @@ rb_waitpid(rb_pid_t pid, int *st, int flags)
else {
struct waitpid_state w;
- waitpid_state_init(&w, GET_VM(), pid, flags);
- rb_ensure(waitpid_wait, (VALUE)&w, waitpid_ensure, (VALUE)&w);
- if (st) {
- *st = w.status;
- }
+ waitpid_state_init(&w, pid, flags);
+ w.is_ruby = 1;
+ w.wake.ec = GET_EC();
+ waitpid_wait(&w);
+ if (st) *st = w.status;
result = w.ret;
}
if (result > 0) {
@@ -1348,6 +1348,9 @@ after_exec_non_async_signal_safe(void)
{
rb_thread_reset_timer_thread();
rb_thread_start_timer_thread();
+ if (rb_signal_buff_size()) {
+ rb_thread_wakeup_timer_thread();
+ }
}
static void
@@ -8227,4 +8230,8 @@ Init_process(void)
id_exception = rb_intern("exception");
InitVM(process);
+ if (0) {
+ mstream = fopen("/tmp/wait.log", "a");
+ setvbuf(mstream, NULL, _IONBF, 0);
+ }
}
diff --git a/signal.c b/signal.c
index c20b01ea36..22fc4286d9 100644
--- a/signal.c
+++ b/signal.c
@@ -66,6 +66,14 @@ ruby_atomic_compare_and_swap(rb_atomic_t *ptr, rb_atomic_t cmp,
# define NSIG (_SIGMAX + 1) /* For QNX */
#endif
+#if defined(SIGCLD)
+# define RUBY_SIGCHLD (SIGCLD)
+#elif defined(SIGCHLD)
+# define RUBY_SIGCHLD (SIGCHLD)
+#else
+# define RUBY_SIGCHLD (0)
+#endif
+
static const struct signals {
const char *signm;
int signo;
@@ -129,15 +137,9 @@ static const struct signals {
#ifdef SIGCONT
{"CONT", SIGCONT},
#endif
-#ifdef SIGCHLD
- {"CHLD", SIGCHLD},
-#endif
-#ifdef SIGCLD
- {"CLD", SIGCLD},
-#else
-# ifdef SIGCHLD
- {"CLD", SIGCHLD},
-# endif
+#if RUBY_SIGCHLD
+ {"CHLD", RUBY_SIGCHLD },
+ {"CLD", RUBY_SIGCHLD },
#endif
#ifdef SIGTTIN
{"TTIN", SIGTTIN},
@@ -1052,18 +1054,7 @@ rb_trap_exit(void)
}
}
-static int
-sig_is_chld(int sig)
-{
-#if defined(SIGCLD)
- return (sig == SIGCLD);
-#elif defined(SIGCHLD)
- return (sig == SIGCHLD);
-#endif
- return 0;
-}
-
-void rb_sigchld(rb_vm_t *); /* process.c */
+void rb_waitpid_all(rb_vm_t *); /* process.c */
void
rb_signal_exec(rb_thread_t *th, int sig)
@@ -1072,9 +1063,10 @@ rb_signal_exec(rb_thread_t *th, int sig)
VALUE cmd = vm->trap_list.cmd[sig];
int safe = vm->trap_list.safe[sig];
- if (sig_is_chld(sig)) {
- rb_sigchld(vm);
+ if (sig == RUBY_SIGCHLD) {
+ rb_waitpid_all(vm);
}
+
if (cmd == 0) {
switch (sig) {
case SIGINT:
@@ -1134,10 +1126,8 @@ default_handler(int sig)
#ifdef SIGUSR2
case SIGUSR2:
#endif
-#ifdef SIGCLD
- case SIGCLD:
-#elif defined(SIGCHLD)
- case SIGCHLD:
+#if RUBY_SIGCHLD
+ case RUBY_SIGCHLD:
#endif
func = sighandler;
break;
@@ -1176,7 +1166,7 @@ trap_handler(VALUE *cmd, int sig)
VALUE command;
if (NIL_P(*cmd)) {
- if (sig_is_chld(sig)) {
+ if (sig == RUBY_SIGCHLD) {
goto sig_dfl;
}
func = SIG_IGN;
@@ -1199,9 +1189,9 @@ trap_handler(VALUE *cmd, int sig)
break;
case 14:
if (memcmp(cptr, "SYSTEM_DEFAULT", 14) == 0) {
- if (sig_is_chld(sig)) {
- goto sig_dfl;
- }
+ if (sig == RUBY_SIGCHLD) {
+ goto sig_dfl;
+ }
func = SIG_DFL;
*cmd = 0;
}
@@ -1209,9 +1199,9 @@ trap_handler(VALUE *cmd, int sig)
case 7:
if (memcmp(cptr, "SIG_IGN", 7) == 0) {
sig_ign:
- if (sig_is_chld(sig)) {
- goto sig_dfl;
- }
+ if (sig == RUBY_SIGCHLD) {
+ goto sig_dfl;
+ }
func = SIG_IGN;
*cmd = Qtrue;
}
@@ -1443,7 +1433,7 @@ install_sighandler(int signum, sighandler_t handler)
# define install_sighandler(signum, handler) \
INSTALL_SIGHANDLER(install_sighandler(signum, handler), #signum, signum)
-#if defined(SIGCLD) || defined(SIGCHLD)
+#if RUBY_SIGCHLD
static int
init_sigchld(int sig)
{
@@ -1570,10 +1560,8 @@ Init_signal(void)
install_sighandler(SIGSYS, sig_do_nothing);
#endif
-#if defined(SIGCLD)
- init_sigchld(SIGCLD);
-#elif defined(SIGCHLD)
- init_sigchld(SIGCHLD);
+#if RUBY_SIGCHLD
+ init_sigchld(RUBY_SIGCHLD);
#endif
rb_enable_interrupt();
diff --git a/thread.c b/thread.c
index 8c9aafe07a..ab27c60632 100644
--- a/thread.c
+++ b/thread.c
@@ -1287,6 +1287,17 @@ rb_thread_sleep_forever(void)
sleep_forever(GET_THREAD(), SLEEP_SPURIOUS_CHECK);
}
+void
+rb_thread_sleep_interruptible(struct timespec *ts)
+{
+ rb_thread_t *th = GET_THREAD();
+ enum rb_thread_status prev_status = th->status;
+
+ th->status = THREAD_STOPPED;
+ native_sleep(th, ts);
+ th->status = prev_status;
+}
+
void
rb_thread_sleep_deadly(void)
{
diff --git a/thread_pthread.c b/thread_pthread.c
index 1a1a6fc0c6..4d6afac3fb 100644
--- a/thread_pthread.c
+++ b/thread_pthread.c
@@ -1548,7 +1548,7 @@ rb_thread_create_timer_thread(void)
};
stack_size = stack_min;
if (stack_size < min_size) stack_size = min_size;
- if (needs_more_stack) {
+ if (1 || needs_more_stack) {
stack_size += +((BUFSIZ - 1) / stack_min + 1) * stack_min;
}
err = pthread_attr_setstacksize(&attr, stack_size);
diff --git a/vm_core.h b/vm_core.h
index d0689e5ba6..29f12abbf0 100644
--- a/vm_core.h
+++ b/vm_core.h
@@ -1559,11 +1559,14 @@ void rb_thread_stop_timer_thread(void);
void rb_thread_reset_timer_thread(void);
void rb_thread_wakeup_timer_thread(void);
+extern FILE *mstream;
+
static inline void
rb_vm_living_threads_init(rb_vm_t *vm)
{
list_head_init(&vm->waiting_fds);
list_head_init(&vm->waiting_pids);
+ if (mstream) fprintf(mstream, "clobber: %d\n", getpid());
list_head_init(&vm->living_threads);
vm->living_thread_num = 0;
}
--
EW
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH] wip
@ 2018-07-06 21:31 Eric Wong
0 siblings, 0 replies; 23+ messages in thread
From: Eric Wong @ 2018-07-06 21:31 UTC (permalink / raw)
To: spew
---
configure.ac | 5 ++++
thread_pthread.c | 60 ++++++++++++++++++++++++++++++------------------
2 files changed, 43 insertions(+), 22 deletions(-)
diff --git a/configure.ac b/configure.ac
index 074a3fd394f..899f2437467 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1998,6 +1998,11 @@ AS_IF([test x"$ac_cv_func_clock_gettime" != xyes], [
])
])
AC_CHECK_FUNCS(clock_getres) # clock_getres should be tested after clock_gettime test including librt test.
+AC_CHECK_LIB([rt], [timer_create], [
+ AC_CHECK_LIB([rt], [timer_settime], [
+ AC_CHECK_LIB([rt], [timer_gettime], [
+ AC_CHECK_LIB([rt], [timer_delete], [AC_DEFINE(HAVE_POSIX_TIMERS)])
+])])])
AC_CACHE_CHECK(for unsetenv returns a value, rb_cv_unsetenv_return_value,
[AC_TRY_COMPILE([
diff --git a/thread_pthread.c b/thread_pthread.c
index 722ce444879..c4670efc9e3 100644
--- a/thread_pthread.c
+++ b/thread_pthread.c
@@ -34,6 +34,9 @@
#if defined(__HAIKU__)
#include <kernel/OS.h>
#endif
+#if defined(HAVE_TIME_H)
+#include <time.h>
+#endif
void rb_native_mutex_lock(rb_nativethread_lock_t *lock);
void rb_native_mutex_unlock(rb_nativethread_lock_t *lock);
@@ -46,6 +49,18 @@ void rb_native_cond_wait(rb_nativethread_cond_t *cond, rb_nativethread_lock_t *m
void rb_native_cond_initialize(rb_nativethread_cond_t *cond);
void rb_native_cond_destroy(rb_nativethread_cond_t *cond);
static void rb_thread_wakeup_timer_thread_low(void);
+
+#define TIMER_THREAD_MASK (1)
+#define TIMER_THREAD_SLEEPY (2|TIMER_THREAD_MASK)
+#define TIMER_THREAD_BUSY (4|TIMER_THREAD_MASK)
+
+#if defined(HAVE_POLL) && defined(HAVE_FCNTL) && defined(F_GETFL) && defined(F_SETFL) && defined(O_NONBLOCK)
+/* The timer thread sleeps while only one Ruby thread is running. */
+# define TIMER_IMPL TIMER_THREAD_SLEEPY
+#else
+# define TIMER_IMPL TIMER_THREAD_BUSY
+#endif
+
static struct {
pthread_t id;
int created;
@@ -61,13 +76,6 @@ static pthread_condattr_t *condattr_monotonic = &condattr_mono;
static const void *const condattr_monotonic = NULL;
#endif
-#if defined(HAVE_POLL) && defined(HAVE_FCNTL) && defined(F_GETFL) && defined(F_SETFL) && defined(O_NONBLOCK)
-/* The timer thread sleeps while only one Ruby thread is running. */
-# define USE_SLEEPY_TIMER_THREAD 1
-#else
-# define USE_SLEEPY_TIMER_THREAD 0
-#endif
-
static void
gvl_acquire_common(rb_vm_t *vm)
{
@@ -982,6 +990,7 @@ native_thread_create(rb_thread_t *th)
return err;
}
+#if (TIMER_IMPL & TIMER_THREAD_MASK)
static void
native_thread_join(pthread_t th)
{
@@ -990,6 +999,7 @@ native_thread_join(pthread_t th)
rb_raise(rb_eThreadError, "native_thread_join() failed (%d)", err);
}
}
+#endif /* TIMER_THREAD_MASK */
#if USE_NATIVE_THREAD_PRIORITY
@@ -1138,8 +1148,10 @@ ubf_select(void *ptr)
* In the other hands, we shouldn't call rb_thread_wakeup_timer_thread()
* if running on timer thread because it may make endless wakeups.
*/
+#if TIMER_IMPL == TIMER_THREAD_SLEEPY
if (!pthread_equal(pthread_self(), timer_thread.id))
rb_thread_wakeup_timer_thread();
+#endif
ubf_wakeup_thread(th);
}
@@ -1181,7 +1193,7 @@ static int ubf_threads_empty(void) { return 1; }
*/
#define TIME_QUANTUM_USEC (100 * 1000)
-#if USE_SLEEPY_TIMER_THREAD
+#if TIMER_IMPL == TIMER_THREAD_SLEEPY
static struct {
/*
* Read end of each pipe is closed inside timer thread for shutdown
@@ -1419,8 +1431,9 @@ timer_thread_sleep(rb_vm_t *vm)
}
}
}
+#endif /* TIMER_THREAD_SLEEPY */
-#else /* USE_SLEEPY_TIMER_THREAD */
+#if TIMER_IMPL == TIMER_THREAD_BUSY
# define PER_NANO 1000000000
void rb_thread_wakeup_timer_thread(void) {}
static void rb_thread_wakeup_timer_thread_low(void) {}
@@ -1438,7 +1451,7 @@ timer_thread_sleep(rb_vm_t *unused)
native_cond_timedwait(&timer_thread_cond, &timer_thread_lock, &ts);
}
-#endif /* USE_SLEEPY_TIMER_THREAD */
+#endif /* TIMER_IMPL == TIMER_THREAD_BUSY */
#if !defined(SET_CURRENT_THREAD_NAME) && defined(__linux__) && defined(PR_SET_NAME)
# define SET_CURRENT_THREAD_NAME(name) prctl(PR_SET_NAME, name)
@@ -1508,7 +1521,7 @@ thread_timer(void *p)
SET_CURRENT_THREAD_NAME("ruby-timer-thr");
#endif
-#if !USE_SLEEPY_TIMER_THREAD
+#if TIMER_IMPL == TIMER_THREAD_BUSY
rb_native_mutex_initialize(&timer_thread_lock);
rb_native_cond_initialize(&timer_thread_cond);
rb_native_mutex_lock(&timer_thread_lock);
@@ -1524,10 +1537,11 @@ thread_timer(void *p)
/* wait */
timer_thread_sleep(vm);
}
-#if USE_SLEEPY_TIMER_THREAD
+#if TIMER_IMPL == TIMER_THREAD_SLEEPY
CLOSE_INVALIDATE(normal[0]);
CLOSE_INVALIDATE(low[0]);
-#else
+#endif
+#if TIMER_IMPL == TIMER_THREAD_BUSY
rb_native_mutex_unlock(&timer_thread_lock);
rb_native_cond_destroy(&timer_thread_cond);
rb_native_mutex_destroy(&timer_thread_lock);
@@ -1537,6 +1551,7 @@ thread_timer(void *p)
return NULL;
}
+#if (TIMER_IMPL & TIMER_THREAD_MASK)
static void
rb_thread_create_timer_thread(void)
{
@@ -1581,14 +1596,14 @@ rb_thread_create_timer_thread(void)
}
# endif
-#if USE_SLEEPY_TIMER_THREAD
+#if TIMER_IMPL == TIMER_THREAD_SLEEPY
err = setup_communication_pipe();
if (err != 0) {
rb_warn("pipe creation failed for timer: %s, scheduling broken",
strerror(err));
return;
}
-#endif /* USE_SLEEPY_TIMER_THREAD */
+#endif /* TIMER_THREAD_SLEEPY */
/* create timer thread */
if (timer_thread.created) {
@@ -1617,21 +1632,22 @@ rb_thread_create_timer_thread(void)
rb_warn("timer thread stack size: system default");
}
VM_ASSERT(err == 0);
-#if USE_SLEEPY_TIMER_THREAD
+#if TIMER_IMPL == TIMER_THREAD_SLEEPY
CLOSE_INVALIDATE(normal[0]);
CLOSE_INVALIDATE(normal[1]);
CLOSE_INVALIDATE(low[0]);
CLOSE_INVALIDATE(low[1]);
-#endif
+#endif /* TIMER_THREAD_SLEEPY */
return;
}
-#if USE_SLEEPY_TIMER_THREAD
+#if TIMER_IMPL == TIMER_THREAD_SLEEPY
/* validate pipe on this process */
timer_thread_pipe.owner_process = getpid();
-#endif
+#endif /* TIMER_THREAD_SLEEPY */
timer_thread.created = 1;
}
}
+#endif /* TIMER_IMPL & TIMER_THREAD_MASK */
static int
native_stop_timer_thread(void)
@@ -1641,7 +1657,7 @@ native_stop_timer_thread(void)
if (TT_DEBUG) fprintf(stderr, "stop timer thread\n");
if (stopped) {
-#if USE_SLEEPY_TIMER_THREAD
+#if TIMER_IMPL == TIMER_THREAD_SLEEPY
/* prevent wakeups from signal handler ASAP */
timer_thread_pipe.owner_process = 0;
@@ -1662,7 +1678,7 @@ native_stop_timer_thread(void)
/* timer thread will stop looping when system_working <= 0: */
native_thread_join(timer_thread.id);
-#if USE_SLEEPY_TIMER_THREAD
+#if TIMER_IMPL == TIMER_THREAD_SLEEPY
/* timer thread will close the read end on exit: */
VM_ASSERT(timer_thread_pipe.normal[0] == -1);
VM_ASSERT(timer_thread_pipe.low[0] == -1);
@@ -1727,7 +1743,7 @@ ruby_stack_overflowed_p(const rb_thread_t *th, const void *addr)
int
rb_reserved_fd_p(int fd)
{
-#if USE_SLEEPY_TIMER_THREAD
+#if TIMER_IMPL == TIMER_THREAD_SLEEPY
if ((fd == timer_thread_pipe.normal[0] ||
fd == timer_thread_pipe.normal[1] ||
fd == timer_thread_pipe.low[0] ||
--
EW
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH] wip
@ 2019-01-02 9:21 Eric Wong
0 siblings, 0 replies; 23+ messages in thread
From: Eric Wong @ 2019-01-02 9:21 UTC (permalink / raw)
To: spew
---
ci/GNUmakefile | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 63 insertions(+)
create mode 100644 ci/GNUmakefile
diff --git a/ci/GNUmakefile b/ci/GNUmakefile
new file mode 100644
index 0000000..362bd34
--- /dev/null
+++ b/ci/GNUmakefile
@@ -0,0 +1,63 @@
+# Copyright (C) 2019 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+PERL = perl
+SUDO = sudo
+GIT = git
+GIT_URL = https://public-inbox.org/
+TMPDIR = /tmp
+BUILDDIR = $(TMPDIR)/public-inbox-$@
+
+cur_git_dir := $(shell $(GIT) rev-parse --git-dir)
+ifneq ($(cur_git_dir),)
+ git_reference = --reference $(cur_git_dir)
+endif
+
+all ::
+ @echo usage:
+ @echo $(MAKE) sid-all
+ @echo $(MAKE) sid-min
+
+sid_all :=
+sid_min :=
+
+sid_min += libdatetime-perl
+sid_min += libemail-mime-perl
+sid_min += libplack-perl
+sid_min += liburi-perl
+
+# Plack depends on this:
+sid_min += libfilesys-notify-simple-perl
+
+sid_all += libdanga-socket-perl
+sid_all += libdbd-sqlite3-perl
+sid_all += libdbi-perl
+sid_all += libplack-middleware-deflater-perl
+sid_all += libplack-middleware-reverseproxy-perl
+sid_all += libsearch-xapian-perl
+sid_all += libsocket6-perl
+sid_all += libipc-run-perl
+sid_all += xapian-tools
+sid_all += spamassassin
+sid_all += liblinux-inotify2-perl
+
+sid_remove = $(addsuffix -,$(filter-out $(sid_min),$(sid_all)))
+
+install-debs-sid-all ::
+ $(SUDO) apt-get install -yqq $(sid_all)
+
+install-debs-sid-min ::
+ $(SUDO) apt-get install --purge -yqq $(sid_min) $(sid_remove)
+ $(SUDO) apt-get autoremove --purge -yqq
+
+sid-min sid-all ::
+ $(MAKE) install-debs-$@ build-and-test BUILDDIR=$(TMPDIR)/$@
+
+build-and-test ::
+ $(RM) -r $(BUILDDIR)
+ mkdir -p $(BUILDDIR)
+ $(GIT) clone -q $(git_reference) $(GIT_URL) $(BUILDDIR)
+ cd $(BUILDDIR) && $(PERL) Makefile.PL && $(MAKE) && $(MAKE) check
+ $(RM) -r $(BUILDDIR)
+
+.NOTPARALLEL:
--
2.20.1
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH] WIP
@ 2019-05-11 22:55 Eric Wong
0 siblings, 0 replies; 23+ messages in thread
From: Eric Wong @ 2019-05-11 22:55 UTC (permalink / raw)
To: spew
---
lib/PublicInbox/ViewDiff.pm | 367 +++++++++++++++++++++++++++++++++++-
lib/PublicInbox/ViewVCS.pm | 19 +-
2 files changed, 367 insertions(+), 19 deletions(-)
diff --git a/lib/PublicInbox/ViewDiff.pm b/lib/PublicInbox/ViewDiff.pm
index 85b5314..6d732dd 100644
--- a/lib/PublicInbox/ViewDiff.pm
+++ b/lib/PublicInbox/ViewDiff.pm
@@ -10,10 +10,11 @@ package PublicInbox::ViewDiff;
use strict;
use warnings;
use base qw(Exporter);
-our @EXPORT_OK = qw(flush_diff);
+our @EXPORT_OK = qw(flush_diff); # for emails, OO API is for git(1) output
use URI::Escape qw(uri_escape_utf8);
use PublicInbox::Hval qw(ascii_html to_attr from_attr);
use PublicInbox::Git qw(git_unquote);
+use PublicInbox::WwwStream;
# keep track of state so we can avoid redundant HTML tags for
# identically-classed lines
@@ -41,6 +42,17 @@ package PublicInbox::ViewDiff;
my $PATH_A = '"?a/.+|/dev/null';
my $PATH_B = '"?b/.+|/dev/null';
+my $CMT_FMT = '--pretty=format:'.join('%n',
+ '%H', '%s', '%an <%ae>', '%ai', '%cn <%ce>', '%ci',
+ '%t', '%p', '%D', '%b%x00');
+
+sub CC_EMPTY () { " This is a merge, and the combined diff is empty.\n" }
+sub CC_MERGE () { " This is a merge, showing combined diff:\n\n" }
+
+# used for "git show" (on commits) and "git diff"
+my @DIFF_OPT = qw(-z --numstat -p --encoding=UTF-8 -C -B -D
+ --no-color --no-abbrev);
+
sub to_html ($$) {
$_[0]->linkify_1($_[1]);
$_[0]->linkify_2(ascii_html($_[1]));
@@ -143,6 +155,16 @@ ($$$$$)
undef
}
+sub dquery ($$) {
+ my ($pa, $pb) = @_;
+ my $q = '?b=' . uri_escape_utf8($pb, UNSAFE);
+ $q .= '&a=' . uri_escape_utf8($pa, UNSAFE) if $pa ne $pb;
+ $q
+}
+
+# API for emails. public-inbox assumes we can fit any email entirely
+# (because we rely on Email::MIME anyways).
+# This interface assumes that.
sub flush_diff ($$$) {
my ($dst, $ctx, $linkify) = @_;
my $diff = $ctx->{-diff};
@@ -174,13 +196,7 @@ ($$$)
}
$pa = (split('/', git_unquote($pa), 2))[1];
$pb = (split('/', git_unquote($pb), 2))[1];
- $dctx = {
- Q => "?b=".uri_escape_utf8($pb, UNSAFE),
- };
- if ($pa ne $pb) {
- $dctx->{Q} .= '&a='.
- uri_escape_utf8($pa, UNSAFE);
- }
+ $dctx = { Q => dquery($pa, $pb) };
anchor1($dst, $ctx, $linkify, $pb, $s) and next;
$$dst .= to_html($linkify, $s);
} elsif ($s =~ s/^(index $OID_NULL\.\.)($OID_BLOB)\b//o) {
@@ -196,7 +212,7 @@ ($$$)
$$dst .= to_html($linkify, $s);
} elsif ($s =~ s/^@@ (\S+) (\S+) @@//) {
$$dst .= '</span>' if $state2class[$state];
- $$dst .= qq(<span\nclass="hunk">);
+ $$dst .= qq(<span\nclass="hunk">); # XHTML
$$dst .= diff_hunk($dctx, $spfx, $1, $2);
$$dst .= '</span>';
$state = DSTATE_CTX;
@@ -235,4 +251,337 @@ ($$$)
undef;
}
+# OO interface
+sub _cmd ($$) {
+ my ($self, $cmd) = @_;
+ $self->{git_cmd} = join(' ', @$cmd);
+ $cmd;
+}
+
+sub diff_cmd ($$$) {
+ my ($self, $oid_a, $oid_b) = @_;
+ _cmd($self, [ 'diff', @DIFF_OPT, $oid_a, $oid_b, '--' ]);
+}
+
+sub commit_cmd ($$) {
+ my ($self, $oid) = @_;
+ _cmd($self, [ qw(show -c), @DIFF_OPT, $CMT_FMT, $oid, '--' ]);
+}
+
+# OO API for parsing output of git-diff(1), git-show(1), etc...
+# We try to do as much as possible by streaming, so we act as a
+# a stream editor (e.g. 'sed')
+sub new {
+ my ($class) = @_;
+ my $self = {
+ dstate => DSTATE_INIT,
+ dbuf => '',
+ # mhelp => merge help
+ # diff_tree => 1 (true if comparing tree-ish)
+ };
+ $self->{ndiff} = $self->{nchg} = $self->{nadd} = $self->{ndel} = 0;
+ bless $self, $class;
+}
+
+# diffstat links to anchors within the same HTML page
+sub git_diffstat_rename ($$$) {
+ my ($self, $from, $to) = @_;
+ my $anchor = to_attr(git_unquote($to));
+ $self->{anchors}->{$anchor} = $to;
+ my @from = split('/', $from);
+ my @to = split('/', $to);
+ my ($base, @base);
+
+ # only show differing path components
+ while (@to && @from && $to[0] eq $from[0]) {
+ push @base, shift(@to);
+ shift @from;
+ }
+
+ $base = ascii_html(join('/', @base)) if @base;
+ $from = ascii_html(join('/', @from));
+ $to = ascii_html(join('/', @to));
+ $to = qq(<a\nhref="#$anchor">$to</a>);
+ @base ? "$base/{$from => $to}" : "$from => $to";
+}
+
+sub git_diff_sed_stat ($$) {
+ my ($self, $dst) = @_;
+ my @stat = split(/\0/, delete $self->{dbuf}, -1);
+ my $end; # end-of-stat
+ my $nchg = \($self->{nchg});
+ my $nadd = \($self->{nadd});
+ my $ndel = \($self->{ndel});
+ if (!$self->{dstat_started}) {
+ $self->{dstat_started} = 1;
+
+ # merges start with an extra '\0' before the diffstat
+ # non-merge commits start with an extra '\n', instead
+ if ($self->{mhelp}) {
+ if ($stat[0] eq '') {
+ shift @stat;
+ } else {
+ warn
+'initial merge diffstat line was not empty';
+ }
+ } else {
+ # for commits, only (not diff-tree)
+ $stat[0] =~ s/\A\n//s;
+ }
+ }
+ while (defined(my $l = shift @stat)) {
+ if ($l eq '') {
+ $end = 1 if $stat[0] && $stat[0] =~ /\Ad/; # "diff --"
+ last;
+ } elsif ($l =~ /\Adiff /) {
+ unshift @stat, $l;
+ $end = 1;
+ last;
+ }
+ utf8::upgrade($l);
+ $l =~ /\A(\S+)\t+(\S+)\t+(.*)/ or next;
+ my ($add, $del, $fn) = ($1, $2, $3);
+ if ($fn ne '') { # normal modification
+ # TODO: discard diffs if they are too big
+ # gigantic changes with many files may still OOM us
+ my $anchor = to_attr(git_unquote($fn));
+ $self->{anchors}->{$anchor} = $fn;
+ $l = qq(<a\nhref="#$anchor">).ascii_html($fn).'</a>';
+ } else { # rename
+ # incomplete...
+ if (scalar(@stat) < 2) {
+ unshift @stat, $l;
+ last;
+ }
+ my $from = shift @stat;
+ my $to = shift @stat;
+ utf8::upgrade($from);
+ utf8::upgrade($to);
+ $l = git_diffstat_rename($self, $from, $to);
+ }
+
+ # text changes show numerically, Binary does not
+ if ($add =~ /\A\d+\z/) {
+ $$nadd += $add;
+ $$ndel += $del;
+ $add = "+$add";
+ $del = "-$del";
+ }
+ ++$$nchg;
+ my $num = sprintf('% 6s/%-6s', $del, $add);
+ $$dst .= " $num\t$l\n";
+ }
+
+ # the rest of the diff:
+ $self->{dbuf} = join("\0", @stat);
+ return unless $end;
+
+ $self->{dstate} = DSTATE_HEAD;
+ $$dst .= "\n $$nchg ";
+ $$dst .= $$nchg == 1 ? 'file changed, ' : 'files changed, ';
+ $$dst .= $$nadd;
+ $$dst .= $$nadd == 1 ? ' insertion(+), ' : ' insertions(+), ';
+ $$dst .= $$ndel;
+ $$dst .= $$ndel == 1 ? " deletion(-)\n\n" : " deletions(-)\n\n";
+}
+
+# index abcdef89..01234567 100644
+sub git_diff_ab_index ($$$$) {
+ my ($self, $oid_a, $oid_b, $mode) = @_;
+ $self->{oid_a} = $oid_a;
+ $self->{oid_b} = $oid_b;
+ my $range = "$oid_a..$oid_b";
+ if ($self->{diff_tree}) {
+ $range = qq(<a\nhref="../../$range/s/$self->{Q}">$range</a>);
+ }
+ 'index ' . $range . ascii_html($mode);
+}
+
+# diff --git a/foo.c b/bar.c
+sub git_diff_ab_hdr ($$$) {
+ my ($self, $pa, $pb) = @_;
+ my $rv = '';
+ if ($self->{dstate} != DSTATE_HEAD) {
+ to_state(\$rv, $self->{dstate}, DSTATE_HEAD);
+ }
+ $pa = (split('/', git_unquote($pa), 2))[1];
+ $pb = (split('/', git_unquote($pb), 2))[1];
+ $self->{Q} = dquery($pa, $pb) if $self->{diff_tree};
+ my $anchor = to_attr($pb);
+ delete $self->{anchors}->{$anchor};
+
+ # not wasting bandwidth on links here
+ # links in hunk headers are far more useful with line offsets
+ $rv .= qq(<a\nid="$anchor">diff</a> --git ) .
+ ascii_html($pa) . ' ' . ascii_html($pb)
+}
+
+# diff (--cc|--combined)
+sub git_diff_cc_hdr {
+ my ($self, $combined, $path) = @_;
+ $path = git_unquote($path);
+ $self->{Q} = dquery($path, $path);
+ my $anchor = to_attr($path);
+ delete $self->{anchors}->{$anchor};
+ qq(<a\nid="$anchor">diff</a> --$combined ) . ascii_html($path);
+}
+
+sub offset_link ($$$) {
+ my ($qs, $oid, $offset) = @_;
+ my ($n) = ($offset =~ /\A[\-\+](\d+)/);
+ if (defined $n && $n == 0) {
+ # new or deleted file, don't link it
+ $offset;
+ } else {
+ $n = defined $n ? "#n$n" : '';
+ qq(<a href="../../$oid/s/$qs$n">$offset</a>)
+ }
+}
+
+# @@ -1,2 +3,4 @@ (regular diff)
+sub git_diff_ab_hunk ($$$$) {
+ my ($self, $ca, $cb, $func_ctx) = @_;
+ my $qs = $self->{Q};
+
+ qq(<span\nclass=hunk>@@ ) . # HTML
+ offset_link($qs, $self->{oid_a}, $ca) .
+ ' ' . offset_link($qs, $self->{oid_b}, $cb) .
+ ' @@' . ascii_html($func_ctx) . '</span>';
+}
+
+# index abcdef09,01234567..76543210
+sub git_diff_cc_index {
+ my ($self, $before, $last, $end) = @_;
+ $self->{oids_cc} = [ split(',', $before), $last ];
+
+ # not wasting bandwidth on links here, yet
+ # links in hunk headers are far more useful with line offsets
+ "index $before..$last" . ascii_html($end);
+}
+
+# @@@ -1,2 -3,4 +5,6 @@@ (combined diff)
+sub git_diff_cc_hunk ($$$$) {
+ my ($self, $at_signs, $offs, $func_ctx) = @_;
+ my $pobj = $self->{oids_cc};
+ my $i = 0;
+ my $qs = $self->{Q};
+ qq(<span\nclass=hunk>@@ ) . # HTML
+ join(' ', $at_signs, map {
+ offset_link($qs, $pobj->[$i++], $_);
+ } split(' ', $offs),
+ $at_signs) . ascii_html($func_ctx) . '</span>';
+}
+
+# the rest of the diff (beyond diffstat)
+sub git_diff_sed_lines ($$) {
+ my ($self, $dst) = @_;
+
+ my @dlines = split(/\n/, delete $self->{dbuf}, -1);
+
+ # don't touch the last line, it may not be terminated
+ $self->{dbuf} = pop @dlines;
+
+ if (my $help = delete $self->{mhelp}) {
+ $$dst .= $help; # CC_MERGE
+ }
+
+ # reminder: this is stricter than similar code in flush_diff,
+ # this is for git output (including --cc/--combined) we generate,
+ # while flush_diff parses mail
+ my $ndiff = \($self->{ndiff});
+ my $linkify = PublicInbox::Linkify->new;
+ while (defined(my $s = shift @dlines)) {
+ utf8::upgrade($s);
+ if ($s =~ m{\Adiff --git ("?a/.+) ("?b/.+)\z}) { # regular
+ $$dst .= git_diff_ab_hdr($self, $1, $2);
+ } elsif ($s =~ m{\Adiff --(cc|combined) (.+)\z}) {
+ $$dst .= git_diff_cc_hdr($self, $1, $2);
+ } elsif ($s =~ /\Aindex ($OID_BLOB)\.\.($OID_BLOB)(.*)\z/o) {
+ # regular diff
+ $$dst .= git_diff_ab_index($self, $1, $2, $3);
+ } elsif ($s =~
+ /\Aindex ($OID_BLOB,[^\.]+)\.\.($OID_BLOB)(.*)\z/o) {
+ # --cc diff
+ $$dst .= git_diff_cc_index($self, $1, $2, $3);
+ } elsif ($s =~ /\A@@ (\S+) (\S+) @@(.*)\z/) { # regular
+ $$dst .= '</span>' if $state2class[$self->{dstate}];
+ $$dst .= git_diff_ab_hunk($self, $1, $2, $3);
+ $self->{dstate} = DSTATE_CTX;
+ } elsif ($s =~ /\A(@@@+) (\S+.*\S+) @@@+(.*)\z/) { # --cc
+ $$dst .= '</span>' if $state2class[$self->{dstate}];
+ $$dst .= git_diff_cc_hunk($self, $1, $2, $3);
+ } elsif ($s =~ /^ /) {
+ # works for common cases, but not weird/long filenames
+ if ($self->{dstate} == DSTATE_STAT &&
+ $s =~ /^ (.+)( +\| .*\z)/s) {
+ anchor0(\$dst, $self, $linkify, $1, $2) and next;
+ } elsif ($state2class[$self->{dstate}]) {
+ to_state($dst, $self->{dstate}, DSTATE_CTX);
+ }
+ $$dst .= to_html($linkify, $s);
+ } elsif ($s =~ m!^--- ! || $s =~ m!^\+{3} !) {
+ # color only (no oid link) if missing dctx->{oid_*}
+ $self->{dstate} <= DSTATE_STAT and
+ to_state($dst, $state, DSTATE_HEAD);
+ $$dst .= to_html($linkify, $s);
+ } elsif ($s =~ /^\+/) {
+ if ($self->{dstate} != DSTATE_ADD) {
+ to_state($dst, $state, DSTATE_ADD);
+ }
+ $$dst .= to_html($linkify, $s);
+ } elsif ($s =~ /^-/) {
+ if ($$state != DSTATE_DEL && $$state > DSTATE_STAT) {
+ to_state($dst, $state, DSTATE_DEL);
+ }
+ $$dst .= to_html($linkify, $s);
+ # ignore the following lines in headers:
+ } elsif ($s =~ /^(?:dis)similarity index/ ||
+ $s =~ /^(?:old|new) mode/ ||
+ $s =~ /^(?:deleted|new) file mode/ ||
+ $s =~ /^(?:copy|rename) (?:from|to) / ||
+ $s =~ /^(?:dis)?similarity index /) {
+ $$dst .= to_html($linkify, $s);
+ } else {
+ $$dst .= to_html($linkify, $s);
+ }
+ ++$$ndiff;
+ }
+}
+
+sub git_diff_sed_run ($$) {
+ my ($self, $dst) = @_;
+ $self->{dstate} == DSTATE_STAT and git_diff_sed_stat($self, $dst);
+ $self->{dstate} > DSTATE_STAT and git_diff_sed_lines($self, $dst);
+ undef;
+}
+
+sub git_diff_sed_close ($$) {
+ my ($self, $dst) = @_;
+ my $tmp = delete $self->{dbuf};
+ utf8::upgrade($tmp);
+ $$dst .= $tmp;
+ undef;
+}
+
+sub git_diff_sed {
+ my ($self, $ctx) = @_;
+ my $ws = { ctx => $ctx };
+ my @first = PublicInbox::WwwStream::html_top($ws) . '<pre>';
+ $ctx->{-html_tip} = "<pre>Output of: git $self->{git_cmd}\n";
+ $self->{dstate} = DSTATE_STAT;
+
+ # this filters for $fh->write or $body->getline (see Qspawn)
+ sub {
+ my $dst = shift @first || '';
+ if (defined $_[0]) { # $_[0] == scalar buffer
+ $self->{dbuf} .= $_[0];
+ git_diff_sed_run($self, \$dst);
+ } else { # undef means EOF from "git show", flush the last bit
+ git_diff_sed_close($self, \$dst);
+ $dst .= '</pre>'.PublicInbox::WwwStream::html_end($ws);
+ }
+ $dst;
+ }
+}
+
1;
diff --git a/lib/PublicInbox/ViewVCS.pm b/lib/PublicInbox/ViewVCS.pm
index c693fcf..2ba09a8 100644
--- a/lib/PublicInbox/ViewVCS.pm
+++ b/lib/PublicInbox/ViewVCS.pm
@@ -20,7 +20,7 @@ package PublicInbox::ViewVCS;
use PublicInbox::SolverGit;
use PublicInbox::WwwStream;
use PublicInbox::Linkify;
-use PublicInbox::ViewDiff qw(flush_diff);
+use PublicInbox::ViewDiff;
use PublicInbox::Hval qw(ascii_html to_filename);
my $hl = eval {
require PublicInbox::HlMod;
@@ -181,23 +181,22 @@ ($$$$$$)
return html_page($ctx, 500, \'seek error');
}
$log = do { local $/; <$log> };
- warn "log: $log\n";
+ my $vdiff = PublicInbox::ViewDiff->new;
my $git_b = $res_b->[0];
- my $cmd = ['git', "--git-dir=$git_b->{git_dir}", 'diff',
- $res_a->[1], $res_b->[1] ];
+ my $gcmd = $vdiff->diff_cmd($res_a->[1], $res_b->[1]);
+ my $cmd = ['git', "--git-dir=$git_b->{git_dir}", @$gcmd ];
my $qsp = PublicInbox::Qspawn->new($cmd);
my $env = $ctx->{env};
$env->{'qspawn.wcb'} = delete $ctx->{-wcb};
- $qsp->psgi_return($env, undef, sub {
+ $qsp->psgi_return($env, undef, sub { # parse header
my ($r, $bref) = @_;
if (!defined $r) { # error
html_page($ctx, 500, $log);
- } elsif (index($$bref, "\0") >= 0) {
- my $ct = 'application/octet-stream';
- [200, ['Content-Type', $ct ] ];
+ } elsif ($r == 0) {
+ PublicInbox::WwwStream::r($ctx, 200, 'empty diff');
} else {
- my $ct = 'text/plain; charset=UTF-8';
- [200, ['Content-Type', $ct] ];
+ $env->{'qspawn.filter'} = $vdiff->git_diff_sed($ctx);
+ PublicInbox::WwwStream::r($ctx, 200);
}
});
}
--
EW
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH] WIP
@ 2020-01-13 9:24 Eric Wong
0 siblings, 0 replies; 23+ messages in thread
From: Eric Wong @ 2020-01-13 9:24 UTC (permalink / raw)
To: spew
---
lib/PublicInbox/SearchView.pm | 4 +--
lib/PublicInbox/View.pm | 52 +++++++++++++++++++----------------
lib/PublicInbox/WwwStream.pm | 1 +
3 files changed, 32 insertions(+), 25 deletions(-)
diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index 97233069..14a4aeaa 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -300,8 +300,8 @@ sub mset_thread_i {
my $msgs = $ctx->{msgs} or return;
while (my $smsg = pop @$msgs) {
$ctx->{-inbox}->smsg_mime($smsg) or next;
- return PublicInbox::View::index_entry($smsg, $ctx,
- scalar @$msgs);
+ return ${PublicInbox::View::index_entry($smsg, $ctx,
+ scalar @$msgs)};
}
my ($skel) = delete @$ctx{qw(skel msgs)};
$$skel .= "\n</pre>";
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index c9e6b3c5..be42c1be 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -208,12 +208,13 @@ sub index_entry {
my $obfs_ibx = $ctx->{-obfs_ibx};
$subj = '(no subject)' if $subj eq '';
- my $rv = "<a\nhref=#e$id\nid=m$id>*</a> ";
+ my $obuf = $ctx->{obuf};
+ $$obuf .= "<a\nhref=#e$id\nid=m$id>*</a> ";
$subj = '<b>'.ascii_html($subj).'</b>';
obfuscate_addrs($obfs_ibx, $subj) if $obfs_ibx;
$subj = "<u\nid=u>$subj</u>" if $root_anchor eq $id_m;
- $rv .= $subj . "\n";
- $rv .= _th_index_lite($mid_raw, \$irt, $id, $ctx);
+ $$obuf .= $subj . "\n";
+ $$obuf .= _th_index_lite($mid_raw, \$irt, $id, $ctx);
my @tocc;
my $ds = $smsg->ds; # for v1 non-Xapian/SQLite users
# deleting {mime} is critical to memory use,
@@ -223,11 +224,11 @@ sub index_entry {
my $hdr = $mime->header_obj;
my $from = _hdr_names_html($hdr, 'From');
obfuscate_addrs($obfs_ibx, $from) if $obfs_ibx;
- $rv .= "From: $from @ ".fmt_ts($ds)." UTC";
+ $$obuf .= "From: $from @ ".fmt_ts($ds)." UTC";
my $upfx = $ctx->{-upfx};
my $mhref = $upfx . mid_escape($mid_raw) . '/';
- $rv .= qq{ (<a\nhref="$mhref">permalink</a> / };
- $rv .= qq{<a\nhref="${mhref}raw">raw</a>)\n};
+ $$obuf .= qq{ (<a\nhref="$mhref">permalink</a> / };
+ $$obuf .= qq{<a\nhref="${mhref}raw">raw</a>)\n};
my $to = fold_addresses(_hdr_names_html($hdr, 'To'));
my $cc = fold_addresses(_hdr_names_html($hdr, 'Cc'));
my ($tlen, $clen) = (length($to), length($cc));
@@ -245,32 +246,30 @@ sub index_entry {
$to_cc .= "\n";
}
obfuscate_addrs($obfs_ibx, $to_cc) if $obfs_ibx;
- $rv .= $to_cc;
+ $$obuf .= $to_cc;
my $mapping = $ctx->{mapping};
if (!$mapping && (defined($irt) || defined($irt = in_reply_to($hdr)))) {
my $mirt = PublicInbox::Hval->new_msgid($irt);
my $href = $upfx . $mirt->{href}. '/';
my $html = $mirt->as_html;
- $rv .= qq(In-Reply-To: <<a\nhref="$href">$html</a>>\n)
+ $$obuf .= qq(In-Reply-To: <<a\nhref="$href">$html</a>>\n)
}
- $rv .= "\n";
+ $$obuf .= "\n";
# scan through all parts, looking for displayable text
$ctx->{mhref} = $mhref;
- $ctx->{obuf} = \$rv;
msg_iter($mime, \&add_text_body, $ctx, 1);
- delete $ctx->{obuf};
# add the footer
- $rv .= "\n<a\nhref=#$id_m\nid=e$id>^</a> ".
+ $$obuf .= "\n<a\nhref=#$id_m\nid=e$id>^</a> ".
"<a\nhref=\"$mhref\">permalink</a>" .
" <a\nhref=\"${mhref}raw\">raw</a>" .
" <a\nhref=\"${mhref}#R\">reply</a>";
my $hr;
if (defined(my $pct = $smsg->{pct})) { # used by SearchView.pm
- $rv .= "\t[relevance $pct%]";
+ $$obuf .= "\t[relevance $pct%]";
$hr = 1;
} elsif ($mapping) {
my $nested = 'nested';
@@ -282,15 +281,15 @@ sub index_entry {
} else {
$nested = "<b>$nested</b>";
}
- $rv .= "\t[<a\nhref=\"${mhref}T/#u\">$flat</a>";
- $rv .= "|<a\nhref=\"${mhref}t/#u\">$nested</a>]";
- $rv .= " <a\nhref=#r$id>$ctx->{s_nr}</a>";
+ $$obuf .= "\t[<a\nhref=\"${mhref}T/#u\">$flat</a>";
+ $$obuf .= "|<a\nhref=\"${mhref}t/#u\">$nested</a>]";
+ $$obuf .= " <a\nhref=#r$id>$ctx->{s_nr}</a>";
} else {
$hr = $ctx->{-hr};
}
- $rv .= $more ? '</pre><hr><pre>' : '</pre>' if $hr;
- $rv;
+ $$obuf .= $more ? '</pre><hr><pre>' : '</pre>' if $hr;
+ delete $ctx->{obuf};
}
sub pad_link ($$;$) {
@@ -392,7 +391,10 @@ sub pre_thread { # walk_thread callback
sub thread_index_entry {
my ($ctx, $level, $smsg) = @_;
my ($beg, $end) = thread_adj_level($ctx, $level);
- $beg . '<pre>' . index_entry($smsg, $ctx, 0) . '</pre>' . $end;
+ my $obuf = $ctx->{obuf};
+ $$obuf .= $beg . '<pre>';
+ index_entry($smsg, $ctx, 0);
+ $$obuf .= '</pre>' . $end;
}
sub stream_thread_i { # PublicInbox::WwwStream::getline callback
@@ -475,20 +477,24 @@ sub thread_html {
}
return missing_thread($ctx) unless $smsg;
$ctx->{-title_html} = ascii_html($smsg->subject);
- $ctx->{-html_tip} = '<pre>'.index_entry($smsg, $ctx, scalar @$msgs);
+ ${$ctx->{obuf}} .= '<pre>';
+ index_entry($smsg, $ctx, scalar @$msgs);
+ $ctx->{-html_tip} = $obuf;
$ctx->{msgs} = $msgs;
PublicInbox::WwwStream->response($ctx, 200, \&thread_html_i);
}
sub thread_html_i { # PublicInbox::WwwStream::getline callback
my ($nr, $ctx) = @_;
- my $msgs = $ctx->{msgs} or return;
+ my $msgs = $ctx->{msgs} or return; # final return value
+
while (my $smsg = shift @$msgs) {
$ctx->{-inbox}->smsg_mime($smsg) or next;
- return index_entry($smsg, $ctx, scalar @$msgs);
+ return ${index_entry($smsg, $ctx, scalar @$msgs)};
}
+
my ($skel) = delete @$ctx{qw(skel msgs)};
- $$skel;
+ $$skel; # penultimate return value
}
sub multipart_text_as_html {
diff --git a/lib/PublicInbox/WwwStream.pm b/lib/PublicInbox/WwwStream.pm
index f0a57d6a..fd2cbcff 100644
--- a/lib/PublicInbox/WwwStream.pm
+++ b/lib/PublicInbox/WwwStream.pm
@@ -160,6 +160,7 @@ sub getline {
return _html_top($self) if $nr == 0;
if (my $middle = $self->{cb}) {
+ $self->{ctx}->{obuf} = \(my $obuf = '');
$middle = $middle->($nr, $self->{ctx}) and return $middle;
}
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH] wip
@ 2020-04-20 7:14 Eric Wong
0 siblings, 0 replies; 23+ messages in thread
From: Eric Wong @ 2020-04-20 7:14 UTC (permalink / raw)
To: spew
---
lib/PublicInbox/MsgIter.pm | 6 ++--
lib/PublicInbox/View.pm | 64 +++++++++++++++++++++++++++++++++++---
t/message_subparts.t | 46 +++++++++++++++++++++++++++
3 files changed, 108 insertions(+), 8 deletions(-)
create mode 100644 t/message_subparts.t
diff --git a/lib/PublicInbox/MsgIter.pm b/lib/PublicInbox/MsgIter.pm
index fa25564a..e33e2b2b 100644
--- a/lib/PublicInbox/MsgIter.pm
+++ b/lib/PublicInbox/MsgIter.pm
@@ -12,8 +12,8 @@ use PublicInbox::MIME;
# Like Email::MIME::walk_parts, but this is:
# * non-recursive
# * passes depth and indices to the iterator callback
-sub msg_iter ($$;$$) {
- my ($mime, $cb, $cb_arg, $do_undef) = @_;
+sub msg_iter ($$;$$$) {
+ my ($mime, $cb, $cb_arg, $do_undef, $sub_hdr_cb) = @_;
my @parts = $mime->subparts;
if (@parts) {
$mime = $_[0] = undef if $do_undef; # saves some memory
@@ -28,7 +28,7 @@ sub msg_iter ($$;$$) {
@sub = map { [ $_, $depth, @idx, ++$i ] } @sub;
@parts = (@sub, @parts);
} else {
- $cb->($p, $cb_arg);
+ $cb->($p, $cb_arg, \$depth, \@parts);
}
}
} else {
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 9b62ed3c..527b8316 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -515,14 +515,67 @@ EOF
undef;
}
+# show headers on message/rfc822 attachments
+sub submsg_hdr {
+ my ($hdr, $ctx) = @_;
+ my $obfs_ibx = $ctx->{-obfs_ibx};
+ my $rv = $ctx->{obuf};
+ $$rv .= "\n";
+ for my $h (qw(From To Cc)) {
+ for my $v ($hdr->header($h)) {
+ fold_addresses($v);
+ $v = ascii_html($v);
+ obfuscate_addrs($obfs_ibx, $v) if $obfs_ibx;
+ $$rv .= "$h: $v\n" if $v ne '';
+ }
+ }
+ for my $h (qw(Subject Date)) {
+ for my $v ($hdr->header($h)) {
+ $v = ascii_html($v);
+ obfuscate_addrs($obfs_ibx, $v) if $obfs_ibx;
+ $$rv .= "$h: $v\n" if $v ne '';
+ }
+ }
+ # my $lnk = PublicInbox::Linkify->new;
+ for my $h (qw(Message-ID X-Alt-Message-ID In-Reply-To References)) {
+ my $s = '';
+ $s .= "$h: $_\n" for ($hdr->header_raw($h));
+ # $lnk->linkify_mids('..', \$s, 1);
+ $$rv .= $s;
+ }
+ $$rv .= _parent_headers($hdr);
+ $$rv .= "\n";
+ undef;
+}
+
sub add_text_body { # callback for msg_iter
- my ($p, $ctx) = @_;
- my $upfx = $ctx->{mhref};
- my $ibx = $ctx->{-inbox};
+ my ($p, $ctx, $iter_depth, $iter_parts) = @_;
# $p - from msg_iter: [ Email::MIME, depth, @idx ]
my ($part, $depth, @idx) = @$p;
my $ct = $part->content_type || 'text/plain';
my $fn = $part->filename;
+
+ if ($ct =~ m!\Amessage/(?:rfc822|news)\b!i) {
+ # required for compatibility with old URLs
+ attach_link($ctx, $ct, $p, $fn);
+
+ my $submsg = PublicInbox::MIME->new(\($part->body));
+ submsg_hdr($submsg->header_obj, $ctx);
+ my @sub = $submsg->subparts;
+ @sub = $submsg if !@sub;
+ my $d = ++$$iter_depth;
+ my $i = 0;
+ @sub = map { [ $_, $d, @idx, ++$i ] } @sub;
+ @$iter_parts = (@sub, @$iter_parts);
+ return;
+ # }
+ # $p->[0] = $part = $submsg;
+ # $p->[1] = ++$depth;
+ # $ct = $part->content_type || 'text/plain';
+ # $fn = $part->filename;
+ # fall-through:
+ }
+
my ($s, $err) = msg_part_text($part, $ct);
return attach_link($ctx, $ct, $p, $fn) unless defined $s;
@@ -530,6 +583,7 @@ sub add_text_body { # callback for msg_iter
# link generation in diffs with the extra '%0D'
$s =~ s/\r\n/\n/sg;
+ my $ibx = $ctx->{-inbox};
# will be escaped to `•' in HTML
obfuscate_addrs($ibx, $s, "\x{2022}") if $ibx->{obfuscate};
@@ -537,6 +591,7 @@ sub add_text_body { # callback for msg_iter
# headers for solver unless some coderepo are configured:
my $diff;
if ($s =~ /^(?:diff|---|\+{3}) /ms) {
+ my $upfx = $ctx->{mhref};
# diffstat anchors do not link across attachments or messages:
$idx[0] = $upfx . $idx[0] if $upfx ne '';
$ctx->{-apfx} = join('/', @idx);
@@ -718,12 +773,11 @@ sub thread_skel ($$$) {
}
sub _parent_headers {
- my ($hdr, $over) = @_;
+ my ($hdr, $over, $lnk) = @_;
my $rv = '';
my @irt = $hdr->header_raw('In-Reply-To');
my $refs;
if (@irt) {
- my $lnk = PublicInbox::Linkify->new;
$rv .= "In-Reply-To: $_\n" for @irt;
$lnk->linkify_mids('..', \$rv);
} else {
diff --git a/t/message_subparts.t b/t/message_subparts.t
new file mode 100644
index 00000000..55ca8f4e
--- /dev/null
+++ b/t/message_subparts.t
@@ -0,0 +1,46 @@
+# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use Test::More;
+use PublicInbox::MIME;
+use PublicInbox::MsgIter;
+
+my $rfc822 = PublicInbox::MIME->new(<<'EOF');
+From: a@example.com
+To: a@example.com
+Date: Fri, 02 Oct 1993 00:00:00 +0000
+Subject: message/rfc822 embedded
+Content-Type: multipart/mixed; boundary="x"
+MIME-Version: 1.0
+
+--x
+Content-Transfer-Encoding: 7bit
+Content-Type: message/rfc822
+
+From: a@example.com
+To: a@example.com
+Date: Fri, 02 Oct 1993 00:00:00 +0000
+Subject: lost
+Content-Type: multipart/mixed; boundary="y"
+MIME-Version: 1.0
+
+--y
+Content-Transfer-Encoding: 7bit
+Content-Type: text/plain
+
+my leg
+
+--y--
+
+--x--
+EOF
+
+use Data::Dumper;
+msg_iter($rfc822, sub {
+ my ($part, $level, @idx) = @{$_[0]};
+ my ($s, $err) = msg_part_text($part, $part->content_type);
+ diag "S[$level,[".join(',', @idx)."]: $s";
+});
+
+ok 1;
+done_testing();
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH] wip
@ 2020-04-23 4:27 Eric Wong
0 siblings, 0 replies; 23+ messages in thread
From: Eric Wong @ 2020-04-23 4:27 UTC (permalink / raw)
To: spew
---
MANIFEST | 1 -
lib/PublicInbox/GzipFilter.pm | 17 ++--
lib/PublicInbox/Mbox.pm | 155 ++++++++++++++++++++--------------
lib/PublicInbox/MboxGz.pm | 49 -----------
4 files changed, 99 insertions(+), 123 deletions(-)
delete mode 100644 lib/PublicInbox/MboxGz.pm
diff --git a/MANIFEST b/MANIFEST
index b0f08567..857fab4c 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -131,7 +131,6 @@ lib/PublicInbox/MDA.pm
lib/PublicInbox/MID.pm
lib/PublicInbox/MIME.pm
lib/PublicInbox/Mbox.pm
-lib/PublicInbox/MboxGz.pm
lib/PublicInbox/MsgIter.pm
lib/PublicInbox/MsgTime.pm
lib/PublicInbox/Msgmap.pm
diff --git a/lib/PublicInbox/GzipFilter.pm b/lib/PublicInbox/GzipFilter.pm
index d2eb4e66..3a3c4a6b 100644
--- a/lib/PublicInbox/GzipFilter.pm
+++ b/lib/PublicInbox/GzipFilter.pm
@@ -10,7 +10,15 @@ our @EXPORT_OK = qw(gzf_maybe);
my %OPT = (-WindowBits => 15 + 16, -AppendOutput => 1);
my @GZIP_HDRS = qw(Vary Accept-Encoding Content-Encoding gzip);
-sub new { bless {}, shift }
+sub gzip_or_die () {
+ my ($gz, $err) = Compress::Raw::Zlib::Deflate->new(%OPT);
+ $err == Z_OK or die "Deflate->new failed: $err";
+ $gz;
+}
+
+sub new { bless { gz => $_[1] ? undef : gzip_or_die() }, $_[0] }
+
+# for GetlineBody (via Qspawn) when NOT using $env->{'pi-httpd.async'}
# for Qspawn if using $env->{'pi-httpd.async'}
sub attach {
@@ -32,13 +40,6 @@ sub gzf_maybe ($$) {
bless { gz => $gz }, __PACKAGE__;
}
-sub gzip_or_die () {
- my ($gz, $err) = Compress::Raw::Zlib::Deflate->new(%OPT);
- $err == Z_OK or die "Deflate->new failed: $err";
- $gz;
-}
-
-# for GetlineBody (via Qspawn) when NOT using $env->{'pi-httpd.async'}
# Also used for ->getline callbacks
sub translate ($$) {
my $self = $_[0]; # $_[1] => input
diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm
index 97bec5e7..36f7aea8 100644
--- a/lib/PublicInbox/Mbox.pm
+++ b/lib/PublicInbox/Mbox.pm
@@ -9,17 +9,16 @@
# more common "push" model)
package PublicInbox::Mbox;
use strict;
-use warnings;
+use PublicInbox::GzipFilter;
+use PublicInbox::NoopFilter;
use PublicInbox::MID qw/mid_escape/;
use PublicInbox::Hval qw/to_filename/;
use PublicInbox::Smsg;
-use PublicInbox::WwwStream qw(html_oneshot);
use Email::Simple;
use Email::MIME::Encode;
sub subject_fn ($) {
- my ($hdr) = @_;
- my $fn = $hdr->header('Subject');
+ my ($fn) = @_;
return 'no-subject' if (!defined($fn) || $fn eq '');
# no need for full Email::MIME, here
@@ -31,63 +30,6 @@ sub subject_fn ($) {
$fn eq '' ? 'no-subject' : to_filename($fn);
}
-sub mb_stream {
- my ($more) = @_;
- bless $more, 'PublicInbox::Mbox';
-}
-
-# called by PSGI server as body response
-# this gets called twice for every message, once to return the header,
-# once to retrieve the body
-sub getline {
- my ($more) = @_; # self
- my ($ctx, $id, $prev, $next, $mref, $hdr) = @$more;
- if ($hdr) { # first message hits this, only
- pop @$more; # $hdr
- pop @$more; # $mref
- return msg_hdr($ctx, $hdr) . msg_body($$mref);
- }
- my $cur = $next or return;
- my $ibx = $ctx->{-inbox};
- $next = $ibx->over->next_by_mid($ctx->{mid}, \$id, \$prev);
- $mref = $ibx->msg_by_smsg($cur) or return;
- $hdr = Email::Simple->new($mref)->header_obj;
- @$more = ($ctx, $id, $prev, $next); # $next may be undef, here
- msg_hdr($ctx, $hdr) . msg_body($$mref);
-}
-
-sub close {} # noop
-
-# /$INBOX/$MESSAGE_ID/raw
-sub emit_raw {
- my ($ctx) = @_;
- my $mid = $ctx->{mid};
- my $ibx = $ctx->{-inbox};
- $ctx->{base_url} = $ibx->base_url($ctx->{env});
- my ($mref, $more, $id, $prev, $next);
- if (my $over = $ibx->over) {
- my $smsg = $over->next_by_mid($mid, \$id, \$prev) or return;
- $mref = $ibx->msg_by_smsg($smsg) or return;
- $next = $over->next_by_mid($mid, \$id, \$prev);
- } else {
- $mref = $ibx->msg_by_mid($mid) or return;
- }
- my $hdr = Email::Simple->new($mref)->header_obj;
- $more = [ $ctx, $id, $prev, $next, $mref, $hdr ]; # for ->getline
- my $fn = subject_fn($hdr);
- my @hdr = ('Content-Type');
- if ($ibx->{obfuscate}) {
- # obfuscation is stupid, but maybe scrapers are, too...
- push @hdr, 'application/mbox';
- $fn .= '.mbox';
- } else {
- push @hdr, 'text/plain';
- $fn .= '.txt';
- }
- push @hdr, 'Content-Disposition', "inline; filename=$fn";
- [ 200, \@hdr, mb_stream($more) ];
-}
-
sub msg_hdr ($$;$) {
my ($ctx, $header_obj, $mid) = @_;
@@ -135,6 +77,56 @@ sub msg_body ($) {
$_[0] .= "\n";
}
+sub emit_raw_cb {
+ my ($self, $ctx) = @_;
+ delete($self->{first}) // $ctx->{-inbox}->over->next_by_mid(
+ $ctx->{mid},
+ \($self->{cur_id}),
+ \($self->{prev}))
+}
+
+# /$INBOX/$MESSAGE_ID/raw
+sub emit_raw {
+ my ($ctx) = @_;
+ my $mid = $ctx->{mid};
+ my $ibx = $ctx->{-inbox};
+ $ctx->{base_url} = $ibx->base_url($ctx->{env});
+ my $h = [ 'Content-Type' => undef, 'Content-Disposition' => undef ];
+ my ($fn, $body);
+ my $gzf = gzf_maybe($h, $ctx->{env}) || PublicInbox::NoopFilter->new;
+ if (my $over = $ibx->over) {
+ $body = bless {
+ ctx => $ctx,
+ cb => \&emit_raw_cb,
+ gzf => $gzf,
+ cur_id => undef,
+ prev => undef
+ }, __PACKAGE__;
+ my $smsg = $over->next_by_mid($ctx->{mid},
+ \($body->{cur_id}),
+ \($body->{prev})) or return;
+ $body->{first} = $smsg;
+ $fn = $smsg->{subject};
+ } else {
+ my $mref = $ibx->msg_by_mid($ctx->{mid}) or return;
+ my $hdr = Email::Simple->new($mref)->header_obj;
+ $fn = $hdr->header('Subject');
+ $gzf->zmore(msg_hdr($ctx, $hdr, $ctx->{mid}));
+ $gzf->zflush(msg_body($$mref));
+ $body = [ $hdr ];
+ }
+ if ($ibx->{obfuscate}) {
+ # obfuscation is stupid, but maybe scrapers are, too...
+ $h->[1] = 'application/mbox';
+ $fn .= '.mbox';
+ } else {
+ $h->[1] = 'text/plain';
+ $fn .= '.txt';
+ }
+ $h->[3] = 'inline; filename='.subject_fn($fn);
+ [ 200, $h, $body ];
+}
+
sub thread_cb {
my ($ctx) = @_;
my $msgs = $ctx->{msgs};
@@ -150,6 +142,22 @@ sub thread_cb {
}
}
+sub mboxgz ($$$) {
+ my ($ctx, $cb, $fn) = @_;
+ $ctx->{base_url} = $ctx->{-inbox}->base_url($ctx->{env});
+ my $body = bless {
+ gzf => PublicInbox::GzipFilter->new(1),
+ cb => $cb,
+ ctx => $ctx,
+ }, __PACKAGE__;
+ # http://www.iana.org/assignments/media-types/application/gzip
+ $fn = defined($fn) && $fn ne '' ? to_filename($fn) : 'no-subject';
+ my $h = [ qw(Content-Type application/gzip),
+ 'Content-Disposition', "inline; filename=$fn.mbox.gz" ];
+ [ 200, $h, $body ];
+}
+
+
sub thread_mbox {
my ($ctx, $over, $sfx) = @_;
require PublicInbox::MboxGz;
@@ -157,7 +165,7 @@ sub thread_mbox {
return [404, [qw(Content-Type text/plain)], []] if !@$msgs;
$ctx->{prev} = $msgs->[-1];
$ctx->{over} = $over; # bump refcnt
- PublicInbox::MboxGz->response($ctx, \&thread_cb, $msgs->[0]->subject);
+ mboxgz($ctx, \&thread_cb, $msgs->[0]->subject);
}
sub emit_range {
@@ -195,7 +203,7 @@ sub mbox_all_ids {
return PublicInbox::WWW::need($ctx, 'Overview');
$ctx->{ids} = $ids;
$ctx->{prev} = $prev;
- return PublicInbox::MboxGz->response($ctx, \&all_ids_cb, 'all');
+ mboxgz($ctx, \&all_ids_cb, 'all');
}
sub results_cb {
@@ -220,7 +228,6 @@ sub results_cb {
sub mbox_all {
my ($ctx, $query) = @_;
- require PublicInbox::MboxGz;
return mbox_all_ids($ctx) if $query eq '';
my $qopts = $ctx->{qopts} = { mset => 2 };
my $srch = $ctx->{srch} = $ctx->{-inbox}->search or
@@ -231,7 +238,25 @@ sub mbox_all {
["No results found\n"]];
$ctx->{iter} = 0;
$ctx->{query} = $query;
- PublicInbox::MboxGz->response($ctx, \&results_cb, 'results-'.$query);
+ mboxgz($ctx, \&results_cb, 'results-'.$query);
}
+# called by Plack::Util::foreach or similar
+sub getline {
+ my ($self) = @_;
+ my $gzf = $self->{gzf} or return;
+ my $ctx = $self->{ctx};
+ while (my $smsg = $self->{cb}->($ctx)) {
+ my $mref = $ctx->{-inbox}->msg_by_smsg($smsg) or next;
+ my $h = Email::Simple->new($mref)->header_obj;
+ $gzf->zmore(msg_hdr($ctx, $h, $smsg->{mid}));
+ return $gzf->translate(msg_body($$mref));
+ }
+ # signal that we're done and can return undef next call:
+ delete $self->{gzf};
+ $gzf->zflush;
+}
+
+sub close {} # noop
+
1;
diff --git a/lib/PublicInbox/MboxGz.pm b/lib/PublicInbox/MboxGz.pm
deleted file mode 100644
index 7deabf54..00000000
--- a/lib/PublicInbox/MboxGz.pm
+++ /dev/null
@@ -1,49 +0,0 @@
-# Copyright (C) 2015-2020 all contributors <meta@public-inbox.org>
-# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
-package PublicInbox::MboxGz;
-use strict;
-use parent 'PublicInbox::GzipFilter';
-use Email::Simple;
-use PublicInbox::Hval qw/to_filename/;
-use PublicInbox::Mbox;
-
-sub new {
- my ($class, $ctx, $cb) = @_;
- $ctx->{base_url} = $ctx->{-inbox}->base_url($ctx->{env});
- bless {
- gz => PublicInbox::GzipFilter::gzip_or_die(),
- cb => $cb,
- ctx => $ctx
- }, $class;
-}
-
-sub response {
- my ($class, $ctx, $cb, $fn) = @_;
- my $body = $class->new($ctx, $cb);
- # http://www.iana.org/assignments/media-types/application/gzip
- $fn = defined($fn) && $fn ne '' ? to_filename($fn) : 'no-subject';
- my $h = [ qw(Content-Type application/gzip),
- 'Content-Disposition', "inline; filename=$fn.mbox.gz" ];
- [ 200, $h, $body ];
-}
-
-# called by Plack::Util::foreach or similar
-sub getline {
- my ($self) = @_;
- my $ctx = $self->{ctx} or return;
- while (my $smsg = $self->{cb}->($ctx)) {
- my $mref = $ctx->{-inbox}->msg_by_smsg($smsg) or next;
- my $h = Email::Simple->new($mref)->header_obj;
- $self->zmore(
- PublicInbox::Mbox::msg_hdr($ctx, $h, $smsg->{mid})
- );
- return $self->translate(PublicInbox::Mbox::msg_body($$mref));
- }
- # signal that we're done and can return undef next call:
- delete $self->{ctx};
- $self->zflush;
-}
-
-sub close {} # noop
-
-1;
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH] wip
@ 2020-11-15 7:35 Eric Wong
0 siblings, 0 replies; 23+ messages in thread
From: Eric Wong @ 2020-11-15 7:35 UTC (permalink / raw)
To: spew
---
t/eml.t | 15 ++++++++++-----
1 file changed, 10 insertions(+), 5 deletions(-)
diff --git a/t/eml.t b/t/eml.t
index 8d131b14..3012a035 100644
--- a/t/eml.t
+++ b/t/eml.t
@@ -269,7 +269,7 @@ Object-Id: ab0440d8cd6d843bee9a27709a459ce3b2bdb94d (lore/kvm)
EOF
my $eml = $cls->new(\$s);
my ($str, $err) = msg_part_text($eml, $eml->content_type);
- is($str, "\x{100}\n", "got wide character by assuming utf-8");
+ is($str, "\x{100}\n", "got wide character by assuming utf-8 ($cls)");
}
if ('we differ from Email::MIME with final "\n" on missing epilogue') {
@@ -383,8 +383,12 @@ SKIP: {
$msg->parts_set([$old[-1]]);
is(scalar $msg->subparts, 1, 'only last remains');
}
- is($eml->as_string, $mime->as_string,
- 'as_string matches after parts_set');
+
+ # some versions of Email::MIME or Email::MIME::* will drop
+ # unnecessary ", while PublicInbox::Eml will preserve the original
+ my $exp = $mime->as_string;
+ $exp =~ s/; boundary=b\b/; boundary="b"/;
+ is($eml->as_string, $exp, 'as_string matches after parts_set');
}
for my $cls (@classes) {
@@ -395,7 +399,8 @@ Content-Disposition: attachment; filename="=?utf-8?q?vtpm-makefile.patch?="
EOF
is($cls->new($s)->filename, 'vtpm-makefile.patch', 'filename decoded');
$s =~ s/^Content-Disposition:.*$//sm;
- is($cls->new($s)->filename, 'vtpm-fakefile.patch', 'filename fallback');
+ is($cls->new($s)->filename, 'vtpm-fakefile.patch',
+ "filename fallback ($cls)") if $cls ne 'PublicInbox::MIME';
is($cls->new($s)->content_type,
'text/x-patch; name="vtpm-fakefile.patch"',
'matches Email::MIME output, "correct" or not');
@@ -416,7 +421,7 @@ EOF
my @tmp;
$cls->new($s)->each_part(sub { push @tmp, $_[0]->[0]->filename });
is_deeply(['vtpm-makefile.patch', 'vtpm-fakefile.patch'], \@tmp,
- 'got filename for both attachments');
+ "got filename for both attachments ($cls)");
}
done_testing;
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH] WIP
@ 2020-12-27 11:36 Eric Wong
0 siblings, 0 replies; 23+ messages in thread
From: Eric Wong @ 2020-12-27 11:36 UTC (permalink / raw)
To: spew
---
MANIFEST | 2 ++
lib/PublicInbox/LeiSearch.pm | 14 ++++-----
lib/PublicInbox/LeiXSearch.pm | 57 +++++++++++++++++++++++++++++++++++
lib/PublicInbox/Search.pm | 19 +++++-------
t/lei_xsearch.t | 47 +++++++++++++++++++++++++++++
5 files changed, 119 insertions(+), 20 deletions(-)
create mode 100644 lib/PublicInbox/LeiXSearch.pm
create mode 100644 t/lei_xsearch.t
diff --git a/MANIFEST b/MANIFEST
index 656c707e..a5ff81cf 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -165,6 +165,7 @@ lib/PublicInbox/LEI.pm
lib/PublicInbox/LeiExtinbox.pm
lib/PublicInbox/LeiSearch.pm
lib/PublicInbox/LeiStore.pm
+lib/PublicInbox/LeiXSearch.pm
lib/PublicInbox/Linkify.pm
lib/PublicInbox/Listener.pm
lib/PublicInbox/Lock.pm
@@ -327,6 +328,7 @@ t/kqnotify.t
t/lei-oneshot.t
t/lei.t
t/lei_store.t
+t/lei_xsearch.t
t/linkify.t
t/main-bin/spamc
t/mda-mime.eml
diff --git a/lib/PublicInbox/LeiSearch.pm b/lib/PublicInbox/LeiSearch.pm
index 66c16e04..0b962b11 100644
--- a/lib/PublicInbox/LeiSearch.pm
+++ b/lib/PublicInbox/LeiSearch.pm
@@ -7,20 +7,18 @@ use v5.10.1;
use parent qw(PublicInbox::ExtSearch);
use PublicInbox::Search;
-sub combined_docid ($$) {
+# get combined docid from over.num:
+# (not generic Xapian, only works with our sharding scheme)
+sub num2docid ($$) {
my ($self, $num) = @_;
- ($num - 1) * $self->{nshard} + 1;
+ my $nshard = $self->{nshard};
+ ($num - 1) * $nshard + $num % $nshard + 1;
}
sub msg_keywords {
my ($self, $num) = @_; # num_or_mitem
my $xdb = $self->xdb; # set {nshard};
- my $docid = ref($num) ? $num->get_docid : do {
- # get combined docid from over.num:
- # (not generic Xapian, only works with our sharding scheme)
- my $nshard = $self->{nshard};
- ($num - 1) * $nshard + $num % $nshard + 1;
- };
+ my $docid = ref($num) ? $num->get_docid : num2docid($self, $num);
my %kw;
eval {
my $end = $xdb->termlist_end($docid);
diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
new file mode 100644
index 00000000..07ceb84e
--- /dev/null
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -0,0 +1,57 @@
+# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# Combine any combination of PublicInbox::Search,
+# PublicInbox::ExtSearch, and PublicInbox::LeiSearch objects
+# into one Xapian DB
+package PublicInbox::LeiXSearch;
+use strict;
+use v5.10.1;
+use parent qw(PublicInbox::LeiSearch);
+
+sub new {
+ my ($class) = @_;
+ PublicInbox::Search::load_xapian();
+ bless {}, $class
+}
+
+sub attach_extinbox {
+ my ($self, $ibxish) = @_; # ibxish = ExtSearch or Inbox
+ if (delete $self->{xdb}) {
+ # clobber existing {xdb} if amending
+ my $expect = delete $self->{nshard};
+ my $shards = delete $self->{shards_flat};
+ scalar(@$shards) == $expect or die
+ "BUG: {nshard}$expect != shards=".scalar(@$shards);
+
+ my $prev = {};
+ for my $old_ibxish (@{$self->{shard2ibx}}) {
+ next if $prev == $old_ibxish;
+ $prev = $old_ibxish;
+ my @shards = $old_ibxish->search->xdb_shards_flat;
+ push @{$self->{shards_flat}}, @shards;
+ }
+ my $nr = scalar(@{$self->{shards_flat}});
+ $nr == $expect or die
+ "BUG: reloaded $nr shards, expected $expect"
+ }
+ my @shards = $ibxish->search->xdb_shards_flat;
+ push @{$self->{shards_flat}}, @shards;
+ push(@{$self->{shard2ibx}}, $ibxish) for (@shards);
+}
+
+# called by PublicInbox::Search::xdb
+sub xdb_shards_flat { @{$_[0]->{shards_flat}} }
+
+# like over->get_art
+sub smsg_for {
+ my ($self, $mitem) = @_;
+ # cf. https://trac.xapian.org/wiki/FAQ/MultiDatabaseDocumentID
+ my $nshard = $self->{nshard};
+ my $docid = $mitem->get_docid;
+ my $shard = ($docid - 1) % $nshard;
+ my $num = int(($docid - 1) / $nshard) + 1;
+ $self->{shard2ibx}->[$shard]->over->get_art($num);
+}
+
+1;
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 498b6632..73926b1c 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -197,6 +197,7 @@ sub xdb_shards_flat ($) {
my ($self) = @_;
my $xpfx = $self->{xpfx};
my (@xdb, $slow_phrase);
+ load_xapian();
if ($xpfx =~ m/xapian${\SCHEMA_VERSION}\z/) {
@xdb = ($X{Database}->new($xpfx));
$self->{qp_flags} |= FLAG_PHRASE() if !-f "$xpfx/iamchert";
@@ -215,16 +216,6 @@ sub xdb_shards_flat ($) {
@xdb;
}
-sub _xdb {
- my ($self) = @_;
- $self->{qp_flags} //= $QP_FLAGS;
- my @xdb = xdb_shards_flat($self) or return;
- $self->{nshard} = scalar(@xdb);
- my $xdb = shift @xdb;
- $xdb->add_database($_) for @xdb;
- $xdb;
-}
-
# v2 Xapian docids don't conflict, so they're identical to
# NNTP article numbers and IMAP UIDs.
# https://trac.xapian.org/wiki/FAQ/MultiDatabaseDocumentID
@@ -243,8 +234,12 @@ sub mset_to_artnums {
sub xdb ($) {
my ($self) = @_;
$self->{xdb} //= do {
- load_xapian();
- $self->_xdb;
+ $self->{qp_flags} //= $QP_FLAGS;
+ my @xdb = $self->xdb_shards_flat or return;
+ $self->{nshard} = scalar(@xdb);
+ my $xdb = shift @xdb;
+ $xdb->add_database($_) for @xdb;
+ $xdb;
};
}
diff --git a/t/lei_xsearch.t b/t/lei_xsearch.t
new file mode 100644
index 00000000..70e3affd
--- /dev/null
+++ b/t/lei_xsearch.t
@@ -0,0 +1,47 @@
+#!perl -w
+# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use v5.10.1;
+use Test::More;
+use PublicInbox::TestCommon;
+use PublicInbox::ExtSearchIdx;
+use PublicInbox::Eml;
+use PublicInbox::InboxWritable;
+require_mods(qw(DBD::SQLite Search::Xapian));
+require_git 2.6;
+require_ok 'PublicInbox::LeiXSearch';
+my ($home, $for_destroy) = tmpdir();
+my @ibx;
+for my $V (1..2) {
+ for my $i (3..6) {
+ my $ibx = PublicInbox::InboxWritable->new({
+ inboxdir => "$home/v$V-$i",
+ name => "test-v$V-$i",
+ version => $V,
+ indexlevel => 'medium',
+ -primary_address => "v$V-$i\@example.com",
+ }, { nproc => int(rand(8)) + 1 });
+ push @ibx, $ibx;
+ my $im = $ibx->importer(0);
+ my $eml = PublicInbox::Eml->new(<<EOF);
+From: x\@example.com
+To: $ibx->{-primary_address}
+Date: Fri, 02 Oct 1993 0$V:0$i:00 +0000
+Subject: v${V}i${i}
+
+${V}er ${i}on
+EOF
+ $im->add($eml);
+ $im->done;
+ }
+}
+my $first = shift @ibx; is($first->{name}, 'test-v1-3', 'first plucked');
+my $last = pop @ibx; is($last->{name}, 'test-v2-6', 'last plucked');
+my $eidx = PublicInbox::ExtSearchIdx->new("$home/eidx");
+$eidx->attach_inbox($first);
+$eidx->attach_inbox($last);
+$eidx->eidx_sync({fsync => 0});
+my $lxs = PublicInbox::LeiXSearch->new;
+
+done_testing;
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH] wip
@ 2021-01-03 22:57 Eric Wong
0 siblings, 0 replies; 23+ messages in thread
From: Eric Wong @ 2021-01-03 22:57 UTC (permalink / raw)
To: spew
---
lib/PublicInbox/LEI.pm | 7 ++-----
lib/PublicInbox/LeiExternal.pm | 33 ++++++++++++++++++++++-----------
lib/PublicInbox/LeiStore.pm | 2 +-
lib/PublicInbox/LeiXSearch.pm | 16 ++++++++++++++++
t/lei.t | 12 +++++++++---
5 files changed, 50 insertions(+), 20 deletions(-)
diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index a0ea793b..6b141a37 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -8,7 +8,8 @@
package PublicInbox::LEI;
use strict;
use v5.10.1;
-use parent qw(PublicInbox::DS PublicInbox::LeiExternal);
+use parent qw(PublicInbox::DS PublicInbox::LeiExternal
+ PublicInbox::LeiQuery);
use Getopt::Long ();
use Socket qw(AF_UNIX SOCK_STREAM pack_sockaddr_un);
use Errno qw(EAGAIN ECONNREFUSED ENOENT);
@@ -475,10 +476,6 @@ sub lei_show {
my ($self, @argv) = @_;
}
-sub lei_query {
- my ($self, @argv) = @_;
-}
-
sub lei_mark {
my ($self, @argv) = @_;
}
diff --git a/lib/PublicInbox/LeiExternal.pm b/lib/PublicInbox/LeiExternal.pm
index 4facd451..64faf5a0 100644
--- a/lib/PublicInbox/LeiExternal.pm
+++ b/lib/PublicInbox/LeiExternal.pm
@@ -8,24 +8,35 @@ use v5.10.1;
use parent qw(Exporter);
our @EXPORT = qw(lei_ls_external lei_add_external lei_forget_external);
-sub lei_ls_external {
- my ($self, @argv) = @_;
- my $stor = $self->_lei_store(0);
+sub _externals_each {
+ my ($self, $cb, @arg) = @_;
my $cfg = $self->_lei_cfg(0);
- my $out = $self->{1};
- my ($OFS, $ORS) = $self->{opt}->{z} ? ("\0", "\0\0") : (" ", "\n");
- my (%boost, @loc);
+ my %boost;
for my $sec (grep(/\Aexternal\./, @{$cfg->{-section_order}})) {
my $loc = substr($sec, length('external.'));
$boost{$loc} = $cfg->{"$sec.boost"};
- push @loc, $loc;
}
- use sort 'stable';
+ return \%boost if !wantarray && !$cb;
+
# highest boost first, but stable for alphabetic tie break
- for (sort { $boost{$b} <=> $boost{$a} } sort keys %boost) {
- # TODO: use miscidx and show docid so forget/set is easier
- print $out $_, $OFS, 'boost=', $boost{$_}, $ORS;
+ use sort 'stable';
+ my @order = sort { $boost{$b} <=> $boost{$a} } sort keys %boost;
+ return @order if !$cb;
+ for my $loc (@order) {
+ $cb->(@arg, $loc, $boost{$loc});
}
+ @order; # scalar or array
+}
+
+sub lei_ls_external {
+ my ($self, @argv) = @_;
+ my $stor = $self->_lei_store(0);
+ my $out = $self->{1};
+ my ($OFS, $ORS) = $self->{opt}->{z} ? ("\0", "\0\0") : (" ", "\n");
+ $self->_externals_each(sub {
+ my ($loc, $boost_val) = @_;
+ print $out $loc, $OFS, 'boost=', $boost_val, $ORS;
+ });
}
sub lei_add_external {
diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm
index 07a3198a..a7267a64 100644
--- a/lib/PublicInbox/LeiStore.pm
+++ b/lib/PublicInbox/LeiStore.pm
@@ -24,7 +24,7 @@ sub new {
my (undef, $dir, $opt) = @_;
my $eidx = PublicInbox::ExtSearchIdx->new($dir, $opt);
my $self = bless { priv_eidx => $eidx }, __PACKAGE__;
- eidx_init($self) if $opt->{creat};
+ eidx_init($self)->done if $opt->{creat};
$self;
}
diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index 33e9c413..68be0c97 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -20,6 +20,21 @@ sub new {
sub attach_external {
my ($self, $ibxish) = @_; # ibxish = ExtSearch or Inbox
+ if (!ref($ibxish) && -d $ibxish) {
+ if (-f "$ibxish/ei.lock") {
+ require PublicInbox::ExtSearch;
+ $ibxish = PublicInbox::ExtSearch->new($ibxish);
+ } elsif (-f "$ibxish/inbox.lock" || # v2
+ -f "$ibxish/public-inbox") { # v1 indexed
+ require PublicInbox::Inbox;
+ $ibxish = bless { inboxdir => $ibxish },
+ 'PublicInbox::Inbox';
+ } else {
+ warn "W: ignoring $ibxish, unable to determine type\n";
+ return;
+ }
+ }
+
if (!$ibxish->can('over')) {
push @{$self->{remotes}}, $ibxish
}
@@ -44,6 +59,7 @@ sub attach_external {
my @shards = $ibxish->search->xdb_shards_flat;
push @{$self->{shards_flat}}, @shards;
push(@{$self->{shard2ibx}}, $ibxish) for (@shards);
+ $ibxish;
}
# called by PublicInbox::Search::xdb
diff --git a/t/lei.t b/t/lei.t
index 41638950..befbe723 100644
--- a/t/lei.t
+++ b/t/lei.t
@@ -177,12 +177,18 @@ my $test_external = sub {
});
$lei->('ls-external');
like($out, qr/boost=0\n/s, 'ls-external has output');
+ diag 'ls-external '.$out;
+
+ $lei->('q', 's:"use boolean prefix"', '-o', "mboxrd:$home/rd.mbox",
+ '--external');
+ diag $out;
+ diag $err;
};
my $test_lei_common = sub {
- $test_help->();
- $test_config->();
- $test_init->();
+ # $test_help->();
+ # $test_config->();
+ # $test_init->();
$test_external->();
};
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH] WIP
@ 2021-01-21 4:24 Eric Wong
0 siblings, 0 replies; 23+ messages in thread
From: Eric Wong @ 2021-01-21 4:24 UTC (permalink / raw)
To: spew
---
perl/dtas-graph | 25 ++++++++++++++-----------
1 file changed, 14 insertions(+), 11 deletions(-)
diff --git a/perl/dtas-graph b/perl/dtas-graph
index a41e2e0..cabe5de 100755
--- a/perl/dtas-graph
+++ b/perl/dtas-graph
@@ -82,21 +82,24 @@ while (my $pid = shift @to_scan) {
#
foreach my $pid (keys %pids) {
my @out = `lsof -p $pid`;
- # output is like this:
- # play 12739 ew 0r FIFO 0,7 0t0 36924019 pipe
+# output is like this:
+# play 12739 no1 0r FIFO 0,7 0t0 36924019 pipe
+# daemon 10742 no1 3u unix 0xdc6aea87 0t0 19554016 /path type=SEQPACKET
foreach my $l (@out) {
my @l = split(/\s+/, $l);
- $l[4] eq "FIFO" or next;
-
my $fd = $l[3];
- my $pipe_ino = $l[7];
- my $info = $pipes{$pipe_ino} ||= { r => [], w => [] };
- if ($fd =~ s/r\z//) {
- push @{$info->{r}}, [ $pid, $fd ];
- } elsif ($fd =~ s/w\z//) {
- push @{$info->{w}}, [ $pid, $fd ];
+ my $type = $l[4];
+ my $ino = $l[7];
+ if ($type eq 'FIFO') {
+ my $info = $pipes{$ino} ||= { r => [], w => [] };
+ if ($fd =~ s/r\z//) {
+ push @{$info->{r}}, [ $pid, $fd ];
+ } elsif ($fd =~ s/w\z//) {
+ push @{$info->{w}}, [ $pid, $fd ];
+ }
+ } elsif ($type eq 'unix') {
+ my $info = $unix{$ino}
}
-
}
}
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH] wip
@ 2021-03-08 7:11 Eric Wong
0 siblings, 0 replies; 23+ messages in thread
From: Eric Wong @ 2021-03-08 7:11 UTC (permalink / raw)
To: spew
From: Eric Wong <normalperson@yhbt.net>
---
lib/PublicInbox/Search.pm | 8 ++++++++
t/search.t | 1 +
2 files changed, 9 insertions(+)
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 209969c5..3cd71ae6 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -242,6 +242,14 @@ sub xdb ($) {
};
}
+sub sessionstate {
+ my ($self) = @_;
+ my $xdb = $self->xdb;
+ my @cb = map { $xdb->can($_) } qw(get_uuid get_revision);
+ scalar(@cb) == 2 ? join('-', map { $_->($xdb) }) :
+ $xdb->get_metadata('xs_sessionstate');
+}
+
sub new {
my ($class, $ibx) = @_;
ref $ibx or die "BUG: expected PublicInbox::Inbox object: $ibx";
diff --git a/t/search.t b/t/search.t
index 124c9acf..c67d8352 100644
--- a/t/search.t
+++ b/t/search.t
@@ -479,6 +479,7 @@ EOF
"searching chopped($i) digit yielded result $wild ");
}
is($ibx->search->mset('m:Pine m:LNX m:10010260936330')->size, 1);
+ diag $ibx->search->sessionstate;
});
{ # List-Id searching
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH] wip
@ 2021-04-05 7:42 Eric Wong
0 siblings, 0 replies; 23+ messages in thread
From: Eric Wong @ 2021-04-05 7:42 UTC (permalink / raw)
To: spew
---
script/lei | 7 ++-----
1 file changed, 2 insertions(+), 5 deletions(-)
diff --git a/script/lei b/script/lei
index 78a7dab9..5b9b2e34 100755
--- a/script/lei
+++ b/script/lei
@@ -14,12 +14,9 @@ my $send_cmd = PublicInbox::CmdIPC4->can('send_cmd4') // do {
PublicInbox::Spawn->can('send_cmd4');
};
-my %pids;
my $sigchld = sub {
my $flags = scalar(@_) ? POSIX::WNOHANG() : 0;
- for my $pid (keys %pids) {
- delete($pids{$pid}) if waitpid($pid, $flags) == $pid;
- }
+ undef while ((waitpid(-1, $flags) // -1) > 0);
};
my @parent;
my $exec_cmd = sub {
@@ -48,7 +45,7 @@ my $exec_cmd = sub {
$do_exec->() if scalar(@$fds); # git-credential, pager
# parent backgrounds on MUA
- POSIX::setsid() > 0 or die "setsid: $!";
+ #POSIX::setsid() > 0 or die "setsid: $!";
@parent = ($parent);
return; # continue $recv_cmd in background
}
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH] wip
@ 2021-06-05 19:58 Eric Wong
0 siblings, 0 replies; 23+ messages in thread
From: Eric Wong @ 2021-06-05 19:58 UTC (permalink / raw)
To: spew
---
MANIFEST | 1 +
lib/PublicInbox/LEI.pm | 7 ++--
lib/PublicInbox/LeiImport.pm | 33 ++++++++++++-------
lib/PublicInbox/LeiInput.pm | 29 +++++++++++------
lib/PublicInbox/LeiMailSync.pm | 10 ++++++
lib/PublicInbox/LeiPmdir.pm | 59 ++++++++++++++++++++++++++++++++++
lib/PublicInbox/LeiTag.pm | 2 +-
lib/PublicInbox/MdirReader.pm | 22 +++++++------
t/lei-import-maildir.t | 2 +-
9 files changed, 130 insertions(+), 35 deletions(-)
create mode 100644 lib/PublicInbox/LeiPmdir.pm
diff --git a/MANIFEST b/MANIFEST
index 5a70a144..7bdbf252 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -221,6 +221,7 @@ lib/PublicInbox/LeiMailSync.pm
lib/PublicInbox/LeiMirror.pm
lib/PublicInbox/LeiOverview.pm
lib/PublicInbox/LeiP2q.pm
+lib/PublicInbox/LeiPmdir.pm
lib/PublicInbox/LeiQuery.pm
lib/PublicInbox/LeiRediff.pm
lib/PublicInbox/LeiRemote.pm
diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index 4b1a6da7..ee9f1f80 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -421,7 +421,7 @@ my %CONFIG_KEYS = (
'leistore.dir' => 'top-level storage location',
);
-my @WQ_KEYS = qw(lxs l2m wq1 ikw); # internal workers
+my @WQ_KEYS = qw(lxs l2m wq1 ikw pmd); # internal workers
sub _drop_wq {
my ($self) = @_;
@@ -565,7 +565,7 @@ sub pkt_op_pair {
}
sub workers_start {
- my ($lei, $wq, $jobs, $ops) = @_;
+ my ($lei, $wq, $jobs, $ops, $flds) = @_;
$ops = {
'!' => [ \&fail_handler, $lei ],
'|' => [ \&sigpipe_handler, $lei ],
@@ -576,7 +576,8 @@ sub workers_start {
$ops->{''} //= [ $wq->can('_lei_wq_eof') || \&wq_eof, $lei ];
my $end = $lei->pkt_op_pair;
my $ident = $wq->{-wq_ident} // "lei-$lei->{cmd} worker";
- $wq->wq_workers_start($ident, $jobs, $lei->oldset, { lei => $lei });
+ $flds->{lei} = $lei;
+ $wq->wq_workers_start($ident, $jobs, $lei->oldset, $flds);
delete $lei->{pkt_op_p};
my $op_c = delete $lei->{pkt_op_c};
@$end = ();
diff --git a/lib/PublicInbox/LeiImport.pm b/lib/PublicInbox/LeiImport.pm
index 60f3241b..14b98480 100644
--- a/lib/PublicInbox/LeiImport.pm
+++ b/lib/PublicInbox/LeiImport.pm
@@ -6,6 +6,7 @@ package PublicInbox::LeiImport;
use strict;
use v5.10.1;
use parent qw(PublicInbox::IPC PublicInbox::LeiInput);
+use PublicInbox::InboxWritable qw(eml_from_path);
# /^input_/ subs are used by (or override) PublicInbox::LeiInput superclass
@@ -28,17 +29,26 @@ sub input_mbox_cb { # MboxReader callback
input_eml_cb($self, $eml, $vmd);
}
-sub input_maildir_cb { # maildir_each_eml cb
- my ($f, $kw, $eml, $self) = @_;
+sub pmdir_cb { # called via wq_io_do from LeiPmdir->each_mdir_fn
+ my ($self, $f, @args) = @_;
+ my ($folder, $bn) = ($f =~ m!\A(.+?)/(?:new|cur)/([^/]+)\z!) or
+ die "BUG: $f was not from a Maildir?\n";
+ my $fl = PublicInbox::MdirReader::maildir_basename_flags($bn);
+ return if index($fl, 'T') >= 0; # no Trashed messages
+ my $kw = PublicInbox::MdirReader::flags2kw($fl);
+ substr($folder, 0, 0) = 'maildir:'; # add prefix
+ my $lms = $self->{-lms_ro};
+ my $oidbin = $lms ? $lms->name_oidbin($folder, $bn) : undef;
+ my @docids = defined($oidbin) ?
+ $self->{over}->oidbin_exists($oidbin) : ();
my $vmd = $self->{-import_kw} ? { kw => $kw } : undef;
- if ($self->{-mail_sync}) {
- if ($f =~ m!\A(.+?)/(?:new|cur)/([^/]+)\z!) { # ugh...
- $vmd->{sync_info} = [ "maildir:$1", \(my $n = $2) ];
- } else {
- warn "E: $f was not from a Maildir?\n";
- }
+ if (scalar @docids) {
+ $self->{lse}->kw_changed(undef, $kw, \@docids) or return;
+ $self->{sto}->ipc_do('set_eml_vmd', undef, $vmd, \@docids);
+ } elsif (my $eml = eml_from_path($f)) {
+ $vmd->{sync_info} = [ $folder, $bn ] if $self->{-mail_sync};
+ $self->input_eml_cb($eml, $vmd);
}
- $self->input_eml_cb($eml, $vmd);
}
sub input_net_cb { # imap_each / nntp_each
@@ -62,10 +72,10 @@ sub do_import_index ($$@) {
my $vmd_mod = $self->vmd_mod_extract(\@inputs);
return $lei->fail(join("\n", @{$vmd_mod->{err}})) if $vmd_mod->{err};
$self->{all_vmd} = $vmd_mod if scalar keys %$vmd_mod;
- $self->prepare_inputs($lei, \@inputs) or return;
+ $lei->ale; # initialize for workers to read (before LeiPmdir->new)
$self->{-mail_sync} = $lei->{opt}->{'mail-sync'} // 1;
+ $self->prepare_inputs($lei, \@inputs) or return;
- $lei->ale; # initialize for workers to read
my $j = $lei->{opt}->{jobs} // scalar(@{$self->{inputs}}) || 1;
my $ikw;
if (my $net = $lei->{net}) {
@@ -93,6 +103,7 @@ sub do_import_index ($$@) {
$lei->{wq1} = $self;
$lei->{-err_type} = 'non-fatal';
$ikw->wq_close(1) if $ikw;
+ $lei->{pmd}->wq_close(1) if $lei->{pmd};
net_merge_all_done($self) unless $lei->{auth};
$op_c->op_wait_event($ops);
}
diff --git a/lib/PublicInbox/LeiInput.pm b/lib/PublicInbox/LeiInput.pm
index 4ff7a379..f0c141c8 100644
--- a/lib/PublicInbox/LeiInput.pm
+++ b/lib/PublicInbox/LeiInput.pm
@@ -151,9 +151,16 @@ sub input_path_url {
return $lei->fail(<<EOM) if $ifmt && $ifmt ne 'maildir';
$input appears to be a maildir, not $ifmt
EOM
- PublicInbox::MdirReader->new->maildir_each_eml($input,
- $self->can('input_maildir_cb'),
- $self, @args);
+ my $mdr = PublicInbox::MdirReader->new;
+ if (my $pmd = $self->{pmd}) {
+ $mdr->maildir_each_file($input,
+ $pmd->can('each_mdir_fn'),
+ $pmd, @args);
+ } else {
+ $mdr->maildir_each_eml($input,
+ $self->can('input_maildir_cb'),
+ $self, @args);
+ }
} else {
$lei->fail("$input unsupported (TODO)");
}
@@ -215,7 +222,7 @@ sub prepare_inputs { # returns undef on error
push @{$sync->{no}}, '/dev/stdin' if $sync;
}
my $net = $lei->{net}; # NetWriter may be created by l2m
- my (@f, @d);
+ my (@f, @md);
# e.g. Maildir:/home/user/Mail/ or imaps://example.com/INBOX
for my $input (@$inputs) {
my $input_path = $input;
@@ -247,11 +254,11 @@ sub prepare_inputs { # returns undef on error
PublicInbox::MboxReader->reads($ifmt) or return
$lei->fail("$ifmt not supported");
} elsif (-d $input_path) {
- require PublicInbox::MdirReader;
$ifmt eq 'maildir' or return
$lei->fail("$ifmt not supported");
$sync and $input = 'maildir:'.
$lei->abs_path($input_path);
+ push @md, $input;
} else {
return $lei->fail("Unable to handle $input");
}
@@ -266,21 +273,18 @@ $input is `eml', not --in-format=$in_fmt
if ($devfd >= 0 || -f $input || -p _) {
push @{$sync->{no}}, $input if $sync;
push @f, $input;
- } elsif (-d $input) {
+ } elsif (-d "$input/new" && -d "$input/cur") {
if ($sync) {
$input = $lei->abs_path($input);
push @{$sync->{ok}}, $input;
}
- push @d, $input;
+ push @md, $input;
} else {
return $lei->fail("Unable to handle $input")
}
}
}
if (@f) { check_input_format($lei, \@f) or return }
- if (@d) { # TODO: check for MH vs Maildir, here
- require PublicInbox::MdirReader;
- }
if ($sync && $sync->{no}) {
return $lei->fail(<<"") if !$sync->{ok};
--mail-sync specified but no inputs support it
@@ -299,6 +303,11 @@ $input is `eml', not --in-format=$in_fmt
$lei->{auth} //= PublicInbox::LeiAuth->new;
$lei->{net} //= $net;
}
+ if (scalar(@md)) {
+ require PublicInbox::MdirReader;
+ require PublicInbox::LeiPmdir;
+ $self->{pmd} = PublicInbox::LeiPmdir->new($lei, $self);
+ }
$self->{inputs} = $inputs;
}
diff --git a/lib/PublicInbox/LeiMailSync.pm b/lib/PublicInbox/LeiMailSync.pm
index 75603d89..c87685a0 100644
--- a/lib/PublicInbox/LeiMailSync.pm
+++ b/lib/PublicInbox/LeiMailSync.pm
@@ -375,6 +375,16 @@ EOM
$sth->fetchrow_array;
}
+sub name_oidbin ($$$) {
+ my ($self, $mdir, $nm) = @_;
+ my $fid = $self->{fmap}->{$mdir} //= fid_for($self, $mdir) // return;
+ my $sth = $self->{dbh}->prepare_cached(<<EOM, undef, 1);
+SELECT oidbin FROM blob2name WHERE fid = ? AND name = ?
+EOM
+ $sth->execute($fid, $nm);
+ $sth->fetchrow_array;
+}
+
sub imap_oid {
my ($self, $lei, $uid_uri) = @_;
my $mailbox_uri = $uid_uri->clone;
diff --git a/lib/PublicInbox/LeiPmdir.pm b/lib/PublicInbox/LeiPmdir.pm
new file mode 100644
index 00000000..3f9c9e4a
--- /dev/null
+++ b/lib/PublicInbox/LeiPmdir.pm
@@ -0,0 +1,59 @@
+# Copyright (C) 2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# WQ worker for dealing with parallel Maildir reads;
+# this does NOT use the {shard_info} field of LeiToMail
+# (and we may remove {shard_info})
+# WQ key: {pmd}
+package PublicInbox::LeiPmdir;
+use strict;
+use v5.10.1;
+use parent qw(PublicInbox::IPC);
+use PublicInbox::MdirReader;
+
+sub new {
+ my ($cls, $lei, $ipt) = @_;
+ my $self = bless { -wq_ident => 'lei Maildir worker' }, $cls;
+ my ($op_c, $ops) = $lei->workers_start($self, $self->detect_nproc,
+ undef, { ipt => $ipt }); # LeiInput subclass
+ $op_c->{ops} = $ops; # for PktOp->event_step
+ $lei->{pmd} = $self;
+}
+
+sub ipc_atfork_child {
+ my ($self) = @_;
+ my $lei = $self->{lei};
+ $lei->_lei_atfork_child;
+ my $ipt = $self->{ipt} // die 'BUG: no self->{ipt}';
+ $ipt->{lei} = $lei;
+ $ipt->{sto} = $lei->{sto} // die 'BUG: no lei->{sto}';
+ $ipt->{lse} = $ipt->{sto}->search;
+ $ipt->{over} = $ipt->{lse}->over;
+ $ipt->{-lms_ro} //= $ipt->{lse}->lms; # may be undef or '0'
+ $self->SUPER::ipc_atfork_child;
+}
+
+sub each_mdir_fn { # maildir_each_file callback
+ my ($f, $self, @args) = @_;
+ $self->wq_io_do('mdir_iter', [], $f, @args);
+}
+
+sub mdir_iter { # via wq_io_do
+ my ($self, $f, @args) = @_;
+ $self->{ipt}->pmdir_cb($f, @args);
+}
+
+sub pmd_done_wait {
+ my ($arg, $pid) = @_;
+ my ($self, $lei) = @$arg;
+ my $wait = $lei->{sto}->ipc_do('done');
+ $lei->can('wq_done_wait')->($arg, $pid);
+}
+
+sub _lei_wq_eof { # EOF callback for main lei daemon
+ my ($lei) = @_;
+ my $pmd = delete $lei->{pmd} or return $lei->fail;
+ $pmd->wq_wait_old(\&pmd_done_wait, $lei);
+}
+
+1;
diff --git a/lib/PublicInbox/LeiTag.pm b/lib/PublicInbox/LeiTag.pm
index b6abd533..0a8644be 100644
--- a/lib/PublicInbox/LeiTag.pm
+++ b/lib/PublicInbox/LeiTag.pm
@@ -117,6 +117,6 @@ sub _complete_tag {
no warnings 'once'; # the following works even when LeiAuth is lazy-loaded
*net_merge_all = \&PublicInbox::LeiAuth::net_merge_all;
-*net_merge_all_done = \&PublicInbox::LeiInput::input_only_net_merge_all_done;
+*net_merge_all_done = \&PublicInbox::LeiInput::parallel_net_merge_all_done;
1;
diff --git a/lib/PublicInbox/MdirReader.pm b/lib/PublicInbox/MdirReader.pm
index 304be63d..484bf0a8 100644
--- a/lib/PublicInbox/MdirReader.pm
+++ b/lib/PublicInbox/MdirReader.pm
@@ -87,17 +87,21 @@ sub maildir_each_eml {
sub new { bless {}, __PACKAGE__ }
sub flags2kw ($) {
- my @unknown;
- my %kw;
- for (split(//, $_[0])) {
- my $k = $c2kw{$_};
- if (defined($k)) {
- $kw{$k} = 1;
- } else {
- push @unknown, $_;
+ if (wantarray) {
+ my @unknown;
+ my %kw;
+ for (split(//, $_[0])) {
+ my $k = $c2kw{$_};
+ if (defined($k)) {
+ $kw{$k} = 1;
+ } else {
+ push @unknown, $_;
+ }
}
+ (\%kw, \@unknown);
+ } else {
+ [ sort(map { $c2kw{$_} // () } split(//, $_[0])) ];
}
- (\%kw, \@unknown);
}
1;
diff --git a/t/lei-import-maildir.t b/t/lei-import-maildir.t
index 688b10ce..c81e7805 100644
--- a/t/lei-import-maildir.t
+++ b/t/lei-import-maildir.t
@@ -28,7 +28,7 @@ test_lei(sub {
is(scalar(keys %v), 1, 'inspect handles relative and absolute paths');
my $inspect = json_utf8->decode([ keys %v ]->[0]);
is_deeply($inspect, {"maildir:$md" => { 'name.count' => 1 }},
- 'inspect maildir: path had expected output');
+ 'inspect maildir: path had expected output') or xbail($inspect);
lei_ok(qw(q s:boolean));
my $res = json_utf8->decode($lei_out);
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH] wip
@ 2021-10-27 20:16 Eric Wong
0 siblings, 0 replies; 23+ messages in thread
From: Eric Wong @ 2021-10-27 20:16 UTC (permalink / raw)
To: spew
---
lib/PublicInbox/LeiXSearch.pm | 17 ++++++++++-------
1 file changed, 10 insertions(+), 7 deletions(-)
diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index acc36897..2a037f2b 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -283,20 +283,22 @@ sub each_remote_eml { # callback for MboxReader->mboxrd
my $smsg = bless {}, 'PublicInbox::Smsg';
if ($self->{import_sto} && !$xoids) {
my ($res, $kw) = $self->{import_sto}->wq_do('add_eml', $eml);
- $smsg = $res if ref($res) eq ref($smsg); # totally new message
+ if (ref($res) eq ref($smsg)) { # totally new message
+ $smsg = $res;
+ $self->{-imported} = 1;
+ }
$smsg->{kw} = $kw; # short-circuit xsmsg_vmd
}
$smsg->{blob} //= $xoids ? (keys(%$xoids))[0]
: $lei->git_oid($eml)->hexdigest;
_smsg_fill($smsg, $eml);
wait_startq($lei);
+ my $nr = ++$lei->{-nr_remote_eml}; # needed for lss->cfg_set
if ($lei->{-progress}) {
- ++$lei->{-nr_remote_eml};
my $now = now();
my $next = $lei->{-next_progress} //= ($now + 1);
if ($now > $next) {
$lei->{-next_progress} = $now + 1;
- my $nr = $lei->{-nr_remote_eml};
mset_progress($lei, $lei->{-current_url}, $nr, '?');
}
}
@@ -374,13 +376,14 @@ sub query_remote_mboxrd {
$fh = IO::Uncompress::Gunzip->new($fh, MultiStream => 1);
PublicInbox::MboxReader->mboxrd($fh, \&each_remote_eml, $self,
$lei, $each_smsg);
- my $nr = $lei->{-nr_remote_eml};
- my $wait = $lei->{sto}->wq_do('done') if $nr && $lei->{sto};
+ if ($self->{import_sto} && delete($self->{-imported})) {
+ my $wait = $self->{import_sto}->wq_do('done');
+ }
$reap_curl->join;
if ($? == 0) {
# don't update if no results, maybe MTA is down
- $key && $nr and
- $lei->{lss}->cfg_set($key, $start);
+ my $nr = $lei->{-nr_remote_eml};
+ $lei->{lss}->cfg_set($key, $start) if $key && $nr;
mset_progress($lei, $lei->{-current_url}, $nr, $nr);
next;
}
^ permalink raw reply related [flat|nested] 23+ messages in thread
end of thread, other threads:[~2021-10-27 20:16 UTC | newest]
Thread overview: 23+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-04-05 18:40 [PATCH] wip Eric Wong
-- strict thread matches above, loose matches on Subject: below --
2021-10-27 20:16 Eric Wong
2021-06-05 19:58 Eric Wong
2021-04-05 7:42 Eric Wong
2021-03-08 7:11 Eric Wong
2021-01-21 4:24 [PATCH] WIP Eric Wong
2021-01-03 22:57 [PATCH] wip Eric Wong
2020-12-27 11:36 [PATCH] WIP Eric Wong
2020-11-15 7:35 [PATCH] wip Eric Wong
2020-04-23 4:27 Eric Wong
2020-04-20 7:14 Eric Wong
2020-01-13 9:24 [PATCH] WIP Eric Wong
2019-05-11 22:55 Eric Wong
2019-01-02 9:21 [PATCH] wip Eric Wong
2018-07-06 21:31 Eric Wong
2018-06-24 11:55 Eric Wong
2018-06-24 8:39 Eric Wong
2017-07-15 1:42 [PATCH] WIP Eric Wong
2017-04-12 20:17 [PATCH] wip Eric Wong
2016-08-23 20:07 Eric Wong
2016-08-18 2:16 Eric Wong
2016-06-26 3:46 Eric Wong
2015-12-22 0:15 Eric Wong
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).