dumping ground for random patches and texts
 help / color / mirror / Atom feed
* [PATCH] wip
@ 2015-12-22  0:15 Eric Wong
  0 siblings, 0 replies; 23+ messages in thread
From: Eric Wong @ 2015-12-22  0:15 UTC (permalink / raw)
  To: spew

---
 lib/PublicInbox/RepoBrowse.pm       |   9 ++-
 lib/PublicInbox/RepoBrowseCommit.pm |   2 +-
 lib/PublicInbox/RepoBrowseTree.pm   | 146 ++++++++++++++++++++++++++++++++++++
 lib/PublicInbox/Spawn.pm            |  19 +++++
 4 files changed, 171 insertions(+), 5 deletions(-)
 create mode 100644 lib/PublicInbox/RepoBrowseTree.pm
 create mode 100644 lib/PublicInbox/Spawn.pm

diff --git a/lib/PublicInbox/RepoBrowse.pm b/lib/PublicInbox/RepoBrowse.pm
index 5865d65..0b3197e 100644
--- a/lib/PublicInbox/RepoBrowse.pm
+++ b/lib/PublicInbox/RepoBrowse.pm
@@ -23,7 +23,7 @@ use warnings;
 use URI::Escape qw(uri_escape_utf8 uri_unescape);
 use PublicInbox::RepoConfig;
 
-my %CMD = map { lc($_) => $_ } qw(Log Commit);
+my %CMD = map { lc($_) => $_ } qw(Log Commit Tree);
 
 sub new {
 	my ($class, $file) = @_;
@@ -39,9 +39,10 @@ sub run {
 
 	# URL syntax: / repo [ / cmd [ / path ] ]
 	# cmd: log | commit | diff | tree | view | blob | snapshot
-	# repo and path may both contain '/'
+	# repo and path (@extra) may both contain '/'
 	my $rconfig = $self->{rconfig};
-	my (undef, $repo_path, @extra) = split(m{/+}, $cgi->path_info, -1);
+	my $path_info = uri_unescape($cgi->path_info);
+	my (undef, $repo_path, @extra) = split(m{/+}, $path_info, -1);
 
 	return r404() unless $repo_path;
 	my $repo_info;
@@ -53,7 +54,7 @@ sub run {
 
 	my $req = {
 		repo_info => $repo_info,
-		path => \@extra,
+		extra => \@extra, # path
 		cgi => $cgi,
 		rconfig => $rconfig,
 	};
diff --git a/lib/PublicInbox/RepoBrowseCommit.pm b/lib/PublicInbox/RepoBrowseCommit.pm
index 67f63a3..43f637a 100644
--- a/lib/PublicInbox/RepoBrowseCommit.pm
+++ b/lib/PublicInbox/RepoBrowseCommit.pm
@@ -110,7 +110,7 @@ sub call_git {
 
 sub git_blob_hrefs {
 	my ($rel, @ids) = @_;
-	map { "<a\nhref=\"${rel}blob?id=$_\"" } @ids;
+	map { "<a\nhref=\"${rel}tree?id=$_\"" } @ids;
 }
 
 sub git_blob_links {
diff --git a/lib/PublicInbox/RepoBrowseTree.pm b/lib/PublicInbox/RepoBrowseTree.pm
new file mode 100644
index 0000000..8427a5d
--- /dev/null
+++ b/lib/PublicInbox/RepoBrowseTree.pm
@@ -0,0 +1,146 @@
+# Copyright (C) 2015 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+package PublicInbox::RepoBrowseTree;
+use strict;
+use warnings;
+use base qw(PublicInbox::RepoBrowseBase);
+use PublicInbox::Spawn qw(git);
+use PublicInbox::GitCatFile;
+use URI::Escape qw(uri_escape_utf8);
+use Encode qw/find_encoding/;
+my $enc_utf8 = find_encoding('UTF-8');
+
+my %GIT_MODE = (
+	'100644' => ' ', # blob
+	'100755' => 'x', # executable blob
+	'040000' => 'd', # tree
+	'120000' => 'l', # symlink
+	'160000' => 'g', # commit (gitlink)
+);
+
+sub git_tree_stream {
+	my ($self, $req, $res) = @_; # res: Plack callback
+	my $repo_info = $req->{repo_info};
+	my $dir = $repo_info->{path};
+	my @extra = @{$req->{extra}};
+	my $tslash;
+	if (@extra && $extra[-1] eq '') { # no trailing slash
+		pop @extra;
+		$tslash = 1;
+	}
+	my $tree_path = join('/', @extra);
+	my $q = PublicInbox::RepoBrowseQuery->new($req->{cgi});
+	my $id = $q->{id};
+	$id eq '' and $id = 'HEAD';
+
+	my $obj = "$id:$tree_path";
+	my $git = $repo_info->{git} ||= PublicInbox::GitCatFile->new($dir);
+	my ($hex, $type, $size) = $git->check($obj);
+
+	if (!defined($type) || ($type ne 'blob' && $type ne 'tree')) {
+		return $res->([404, ['Content-Type'=>'text/html'],
+			 ['Not Found']]);
+	}
+
+	my $fh = $res->([200, ['Content-Type'=>'text/html; charset=UTF-8']]);
+	$fh->write('<html><head><title></title></head><body>'.
+			 PublicInbox::Hval::PRE);
+
+	if ($type eq 'tree') {
+		tree_show($fh, $git, $dir, $hex, $q, \@extra, $tslash);
+	} elsif ($type eq 'blob') {
+		blob_show($fh, $git, $hex);
+	}
+	$fh->write('</body></html>');
+	$fh->close;
+}
+
+sub call_git {
+	my ($self, $req) = @_;
+	sub { git_tree_stream($self, $req, @_) };
+}
+
+sub blob_binary {
+	my ($fh) = @_;
+	$fh->write("Binary file cannot be displayed\n");
+}
+
+sub blob_show {
+	my ($fh, $git, $hex) = @_;
+	# ref: buffer_is_binary in git.git
+	my $to_read = 8000; # git uses this size to detect binary files
+	my $text_p;
+	$git->cat_file($hex, sub {
+		my ($cat, $left) = @_; # $$left == $size
+		my $n = 0;
+		$to_read = $$left if $to_read > $$left;
+		my $r = read($cat, my $buf, $to_read);
+		return unless defined($r) && $r > 0;
+		$$left -= $r;
+
+		return blob_binary($fh) if (index($buf, "\0") >= 0);
+
+		$text_p = 1;
+		$fh->write('<table><tr><td>'.PublicInbox::Hval::PRE);
+		while (1) {
+			my @buf = split("\n", $buf, -1);
+			$buf = pop @buf; # last line, careful...
+			$n += scalar @buf;
+			foreach my $l (@buf) {
+				$l = $enc_utf8->decode($l);
+				$l = PublicInbox::Hval::ascii_html($l);
+				$l .= "\n";
+				$fh->write($l);
+			}
+			last if ($$left == 0 || !defined $buf);
+
+			$to_read = $$left if $to_read > $$left;
+			my $off = length $buf; # last line from previous read
+			$r = read($cat, $buf, $to_read, $off);
+			return unless defined($r) && $r > 0;
+			$$left -= $r;
+		}
+
+		$fh->write('</pre></td><td><pre>');
+		foreach my $i (1..$n) {
+			$fh->write("<a id=n$i href='#n$i'>$i</a>\n");
+		}
+		$fh->write('</pre></td></tr></table>');
+		0;
+	});
+}
+
+sub tree_show {
+	my ($fh, $git, $dir, $hex, $q, $extra, $tslash) = @_;
+
+	my $ls = git($dir, qw(ls-tree --abbrev=16 -l -z), $hex);
+	local $/ = "\0";
+	my $pfx = $tslash ? './' :
+		(@$extra ? uri_escape_utf8($extra->[-1]).'/' : 'tree/');
+
+	my $qs = $q->qs;
+	while (defined(my $l = <$ls>)) {
+		chomp $l;
+		my ($m, $t, $x, $s, $path) =
+			($l =~ /\A(\S+) (\S+) (\S+)( *\S+)\t(.+)\z/s);
+		$m = $GIT_MODE{$m} or next;
+
+		my $ref = uri_escape_utf8($path);
+		$path = PublicInbox::Hval::ascii_html($path);
+
+		if ($m eq 'g') {
+			# TODO: support cross-repository gitlinks
+			$fh->write('g' . (' ' x 18) . "$path @ $x\n");
+			next;
+		}
+		elsif ($m eq 'd') { $path = "<b>$path/</b>" }
+		elsif ($m eq 'x') { $path = "<b>$path</b>" }
+		elsif ($m eq 'l') { $path = "<i>$path</i>" }
+
+		$ref = $pfx.PublicInbox::Hval::ascii_html($ref).$qs;
+		$fh->write("$m log raw $s <a\nhref=\"$ref\">$path</a>\n");
+	}
+	$fh->write('</pre>');
+}
+
+1;
diff --git a/lib/PublicInbox/Spawn.pm b/lib/PublicInbox/Spawn.pm
new file mode 100644
index 0000000..6407a90
--- /dev/null
+++ b/lib/PublicInbox/Spawn.pm
@@ -0,0 +1,19 @@
+# Copyright (C) 2015 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+package PublicInbox::Spawn;
+use strict;
+use warnings;
+use base qw(Exporter);
+our @EXPORT_OK = qw(git);
+
+sub git {
+	my ($git_dir, @cmd) = @_;
+	@cmd = ('git', "--git-dir=$git_dir", @cmd);
+
+	open(my $fh, '-|', @cmd) or
+		die('open `'.join(' ', @cmd) . "' fork/pipe failed: $!\n");
+
+	$fh;
+}
+
+1;
-- 
EW


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH] wip
@ 2016-06-26  3:46 Eric Wong
  0 siblings, 0 replies; 23+ messages in thread
From: Eric Wong @ 2016-06-26  3:46 UTC (permalink / raw)
  To: spew

From: Eric Wong <normalperson@yhbt.net>

---
 lib/PublicInbox/View.pm | 164 +++++++++++++-----------------------------------
 1 file changed, 44 insertions(+), 120 deletions(-)

diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 38e35bf..58361db 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -92,7 +92,7 @@ sub _hdr_names ($$) {
 # this is already inside a <pre>
 sub index_entry {
 	my ($mime, $level, $state) = @_;
-	my $midx = $state->{anchor_idx}++;
+	$state->{anchor_idx}++;
 	my $ctx = $state->{ctx};
 	my $srch = $ctx->{srch};
 	my $hdr = $mime->header_obj;
@@ -100,28 +100,25 @@ sub index_entry {
 
 	my $mid_raw = mid_clean(mid_mime($mime));
 	my $id = anchor_for($mid_raw);
-	my $seen = $state->{seen};
-	$seen->{$id} = "#$id"; # save the anchor for children, later
-
 	my $mid = PublicInbox::Hval->new_msgid($mid_raw);
 
 	my $root_anchor = $state->{root_anchor} || '';
 	my $path = $root_anchor ? '../../' : '';
 	my $href = $mid->as_href;
 	my $irt = in_reply_to($hdr);
-	my $parent_anchor = $seen->{anchor_for($irt)} if defined $irt;
 
-	$subj = ascii_html($subj);
-	$subj = "<a\nhref=\"${path}$href/\">$subj</a>";
+	$subj = '<b>'.ascii_html($subj).'</b>';
 	$subj = "<u\nid=u>$subj</u>" if $root_anchor eq $id;
 
 	my $ts = _msg_date($hdr);
-	my $rv = "<pre\nid=s$midx>";
+	my $rv = "<pre\nid=$id>";
 	$rv .= "<b\nid=$id>$subj</b>\n";
-	my $txt = "${path}$href/raw";
+	my $mhref = $path.$href.'/';
 	my $fh = $state->{fh};
 	my $from = _hdr_names($hdr, 'From');
-	$rv .= "- $from @ $ts UTC (<a\nhref=\"$txt\">raw</a>)\n";
+	$rv .= "- $from @ $ts UTC (";
+	$rv .= "<a\nhref=\"${mhref}\">permalink</a> ";
+	$rv .= "/ <a\nhref=\"${mhref}raw\">raw</a>)\n";
 	my @tocc;
 	foreach my $f (qw(To Cc)) {
 		my $dst = _hdr_names($hdr, $f);
@@ -130,36 +127,19 @@ sub index_entry {
 	$rv .= '  '.join('; +', @tocc) . "\n" if @tocc;
 	$fh->write($rv .= "\n");
 
-	my $mhref = "${path}$href/";
 
 	# scan through all parts, looking for displayable text
 	msg_iter($mime, sub { index_walk($fh, $mhref, $_[0]) });
-	$rv = "\n" . html_footer($hdr, 0, $ctx, "$path$href/#R");
+	$rv = "\n" . html_footer($hdr, 0, $ctx, $mhref.'#R');
 
 	if (defined $irt) {
-		unless (defined $parent_anchor) {
-			my $v = PublicInbox::Hval->new_msgid($irt, 1);
-			$v = $v->as_href;
-			$parent_anchor = "${path}$v/";
-		}
-		$rv .= " <a\nhref=\"$parent_anchor\">parent</a>";
+		$irt = anchor_for($irt);
+		$rv .= " <a\nhref=#$irt>parent</a>";
 	}
 	if (my $pct = $state->{pct}) { # used by SearchView.pm
 		$rv .= " [relevance $pct->{$mid_raw}%]";
-	} elsif ($srch) {
-		my $threaded = 'threaded';
-		my $flat = 'flat';
-		my $end = '';
-		if ($ctx->{flat}) {
-			$flat = "<b>$flat</b>";
-			$end = "\n"; # for lynx
-		} else {
-			$threaded = "<b>$threaded</b>";
-		}
-		$rv .= " [<a\nhref=\"${path}$href/t/#u\">$threaded</a>";
-		$rv .= "|<a\nhref=\"${path}$href/T/#u\">$flat</a>]$end";
 	}
-	$fh->write($rv .= '</pre>');
+	$fh->write($rv .= "\n</pre>"); # '\n' for lynx
 }
 
 sub thread_html {
@@ -180,48 +160,47 @@ sub walk_thread {
 }
 
 # only private functions below.
+#
+sub pre_thread  {
+	my ($state, $level, $node) = @_;
+	$state->{mapping}->{$node->messageid} = $node;
+	skel_dump($state, $level, $node);
+}
 
 sub emit_thread_html {
 	my ($res, $ctx, $foot, $srch) = @_;
 	my $mid = $ctx->{mid};
-	my $flat = $ctx->{flat};
-	my $msgs = load_results($srch->get_thread($mid, { asc => $flat }));
+	my $msgs = load_results($srch->get_thread($mid, {asc => 1}));
 	my $nr = scalar @$msgs;
 	return missing_thread($res, $ctx) if $nr == 0;
-	my $seen = {};
+	my $skel = '';
 	my $state = {
+		seen => {},
 		res => $res,
 		ctx => $ctx,
-		seen => $seen,
+		srch => $ctx->{srch},
 		root_anchor => anchor_for($mid),
 		anchor_idx => 0,
 		cur_level => 0,
+		mapping => {}, # mid -> node
+		dst => \$skel,
 	};
 
-	require PublicInbox::Git;
-	$ctx->{git} ||= PublicInbox::Git->new($ctx->{git_dir});
-	if ($flat) {
-		pre_anchor_entry($seen, $_) for (@$msgs);
-		__thread_entry($state, $_, 0) for (@$msgs);
-	} else {
-		walk_thread(thread_results($msgs), $state, *thread_entry);
-		if (my $max = $state->{cur_level}) {
-			$state->{fh}->write(
-				('</ul></li>' x ($max - 1)) . '</ul>');
-		}
-	}
+	walk_thread(thread_results($msgs), $state, *pre_thread);
+
+	__thread_entry($state, $_, 0) for @$msgs;
 
 	# there could be a race due to a message being deleted in git
 	# but still being in the Xapian index:
 	my $fh = delete $state->{fh} or return missing_thread($res, $ctx);
 
-	my $final_anchor = $state->{anchor_idx};
-	my $next = "<a\nid=s$final_anchor>";
-	$next .= $final_anchor == 1 ? 'only message in' : 'end of';
-	$next .= " thread</a>, back to <a\nhref=\"../../\">index</a>";
+	my $next = @$msgs == 1 ? 'only message in thread' : 'end of thread';
+	$next .= ", back to <a\nhref=\"../../\">index</a>";
 	$next .= "\ndownload thread: ";
 	$next .= "<a\nhref=\"../t.mbox.gz\">mbox.gz</a>";
 	$next .= " / follow: <a\nhref=\"../t.atom\">Atom feed</a>";
+	$next .= "\n";
+	$next .= $skel;
 	$fh->write('<hr /><pre>' . $next . "\n\n".
 			$foot .  '</pre></body></html>');
 	$fh->close;
@@ -563,12 +542,6 @@ sub thread_html_head {
 		"</head><body>");
 }
 
-sub pre_anchor_entry {
-	my ($seen, $mime) = @_;
-	my $id = anchor_for(mid_mime($mime));
-	$seen->{$id} = "#$id"; # save the anchor for children, later
-}
-
 sub ghost_parent {
 	my ($upfx, $mid) = @_;
 	# 'subject dummy' is used internally by Mail::Thread
@@ -580,36 +553,9 @@ sub ghost_parent {
 	qq{[parent not found: &lt;<a\nhref="$upfx$href/">$html</a>&gt;]};
 }
 
-sub thread_adj_level {
-	my ($state, $level) = @_;
-
-	my $max = $state->{cur_level};
-	if ($level <= 0) {
-		return '' if $max == 0; # flat output
-
-		# reset existing lists
-		my $x = $max > 1 ? ('</ul></li>' x ($max - 1)) : '';
-		$state->{fh}->write($x . '</ul>');
-		$state->{cur_level} = 0;
-		return '';
-	}
-	if ($level == $max) { # continue existing list
-		$state->{fh}->write('<li>');
-	} elsif ($level < $max) {
-		my $x = $max > 1 ? ('</ul></li>' x ($max - $level)) : '';
-		$state->{fh}->write($x .= '<li>');
-		$state->{cur_level} = $level;
-	} else { # ($level > $max) # start a new level
-		$state->{cur_level} = $level;
-		$state->{fh}->write(($max ? '<li>' : '') . '<ul><li>');
-	}
-	'</li>';
-}
-
 sub ghost_flush {
-	my ($state, $upfx, $mid, $level) = @_;
-	my $end = '<pre>'. ghost_parent($upfx, $mid) . '</pre>';
-	$state->{fh}->write($end .= thread_adj_level($state, $level));
+	my ($state, $upfx, $mid) = @_;
+	$state->{fh}->write('<pre>'. ghost_parent($upfx, $mid) . '</pre>');
 }
 
 sub __thread_entry {
@@ -623,16 +569,7 @@ sub __thread_entry {
 	$mime = Email::MIME->new($mime);
 
 	thread_html_head($mime, $state) if $state->{anchor_idx} == 0;
-	if (my $ghost = delete $state->{ghost}) {
-		# n.b. ghost messages may only be parents, not children
-		foreach my $g (@$ghost) {
-			ghost_flush($state, '../../', @$g);
-		}
-	}
-	my $end = thread_adj_level($state, $level);
 	index_entry($mime, $level, $state);
-	$state->{fh}->write($end) if $end;
-
 	1;
 }
 
@@ -641,23 +578,6 @@ sub indent_for {
 	INDENT x ($level - 1);
 }
 
-sub __ghost_prepare {
-	my ($state, $node, $level) = @_;
-	my $ghost = $state->{ghost} ||= [];
-	push @$ghost, [ $node->messageid, $level ];
-}
-
-sub thread_entry {
-	my ($state, $level, $node) = @_;
-	if (my $mime = $node->message) {
-		unless (__thread_entry($state, $mime, $level)) {
-			__ghost_prepare($state, $node, $level);
-		}
-	} else {
-		__ghost_prepare($state, $node, $level);
-	}
-}
-
 sub load_results {
 	my ($sres) = @_;
 
@@ -738,7 +658,8 @@ sub _skel_header {
 		$s = $s->as_html;
 	}
 	my $m = PublicInbox::Hval->new_msgid($mid);
-	$m = $state->{upfx} . $m->as_href . '/';
+	my $upfx = $state->{upfx};
+	$m = defined $upfx ? $upfx.$m->as_href.'/' : '#'.anchor_for($m->raw);
 	$$dst .= "$pfx<a\nhref=\"$m\">";
 	$$dst .= defined($s) ? "$s</a> $f\n" : "$f</a>\n";
 }
@@ -754,14 +675,17 @@ sub skel_dump {
 		my $dst = $state->{dst};
 		if ($mid eq 'subject dummy') {
 			$$dst .= "\t[no common parent]\n";
-		} else {
-			$$dst .= '     [not found] ';
-			$$dst .= indent_for($level) . th_pfx($level);
-			$mid = PublicInbox::Hval->new_msgid($mid);
-			my $href = $state->{upfx} . $mid->as_href . '/';
-			my $html = $mid->as_html;
-			$$dst .= qq{&lt;<a\nhref="$href">$html</a>&gt;\n};
+			return;
 		}
+		$$dst .= '     [not found] ';
+		$$dst .= indent_for($level) . th_pfx($level);
+		$mid = PublicInbox::Hval->new_msgid($mid);
+		my $href;
+		my $upfx = $state->{upfx};
+		$upfx = '../../' unless defined $upfx; # thread index view
+		my $href = $upfx . $mid->as_href . '/';
+		my $html = $mid->as_html;
+		$$dst .= qq{&lt;<a\nhref="$href">$html</a>&gt;\n};
 	}
 }
 
-- 
EW


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH] wip
@ 2016-08-18  2:16 Eric Wong
  0 siblings, 0 replies; 23+ messages in thread
From: Eric Wong @ 2016-08-18  2:16 UTC (permalink / raw)
  To: spew

---
 MANIFEST                     |   2 +
 lib/PublicInbox/Search.pm    |  30 +++++++-
 lib/PublicInbox/WWW.pm       |  14 ++++
 lib/PublicInbox/WwwStream.pm |   4 +-
 lib/PublicInbox/WwwText.pm   | 160 +++++++++++++++++++++++++++++++++++++++++++
 t/psgi_text.t                |  39 +++++++++++
 6 files changed, 247 insertions(+), 2 deletions(-)
 create mode 100644 lib/PublicInbox/WwwText.pm
 create mode 100644 t/psgi_text.t

diff --git a/MANIFEST b/MANIFEST
index bed6050..306945a 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -82,6 +82,7 @@ lib/PublicInbox/WWW.pm
 lib/PublicInbox/WatchMaildir.pm
 lib/PublicInbox/WwwAttach.pm
 lib/PublicInbox/WwwStream.pm
+lib/PublicInbox/WwwText.pm
 sa_config/Makefile
 sa_config/README
 sa_config/root/etc/spamassassin/public-inbox.pre
@@ -141,6 +142,7 @@ t/plack.t
 t/precheck.t
 t/psgi_attach.t
 t/psgi_mount.t
+t/psgi_text.t
 t/qspawn.t
 t/search.t
 t/spamcheck_spamc.t
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 7561ef4..4fff1e4 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -51,6 +51,7 @@ my %bool_pfx_internal = (
 	thread => 'G', # newsGroup (or similar entity - e.g. a web forum name)
 );
 
+# do we still need these? probably not..
 my %bool_pfx_external = (
 	path => 'XPATH',
 	mid => 'Q', # uniQue id (Message-ID)
@@ -62,6 +63,26 @@ my %prob_prefix = (
 	m => 'Q', # 'mid' is exact, 'm' can do partial
 );
 
+# not documenting m: and mid:, the using the URLs works w/o Xapian
+our $HELP = <<EOF;
+s: match within Subject only  e.g. s:"a quick brown fox"
+   This is a probabilistic search with support for stemming
+   and wildcards '*'.
+
+d: date range as YYYYMMDD  e.g. d:19931002..20101002
+   Open-ended ranges such as d:19931002.. and d:..20101002
+   are also supported.
+EOF
+# TODO: (from mairix, some of these are maybe)
+# b (body), f (From:), c (Cc:), n (attachment), t (To:)
+# tc (To:+Cc:), bs (body + Subject), tcf (To: +Cc: +From:)
+#
+# Non-mairix:
+# df (filenames from diff)
+# nq (non-quoted body)
+# da (diff a/ removed lines)
+# db (diff b/ added lines)
+
 my %all_pfx = (%bool_pfx_internal, %bool_pfx_external, %prob_prefix);
 
 sub xpfx { $all_pfx{$_[0]} }
@@ -191,7 +212,7 @@ sub qp {
 	# just parse the spec to avoid the extra DB handles for now.
 	if (my $altid = $self->{altid}) {
 		for (@$altid) {
-			# $_ = 'serial:gmane:/path/to/gmane.msgmap.sqlite3'
+			# $_ = 'serial:gmane:file=/path/to/gmane.msgmap.sqlite3'
 			/\Aserial:(\w+):/ or next;
 			my $pfx = $1;
 			# gmane => XGMANE
@@ -321,4 +342,11 @@ sub enquire {
 	$self->{enquire} ||= Search::Xapian::Enquire->new($self->{xdb});
 }
 
+sub help {
+	my ($self) = @_;
+	my $ret = [ $HELP ];
+	# TODO: merge altid prefixes
+	$ret;
+}
+
 1;
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index 6f6a003..6d6349a 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -96,6 +96,8 @@ sub call {
 
 	} elsif ($path_info =~ m!$INBOX_RE/$MID_RE/f/?\z!o) {
 		r301($ctx, $1, $2);
+	} elsif ($path_info =~ m!$INBOX_RE/_/text/(.+)\z!o) {
+		get_text($ctx, $1, $2);
 
 	# convenience redirects order matters
 	} elsif ($path_info =~ m!$INBOX_RE/([^/]{2,})\z!o) {
@@ -238,6 +240,18 @@ sub get_thread {
 	PublicInbox::View::thread_html($ctx);
 }
 
+# /$INBOX/_/text/$KEY/
+# /$INBOX/_/text/$KEY/raw
+# KEY may contain slashes
+sub get_text {
+	my ($ctx, $inbox, $key) = @_;
+	my $r404 = invalid_inbox($ctx, $inbox);
+	return $r404 if $r404;
+
+	require PublicInbox::WwwText;
+	PublicInbox::WwwText::get_text($ctx, $key);
+}
+
 sub ctx_get {
 	my ($ctx, $key) = @_;
 	my $val = $ctx->{$key};
diff --git a/lib/PublicInbox/WwwStream.pm b/lib/PublicInbox/WwwStream.pm
index 9ed25e1..c89e6de 100644
--- a/lib/PublicInbox/WwwStream.pm
+++ b/lib/PublicInbox/WwwStream.pm
@@ -31,10 +31,12 @@ sub _html_top ($) {
 	my $desc = ascii_html($obj->description);
 	my $title = $ctx->{-title_html} || $desc;
 	my $upfx = $ctx->{-upfx} || '';
+	my $help = $upfx.'_/text/help';
 	my $atom = $ctx->{-atom} || $upfx.'new.atom';
 	my $tip = $ctx->{-html_tip} || '';
 	my $top = "<b>$desc</b>";
-	my $links = "<a\nhref=\"$atom\">Atom feed</a>";
+	my $links = "<a\nhref=\"$help\">help</a> / ".
+			"<a\nhref=\"$atom\">Atom feed</a>";
 	if ($obj->search) {
 		my $q_val = $ctx->{-q_value_html};
 		if (defined $q_val && $q_val ne '') {
diff --git a/lib/PublicInbox/WwwText.pm b/lib/PublicInbox/WwwText.pm
new file mode 100644
index 0000000..51a91d1
--- /dev/null
+++ b/lib/PublicInbox/WwwText.pm
@@ -0,0 +1,160 @@
+# Copyright (C) 2016 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+#
+# serves the /$INBOX/_/* endpoints from :text/* of the git tree
+package PublicInbox::WwwText;
+use strict;
+use warnings;
+use PublicInbox::Linkify;
+use PublicInbox::WwwStream;
+use PublicInbox::Hval qw(ascii_html);
+our $QP_URL = 'https://xapian.org/docs/queryparser.html';
+our $WIKI_URL = 'https://en.wikipedia.org/wiki';
+
+# /$INBOX/_/text/$KEY/ # KEY may contain slashes
+sub get_text {
+	my ($ctx, $key) = @_;
+	my $code = 200;
+
+	# get the raw text the same way we get mboxrds
+	my $raw = ($key =~ s!/raw\z!!);
+	my $have_tslash = ($key =~ s!/\z!!) if !$raw;
+
+	my $txt = '';
+	if (!_default_text($ctx, $key, \$txt)) {
+		$code = 404;
+		$txt = "404 Not Found ($key)\n";
+	}
+	if ($raw) {
+		return [ $code, [ 'Content-Type', 'text/plain',
+				  'Content-Length', bytes::length($txt) ],
+			[ $txt ] ]
+	}
+
+	# enforce trailing slash for "wget -r" compatibility
+	if (!$have_tslash && $code == 200) {
+		my $url = $ctx->{-inbox}->base_url($ctx->{env});
+		$url .= "_/text/$key/";
+
+		return [ 302, [ 'Content-Type', 'text/plain',
+				'Location', $url ],
+			[ "Redirecting to $url\n" ] ];
+	}
+
+	# Follow git commit message conventions,
+	# first line is the Subject/title
+	my ($title) = ($txt =~ /\A([^\n]*)/s);
+	_do_linkify($txt);
+	$ctx->{-title_html} = ascii_html($title);
+
+	my $nslash = ($key =~ tr!/!/!);
+	$ctx->{-upfx} = '../../../' . ('../' x $nslash);
+
+	PublicInbox::WwwStream->response($ctx, $code, sub {
+		my ($nr, undef) = @_;
+		$nr == 1 ? '<pre>'.$txt.'</pre>' : undef
+	});
+}
+
+sub _do_linkify {
+	my $l = PublicInbox::Linkify->new;
+	$_[0] = $l->linkify_2(ascii_html($l->linkify_1($_[0])));
+}
+
+sub _default_text ($$$) {
+	my ($ctx, $key, $txt) = @_;
+	return if $key ne 'help'; # TODO more keys?
+
+	my $ibx = $ctx->{-inbox};
+	my $base_url = $ibx->base_url($ctx->{env});
+	$$txt .= "public-inbox help for $base_url\n";
+	$$txt .= <<EOF;
+
+overview
+--------
+
+    public-inbox uses Message-ID identifiers in URLs.
+    One may look up messages by substituting Message-IDs
+    (without the leading '<' or trailing '>') into the URL.
+    Forward slash ('/') characters in the Message-IDs
+    need to be escaped as "%2F" (without quotes).
+
+    Thus, it is possible to retrieve any message by its
+    Message-ID by going to:
+
+	$base_url<Message-ID>/
+
+	(without the '<' or '>')
+
+    Message-IDs are described at:
+
+	$WIKI_URL/Message-ID
+
+EOF
+
+	# n.b. we use the Xapian DB for any regeneratable,
+	# order-of-arrival-independent data.
+	my $srch = $ibx->search;
+	if ($srch) {
+		$$txt .= <<EOF;
+search
+------
+
+    This public-inbox has search functionality provided by Xapian.
+
+    It supports typical AND, OR, NOT, '+', '-' queries present
+    in other search engines.
+
+    Prefixes supported:
+
+EOF
+		my $help = $srch->help;
+		foreach my $h (@$help) {
+			$h =~ s/^/\t/sgm;
+			$$txt .= $h;
+			$$txt .= "\n";
+		}
+
+		$$txt .= <<EOF;
+    See $QP_URL for more details.
+
+message threading
+-----------------
+
+    Message threading is enabled for this public-inbox,
+    additional endpoints for message threads are available:
+
+    * $base_url<Message-ID>/T/#u
+
+      Loads the thread belonging to the given <Message-ID>
+      in flat chronological order.  The "#u" anchor
+      focuses the browser on the given <Message-ID>.
+
+    * $base_url<Message-ID>/t/#u
+
+      Loads the thread belonging to the given <Message-ID>
+      in threaded order with nesting.  For deep threads,
+      this requires a wide display or horizontal scrolling.
+
+    Both of these HTML endpoints are suitable for offline reading
+    using the thread overview at the bottom of each page.
+
+    Users of feed readers may follow a particular thread using:
+
+    * $base_url<Message-ID>/t.atom
+
+	$WIKI_URL/Atom_(standard)
+
+    Finally, the gzipped mbox for a thread is available for
+    downloading and importing into your favorite mail client:
+
+    * $base_url<Message-ID>/t.mbox.gz
+
+	$WIKI_URL/Mbox
+	(we use the mboxrd variant)
+EOF
+	}
+	1;
+}
+
+1;
diff --git a/t/psgi_text.t b/t/psgi_text.t
new file mode 100644
index 0000000..bf565f8
--- /dev/null
+++ b/t/psgi_text.t
@@ -0,0 +1,39 @@
+# Copyright (C) 2016 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use warnings;
+use Test::More;
+use Email::MIME;
+use File::Temp qw/tempdir/;
+my $tmpdir = tempdir('psgi-text-XXXXXX', TMPDIR => 1, CLEANUP => 1);
+my $maindir = "$tmpdir/main.git";
+my $addr = 'test-public@example.com';
+my $cfgpfx = "publicinbox.test";
+my @mods = qw(HTTP::Request::Common Plack::Test URI::Escape);
+foreach my $mod (@mods) {
+	eval "require $mod";
+	plan skip_all => "$mod missing for psgi_text.t" if $@;
+}
+use_ok $_ foreach @mods;
+use PublicInbox::Import;
+use PublicInbox::Git;
+use PublicInbox::Config;
+use PublicInbox::WWW;
+use_ok 'PublicInbox::WwwText';
+use Plack::Builder;
+my $config = PublicInbox::Config->new({
+	"$cfgpfx.address" => $addr,
+	"$cfgpfx.mainrepo" => $maindir,
+});
+is(0, system(qw(git init -q --bare), $maindir), "git init (main)");
+my $www = PublicInbox::WWW->new($config);
+
+test_psgi(sub { $www->call(@_) }, sub {
+	my ($cb) = @_;
+	my $res;
+	$res = $cb->(GET('/test/_/text/help/'));
+	like($res->content, qr!<title>public-inbox help.*</title>!,
+		'default help');
+});
+
+done_testing();
-- 
EW


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH] wip
@ 2016-08-23 20:07 Eric Wong
  0 siblings, 0 replies; 23+ messages in thread
From: Eric Wong @ 2016-08-23 20:07 UTC (permalink / raw)
  To: spew

From: Eric Wong <normalperson@yhbt.net>

---
 .gitignore                            |   1 +
 Documentation/.gitignore              |   2 +-
 Documentation/include.mk              |  15 +++-
 Documentation/public-inbox-config.pod |  90 +++++++++++++++++++
 Documentation/public-inbox-daemon.pod | 165 ++++++++++++++++++++++++++++++++++
 Documentation/public-inbox-httpd.pod  |  36 ++++++++
 Documentation/public-inbox-nntpd.pod  |  31 +++++++
 lib/PublicInbox/WWW.pod               |  40 +++++++++
 8 files changed, 376 insertions(+), 4 deletions(-)
 create mode 100644 Documentation/public-inbox-config.pod
 create mode 100644 Documentation/public-inbox-daemon.pod
 create mode 100644 Documentation/public-inbox-httpd.pod
 create mode 100644 Documentation/public-inbox-nntpd.pod
 create mode 100644 lib/PublicInbox/WWW.pod

diff --git a/.gitignore b/.gitignore
index 3b333a5..6a44471 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,5 +9,6 @@
 *.1
 *.5
 *.7
+*.8
 *.html
 *.gz
diff --git a/Documentation/.gitignore b/Documentation/.gitignore
index 8ba4186..107ad36 100644
--- a/Documentation/.gitignore
+++ b/Documentation/.gitignore
@@ -1 +1 @@
-/public-inbox-mda.txt
+/public-inbox-*.txt
diff --git a/Documentation/include.mk b/Documentation/include.mk
index 5d73028..99f3d43 100644
--- a/Documentation/include.mk
+++ b/Documentation/include.mk
@@ -16,33 +16,42 @@ podtext = $(PODTEXT) $(PODTEXT_OPTS)
 
 m1 =
 m1 += public-inbox-mda
+m1 += public-inbox-httpd
+m1 += public-inbox-nntpd
 m5 =
+m5 += public-inbox-config
 m7 =
+m8 =
+m8 += public-inbox-daemon
 
 man1 := $(addsuffix .1, $(m1))
 man5 := $(addsuffix .5, $(m5))
 man7 := $(addsuffix .7, $(m7))
+man8 := $(addsuffix .8, $(m8))
 
 all:: man html
 
-man: $(man1) $(man5) $(man7)
+man: $(man1) $(man5) $(man7) $(man8)
 
 prefix ?= $(HOME)
 mandir ?= $(prefix)/share/man
 man1dir = $(mandir)/man1
 man5dir = $(mandir)/man5
 man7dir = $(mandir)/man7
+man8dir = $(mandir)/man8
 
 install-man: man
 	test -z "$(man1)" || $(INSTALL) -d -m 755 $(DESTDIR)$(man1dir)
 	test -z "$(man5)" || $(INSTALL) -d -m 755 $(DESTDIR)$(man5dir)
 	test -z "$(man7)" || $(INSTALL) -d -m 755 $(DESTDIR)$(man7dir)
+	test -z "$(man8)" || $(INSTALL) -d -m 755 $(DESTDIR)$(man8dir)
 	test -z "$(man1)" || $(INSTALL) -m 644 $(man1) $(DESTDIR)$(man1dir)
 	test -z "$(man5)" || $(INSTALL) -m 644 $(man5) $(DESTDIR)$(man5dir)
 	test -z "$(man7)" || $(INSTALL) -m 644 $(man7) $(DESTDIR)$(man7dir)
+	test -z "$(man8)" || $(INSTALL) -m 644 $(man8) $(DESTDIR)$(man8dir)
 
-%.1 : Documentation/%.pod
-	$(podman) -s 1 $< $@+ && mv $@+ $@
+%.1 %.5 %.7 %.8 : Documentation/%.pod
+	$(podman) -s $(suffix $@) $< $@+ && mv $@+ $@
 
 mantxt = $(addprefix Documentation/, $(addsuffix .txt, $(m1)))
 docs += $(mantxt)
diff --git a/Documentation/public-inbox-config.pod b/Documentation/public-inbox-config.pod
new file mode 100644
index 0000000..855879d
--- /dev/null
+++ b/Documentation/public-inbox-config.pod
@@ -0,0 +1,90 @@
+=head1 NAME
+
+public-inbox-config - public-inbox config file description
+
+=head1 SYNOPSIS
+
+~/.public-inbox/config
+
+=head1 DESCRIPTION
+
+The public-inbox config file is parseable by L<git-config(1)>.
+This is a global configuration file for mapping/discovering
+all public-inboxes used by a particular user.
+
+=head1 CONFIGURATION FILE
+
+=head2 EXAMPLE
+
+	[publicinbox "test"]
+		mainrepo = /home/user/path/to/test.git
+		; multiple addresses are supported
+		address = test@example.com
+		; address = alternate@example.com
+		url = http://example.com/test
+		newsgroup = inbox.test
+
+=head2 VARIABLES
+
+=over 8
+
+=item publicinbox.<name>.address
+
+The email address of the public-inbox.  May be specified
+more than once for merging multiple mailing lists (migrating
+to new addresses).  This must be specified.
+
+Default: none, required
+
+=item publicinbox.<name>.mainrepo
+
+The path to the git repository which hosts the public-inbox.
+
+Default: none, required
+
+=item publicinbox.<name>.url
+
+The primary URL for hosting the HTTP/HTTPS archives.
+Additional HTTP/HTTPS URLs may be specified via C<$GIT_DIR/cloneurl>
+as documented in L<gitweb(1)>
+
+Default: none, optional
+
+=item publicinbox.<name>.newsgroup
+
+The NNTP group name for use with L<public-inbox-nntpd(8)>.  This
+may be any newsgroup name with hiearchies delimited by '.'.  For
+example, the newsgroup for by L<mailto:meta@public-inbox.org>
+is: inbox.comp.mail.public-inbox.meta
+
+Default: none, optional.
+
+=back
+
+=head1 ENVIRONMENT
+
+=over 8
+
+=item PI_CONFIG
+
+May be used to override the default "~/.public-inbox/config" value.
+
+=back
+
+=head1 CONTACT
+
+Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org>
+
+The mail archives are hosted at L<https://public-inbox.org/meta/>
+and L<http://hjrcffqmbrq6wope.onion/meta/>
+
+=head1 COPYRIGHT
+
+Copyright 2016 all contributors L<mailto:meta@public-inbox.org>
+
+License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt>
+
+=head1 SEE ALSO
+
+L<git(1)>, L<git-config(1)>, L<public-inbox-daemon(8)>,
+L<public-inbox-mda(1)>,
diff --git a/Documentation/public-inbox-daemon.pod b/Documentation/public-inbox-daemon.pod
new file mode 100644
index 0000000..b02db9e
--- /dev/null
+++ b/Documentation/public-inbox-daemon.pod
@@ -0,0 +1,165 @@
+=head1 NAME
+
+public-inbox-daemon - public-inbox network server daemons
+
+=head1 SYNOPSIS
+
+B<public-inbox-httpd>
+B<public-inbox-nntpd>
+
+=head1 DESCRIPTION
+
+Common daemon options and behavior for public-inbox network
+daemons.  Network daemons for public-inbox provide read-only
+NNTP and HTTP access to public-inboxes.  Write access to a
+public-inbox repository will never be required to run these.
+
+These daemons are implementing with a common core using
+non-blocking sockets to scale to thousands of connected clients.
+These daemons may also utilize multiple pre-forked worker
+processes to take advantage of multiple CPUs.
+
+Native TLS (Transport Layer Security) support is planned.
+
+=head1 OPTIONS
+
+=over
+
+=item -l, --listen ADDRESS
+
+This takes an absolute path to a Unix socket or HOST:PORT
+to listen on.  For example, to listen to TCP connections on
+port 119, use: C<-l 0.0.0.0:119>.  This may also point to
+a Unix socket (C<-l /path/to/http.sock>) for a reverse proxy
+like L<nginx(1)> to use.
+
+May be specified multiple times to allow listening on multiple
+sockets.
+
+Default: server-dependent unless socket activation is used with
+L<systemd(1)> or similar (see L<systemd.socket(5)>).
+
+=item -1, --stdout PATH
+
+Specify an appendable path to redirect stdout descriptor (1) to.
+Using this is preferable to setting up the redirect externally
+(e.g. E<gt>E<gt>/path/to/log in shell) since it allows
+SIGUSR1 to be handled (see L<SIGNALS/SIGNALS> below).
+
+Default: /dev/null
+
+=item -2, --stderr PATH
+
+Like C<--stdout>, but for the stderr descriptor (2).
+
+=item -W, --worker-processes
+
+Set the number of worker processes.
+
+Normally, this should match the number of CPUs on the system to
+take full advantage of the hardware.  However, users of
+memory-constrained systems may want to lower this.
+
+Setting this to zero (C<-W0>) disables the master/worker split;
+saving some memory but removing the ability to use SIGTTIN
+to increase worker processes or have the worker restarted by
+the master on crashes.
+
+Default: 1
+
+=back
+
+=head1 SIGNALS
+
+Most of our signal handling behavior is copied from L<nginx(1)>
+and/or L<starman(1)>; so it is possible to reuse common scripts
+for them.
+
+=over 8
+
+=item SIGUSR1
+
+Reopens log files pointed to by --stdout and --stderr options.
+
+=item SIGUSR2
+
+Spawn a new process with the intention to replace the running one.
+See L</UPGRADING> below.
+
+=item SIGHUP
+
+Reload config files associated with the process.
+(FIXME: not tested for -httpd, yet)
+
+=item SIGTTIN
+
+Increase the number of running workers processes by one.
+
+=item SIGTTOU
+
+Decrease the number of running worker processes by one.
+
+=item SIGWINCH
+
+Stop all running worker processes.   SIGHUP or SIGTTIN
+may be used to restart workers.
+
+=item SIGQUIT
+
+Gracefully terminate the running process.
+
+=back
+
+SIGTTOU, SIGTTIN, SIGWINCH all have on effect when worker
+processes are disabled with C<-W0> on the command-line.
+
+=head1 ENVIRONMENT
+
+=over 8
+
+=item PI_CONFIG
+
+config file. default: ~/.public-inbox/config
+See L<public-inbox-config(5)>
+
+=item LISTEN_FDS, LISTEN_PID
+
+Used by systemd (and compatible) installations for socket
+activation.  See L<systemd.socket(5)> and L<sd_listen_fds(3)>.
+
+=back
+
+=head1 UPGRADING
+
+There are two ways to upgrade a running process.
+
+Users of process management systems with socket activation
+(L<systemd(1)> or similar) may rely on multiple instances For
+systemd, this means using two (or more) '@' instances for each
+service (e.g. C<SERVICENAME@INSTANCE>) as documented in
+L<systemd.unit(5)>.
+
+Users of traditional SysV init may use SIGUSR2 to spawn
+a replacement process and gracefully terminate the old
+process using SIGQUIT.
+
+In either case, the old process will not truncate running
+responses; so responses to expensive requests do not get
+interrupted and lost.
+
+=head1 CONTACT
+
+Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org>
+
+The mail archives are hosted at L<https://public-inbox.org/meta/>
+and L<http://hjrcffqmbrq6wope.onion/meta/>
+
+=head1 COPYRIGHT
+
+Copyright 2013-2016 all contributors L<mailto:meta@public-inbox.org>
+
+License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt>
+
+=head1 SEE ALSO
+
+L<public-inbox-httpd(1)>, L<public-inbox-nntpd(1)>
diff --git a/Documentation/public-inbox-httpd.pod b/Documentation/public-inbox-httpd.pod
new file mode 100644
index 0000000..8141f7c
--- /dev/null
+++ b/Documentation/public-inbox-httpd.pod
@@ -0,0 +1,36 @@
+=head1 NAME
+
+public-inbox-httpd - PSGI server optimized for public-inbox
+
+=head1 SYNOPSIS
+
+B<public-inbox-httpd> [OPTIONS] [/path/to/myapp.psgi]
+
+=head1 DESCRIPTION
+
+public-inbox-httpd is a PSGI/Plack server supporting HTTP/1.1.
+It uses options and environment variables common to all
+L<public-inbox-daemon(8)> implementations in addition to
+the PSGI file.
+
+This may point to a PSGI file for supporting generic PSGI applications.
+If a PSGI file is not specified, L<PublicInbox::WWW(3pm)> is loaded
+with a default middleware stack.
+
+=head1 CONTACT
+
+Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org>
+
+The mail archives are hosted at L<https://public-inbox.org/meta/>
+and L<http://hjrcffqmbrq6wope.onion/meta/>
+
+=head1 COPYRIGHT
+
+Copyright 2013-2016 all contributors L<mailto:meta@public-inbox.org>
+
+License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt>
+
+=head1 SEE ALSO
+
+L<git(1)>, L<git-config(1)>, L<public-inbox-daemon(8)>,
+L<Plack(3pm)>
diff --git a/Documentation/public-inbox-nntpd.pod b/Documentation/public-inbox-nntpd.pod
new file mode 100644
index 0000000..40b6b8b
--- /dev/null
+++ b/Documentation/public-inbox-nntpd.pod
@@ -0,0 +1,31 @@
+=head1 NAME
+
+public-inbox-nntpd - NNTP server for sharing public-inbox
+
+=head1 SYNOPSIS
+
+B<public-inbox-nntpd> [OPTIONS]
+
+=head1 DESCRIPTION
+
+public-inbox-nntpd provides a NNTP daemon for public-inbox.
+It uses options and environment variables common to all
+L<public-inbox-daemon(8)> implementations.
+
+=head1 CONTACT
+
+All feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org>
+
+The mail archives are hosted at L<https://public-inbox.org/meta/>,
+L<nntp://news.public-inbox.org/inbox.comp.mail.public-inbox.meta>,
+L<nntp://hjrcffqmbrq6wope.onion/inbox.comp.mail.public-inbox.meta>
+
+=head1 COPYRIGHT
+
+Copyright 2013-2016 all contributors L<mailto:meta@public-inbox.org>
+
+License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt>
+
+=head1 SEE ALSO
+
+L<git(1)>, L<git-config(1)>, L<public-inbox-daemon(8)>
diff --git a/lib/PublicInbox/WWW.pod b/lib/PublicInbox/WWW.pod
new file mode 100644
index 0000000..cfc5b4b
--- /dev/null
+++ b/lib/PublicInbox/WWW.pod
@@ -0,0 +1,40 @@
+=head1 NAME
+
+PublicInbox::WWW - PSGI interface for public-inbox
+
+=head1 SYNOPSIS
+
+In your .psgi file:
+
+	use PublicInbox::WWW;
+	PublicInbox::WWW->preload; # optional
+
+	my $www = PublicInbox::WWW->new;
+	builder {
+		enable 'Head';
+		mount '/inboxes' => sub { $www->call(@_) };
+	};
+
+
+=head1 DESCRIPTION
+
+This application loads I<.psgi> files (or actually whichever filename
+extensions) from the root directory and run it as a PSGI
+application. Suppose you have a directory containing C<foo.psgi> and
+C<bar.psgi>, map this application to C</app> with
+L<Plack::App::URLMap> and you can access them via the URL:
+
+  http://example.com/app/foo.psgi
+  http://example.com/app/bar.psgi
+
+to load them. You can rename the file to the one without C<.psgi>
+extension to make the URL look nicer, or use the URL rewriting tools
+like L<Plack::Middleware::Rewrite> to do the same thing.
+
+=head1 AUTHOR
+
+Tatsuhiko Miyagawa
+
+=head1 SEE ALSO
+
+L<Plack::App::CGIBin>
-- 
EW


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH] wip
@ 2017-04-05 18:40 Eric Wong
  0 siblings, 0 replies; 23+ messages in thread
From: Eric Wong @ 2017-04-05 18:40 UTC (permalink / raw)
  To: spew

---
 lib/yahns/config.rb       |  23 +++++++-
 lib/yahns/http_context.rb |   4 ++
 lib/yahns/rack_proxy.rb   | 143 ++++++++++++++++++++++++++++++++++++++++++++++
 lib/yahns/submaster.rb    |  12 ++++
 4 files changed, 181 insertions(+), 1 deletion(-)
 create mode 100644 lib/yahns/rack_proxy.rb
 create mode 100644 lib/yahns/submaster.rb

diff --git a/lib/yahns/config.rb b/lib/yahns/config.rb
index e545d59..1f4d17e 100644
--- a/lib/yahns/config.rb
+++ b/lib/yahns/config.rb
@@ -436,6 +436,27 @@ def commit!(server)
       server.__send__("#{var}=", val) if val != :unset
     end
 
-    @app_ctx.each { |app| app.logger ||= server.logger }
+    # count extra workers for rack_proxy (and maybe others)
+    submasters = 0
+    afc = @set[:atfork_child]
+    afc = [] if afc == :unset
+
+    nsm = 0
+    @app_ctx.each do |ctx|
+      ctx.logger ||= server.logger
+      next unless ctx.respond_to?(:submasters)
+      cmd_list = ctx.submasters or next
+      require_relative 'submaster'
+      submasters += cmd_list.size
+      cmd_list.each do |cmd|
+        afc << Yahns::Submaster.new(cmd, nsm += 1)
+      end
+    end
+    if submasters > 0
+      wp = @set[:worker_processes]
+      wp = 1 if wp == :unset # gotta have one worker
+      server.__send__(:worker_processes=, wp + submasters)
+      server.__send__(:atfork_child=, afc)
+    end
   end
 end
diff --git a/lib/yahns/http_context.rb b/lib/yahns/http_context.rb
index 40f2c58..70a75d7 100644
--- a/lib/yahns/http_context.rb
+++ b/lib/yahns/http_context.rb
@@ -91,4 +91,8 @@ def tmpio_for(len, env)
     end
     tmp
   end
+
+  def submasters
+    @yahns_rack.respond_to?(:submasters) ? @yahns_rack.submasters : nil
+  end
 end
diff --git a/lib/yahns/rack_proxy.rb b/lib/yahns/rack_proxy.rb
new file mode 100644
index 0000000..1dee36a
--- /dev/null
+++ b/lib/yahns/rack_proxy.rb
@@ -0,0 +1,143 @@
+# -*- encoding: binary -*-
+# Copyright (C) 2017 all contributors <yahns-public@yhbt.net>
+# License: GPL-3.0+ <https://www.gnu.org/licenses/gpl-3.0.txt>
+# frozen_string_literal: true
+require_relative 'rack'
+require_relative 'proxy_pass'
+require 'socket'
+
+# Basically, a lazy way to setup ProxyPass to hand off some (or all)
+# requests to any HTTP server backend (e.g. varnish, etc)
+class Yahns::RackProxy < Yahns::Rack # :nodoc:
+
+  # the key is the destination returned by the top-level config.ru
+  # and the value is a splattable array for spawning another process
+  # via Process.exec
+  # {
+  #   # [ key, backend URL, ]  => %w(splattable array for Process.exec),
+  #   [:pass, 'http://127.0.0.1:9292/' ] => %w(rackup /path/to/config.ru)
+  #   [:lsock, 'unix:/path/to/sock' ] => %w(bleh -l /path/to/sock ...)
+  #
+  #   # Users of Ruby 2.3+ can shorten their config when
+  #   # running systemd-aware daemons which will bind to
+  #   # a random TCP port:
+  #   :pri => %w(blah -c conf.rb config.ru),
+  #   :alt => %w(blah -c /path/to/alt.conf.rb alt.ru),
+  #   :psgi => %w(blah foo.psgi),
+  #   ...
+  # }
+
+  # By default, proxy all requests by using the :pass return value
+  # Users can selectively process requests for non-buggy code in
+  # the core yahns processes.
+  PROXY_ALL = lambda { |env| :pass } # :nodoc:
+  attr_reader :submasters
+
+  def initialize(ru = PROXY_ALL, mapping, opts = {})
+    @submasters = []
+    case mapping
+    when Hash # multiple HTTP backends running different commands
+      # nothing to do  { key: splattable array for Process.spawn }
+    when Array # only one backend
+      mapping = { pass: cmd }
+    else
+      raise ArgumentError, "#{cmd.inspect} must be an Array or Hash"
+    end
+    env = nil
+
+    @proxy_pass_map = {}
+    mapping.each do |key, cmd|
+      case key
+      when Array # undocumented for now..
+        key, url, ppopts = *key
+      when Symbol # OK
+        ppopts = {}
+      else
+        raise ArgumentError, "#{key.inspect} is not a symbol"
+      end
+      Array === cmd or raise ArgumentError,
+                "#{cmd.inspect} must be a splattable array for Process.exec"
+      @proxy_pass_map[key] and raise ArgumentError,
+                "#{key.inspect} may not be repeated in mapping"
+
+      cmd = cmd.dup
+      unless url
+        if RUBY_VERSION.to_f < 2.3 && env.nil? # only warn once
+           warn "Ruby < 2.3 may crash when emulating systemd to pass FDs\n",
+" http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-core/69895\n"
+        end
+
+        # nope, no UNIXServer support, maybe not worth it to deal
+        # with FS perms in containers.
+        # Also, we can use TCP Fast Open support under Linux
+        srv = random_tcp_listener(ppopts)
+        addr = srv.addr
+        url = "http://#{addr[3]}:#{addr[1]}/"
+
+        # pretend to be systemd for sd_listen_fds(3) users
+        # submaster will #call any env values before calling Process.exec,
+        # so we can lazy-expand LISTEN_PID, here:
+        env ||= { 'LISTEN_FDS' => '1', 'LISTEN_PID' => Process.method(:pid) }
+        case cmd[0]
+        when Hash
+          cmd[0] = cmd[0].merge(env)
+        else
+          cmd.unshift(env)
+        end
+
+        rdr = { 3 => srv }
+        case cmd[-1]
+        when Hash
+          cmd[-1] = cmd[-1].merge(rdr)
+        else
+          cmd << rdr
+        end
+      end
+
+      @submasters << cmd
+      @proxy_pass_map[key] = Yahns::ProxyPass.new(url, ppopts)
+    end
+    super(ru, opts) # Yahns::Rack#initialize
+  end
+
+  def build_app!
+    super # Yahns::Rack#build_app!
+    proxy_app = @app
+
+    # wrap the (possibly-)user-supplied app
+    @app = lambda do |env|
+      res = proxy_app.call(env)
+
+      # standard Rack responses may be handled in yahns proper:
+      Array === res and return res
+
+      # the response is :pass or another Symbol, not a proper Rack response!
+      # shove the env over to the appropriate Yahns::ProxyPass which
+      # talks to a backend HTTP process:
+      ppass = @proxy_pass_map[res] and return ppass.call(env)
+
+      # oops, user screwed up :<
+      logger = env['rack.logger'] and
+        logger.error("bad response from user-supplied proxy: #{res.inspect}")
+
+      [ 500, [ %w(Content-Type text/plain) ], [] ]
+    end
+  end
+
+  def random_tcp_listener(opts) # TODO: should we support options?
+    srv = TCPServer.new('127.0.0.1', 0) # 0: bind random port
+    srv.setsockopt(:SOL_SOCKET, :SO_KEEPALIVE, 1)
+    srv.setsockopt(:IPPROTO_TCP, :TCP_NODELAY, 1)
+
+    # Deferring accepts slows down core yahns, but it's useful for
+    # less-sophisticated upstream (backend) servers:
+    Socket.const_defined?(:TCP_DEFER_ACCEPT) and
+      srv.setsockopt(:IPPROTO_TCP, :TCP_DEFER_ACCEPT, 1)
+
+    srv.listen(1024)
+    srv
+  end
+end
+
+# register ourselves
+Yahns::Config::APP_CLASS[:rack_proxy] = Yahns::RackProxy
diff --git a/lib/yahns/submaster.rb b/lib/yahns/submaster.rb
new file mode 100644
index 0000000..b86b425
--- /dev/null
+++ b/lib/yahns/submaster.rb
@@ -0,0 +1,12 @@
+# -*- encoding: binary -*-
+# Copyright (C) 2017 all contributors <yahns-public@yhbt.net>
+# License: GPL-3.0+ <https://www.gnu.org/licenses/gpl-3.0.txt>
+# frozen_string_literal: true
+class Yahns::Submaster < Struct.new(cmd, idx)
+
+  # atfork_child
+  def call(worker_nr)
+    return if worker_nr != idx
+    exec(*cmd)
+  end
+end
-- 
EW


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH] wip
@ 2017-04-12 20:17 Eric Wong
  0 siblings, 0 replies; 23+ messages in thread
From: Eric Wong @ 2017-04-12 20:17 UTC (permalink / raw)
  To: spew

---
 lib/PublicInbox/Search.pm    |  9 ++++++---
 lib/PublicInbox/SearchIdx.pm | 15 ++++++++++-----
 2 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 02d1827..7005b6f 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -74,6 +74,9 @@ my %prob_prefix = (
 
 	q => 'XQUOT',
 	nq => 'XNQ',
+	df => 'XDF',
+	da => 'XDA',
+	db => 'XDB',
 
 	# default:
 	'' => 'XMID S A XNQ XQUOT XFN',
@@ -97,12 +100,12 @@ EOF
 	'a:' => 'match within the To, Cc, and From headers',
 	'tc:' => 'match within the To and Cc headers',
 	'bs:' => 'match within the Subject and body',
+	'df:' => 'match filenames from diff',
+	'da:' => 'diff pre-image removed (-) lines',
+	'db:' => 'diff post-image added(+) lines',
 );
 chomp @HELP;
 # TODO:
-# df (filenames from diff)
-# da (diff a/ removed lines)
-# db (diff b/ added lines)
 
 my $mail_query = Search::Xapian::Query->new('T' . 'mail');
 
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 8200b54..c8f5205 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -130,10 +130,15 @@ sub index_users ($$) {
 }
 
 sub index_body ($$$) {
-	my ($tg, $lines, $inc) = @_;
-	$tg->index_text(join("\n", @$lines), $inc, $inc ? 'XNQ' : 'XQUOT');
-	@$lines = ();
+	my ($tg, $lines, $doc) = @_;
+	my $txt = join("\n", @$lines);
+	$tg->index_text($txt, !!$doc, $doc ? 'XNQ' : 'XQUOT');
 	$tg->increase_termpos;
+	if ($doc && $txt =~ /^[\-\+]/ms) { # does it look like a diff?
+		foreach (@$lines) {
+		}
+	}
+	@$lines = ();
 }
 
 sub add_message {
@@ -200,7 +205,7 @@ sub add_message {
 			my @lines = split(/\n/, $body);
 			while (defined(my $l = shift @lines)) {
 				if ($l =~ /^>/) {
-					index_body($tg, \@orig, 1) if @orig;
+					index_body($tg, \@orig, $doc) if @orig;
 					push @quot, $l;
 				} else {
 					index_body($tg, \@quot, 0) if @quot;
@@ -208,7 +213,7 @@ sub add_message {
 				}
 			}
 			index_body($tg, \@quot, 0) if @quot;
-			index_body($tg, \@orig, 1) if @orig;
+			index_body($tg, \@orig, $doc) if @orig;
 		});
 
 		link_message($self, $smsg, $old_tid);
-- 
EW


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH] WIP
@ 2017-07-15  1:42 Eric Wong
  0 siblings, 0 replies; 23+ messages in thread
From: Eric Wong @ 2017-07-15  1:42 UTC (permalink / raw)
  To: spew

---
 Makefile            |   2 +
 cbm_ref.c           | 438 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 cbm_ref.h           |  16 ++
 t/helper/.gitignore |   1 +
 t/helper/test-cbm.c |  45 ++++++
 5 files changed, 502 insertions(+)
 create mode 100644 cbm_ref.c
 create mode 100644 cbm_ref.h
 create mode 100644 t/helper/test-cbm.c

diff --git a/Makefile b/Makefile
index ba4359ef8d..8e60926d4d 100644
--- a/Makefile
+++ b/Makefile
@@ -633,6 +633,7 @@ X =
 
 PROGRAMS += $(patsubst %.o,git-%$X,$(PROGRAM_OBJS))
 
+TEST_PROGRAMS_NEED_X += test-cbm
 TEST_PROGRAMS_NEED_X += test-chmtime
 TEST_PROGRAMS_NEED_X += test-ctype
 TEST_PROGRAMS_NEED_X += test-config
@@ -748,6 +749,7 @@ LIB_OBJS += branch.o
 LIB_OBJS += bulk-checkin.o
 LIB_OBJS += bundle.o
 LIB_OBJS += cache-tree.o
+LIB_OBJS += cbm_ref.o
 LIB_OBJS += color.o
 LIB_OBJS += column.o
 LIB_OBJS += combine-diff.o
diff --git a/cbm_ref.c b/cbm_ref.c
new file mode 100644
index 0000000000..c2f2dcacb1
--- /dev/null
+++ b/cbm_ref.c
@@ -0,0 +1,438 @@
+/*
+ * Copyright (C) Eric Wong <e@80x24.org>
+ * License: GPL-2.0+ <https://www.gnu.org/licenses/gpl-2.0.txt>
+ *
+ * Byte ordering: Network byte order on filesystem, all in-memory
+ * structures are in native byte order
+ *
+ */
+#include "git-compat-util.h"
+#include "cache.h"
+#include "cbm_ref.h"
+
+/*
+ * align blocks up to the nearest 16 bytes boundary,
+ * it should give mmap compatibility for all 64-bit systems
+ */
+#define CBM_ALIGN 16
+
+/* don't penalize the majority of users who will never need >= 4GB for refs */
+typedef uint32_t cbm_off_t;
+#define CBM_SIZE_MAX UINT32_MAX
+
+/* like "struct list_head" in list.h, but "pointers" are file offsets */
+struct flist_head {
+	cbm_off_t next, prev;
+};
+
+/* the first 36 bytes of the file contains this header: */
+struct cbm_ref_tree {
+	uint8_t magic_bits[16]; /* version, etc .. */
+	struct flist_head free_list;
+	cbm_off_t root; /* 0 == empty */
+};
+/* ... the rest of the file is just made of several cbm_ref, one per ref: */
+struct cbm_ref {
+	uint16_t total_size; /* includes final '\0' in name[] */
+	uint16_t name_byte;
+	uint16_t oid_byte;
+	uint8_t name_otherbits;
+	uint8_t oid_otherbits;
+
+	/* "pointers" are file offsets */
+	cbm_off_t name_child[2];
+	cbm_off_t oid_child[2];
+	union {
+		struct flist_head free_space;
+		struct flist_head oid_alias;
+	} as;
+	struct object_id oid;
+	char name[FLEX_ARRAY];
+};
+
+/* open file, this struct in memory only (malloc-ed) */
+struct cbm {
+	int fd;
+	int flags;
+	/*
+	 * We can add mmap support, later:
+	char *mmptr;
+	size_t mmsize;
+	*/
+	struct cbm_ref_tree t;
+};
+
+static inline size_t size_align(size_t size)
+{
+	return ((size + (CBM_ALIGN - 1)) & ~(CBM_ALIGN - 1));
+}
+
+static cbm_off_t ntoh_off(cbm_off_t off)
+{
+	return sizeof(off) == sizeof(uint32_t)  ? ntohl(off) : ntohll(off);
+}
+
+static cbm_off_t hton_off(cbm_off_t off)
+{
+	return sizeof(off) == sizeof(uint32_t)  ? htonl(off) : htonll(off);
+}
+
+static void pwrite_or_die(int fd, void *buf, size_t len, off_t off)
+{
+	ssize_t w;
+again:
+	w = pwrite(fd, buf, len, off);
+	if (w < 0) {
+		if (errno == EINTR) goto again;
+		die_errno("pwrite failed");
+	}
+	if (w != len)
+		die("short pwrite: %ld < %lu", (long)w, (unsigned long)len);
+}
+
+static void ntoh_flist(struct flist_head *head)
+{
+	head->next = ntoh_off(head->next);
+	head->prev = ntoh_off(head->prev);
+}
+
+static void hton_flist(struct flist_head *head)
+{
+	head->next = hton_off(head->next);
+	head->prev = hton_off(head->prev);
+}
+
+static void xpread_in_full(int fd, void *ptr, size_t len, off_t off)
+{
+	ssize_t r = pread_in_full(fd, ptr, len, off);
+
+	if (r < 0)
+		die_errno("pread failed @ %"PRIuMAX"\n", (uintmax_t)off);
+	if (r != (ssize_t)len)
+		die("pread short %ld/%"PRIuMAX" @ %"PRIuMAX"\n",
+			(long)r, (uintmax_t)len, (uintmax_t)off);
+}
+
+static cbm_off_t ptr_read(const struct cbm *cbm, cbm_off_t off)
+{
+	cbm_off_t ptr;
+	xpread_in_full(cbm->fd, &ptr, sizeof(ptr), off);
+
+	return ntoh_off(ptr);
+}
+
+static void ptr_write(const struct cbm *cbm, cbm_off_t off, cbm_off_t val)
+{
+	cbm_off_t tmp = hton_off(val);
+
+	pwrite_or_die(cbm->fd, &tmp, sizeof(tmp), off);
+}
+
+#define FLIST_INIT(type, ptr, MEMBER) \
+	flist_init(&((ptr)->MEMBER), offsetof(type,MEMBER))
+
+static void flist_init(struct flist_head *head, cbm_off_t base)
+{
+	head->next = head->prev = base;
+}
+
+static void ntoh_cbm_ref(struct cbm_ref *ent)
+{
+	ent->total_size = ntohs(ent->total_size);
+	ent->name_byte = ntohs(ent->name_byte);
+	ent->name_child[0] = ntoh_off(ent->name_child[0]);
+	ent->name_child[1] = ntoh_off(ent->name_child[1]);
+	ent->oid_byte = ntohs(ent->oid_byte);
+	ent->oid_child[0] = ntoh_off(ent->oid_child[0]);
+	ent->oid_child[1] = ntoh_off(ent->oid_child[1]);
+	ntoh_flist(&ent->as.oid_alias);
+}
+
+static void hton_cbm_ref(struct cbm_ref *ent)
+{
+	ent->total_size = htons(ent->total_size);
+	ent->name_byte = htons(ent->name_byte);
+	ent->name_child[0] = hton_off(ent->name_child[0]);
+	ent->name_child[1] = hton_off(ent->name_child[1]);
+	ent->oid_byte = htons(ent->oid_byte);
+	ent->oid_child[0] = hton_off(ent->oid_child[0]);
+	ent->oid_child[1] = hton_off(ent->oid_child[1]);
+	hton_flist(&ent->as.oid_alias);
+}
+
+/* loads a cbm_ref into memory from a given FS offset */
+static void
+cbm_ref_load(const struct cbm *cbm, struct cbm_ref **ent, cbm_off_t off)
+{
+	size_t size = size_align(sizeof(struct cbm_ref));
+	uint16_t total_size;
+	struct cbm_ref *node;
+
+	node = *ent = xrealloc(*ent, size);
+	xpread_in_full(cbm->fd, node, size, off);
+
+	total_size = ntohs(node->total_size);
+	if (total_size > size) {
+		size = total_size;
+		*ent = xrealloc(*ent, size);
+		xpread_in_full(cbm->fd, *ent, size, off);
+	}
+	ntoh_cbm_ref(*ent);
+}
+
+static void
+cbm_ref_dump(struct cbm *cbm, const struct cbm_ref *ent, cbm_off_t off)
+{
+	struct cbm_ref *tmp = alloca(ent->total_size);
+
+	memcpy(tmp, ent, ent->total_size);
+	hton_cbm_ref(tmp);
+	pwrite_or_die(cbm->fd, tmp, ent->total_size, off);
+}
+
+static cbm_off_t
+nearest_by_name(const struct cbm *cbm, struct cbm_ref **ent,
+		const char *name, cbm_off_t p)
+{
+	const uint8_t *const ubytes = (const uint8_t *)name;
+	const size_t name_len = strlen(name);
+
+	while (1 & p) {
+		uint8_t c = 0;
+		int direction;
+		struct cbm_ref *cur;
+
+		cbm_ref_load(cbm, ent, p - 1);
+		cur = *ent;
+
+		if (cur->name_byte < name_len)
+			c = ubytes[cur->name_byte];
+		direction = (1 + (cur->name_otherbits | c)) >> 8;
+		p = cur->name_child[direction];
+	}
+	return p;
+}
+
+/* returns true and populates oid if found, false if not */
+int cbm_fetch(const struct cbm *cbm, const char *name, struct object_id *oid)
+{
+	cbm_off_t p = cbm->t.root;
+	struct cbm_ref *ent = NULL;
+	int ret;
+
+	if (!p)
+		return 0;
+
+	p = nearest_by_name(cbm, &ent, name, p);
+	cbm_ref_load(cbm, &ent, p);
+	ret = 0 == strcmp(name, ent->name);
+	if (ret)
+		oidcpy(oid, &ent->oid);
+	free(ent);
+	return ret;
+}
+
+/* returns an offset where ent->total_size can be safely written */
+static cbm_off_t
+cbm_alloc_begin(struct cbm *cbm, const struct cbm_ref *ent)
+{
+	struct stat sb;
+	/* TODO: scan free_list */
+
+	if (fstat(cbm->fd, &sb) < 0)
+		die_errno("fstat failed");
+
+	if (sb.st_size == 0)
+		sb.st_size = sizeof(cbm->t);
+	else if (sb.st_size < sizeof(cbm->t))
+		die("cbm size mismatch %"PRIuMAX, sb.st_size);
+
+	if (sb.st_size > CBM_SIZE_MAX || sb.st_size < 0)
+		die("size overflow");
+
+	return (cbm_off_t)sb.st_size;
+}
+
+int cbm_set(struct cbm *cbm, const char *name, const struct object_id *oid)
+{
+	const uint8_t *const ubytes = (const uint8_t *)name;
+	const size_t name_len = strlen(name);
+	cbm_off_t p = cbm->t.root;
+	struct cbm_ref *new_node, *cur = NULL;
+	uint32_t new_byte;
+	uint32_t new_otherbits;
+	uint32_t new_direction;
+	uint8_t c;
+	size_t size = size_align(name_len + 1 + sizeof(*new_node));
+	size_t wherep;
+	const uint8_t *cur_ubytes;
+	cbm_off_t new_off;
+
+	if (size > UINT16_MAX) {
+		warning("too long: %s %"PRIuMAX, name, (uintmax_t)size);
+		return 0;
+	}
+
+	if (!p) {
+		new_node = alloca(size);
+		memset(new_node, 0, sizeof(*new_node));
+		new_node->total_size = (uint16_t)size;
+		memcpy(new_node->name, name, name_len + 1);
+		oidcpy(&new_node->oid, oid);
+
+		cbm->t.root = cbm_alloc_begin(cbm, new_node);
+		cbm_ref_dump(cbm, new_node, cbm->t.root);
+
+		return 2;
+	}
+
+	p = nearest_by_name(cbm, &cur, name, p);
+	if (!cur)
+		cbm_ref_load(cbm, &cur, p);
+
+	fprintf(stderr, "nearest :%u %p %s\n", p, cur, cur->name);
+	cur_ubytes = (void *)cur->name;
+	for (new_byte = 0; new_byte < name_len; new_byte++) {
+		if (cur_ubytes[new_byte] != ubytes[new_byte]) {
+			new_otherbits = cur_ubytes[new_byte] ^ ubytes[new_byte];
+			goto different_byte_found;
+		}
+	}
+
+	if (cur_ubytes[new_byte] != 0) {
+		new_otherbits = cur_ubytes[new_byte];
+		goto different_byte_found;
+	}
+
+	/* overwrite existing ref */
+	fprintf(stderr, "clobber\n");
+	assert(!strcmp(cur->name, name));
+	if (oidcmp(&cur->oid, oid)) {
+		oidcpy(&cur->oid, oid);
+		cbm_ref_dump(cbm, cur, p);
+	}
+	free(cur);
+
+	return 1;
+
+different_byte_found:
+
+	new_otherbits |= new_otherbits >> 1;
+	new_otherbits |= new_otherbits >> 2;
+	new_otherbits |= new_otherbits >> 4;
+	new_otherbits = (new_otherbits & ~(new_otherbits >> 1)) ^ 255;
+
+	c = cur_ubytes[new_byte];
+	new_direction = (1 + (new_otherbits | c)) >> 8;
+
+	new_node = alloca(size);
+	memset(new_node, 0, size);
+	new_node->total_size = (uint16_t)size;
+	memcpy(new_node->name, name, name_len + 1);
+	oidcpy(&new_node->oid, oid);
+
+	cbm_flush(cbm, 0); /* ensure cbm->t.root is synchronized begin: */
+	new_off = cbm_alloc_begin(cbm, new_node);
+
+	new_node->name_byte = new_byte;
+	new_node->name_otherbits = new_otherbits;
+	new_node->name_child[1 - new_direction] = new_off; /* point to self */
+
+	wherep = offsetof(struct cbm_ref_tree, root);
+	p = ptr_read(cbm, wherep);
+	while (1 & p) {
+		int direction;
+
+		p--;
+		cbm_ref_load(cbm, &cur, p);
+
+		if (cur->name_byte > new_byte)
+			break;
+		if (cur->name_byte == new_byte &&
+				cur->name_otherbits > new_otherbits)
+			break;
+		c = 0;
+		if (cur->name_byte < name_len)
+			c = ubytes[cur->name_byte];
+
+		fprintf(stderr, "off: %zu\n", offsetof(struct cbm_ref, name_child));
+		wherep = p + offsetof(struct cbm_ref, name_child);
+		direction = (1 + (cur->name_otherbits | c)) >> 8;
+		if (direction)
+			wherep += sizeof(cbm_off_t);
+		p = ptr_read(cbm, wherep);
+	}
+
+	free(cur);
+
+	new_node->name_child[new_direction] = ptr_read(cbm, wherep);
+	cbm_ref_dump(cbm, new_node, new_off);
+
+	if (wherep == offsetof(struct cbm_ref_tree, root))
+		cbm->t.root = new_off + 1; /* to be flushed, later */
+	else
+		ptr_write(cbm, wherep, new_off + 1);
+
+	return 2;
+}
+
+void cbm_flush(struct cbm *cbm, unsigned int flush_flags)
+{
+	struct cbm_ref_tree t = cbm->t;
+
+	hton_flist(&t.free_list);
+	t.root = hton_off(t.root);
+	pwrite_or_die(cbm->fd, &t, sizeof(t), 0);
+
+	if (flush_flags & CBM_FSYNC)
+		fsync(cbm->fd);
+}
+
+struct cbm *cbm_open(const char *path, int flags)
+{
+	struct cbm *cbm;
+	ssize_t n;
+	int fd = xopen(path, flags, 0666);
+
+	if (fd < 0)
+		return NULL;
+#if defined(F_GETFD) && defined(F_SETFD) && defined(FD_CLOEXEC)
+	{
+		int flags = fcntl(fd, F_GETFD);
+		if (flags == -1)
+			die_errno("F_GETFD failed");
+		if (fcntl(fd, F_SETFD, flags | FD_CLOEXEC) == -1)
+			die_errno("F_SETFD failed");
+	}
+#endif
+
+	cbm = xmalloc(sizeof(*cbm));
+	cbm->fd = fd;
+	cbm->flags = flags;
+
+	n = pread_in_full(fd, &cbm->t, sizeof(cbm->t), 0);
+
+	if (n == 0) {
+		cbm->t.root = 0;
+		FLIST_INIT(struct cbm_ref_tree, &cbm->t, free_list);
+	} else if (n == sizeof(cbm->t)) {
+		cbm->t.root = ntoh_off(cbm->t.root);
+		fprintf(stderr, "root: %u\n", cbm->t.root);
+		ntoh_flist(&cbm->t.free_list);
+	} else {
+		die("unexpected cbm size: %ld", n);
+	}
+
+	return cbm;
+}
+
+void cbm_close(struct cbm *cbm, unsigned int close_flags)
+{
+	if (cbm->flags & O_RDWR)
+		cbm_flush(cbm, close_flags);
+	else if (close_flags & CBM_FSYNC)
+		fsync(cbm->fd);
+
+	close(cbm->fd);
+	free(cbm);
+}
diff --git a/cbm_ref.h b/cbm_ref.h
new file mode 100644
index 0000000000..1ecc65052c
--- /dev/null
+++ b/cbm_ref.h
@@ -0,0 +1,16 @@
+#ifndef CBM_REF_H
+#define CBM_REF_H
+#include "cache.h"
+struct cbm;
+
+#define CBM_FSYNC (0x1u)
+
+struct cbm *cbm_open(const char *path, int flags);
+void cbm_flush(struct cbm *cbm, unsigned int flush_flags);
+void cbm_close(struct cbm *, unsigned int flags);
+
+int cbm_fetch(const struct cbm *, const char *name, struct object_id *);
+int cbm_delete(struct cbm *, const char *name, struct object_id *);
+int cbm_set(struct cbm *, const char *name, const struct object_id *);
+
+#endif /* CBM_REF_H */
diff --git a/t/helper/.gitignore b/t/helper/.gitignore
index 721650256e..71648cc187 100644
--- a/t/helper/.gitignore
+++ b/t/helper/.gitignore
@@ -1,3 +1,4 @@
+/test-cbm
 /test-chmtime
 /test-ctype
 /test-config
diff --git a/t/helper/test-cbm.c b/t/helper/test-cbm.c
new file mode 100644
index 0000000000..f77603ca8f
--- /dev/null
+++ b/t/helper/test-cbm.c
@@ -0,0 +1,45 @@
+#include "cbm_ref.h"
+#include "git-compat-util.h"
+
+static const char usage_str[] = "FILE [REFNAME [HEXVALUE]]";
+
+int cmd_main(int argc, const char **argv)
+{
+	int ret = 1;
+
+	if (argc <= 0) {
+		fprintf(stderr, "usage: %s %s\n", argv[0], usage_str);
+	} else if (argc == 2) {
+		fprintf(stderr, "dump todo\n");
+	} else if (argc >= 3 && argc <= 4) {
+		struct cbm *cbm = cbm_open(argv[1], O_RDWR|O_CREAT);
+		struct object_id oid;
+
+		if (!cbm)
+			die_errno("failed to open: %s", argv[1]);
+
+		if (argc == 3) {
+			if (cbm_fetch(cbm, argv[2], &oid)) {
+				printf("%s\n", oid_to_hex(&oid));
+				ret = 0;
+			} else {
+				fprintf(stderr, "not found\n");
+			}
+		} else if (argc == 4) {
+			const char *p = argv[3];
+
+			if (parse_oid_hex(p, &oid, &p))
+				die("invalid hex: %s", argv[3]);
+
+			switch (cbm_set(cbm, argv[2], &oid)) {
+			case 1: fprintf(stderr, "overwritten\n"); break;
+			case 2: fprintf(stderr, "new\n"); break;
+			default: die("cbm_set error");
+			}
+
+			ret = 0;
+		}
+		cbm_close(cbm, 0);
+	}
+	return ret;
+}
-- 
EW


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH] wip
@ 2018-06-24  8:39 Eric Wong
  0 siblings, 0 replies; 23+ messages in thread
From: Eric Wong @ 2018-06-24  8:39 UTC (permalink / raw)
  To: spew

From: Eric Wong <normalperson@yhbt.net>

---
 mjit.c    |  11 +++-
 process.c | 160 ++++++++++++++++++++++++++----------------------------
 signal.c  |  30 +++++-----
 thread.c  |   3 +
 4 files changed, 104 insertions(+), 100 deletions(-)

diff --git a/mjit.c b/mjit.c
index 55ff7e21ee..685a57dfb8 100644
--- a/mjit.c
+++ b/mjit.c
@@ -111,7 +111,8 @@ extern void rb_native_cond_wait(rb_nativethread_cond_t *cond, rb_nativethread_lo
 extern int rb_thread_create_mjit_thread(void (*child_hook)(void), void (*worker_func)(void));
 
 
-pid_t ruby_waitpid_locked(rb_vm_t *, rb_pid_t, int *status, int options);
+pid_t ruby_waitpid_locked(rb_vm_t *, rb_pid_t, int *status, int options,
+                          rb_nativethread_cond_t *cond);
 
 #define RB_CONDATTR_CLOCK_MONOTONIC 1
 
@@ -385,7 +386,9 @@ exec_process(const char *path, char *const argv[])
     int stat, exit_code;
     pid_t pid;
     rb_vm_t *vm = GET_VM();
+    rb_nativethread_cond_t cond;
 
+    rb_native_cond_initialize(&cond);
     rb_nativethread_lock_lock(&vm->waitpid_lock);
     pid = start_process(path, argv);
     if (pid <= 0) {
@@ -393,10 +396,11 @@ exec_process(const char *path, char *const argv[])
         return -2;
     }
     for (;;) {
-        pid_t r = ruby_waitpid_locked(vm, pid, &stat, 0);
+        pid_t r = ruby_waitpid_locked(vm, pid, &stat, 0, &cond);
         if (r == -1) {
-            if (errno == EINTR) continue;
+            if (errno == EINTR) continue; /* should never happen */
             fprintf(stderr, "waitpid: %s\n", strerror(errno));
+            exit_code = -2;
             break;
         }
         else if (r == pid) {
@@ -410,6 +414,7 @@ exec_process(const char *path, char *const argv[])
         }
     }
     rb_nativethread_lock_unlock(&vm->waitpid_lock);
+    rb_native_cond_destroy(&cond);
     return exit_code;
 }
 
diff --git a/process.c b/process.c
index bdb92036b7..3b293b8d6b 100644
--- a/process.c
+++ b/process.c
@@ -899,153 +899,145 @@ do_waitpid(rb_pid_t pid, int *st, int flags)
 
 struct waitpid_state {
     struct list_node wnode;
-    rb_nativethread_cond_t cond;
+    union {
+        rb_nativethread_cond_t *cond; /* non-Ruby thread */
+        rb_execution_context_t *ec; /* normal Ruby execution context */
+    } wake;
+    struct rb_unblock_callback ubf;
     rb_pid_t ret;
     rb_pid_t pid;
     int status;
     int options;
     int errnum;
-    rb_vm_t *vm;
+    unsigned int is_ruby : 1;
 };
 
+void rb_native_mutex_lock(rb_nativethread_lock_t *);
+void rb_native_mutex_unlock(rb_nativethread_lock_t *);
 void rb_native_cond_signal(rb_nativethread_cond_t *);
 void rb_native_cond_wait(rb_nativethread_cond_t *, rb_nativethread_lock_t *);
-void rb_native_cond_initialize(rb_nativethread_cond_t *);
-void rb_native_cond_destroy(rb_nativethread_cond_t *);
 
-/* only called by vm->main_thread */
+/* called by timer-thread */
 void
-rb_sigchld(rb_vm_t *vm)
+ruby_waitpid_all(rb_vm_t *vm)
 {
     struct waitpid_state *w = 0, *next;
 
-    rb_nativethread_lock_lock(&vm->waitpid_lock);
+    rb_native_mutex_lock(&vm->waitpid_lock);
     list_for_each_safe(&vm->waiting_pids, w, next, wnode) {
         w->ret = do_waitpid(w->pid, &w->status, w->options | WNOHANG);
+
         if (w->ret == 0) continue;
         if (w->ret == -1) w->errnum = errno;
+
         list_del_init(&w->wnode);
-        rb_native_cond_signal(&w->cond);
+        if (w->is_ruby) {
+            /*
+             * we call this in timer-thread, because vm->main_thread
+             * cannot wake itself up...
+             */
+            rb_thread_wakeup_alive(rb_ec_thread_ptr(w->wake.ec)->self);
+        }
+        else {
+            rb_native_cond_signal(w->wake.cond);
+        }
     }
-    rb_nativethread_lock_unlock(&vm->waitpid_lock);
+    rb_native_mutex_unlock(&vm->waitpid_lock);
 }
 
 static void
-waitpid_state_init(struct waitpid_state *w, rb_vm_t *vm, pid_t pid, int options)
+waitpid_state_init(struct waitpid_state *w, pid_t pid, int options)
 {
-    rb_native_cond_initialize(&w->cond);
     w->ret = 0;
     w->pid = pid;
     w->status = 0;
     w->options = options;
-    w->vm = vm;
     list_node_init(&w->wnode);
+    w->is_ruby = ruby_thread_has_gvl_p();
 }
 
-/* must be called with vm->waitpid_lock held, this is not interruptible */
+/*
+ * must be called with vm->waitpid_lock held, this is not interruptible
+ */
 pid_t
-ruby_waitpid_locked(rb_vm_t *vm, rb_pid_t pid, int *status, int options)
+ruby_waitpid_locked(rb_vm_t *vm, rb_pid_t pid, int *status, int options,
+                    rb_nativethread_cond_t *cond)
 {
     struct waitpid_state w;
 
     assert(!ruby_thread_has_gvl_p() && "must not have GVL");
 
-    waitpid_state_init(&w, vm, pid, options);
+    waitpid_state_init(&w, pid, options);
     w.ret = do_waitpid(w.pid, &w.status, w.options | WNOHANG);
     if (w.ret) {
-        if (w.ret == -1) {
-            w.errnum = errno;
-        }
+        if (w.ret == -1) w.errnum = errno;
     }
     else {
+        w.wake.cond = cond;
         list_add(&vm->waiting_pids, &w.wnode);
-        while (!w.ret) {
-            rb_native_cond_wait(&w.cond, &vm->waitpid_lock);
-        }
+        do {
+            rb_native_cond_wait(w.wake.cond, &vm->waitpid_lock);
+        } while (!w.ret);
         list_del(&w.wnode);
     }
     if (status) {
         *status = w.status;
     }
-    rb_native_cond_destroy(&w.cond);
     errno = w.errnum;
     return w.ret;
 }
 
-static void
-waitpid_ubf(void *x)
-{
-    struct waitpid_state *w = x;
-    rb_nativethread_lock_lock(&w->vm->waitpid_lock);
-    if (!w->ret) {
-        w->errnum = EINTR;
-        w->ret = -1;
-    }
-    rb_native_cond_signal(&w->cond);
-    rb_nativethread_lock_unlock(&w->vm->waitpid_lock);
-}
-
-static void *
-waitpid_nogvl(void *x)
+static VALUE
+waitpid_sleep(VALUE x)
 {
-    struct waitpid_state *w = x;
+    struct waitpid_state *w = (struct waitpid_state *)x;
 
-    /* let rb_sigchld handle it */
-    rb_native_cond_wait(&w->cond, &w->vm->waitpid_lock);
+    do {
+        rb_thread_sleep_forever(); /* handles interrupts for us */
+    } while (!w->ret);
 
-    return 0;
+    return Qfalse;
 }
 
 static VALUE
-waitpid_wait(VALUE x)
+waitpid_ensure(VALUE x)
 {
     struct waitpid_state *w = (struct waitpid_state *)x;
 
-    rb_nativethread_lock_lock(&w->vm->waitpid_lock);
-    w->ret = do_waitpid(w->pid, &w->status, w->options | WNOHANG);
-    if (w->ret) {
-        if (w->ret == -1) {
-            w->errnum = errno;
-        }
-    }
-    else {
-        rb_execution_context_t *ec = GET_EC();
+    if (w->ret <= 0) {
+        rb_vm_t *vm = rb_ec_vm_ptr(w->wake.ec);
 
-        list_add(&w->vm->waiting_pids, &w->wnode);
-        do {
-            rb_thread_call_without_gvl2(waitpid_nogvl, w, waitpid_ubf, w);
-            if (RUBY_VM_INTERRUPTED_ANY(ec) ||
-                    (w->ret == -1 && w->errnum == EINTR)) {
-                rb_nativethread_lock_unlock(&w->vm->waitpid_lock);
-
-                RUBY_VM_CHECK_INTS(ec);
-
-                rb_nativethread_lock_lock(&w->vm->waitpid_lock);
-                if (w->ret == -1 && w->errnum == EINTR) {
-                    w->ret = do_waitpid(w->pid, &w->status, w->options|WNOHANG);
-                    if (w->ret == -1)
-                        w->errnum = errno;
-                }
-            }
-        } while (!w->ret);
+        rb_native_mutex_lock(&vm->waitpid_lock);
+        list_del(&w->wnode);
+        rb_native_mutex_unlock(&vm->waitpid_lock);
     }
-    rb_nativethread_lock_unlock(&w->vm->waitpid_lock);
     return Qfalse;
 }
 
-static VALUE
-waitpid_ensure(VALUE x)
+static void
+waitpid_wait(struct waitpid_state *w)
 {
-    struct waitpid_state *w = (struct waitpid_state *)x;
+    rb_vm_t *vm = rb_ec_vm_ptr(w->wake.ec);
 
-    if (w->ret <= 0) {
-        rb_nativethread_lock_lock(&w->vm->waitpid_lock);
-        list_del_init(&w->wnode);
-        rb_nativethread_lock_unlock(&w->vm->waitpid_lock);
+    /*
+     * Lock here to prevent do_waitpid from stealing work from the
+     * ruby_waitpid_locked done by mjit workers since mjit works
+     * outside of GVL
+     */
+    rb_native_mutex_lock(&vm->waitpid_lock);
+
+    w->ret = do_waitpid(w->pid, &w->status, w->options | WNOHANG);
+    if (w->ret) {
+        if (w->ret == -1) w->errnum = errno;
+
+        rb_native_mutex_unlock(&vm->waitpid_lock);
     }
+    else {
+        list_add(&vm->waiting_pids, &w->wnode);
+        rb_native_mutex_unlock(&vm->waitpid_lock);
 
-    rb_native_cond_destroy(&w->cond);
-    return Qfalse;
+        rb_ensure(waitpid_sleep, (VALUE)w, waitpid_ensure, (VALUE)w);
+    }
 }
 
 rb_pid_t
@@ -1059,11 +1051,10 @@ rb_waitpid(rb_pid_t pid, int *st, int flags)
     else {
         struct waitpid_state w;
 
-        waitpid_state_init(&w, GET_VM(), pid, flags);
-        rb_ensure(waitpid_wait, (VALUE)&w, waitpid_ensure, (VALUE)&w);
-        if (st) {
-            *st = w.status;
-        }
+        waitpid_state_init(&w, pid, flags);
+        w.wake.ec = GET_EC();
+        waitpid_wait(&w);
+        if (st) *st = w.status;
         result = w.ret;
     }
     if (result > 0) {
@@ -1348,6 +1339,9 @@ after_exec_non_async_signal_safe(void)
 {
     rb_thread_reset_timer_thread();
     rb_thread_start_timer_thread();
+    if (rb_signal_buff_size()) {
+        rb_thread_wakeup_timer_thread();
+    }
 }
 
 static void
diff --git a/signal.c b/signal.c
index c20b01ea36..e339434113 100644
--- a/signal.c
+++ b/signal.c
@@ -1052,18 +1052,23 @@ rb_trap_exit(void)
     }
 }
 
-static int
-sig_is_chld(int sig)
-{
 #if defined(SIGCLD)
-    return (sig == SIGCLD);
+#  define RUBY_SIGCHLD    (SIGCLD)
 #elif defined(SIGCHLD)
-    return (sig == SIGCHLD);
+#  define RUBY_SIGCHLD    (SIGCHLD)
+#else
+#  define RUBY_SIGCHLD    (0)
 #endif
-    return 0;
-}
 
-void rb_sigchld(rb_vm_t *); /* process.c */
+void ruby_waitpid_all(rb_vm_t *); /* process.c */
+
+void
+ruby_sigchld_handler(rb_vm_t *vm)
+{
+    if (signal_buff.cnt[RUBY_SIGCHLD]) {
+        ruby_waitpid_all(vm);
+    }
+}
 
 void
 rb_signal_exec(rb_thread_t *th, int sig)
@@ -1072,9 +1077,6 @@ rb_signal_exec(rb_thread_t *th, int sig)
     VALUE cmd = vm->trap_list.cmd[sig];
     int safe = vm->trap_list.safe[sig];
 
-    if (sig_is_chld(sig)) {
-	rb_sigchld(vm);
-    }
     if (cmd == 0) {
 	switch (sig) {
 	  case SIGINT:
@@ -1176,7 +1178,7 @@ trap_handler(VALUE *cmd, int sig)
     VALUE command;
 
     if (NIL_P(*cmd)) {
-	if (sig_is_chld(sig)) {
+	if (sig == RUBY_SIGCHLD) {
 	    goto sig_dfl;
 	}
 	func = SIG_IGN;
@@ -1199,7 +1201,7 @@ trap_handler(VALUE *cmd, int sig)
 		break;
               case 14:
 		if (memcmp(cptr, "SYSTEM_DEFAULT", 14) == 0) {
-		    if (sig_is_chld(sig)) {
+		    if (sig == RUBY_SIGCHLD) {
 			goto sig_dfl;
 		    }
                     func = SIG_DFL;
@@ -1209,7 +1211,7 @@ trap_handler(VALUE *cmd, int sig)
 	      case 7:
 		if (memcmp(cptr, "SIG_IGN", 7) == 0) {
 sig_ign:
-		    if (sig_is_chld(sig)) {
+		    if (sig == RUBY_SIGCHLD) {
 			goto sig_dfl;
 		    }
                     func = SIG_IGN;
diff --git a/thread.c b/thread.c
index 8c9aafe07a..613599eaa6 100644
--- a/thread.c
+++ b/thread.c
@@ -4146,6 +4146,8 @@ rb_threadptr_check_signal(rb_thread_t *mth)
     }
 }
 
+void ruby_sigchld_handler(rb_vm_t *); /* signal.c */
+
 static void
 timer_thread_function(void *arg)
 {
@@ -4164,6 +4166,7 @@ timer_thread_function(void *arg)
     rb_native_mutex_unlock(&vm->thread_destruct_lock);
 
     /* check signal */
+    ruby_sigchld_handler(vm);
     rb_threadptr_check_signal(vm->main_thread);
 
 #if 0
-- 
EW


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH] wip
@ 2018-06-24 11:55 Eric Wong
  0 siblings, 0 replies; 23+ messages in thread
From: Eric Wong @ 2018-06-24 11:55 UTC (permalink / raw)
  To: spew

From: Eric Wong <normalperson@yhbt.net>

---
 mjit.c           |  13 ++--
 process.c        | 173 ++++++++++++++++++++++++-----------------------
 signal.c         |  66 ++++++++----------
 thread.c         |  11 +++
 thread_pthread.c |   2 +-
 vm_core.h        |   3 +
 6 files changed, 141 insertions(+), 127 deletions(-)

diff --git a/mjit.c b/mjit.c
index c9c1208355..82c8ae2d7f 100644
--- a/mjit.c
+++ b/mjit.c
@@ -111,7 +111,8 @@ extern void rb_native_cond_wait(rb_nativethread_cond_t *cond, rb_nativethread_lo
 extern int rb_thread_create_mjit_thread(void (*child_hook)(void), void (*worker_func)(void));
 
 
-pid_t ruby_waitpid_locked(rb_vm_t *, rb_pid_t, int *status, int options);
+pid_t ruby_waitpid_locked(rb_vm_t *, rb_pid_t, int *status, int options,
+                          rb_nativethread_cond_t *cond);
 
 #define RB_CONDATTR_CLOCK_MONOTONIC 1
 
@@ -388,6 +389,7 @@ exec_process(const char *path, char *const argv[])
     int stat, exit_code;
     pid_t pid;
     rb_vm_t *vm = GET_VM();
+    rb_nativethread_cond_t cond;
 
     rb_nativethread_lock_lock(&vm->waitpid_lock);
     pid = start_process(path, argv);
@@ -395,11 +397,13 @@ exec_process(const char *path, char *const argv[])
         rb_nativethread_lock_unlock(&vm->waitpid_lock);
         return -2;
     }
+    rb_native_cond_initialize(&cond);
     for (;;) {
-        pid_t r = ruby_waitpid_locked(vm, pid, &stat, 0);
+        pid_t r = ruby_waitpid_locked(vm, pid, &stat, 0, &cond);
         if (r == -1) {
-            if (errno == EINTR) continue;
+            if (errno == EINTR) continue; /* should never happen */
             fprintf(stderr, "waitpid: %s\n", strerror(errno));
+            exit_code = -2;
             break;
         }
         else if (r == pid) {
@@ -413,6 +417,7 @@ exec_process(const char *path, char *const argv[])
         }
     }
     rb_nativethread_lock_unlock(&vm->waitpid_lock);
+    rb_native_cond_destroy(&cond);
     return exit_code;
 }
 
@@ -1563,7 +1568,7 @@ mjit_finish(void)
        absence.  So wait for a clean finish of the threads.  */
     while (pch_status == PCH_NOT_READY) {
         verbose(3, "Waiting wakeup from make_pch");
-	/* release GVL to handle interrupts */
+        /* release GVL to handle interrupts */
         rb_thread_call_without_gvl(wait_pch, 0, ubf_pch, 0);
     }
     CRITICAL_SECTION_FINISH(3, "in mjit_finish to wakeup from pch");
diff --git a/process.c b/process.c
index bdb92036b7..044804338f 100644
--- a/process.c
+++ b/process.c
@@ -899,153 +899,153 @@ do_waitpid(rb_pid_t pid, int *st, int flags)
 
 struct waitpid_state {
     struct list_node wnode;
-    rb_nativethread_cond_t cond;
+    union {
+        rb_nativethread_cond_t *cond; /* non-Ruby thread */
+        rb_execution_context_t *ec; /* normal Ruby execution context */
+    } wake;
     rb_pid_t ret;
     rb_pid_t pid;
     int status;
     int options;
     int errnum;
-    rb_vm_t *vm;
+    unsigned int is_ruby : 1;
 };
 
+void rb_native_mutex_lock(rb_nativethread_lock_t *);
+void rb_native_mutex_unlock(rb_nativethread_lock_t *);
 void rb_native_cond_signal(rb_nativethread_cond_t *);
 void rb_native_cond_wait(rb_nativethread_cond_t *, rb_nativethread_lock_t *);
-void rb_native_cond_initialize(rb_nativethread_cond_t *);
-void rb_native_cond_destroy(rb_nativethread_cond_t *);
 
-/* only called by vm->main_thread */
+FILE *mstream;
+
+/* called by main thread */
 void
-rb_sigchld(rb_vm_t *vm)
+rb_waitpid_all(rb_vm_t *vm)
 {
     struct waitpid_state *w = 0, *next;
 
-    rb_nativethread_lock_lock(&vm->waitpid_lock);
+    rb_native_mutex_lock(&vm->waitpid_lock);
     list_for_each_safe(&vm->waiting_pids, w, next, wnode) {
         w->ret = do_waitpid(w->pid, &w->status, w->options | WNOHANG);
+
         if (w->ret == 0) continue;
         if (w->ret == -1) w->errnum = errno;
+
         list_del_init(&w->wnode);
-        rb_native_cond_signal(&w->cond);
+        if (mstream) fprintf(mstream, "%d %d %d\n", getpid(), w->ret, w->pid);
+        if (w->is_ruby) {
+            /*
+             * we call this in timer-thread, because vm->main_thread
+             * cannot wake itself up...
+             */
+            rb_thread_wakeup_alive(rb_ec_thread_ptr(w->wake.ec)->self);
+        }
+        else {
+            rb_native_cond_signal(w->wake.cond);
+        }
     }
-    rb_nativethread_lock_unlock(&vm->waitpid_lock);
+    rb_native_mutex_unlock(&vm->waitpid_lock);
 }
 
 static void
-waitpid_state_init(struct waitpid_state *w, rb_vm_t *vm, pid_t pid, int options)
+waitpid_state_init(struct waitpid_state *w, pid_t pid, int options)
 {
-    rb_native_cond_initialize(&w->cond);
     w->ret = 0;
     w->pid = pid;
-    w->status = 0;
     w->options = options;
-    w->vm = vm;
     list_node_init(&w->wnode);
 }
 
-/* must be called with vm->waitpid_lock held, this is not interruptible */
+/*
+ * must be called with vm->waitpid_lock held, this is not interruptible
+ */
 pid_t
-ruby_waitpid_locked(rb_vm_t *vm, rb_pid_t pid, int *status, int options)
+ruby_waitpid_locked(rb_vm_t *vm, rb_pid_t pid, int *status, int options,
+                    rb_nativethread_cond_t *cond)
 {
     struct waitpid_state w;
 
     assert(!ruby_thread_has_gvl_p() && "must not have GVL");
 
-    waitpid_state_init(&w, vm, pid, options);
+    waitpid_state_init(&w, pid, options);
+    w.is_ruby = 0;
     w.ret = do_waitpid(w.pid, &w.status, w.options | WNOHANG);
     if (w.ret) {
-        if (w.ret == -1) {
-            w.errnum = errno;
-        }
+        if (w.ret == -1) w.errnum = errno;
     }
     else {
+        w.wake.cond = cond;
         list_add(&vm->waiting_pids, &w.wnode);
-        while (!w.ret) {
-            rb_native_cond_wait(&w.cond, &vm->waitpid_lock);
-        }
+        if (mstream) fprintf(mstream, "%d add- %d\n", getpid(), w.pid);
+        do {
+            rb_native_cond_wait(w.wake.cond, &vm->waitpid_lock);
+        } while (!w.ret);
         list_del(&w.wnode);
     }
     if (status) {
         *status = w.status;
     }
-    rb_native_cond_destroy(&w.cond);
     errno = w.errnum;
     return w.ret;
 }
 
-static void
-waitpid_ubf(void *x)
-{
-    struct waitpid_state *w = x;
-    rb_nativethread_lock_lock(&w->vm->waitpid_lock);
-    if (!w->ret) {
-        w->errnum = EINTR;
-        w->ret = -1;
-    }
-    rb_native_cond_signal(&w->cond);
-    rb_nativethread_lock_unlock(&w->vm->waitpid_lock);
-}
+void rb_thread_sleep_interruptible(struct timespec *ts); /* thread.c */
 
-static void *
-waitpid_nogvl(void *x)
+static VALUE
+waitpid_sleep(VALUE x)
 {
-    struct waitpid_state *w = x;
+    struct waitpid_state *w = (struct waitpid_state *)x;
 
-    /* let rb_sigchld handle it */
-    rb_native_cond_wait(&w->cond, &w->vm->waitpid_lock);
+    rb_thread_check_ints();
+    while (!w->ret) {
+        rb_thread_sleep_interruptible(0);
+        rb_thread_check_ints();
+    }
 
-    return 0;
+    return Qfalse;
 }
 
 static VALUE
-waitpid_wait(VALUE x)
+waitpid_ensure(VALUE x)
 {
     struct waitpid_state *w = (struct waitpid_state *)x;
 
-    rb_nativethread_lock_lock(&w->vm->waitpid_lock);
-    w->ret = do_waitpid(w->pid, &w->status, w->options | WNOHANG);
-    if (w->ret) {
-        if (w->ret == -1) {
-            w->errnum = errno;
-        }
-    }
-    else {
-        rb_execution_context_t *ec = GET_EC();
+    if (w->ret == 0) {
+        rb_vm_t *vm = rb_ec_vm_ptr(w->wake.ec);
 
-        list_add(&w->vm->waiting_pids, &w->wnode);
-        do {
-            rb_thread_call_without_gvl2(waitpid_nogvl, w, waitpid_ubf, w);
-            if (RUBY_VM_INTERRUPTED_ANY(ec) ||
-                    (w->ret == -1 && w->errnum == EINTR)) {
-                rb_nativethread_lock_unlock(&w->vm->waitpid_lock);
-
-                RUBY_VM_CHECK_INTS(ec);
-
-                rb_nativethread_lock_lock(&w->vm->waitpid_lock);
-                if (w->ret == -1 && w->errnum == EINTR) {
-                    w->ret = do_waitpid(w->pid, &w->status, w->options|WNOHANG);
-                    if (w->ret == -1)
-                        w->errnum = errno;
-                }
-            }
-        } while (!w->ret);
+        if (mstream) fprintf(mstream, "%d ensure del %d\n", getpid(), w->pid);
+        rb_native_mutex_lock(&vm->waitpid_lock);
+        list_del(&w->wnode);
+        rb_native_mutex_unlock(&vm->waitpid_lock);
     }
-    rb_nativethread_lock_unlock(&w->vm->waitpid_lock);
     return Qfalse;
 }
 
-static VALUE
-waitpid_ensure(VALUE x)
+static void
+waitpid_wait(struct waitpid_state *w)
 {
-    struct waitpid_state *w = (struct waitpid_state *)x;
+    rb_vm_t *vm = rb_ec_vm_ptr(w->wake.ec);
 
-    if (w->ret <= 0) {
-        rb_nativethread_lock_lock(&w->vm->waitpid_lock);
-        list_del_init(&w->wnode);
-        rb_nativethread_lock_unlock(&w->vm->waitpid_lock);
+    /*
+     * Lock here to prevent do_waitpid from stealing work from the
+     * ruby_waitpid_locked done by mjit workers since mjit works
+     * outside of GVL
+     */
+    rb_native_mutex_lock(&vm->waitpid_lock);
+
+    w->ret = do_waitpid(w->pid, &w->status, w->options | WNOHANG);
+    if (w->ret) {
+        if (w->ret == -1) w->errnum = errno;
+
+        rb_native_mutex_unlock(&vm->waitpid_lock);
     }
+    else {
+        list_add(&vm->waiting_pids, &w->wnode);
+        if (mstream) fprintf(mstream, "%d add++ %d\n", getpid(), w->pid);
+        rb_native_mutex_unlock(&vm->waitpid_lock);
 
-    rb_native_cond_destroy(&w->cond);
-    return Qfalse;
+        rb_ensure(waitpid_sleep, (VALUE)w, waitpid_ensure, (VALUE)w);
+    }
 }
 
 rb_pid_t
@@ -1059,11 +1059,11 @@ rb_waitpid(rb_pid_t pid, int *st, int flags)
     else {
         struct waitpid_state w;
 
-        waitpid_state_init(&w, GET_VM(), pid, flags);
-        rb_ensure(waitpid_wait, (VALUE)&w, waitpid_ensure, (VALUE)&w);
-        if (st) {
-            *st = w.status;
-        }
+        waitpid_state_init(&w, pid, flags);
+        w.is_ruby = 1;
+        w.wake.ec = GET_EC();
+        waitpid_wait(&w);
+        if (st) *st = w.status;
         result = w.ret;
     }
     if (result > 0) {
@@ -1348,6 +1348,9 @@ after_exec_non_async_signal_safe(void)
 {
     rb_thread_reset_timer_thread();
     rb_thread_start_timer_thread();
+    if (rb_signal_buff_size()) {
+        rb_thread_wakeup_timer_thread();
+    }
 }
 
 static void
@@ -8227,4 +8230,8 @@ Init_process(void)
     id_exception = rb_intern("exception");
 
     InitVM(process);
+    if (0) {
+        mstream = fopen("/tmp/wait.log", "a");
+        setvbuf(mstream, NULL, _IONBF, 0);
+    }
 }
diff --git a/signal.c b/signal.c
index c20b01ea36..22fc4286d9 100644
--- a/signal.c
+++ b/signal.c
@@ -66,6 +66,14 @@ ruby_atomic_compare_and_swap(rb_atomic_t *ptr, rb_atomic_t cmp,
 # define NSIG (_SIGMAX + 1)      /* For QNX */
 #endif
 
+#if defined(SIGCLD)
+#  define RUBY_SIGCHLD    (SIGCLD)
+#elif defined(SIGCHLD)
+#  define RUBY_SIGCHLD    (SIGCHLD)
+#else
+#  define RUBY_SIGCHLD    (0)
+#endif
+
 static const struct signals {
     const char *signm;
     int  signo;
@@ -129,15 +137,9 @@ static const struct signals {
 #ifdef SIGCONT
     {"CONT", SIGCONT},
 #endif
-#ifdef SIGCHLD
-    {"CHLD", SIGCHLD},
-#endif
-#ifdef SIGCLD
-    {"CLD", SIGCLD},
-#else
-# ifdef SIGCHLD
-    {"CLD", SIGCHLD},
-# endif
+#if RUBY_SIGCHLD
+    {"CHLD", RUBY_SIGCHLD },
+    {"CLD", RUBY_SIGCHLD },
 #endif
 #ifdef SIGTTIN
     {"TTIN", SIGTTIN},
@@ -1052,18 +1054,7 @@ rb_trap_exit(void)
     }
 }
 
-static int
-sig_is_chld(int sig)
-{
-#if defined(SIGCLD)
-    return (sig == SIGCLD);
-#elif defined(SIGCHLD)
-    return (sig == SIGCHLD);
-#endif
-    return 0;
-}
-
-void rb_sigchld(rb_vm_t *); /* process.c */
+void rb_waitpid_all(rb_vm_t *); /* process.c */
 
 void
 rb_signal_exec(rb_thread_t *th, int sig)
@@ -1072,9 +1063,10 @@ rb_signal_exec(rb_thread_t *th, int sig)
     VALUE cmd = vm->trap_list.cmd[sig];
     int safe = vm->trap_list.safe[sig];
 
-    if (sig_is_chld(sig)) {
-	rb_sigchld(vm);
+    if (sig == RUBY_SIGCHLD) {
+        rb_waitpid_all(vm);
     }
+
     if (cmd == 0) {
 	switch (sig) {
 	  case SIGINT:
@@ -1134,10 +1126,8 @@ default_handler(int sig)
 #ifdef SIGUSR2
       case SIGUSR2:
 #endif
-#ifdef SIGCLD
-      case SIGCLD:
-#elif defined(SIGCHLD)
-      case SIGCHLD:
+#if RUBY_SIGCHLD
+      case RUBY_SIGCHLD:
 #endif
         func = sighandler;
         break;
@@ -1176,7 +1166,7 @@ trap_handler(VALUE *cmd, int sig)
     VALUE command;
 
     if (NIL_P(*cmd)) {
-	if (sig_is_chld(sig)) {
+	if (sig == RUBY_SIGCHLD) {
 	    goto sig_dfl;
 	}
 	func = SIG_IGN;
@@ -1199,9 +1189,9 @@ trap_handler(VALUE *cmd, int sig)
 		break;
               case 14:
 		if (memcmp(cptr, "SYSTEM_DEFAULT", 14) == 0) {
-		    if (sig_is_chld(sig)) {
-			goto sig_dfl;
-		    }
+                    if (sig == RUBY_SIGCHLD) {
+                        goto sig_dfl;
+                    }
                     func = SIG_DFL;
                     *cmd = 0;
 		}
@@ -1209,9 +1199,9 @@ trap_handler(VALUE *cmd, int sig)
 	      case 7:
 		if (memcmp(cptr, "SIG_IGN", 7) == 0) {
 sig_ign:
-		    if (sig_is_chld(sig)) {
-			goto sig_dfl;
-		    }
+                    if (sig == RUBY_SIGCHLD) {
+                        goto sig_dfl;
+                    }
                     func = SIG_IGN;
                     *cmd = Qtrue;
 		}
@@ -1443,7 +1433,7 @@ install_sighandler(int signum, sighandler_t handler)
 #  define install_sighandler(signum, handler) \
     INSTALL_SIGHANDLER(install_sighandler(signum, handler), #signum, signum)
 
-#if defined(SIGCLD) || defined(SIGCHLD)
+#if RUBY_SIGCHLD
 static int
 init_sigchld(int sig)
 {
@@ -1570,10 +1560,8 @@ Init_signal(void)
     install_sighandler(SIGSYS, sig_do_nothing);
 #endif
 
-#if defined(SIGCLD)
-    init_sigchld(SIGCLD);
-#elif defined(SIGCHLD)
-    init_sigchld(SIGCHLD);
+#if RUBY_SIGCHLD
+    init_sigchld(RUBY_SIGCHLD);
 #endif
 
     rb_enable_interrupt();
diff --git a/thread.c b/thread.c
index 8c9aafe07a..ab27c60632 100644
--- a/thread.c
+++ b/thread.c
@@ -1287,6 +1287,17 @@ rb_thread_sleep_forever(void)
     sleep_forever(GET_THREAD(), SLEEP_SPURIOUS_CHECK);
 }
 
+void
+rb_thread_sleep_interruptible(struct timespec *ts)
+{
+    rb_thread_t *th = GET_THREAD();
+    enum rb_thread_status prev_status = th->status;
+
+    th->status = THREAD_STOPPED;
+    native_sleep(th, ts);
+    th->status = prev_status;
+}
+
 void
 rb_thread_sleep_deadly(void)
 {
diff --git a/thread_pthread.c b/thread_pthread.c
index 1a1a6fc0c6..4d6afac3fb 100644
--- a/thread_pthread.c
+++ b/thread_pthread.c
@@ -1548,7 +1548,7 @@ rb_thread_create_timer_thread(void)
 	    };
 	    stack_size = stack_min;
 	    if (stack_size < min_size) stack_size = min_size;
-	    if (needs_more_stack) {
+	    if (1 || needs_more_stack) {
 		stack_size += +((BUFSIZ - 1) / stack_min + 1) * stack_min;
 	    }
 	    err = pthread_attr_setstacksize(&attr, stack_size);
diff --git a/vm_core.h b/vm_core.h
index d0689e5ba6..29f12abbf0 100644
--- a/vm_core.h
+++ b/vm_core.h
@@ -1559,11 +1559,14 @@ void rb_thread_stop_timer_thread(void);
 void rb_thread_reset_timer_thread(void);
 void rb_thread_wakeup_timer_thread(void);
 
+extern FILE *mstream;
+
 static inline void
 rb_vm_living_threads_init(rb_vm_t *vm)
 {
     list_head_init(&vm->waiting_fds);
     list_head_init(&vm->waiting_pids);
+    if (mstream) fprintf(mstream, "clobber: %d\n", getpid());
     list_head_init(&vm->living_threads);
     vm->living_thread_num = 0;
 }
-- 
EW


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH] wip
@ 2018-07-06 21:31 Eric Wong
  0 siblings, 0 replies; 23+ messages in thread
From: Eric Wong @ 2018-07-06 21:31 UTC (permalink / raw)
  To: spew

---
 configure.ac     |  5 ++++
 thread_pthread.c | 60 ++++++++++++++++++++++++++++++------------------
 2 files changed, 43 insertions(+), 22 deletions(-)

diff --git a/configure.ac b/configure.ac
index 074a3fd394f..899f2437467 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1998,6 +1998,11 @@ AS_IF([test x"$ac_cv_func_clock_gettime" != xyes], [
     ])
 ])
 AC_CHECK_FUNCS(clock_getres) # clock_getres should be tested after clock_gettime test including librt test.
+AC_CHECK_LIB([rt], [timer_create], [
+  AC_CHECK_LIB([rt], [timer_settime], [
+    AC_CHECK_LIB([rt], [timer_gettime], [
+      AC_CHECK_LIB([rt], [timer_delete], [AC_DEFINE(HAVE_POSIX_TIMERS)])
+])])])
 
 AC_CACHE_CHECK(for unsetenv returns a value, rb_cv_unsetenv_return_value,
   [AC_TRY_COMPILE([
diff --git a/thread_pthread.c b/thread_pthread.c
index 722ce444879..c4670efc9e3 100644
--- a/thread_pthread.c
+++ b/thread_pthread.c
@@ -34,6 +34,9 @@
 #if defined(__HAIKU__)
 #include <kernel/OS.h>
 #endif
+#if defined(HAVE_TIME_H)
+#include <time.h>
+#endif
 
 void rb_native_mutex_lock(rb_nativethread_lock_t *lock);
 void rb_native_mutex_unlock(rb_nativethread_lock_t *lock);
@@ -46,6 +49,18 @@ void rb_native_cond_wait(rb_nativethread_cond_t *cond, rb_nativethread_lock_t *m
 void rb_native_cond_initialize(rb_nativethread_cond_t *cond);
 void rb_native_cond_destroy(rb_nativethread_cond_t *cond);
 static void rb_thread_wakeup_timer_thread_low(void);
+
+#define TIMER_THREAD_MASK    (1)
+#define TIMER_THREAD_SLEEPY  (2|TIMER_THREAD_MASK)
+#define TIMER_THREAD_BUSY    (4|TIMER_THREAD_MASK)
+
+#if defined(HAVE_POLL) && defined(HAVE_FCNTL) && defined(F_GETFL) && defined(F_SETFL) && defined(O_NONBLOCK)
+/* The timer thread sleeps while only one Ruby thread is running. */
+# define TIMER_IMPL TIMER_THREAD_SLEEPY
+#else
+# define TIMER_IMPL TIMER_THREAD_BUSY
+#endif
+
 static struct {
     pthread_t id;
     int created;
@@ -61,13 +76,6 @@ static pthread_condattr_t *condattr_monotonic = &condattr_mono;
 static const void *const condattr_monotonic = NULL;
 #endif
 
-#if defined(HAVE_POLL) && defined(HAVE_FCNTL) && defined(F_GETFL) && defined(F_SETFL) && defined(O_NONBLOCK)
-/* The timer thread sleeps while only one Ruby thread is running. */
-# define USE_SLEEPY_TIMER_THREAD 1
-#else
-# define USE_SLEEPY_TIMER_THREAD 0
-#endif
-
 static void
 gvl_acquire_common(rb_vm_t *vm)
 {
@@ -982,6 +990,7 @@ native_thread_create(rb_thread_t *th)
     return err;
 }
 
+#if (TIMER_IMPL & TIMER_THREAD_MASK)
 static void
 native_thread_join(pthread_t th)
 {
@@ -990,6 +999,7 @@ native_thread_join(pthread_t th)
 	rb_raise(rb_eThreadError, "native_thread_join() failed (%d)", err);
     }
 }
+#endif /* TIMER_THREAD_MASK */
 
 #if USE_NATIVE_THREAD_PRIORITY
 
@@ -1138,8 +1148,10 @@ ubf_select(void *ptr)
      * In the other hands, we shouldn't call rb_thread_wakeup_timer_thread()
      * if running on timer thread because it may make endless wakeups.
      */
+#if TIMER_IMPL == TIMER_THREAD_SLEEPY
     if (!pthread_equal(pthread_self(), timer_thread.id))
 	rb_thread_wakeup_timer_thread();
+#endif
     ubf_wakeup_thread(th);
 }
 
@@ -1181,7 +1193,7 @@ static int ubf_threads_empty(void) { return 1; }
  */
 #define TIME_QUANTUM_USEC (100 * 1000)
 
-#if USE_SLEEPY_TIMER_THREAD
+#if TIMER_IMPL == TIMER_THREAD_SLEEPY
 static struct {
     /*
      * Read end of each pipe is closed inside timer thread for shutdown
@@ -1419,8 +1431,9 @@ timer_thread_sleep(rb_vm_t *vm)
 	}
     }
 }
+#endif /* TIMER_THREAD_SLEEPY */
 
-#else /* USE_SLEEPY_TIMER_THREAD */
+#if TIMER_IMPL == TIMER_THREAD_BUSY
 # define PER_NANO 1000000000
 void rb_thread_wakeup_timer_thread(void) {}
 static void rb_thread_wakeup_timer_thread_low(void) {}
@@ -1438,7 +1451,7 @@ timer_thread_sleep(rb_vm_t *unused)
 
     native_cond_timedwait(&timer_thread_cond, &timer_thread_lock, &ts);
 }
-#endif /* USE_SLEEPY_TIMER_THREAD */
+#endif /* TIMER_IMPL == TIMER_THREAD_BUSY */
 
 #if !defined(SET_CURRENT_THREAD_NAME) && defined(__linux__) && defined(PR_SET_NAME)
 # define SET_CURRENT_THREAD_NAME(name) prctl(PR_SET_NAME, name)
@@ -1508,7 +1521,7 @@ thread_timer(void *p)
     SET_CURRENT_THREAD_NAME("ruby-timer-thr");
 #endif
 
-#if !USE_SLEEPY_TIMER_THREAD
+#if TIMER_IMPL == TIMER_THREAD_BUSY
     rb_native_mutex_initialize(&timer_thread_lock);
     rb_native_cond_initialize(&timer_thread_cond);
     rb_native_mutex_lock(&timer_thread_lock);
@@ -1524,10 +1537,11 @@ thread_timer(void *p)
         /* wait */
 	timer_thread_sleep(vm);
     }
-#if USE_SLEEPY_TIMER_THREAD
+#if TIMER_IMPL == TIMER_THREAD_SLEEPY
     CLOSE_INVALIDATE(normal[0]);
     CLOSE_INVALIDATE(low[0]);
-#else
+#endif
+#if TIMER_IMPL == TIMER_THREAD_BUSY
     rb_native_mutex_unlock(&timer_thread_lock);
     rb_native_cond_destroy(&timer_thread_cond);
     rb_native_mutex_destroy(&timer_thread_lock);
@@ -1537,6 +1551,7 @@ thread_timer(void *p)
     return NULL;
 }
 
+#if (TIMER_IMPL & TIMER_THREAD_MASK)
 static void
 rb_thread_create_timer_thread(void)
 {
@@ -1581,14 +1596,14 @@ rb_thread_create_timer_thread(void)
 	}
 # endif
 
-#if USE_SLEEPY_TIMER_THREAD
+#if TIMER_IMPL == TIMER_THREAD_SLEEPY
 	err = setup_communication_pipe();
 	if (err != 0) {
 	    rb_warn("pipe creation failed for timer: %s, scheduling broken",
 		    strerror(err));
 	    return;
 	}
-#endif /* USE_SLEEPY_TIMER_THREAD */
+#endif /* TIMER_THREAD_SLEEPY */
 
 	/* create timer thread */
 	if (timer_thread.created) {
@@ -1617,21 +1632,22 @@ rb_thread_create_timer_thread(void)
 		rb_warn("timer thread stack size: system default");
 	    }
 	    VM_ASSERT(err == 0);
-#if USE_SLEEPY_TIMER_THREAD
+#if TIMER_IMPL == TIMER_THREAD_SLEEPY
 	    CLOSE_INVALIDATE(normal[0]);
 	    CLOSE_INVALIDATE(normal[1]);
 	    CLOSE_INVALIDATE(low[0]);
 	    CLOSE_INVALIDATE(low[1]);
-#endif
+#endif /* TIMER_THREAD_SLEEPY */
 	    return;
 	}
-#if USE_SLEEPY_TIMER_THREAD
+#if TIMER_IMPL == TIMER_THREAD_SLEEPY
 	/* validate pipe on this process */
 	timer_thread_pipe.owner_process = getpid();
-#endif
+#endif /* TIMER_THREAD_SLEEPY */
 	timer_thread.created = 1;
     }
 }
+#endif /* TIMER_IMPL & TIMER_THREAD_MASK */
 
 static int
 native_stop_timer_thread(void)
@@ -1641,7 +1657,7 @@ native_stop_timer_thread(void)
 
     if (TT_DEBUG) fprintf(stderr, "stop timer thread\n");
     if (stopped) {
-#if USE_SLEEPY_TIMER_THREAD
+#if TIMER_IMPL == TIMER_THREAD_SLEEPY
 	/* prevent wakeups from signal handler ASAP */
 	timer_thread_pipe.owner_process = 0;
 
@@ -1662,7 +1678,7 @@ native_stop_timer_thread(void)
 	/* timer thread will stop looping when system_working <= 0: */
 	native_thread_join(timer_thread.id);
 
-#if USE_SLEEPY_TIMER_THREAD
+#if TIMER_IMPL == TIMER_THREAD_SLEEPY
 	/* timer thread will close the read end on exit: */
 	VM_ASSERT(timer_thread_pipe.normal[0] == -1);
 	VM_ASSERT(timer_thread_pipe.low[0] == -1);
@@ -1727,7 +1743,7 @@ ruby_stack_overflowed_p(const rb_thread_t *th, const void *addr)
 int
 rb_reserved_fd_p(int fd)
 {
-#if USE_SLEEPY_TIMER_THREAD
+#if TIMER_IMPL == TIMER_THREAD_SLEEPY
     if ((fd == timer_thread_pipe.normal[0] ||
 	 fd == timer_thread_pipe.normal[1] ||
 	 fd == timer_thread_pipe.low[0] ||
-- 
EW


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH] wip
@ 2019-01-02  9:21 Eric Wong
  0 siblings, 0 replies; 23+ messages in thread
From: Eric Wong @ 2019-01-02  9:21 UTC (permalink / raw)
  To: spew

---
 ci/GNUmakefile | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 63 insertions(+)
 create mode 100644 ci/GNUmakefile

diff --git a/ci/GNUmakefile b/ci/GNUmakefile
new file mode 100644
index 0000000..362bd34
--- /dev/null
+++ b/ci/GNUmakefile
@@ -0,0 +1,63 @@
+# Copyright (C) 2019 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+PERL = perl
+SUDO = sudo
+GIT = git
+GIT_URL = https://public-inbox.org/
+TMPDIR = /tmp
+BUILDDIR = $(TMPDIR)/public-inbox-$@
+
+cur_git_dir := $(shell $(GIT) rev-parse --git-dir)
+ifneq ($(cur_git_dir),)
+	git_reference = --reference $(cur_git_dir)
+endif
+
+all ::
+	@echo usage:
+	@echo $(MAKE) sid-all
+	@echo $(MAKE) sid-min
+
+sid_all :=
+sid_min :=
+
+sid_min += libdatetime-perl
+sid_min += libemail-mime-perl
+sid_min += libplack-perl
+sid_min += liburi-perl
+
+# Plack depends on this:
+sid_min += libfilesys-notify-simple-perl
+
+sid_all += libdanga-socket-perl
+sid_all += libdbd-sqlite3-perl
+sid_all += libdbi-perl
+sid_all += libplack-middleware-deflater-perl
+sid_all += libplack-middleware-reverseproxy-perl
+sid_all += libsearch-xapian-perl
+sid_all += libsocket6-perl
+sid_all += libipc-run-perl
+sid_all += xapian-tools
+sid_all += spamassassin
+sid_all += liblinux-inotify2-perl
+
+sid_remove = $(addsuffix -,$(filter-out $(sid_min),$(sid_all)))
+
+install-debs-sid-all ::
+	$(SUDO) apt-get install -yqq $(sid_all)
+
+install-debs-sid-min ::
+	$(SUDO) apt-get install --purge -yqq $(sid_min) $(sid_remove)
+	$(SUDO) apt-get autoremove --purge -yqq
+
+sid-min sid-all ::
+	$(MAKE) install-debs-$@ build-and-test BUILDDIR=$(TMPDIR)/$@
+
+build-and-test ::
+	$(RM) -r $(BUILDDIR)
+	mkdir -p $(BUILDDIR)
+	$(GIT) clone -q $(git_reference) $(GIT_URL) $(BUILDDIR)
+	cd $(BUILDDIR) && $(PERL) Makefile.PL && $(MAKE) && $(MAKE) check
+	$(RM) -r $(BUILDDIR)
+
+.NOTPARALLEL:
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH] WIP
@ 2019-05-11 22:55 Eric Wong
  0 siblings, 0 replies; 23+ messages in thread
From: Eric Wong @ 2019-05-11 22:55 UTC (permalink / raw)
  To: spew

---
 lib/PublicInbox/ViewDiff.pm | 367 +++++++++++++++++++++++++++++++++++-
 lib/PublicInbox/ViewVCS.pm  |  19 +-
 2 files changed, 367 insertions(+), 19 deletions(-)

diff --git a/lib/PublicInbox/ViewDiff.pm b/lib/PublicInbox/ViewDiff.pm
index 85b5314..6d732dd 100644
--- a/lib/PublicInbox/ViewDiff.pm
+++ b/lib/PublicInbox/ViewDiff.pm
@@ -10,10 +10,11 @@ package PublicInbox::ViewDiff;
 use strict;
 use warnings;
 use base qw(Exporter);
-our @EXPORT_OK = qw(flush_diff);
+our @EXPORT_OK = qw(flush_diff); # for emails, OO API is for git(1) output
 use URI::Escape qw(uri_escape_utf8);
 use PublicInbox::Hval qw(ascii_html to_attr from_attr);
 use PublicInbox::Git qw(git_unquote);
+use PublicInbox::WwwStream;
 
 # keep track of state so we can avoid redundant HTML tags for
 # identically-classed lines
@@ -41,6 +42,17 @@ package PublicInbox::ViewDiff;
 my $PATH_A = '"?a/.+|/dev/null';
 my $PATH_B = '"?b/.+|/dev/null';
 
+my $CMT_FMT = '--pretty=format:'.join('%n',
+		'%H', '%s', '%an <%ae>', '%ai', '%cn <%ce>', '%ci',
+		'%t', '%p', '%D', '%b%x00');
+
+sub CC_EMPTY () { " This is a merge, and the combined diff is empty.\n" }
+sub CC_MERGE () { " This is a merge, showing combined diff:\n\n" }
+
+# used for "git show" (on commits) and "git diff"
+my @DIFF_OPT = qw(-z --numstat -p --encoding=UTF-8 -C -B -D
+		  --no-color --no-abbrev);
+
 sub to_html ($$) {
 	$_[0]->linkify_1($_[1]);
 	$_[0]->linkify_2(ascii_html($_[1]));
@@ -143,6 +155,16 @@ ($$$$$)
 	undef
 }
 
+sub dquery ($$) {
+	my ($pa, $pb) = @_;
+	my $q = '?b=' . uri_escape_utf8($pb, UNSAFE);
+	$q .= '&amp;a=' . uri_escape_utf8($pa, UNSAFE) if $pa ne $pb;
+	$q
+}
+
+# API for emails. public-inbox assumes we can fit any email entirely
+# (because we rely on Email::MIME anyways).
+# This interface assumes that.
 sub flush_diff ($$$) {
 	my ($dst, $ctx, $linkify) = @_;
 	my $diff = $ctx->{-diff};
@@ -174,13 +196,7 @@ ($$$)
 			}
 			$pa = (split('/', git_unquote($pa), 2))[1];
 			$pb = (split('/', git_unquote($pb), 2))[1];
-			$dctx = {
-				Q => "?b=".uri_escape_utf8($pb, UNSAFE),
-			};
-			if ($pa ne $pb) {
-				$dctx->{Q} .= '&amp;a='.
-					uri_escape_utf8($pa, UNSAFE);
-			}
+			$dctx = { Q => dquery($pa, $pb) };
 			anchor1($dst, $ctx, $linkify, $pb, $s) and next;
 			$$dst .= to_html($linkify, $s);
 		} elsif ($s =~ s/^(index $OID_NULL\.\.)($OID_BLOB)\b//o) {
@@ -196,7 +212,7 @@ ($$$)
 			$$dst .= to_html($linkify, $s);
 		} elsif ($s =~ s/^@@ (\S+) (\S+) @@//) {
 			$$dst .= '</span>' if $state2class[$state];
-			$$dst .= qq(<span\nclass="hunk">);
+			$$dst .= qq(<span\nclass="hunk">); # XHTML
 			$$dst .= diff_hunk($dctx, $spfx, $1, $2);
 			$$dst .= '</span>';
 			$state = DSTATE_CTX;
@@ -235,4 +251,337 @@ ($$$)
 	undef;
 }
 
+# OO interface
+sub _cmd ($$) {
+	my ($self, $cmd) = @_;
+	$self->{git_cmd} = join(' ', @$cmd);
+	$cmd;
+}
+
+sub diff_cmd ($$$) {
+	my ($self, $oid_a, $oid_b) = @_;
+	_cmd($self, [ 'diff', @DIFF_OPT, $oid_a, $oid_b, '--' ]);
+}
+
+sub commit_cmd ($$) {
+	my ($self, $oid) = @_;
+	_cmd($self, [ qw(show -c), @DIFF_OPT, $CMT_FMT, $oid, '--' ]);
+}
+
+# OO API for parsing output of git-diff(1), git-show(1), etc...
+# We try to do as much as possible by streaming, so we act as a
+# a stream editor (e.g. 'sed')
+sub new {
+	my ($class) = @_;
+	my $self = {
+		dstate => DSTATE_INIT,
+		dbuf => '',
+		# mhelp => merge help
+		# diff_tree => 1 (true if comparing tree-ish)
+	};
+	$self->{ndiff} = $self->{nchg} = $self->{nadd} = $self->{ndel} = 0;
+	bless $self, $class;
+}
+
+# diffstat links to anchors within the same HTML page
+sub git_diffstat_rename ($$$) {
+	my ($self, $from, $to) = @_;
+	my $anchor = to_attr(git_unquote($to));
+	$self->{anchors}->{$anchor} = $to;
+	my @from = split('/', $from);
+	my @to = split('/', $to);
+	my ($base, @base);
+
+	# only show differing path components
+	while (@to && @from && $to[0] eq $from[0]) {
+		push @base, shift(@to);
+		shift @from;
+	}
+
+	$base = ascii_html(join('/', @base)) if @base;
+	$from = ascii_html(join('/', @from));
+	$to = ascii_html(join('/', @to));
+	$to = qq(<a\nhref="#$anchor">$to</a>);
+	@base ? "$base/{$from =&gt; $to}" : "$from =&gt; $to";
+}
+
+sub git_diff_sed_stat ($$) {
+	my ($self, $dst) = @_;
+	my @stat = split(/\0/, delete $self->{dbuf}, -1);
+	my $end; # end-of-stat
+	my $nchg = \($self->{nchg});
+	my $nadd = \($self->{nadd});
+	my $ndel = \($self->{ndel});
+	if (!$self->{dstat_started}) {
+		$self->{dstat_started} = 1;
+
+		# merges start with an extra '\0' before the diffstat
+		# non-merge commits start with an extra '\n', instead
+		if ($self->{mhelp}) {
+			if ($stat[0] eq '') {
+				shift @stat;
+			} else {
+				warn
+'initial merge diffstat line was not empty';
+			}
+		} else {
+			# for commits, only (not diff-tree)
+			$stat[0] =~ s/\A\n//s;
+		}
+	}
+	while (defined(my $l = shift @stat)) {
+		if ($l eq '') {
+			$end = 1 if $stat[0] && $stat[0] =~ /\Ad/; # "diff --"
+			last;
+		} elsif ($l =~ /\Adiff /) {
+			unshift @stat, $l;
+			$end = 1;
+			last;
+		}
+		utf8::upgrade($l);
+		$l =~ /\A(\S+)\t+(\S+)\t+(.*)/ or next;
+		my ($add, $del, $fn) = ($1, $2, $3);
+		if ($fn ne '') { # normal modification
+			# TODO: discard diffs if they are too big
+			# gigantic changes with many files may still OOM us
+			my $anchor = to_attr(git_unquote($fn));
+			$self->{anchors}->{$anchor} = $fn;
+			$l = qq(<a\nhref="#$anchor">).ascii_html($fn).'</a>';
+		} else { # rename
+			# incomplete...
+			if (scalar(@stat) < 2) {
+				unshift @stat, $l;
+				last;
+			}
+			my $from = shift @stat;
+			my $to = shift @stat;
+			utf8::upgrade($from);
+			utf8::upgrade($to);
+			$l = git_diffstat_rename($self, $from, $to);
+		}
+
+		# text changes show numerically, Binary does not
+		if ($add =~ /\A\d+\z/) {
+			$$nadd += $add;
+			$$ndel += $del;
+			$add = "+$add";
+			$del = "-$del";
+		}
+		++$$nchg;
+		my $num = sprintf('% 6s/%-6s', $del, $add);
+		$$dst .= " $num\t$l\n";
+	}
+
+	# the rest of the diff:
+	$self->{dbuf} = join("\0", @stat);
+	return unless $end;
+
+	$self->{dstate} = DSTATE_HEAD;
+	$$dst .= "\n $$nchg ";
+	$$dst .= $$nchg  == 1 ? 'file changed, ' : 'files changed, ';
+	$$dst .= $$nadd;
+	$$dst .= $$nadd == 1 ? ' insertion(+), ' : ' insertions(+), ';
+	$$dst .= $$ndel;
+	$$dst .= $$ndel == 1 ? " deletion(-)\n\n" : " deletions(-)\n\n";
+}
+
+# index abcdef89..01234567 100644
+sub git_diff_ab_index ($$$$) {
+	my ($self, $oid_a, $oid_b, $mode) = @_;
+	$self->{oid_a} = $oid_a;
+	$self->{oid_b} = $oid_b;
+	my $range = "$oid_a..$oid_b";
+	if ($self->{diff_tree}) {
+		$range = qq(<a\nhref="../../$range/s/$self->{Q}">$range</a>);
+	}
+	'index ' . $range . ascii_html($mode);
+}
+
+# diff --git a/foo.c b/bar.c
+sub git_diff_ab_hdr ($$$) {
+	my ($self, $pa, $pb) = @_;
+	my $rv = '';
+	if ($self->{dstate} != DSTATE_HEAD) {
+		to_state(\$rv, $self->{dstate}, DSTATE_HEAD);
+	}
+	$pa = (split('/', git_unquote($pa), 2))[1];
+	$pb = (split('/', git_unquote($pb), 2))[1];
+	$self->{Q} = dquery($pa, $pb) if $self->{diff_tree};
+	my $anchor = to_attr($pb);
+	delete $self->{anchors}->{$anchor};
+
+	# not wasting bandwidth on links here
+	# links in hunk headers are far more useful with line offsets
+	$rv .= qq(<a\nid="$anchor">diff</a> --git ) .
+		ascii_html($pa) . ' ' . ascii_html($pb)
+}
+
+# diff (--cc|--combined)
+sub git_diff_cc_hdr {
+	my ($self, $combined, $path) = @_;
+	$path = git_unquote($path);
+	$self->{Q} = dquery($path, $path);
+	my $anchor = to_attr($path);
+	delete $self->{anchors}->{$anchor};
+	qq(<a\nid="$anchor">diff</a> --$combined ) . ascii_html($path);
+}
+
+sub offset_link ($$$) {
+	my ($qs, $oid, $offset) = @_;
+	my ($n) = ($offset =~ /\A[\-\+](\d+)/);
+	if (defined $n && $n == 0) {
+		# new or deleted file, don't link it
+		$offset;
+	} else {
+		$n = defined $n ? "#n$n" : '';
+		qq(<a href="../../$oid/s/$qs$n">$offset</a>)
+	}
+}
+
+# @@ -1,2 +3,4 @@ (regular diff)
+sub git_diff_ab_hunk ($$$$) {
+	my ($self, $ca, $cb, $func_ctx) = @_;
+	my $qs = $self->{Q};
+
+	qq(<span\nclass=hunk>@@  ) . # HTML
+	offset_link($qs, $self->{oid_a}, $ca) .
+	' ' . offset_link($qs, $self->{oid_b}, $cb) .
+	' @@' . ascii_html($func_ctx) . '</span>';
+}
+
+# index abcdef09,01234567..76543210
+sub git_diff_cc_index {
+	my ($self, $before, $last, $end) = @_;
+	$self->{oids_cc} = [ split(',', $before), $last ];
+
+	# not wasting bandwidth on links here, yet
+	# links in hunk headers are far more useful with line offsets
+	"index $before..$last" . ascii_html($end);
+}
+
+# @@@ -1,2 -3,4 +5,6 @@@ (combined diff)
+sub git_diff_cc_hunk ($$$$) {
+	my ($self, $at_signs, $offs, $func_ctx) = @_;
+	my $pobj = $self->{oids_cc};
+	my $i = 0;
+	my $qs = $self->{Q};
+	qq(<span\nclass=hunk>@@  ) . # HTML
+		join(' ', $at_signs, map {
+				offset_link($qs, $pobj->[$i++], $_);
+			} split(' ', $offs),
+		$at_signs) . ascii_html($func_ctx) . '</span>';
+}
+
+# the rest of the diff (beyond diffstat)
+sub git_diff_sed_lines ($$) {
+	my ($self, $dst) = @_;
+
+	my @dlines = split(/\n/, delete $self->{dbuf}, -1);
+
+	# don't touch the last line, it may not be terminated
+	$self->{dbuf} = pop @dlines;
+
+	if (my $help = delete $self->{mhelp}) {
+		$$dst .= $help; # CC_MERGE
+	}
+
+	# reminder: this is stricter than similar code in flush_diff,
+	# this is for git output (including --cc/--combined) we generate,
+	# while flush_diff parses mail
+	my $ndiff = \($self->{ndiff});
+	my $linkify = PublicInbox::Linkify->new;
+	while (defined(my $s = shift @dlines)) {
+		utf8::upgrade($s);
+		if ($s =~ m{\Adiff --git ("?a/.+) ("?b/.+)\z}) { # regular
+			$$dst .= git_diff_ab_hdr($self, $1, $2);
+		} elsif ($s =~ m{\Adiff --(cc|combined) (.+)\z}) {
+			$$dst .= git_diff_cc_hdr($self, $1, $2);
+		} elsif ($s =~ /\Aindex ($OID_BLOB)\.\.($OID_BLOB)(.*)\z/o) {
+			# regular diff
+			$$dst .= git_diff_ab_index($self, $1, $2, $3);
+		} elsif ($s =~
+			 /\Aindex ($OID_BLOB,[^\.]+)\.\.($OID_BLOB)(.*)\z/o) {
+			# --cc diff
+			$$dst .= git_diff_cc_index($self, $1, $2, $3);
+		} elsif ($s =~ /\A@@ (\S+) (\S+) @@(.*)\z/) { # regular
+			$$dst .= '</span>' if $state2class[$self->{dstate}];
+			$$dst .= git_diff_ab_hunk($self, $1, $2, $3);
+			$self->{dstate} = DSTATE_CTX;
+		} elsif ($s =~ /\A(@@@+) (\S+.*\S+) @@@+(.*)\z/) { # --cc
+			$$dst .= '</span>' if $state2class[$self->{dstate}];
+			$$dst .= git_diff_cc_hunk($self, $1, $2, $3);
+		} elsif ($s =~ /^ /) {
+			# works for common cases, but not weird/long filenames
+			if ($self->{dstate} == DSTATE_STAT &&
+					$s =~ /^ (.+)( +\| .*\z)/s) {
+				anchor0(\$dst, $self, $linkify, $1, $2) and next;
+			} elsif ($state2class[$self->{dstate}]) {
+				to_state($dst, $self->{dstate}, DSTATE_CTX);
+			}
+			$$dst .= to_html($linkify, $s);
+		} elsif ($s =~ m!^--- ! || $s =~ m!^\+{3} !)  {
+			# color only (no oid link) if missing dctx->{oid_*}
+			$self->{dstate} <= DSTATE_STAT and
+				to_state($dst, $state, DSTATE_HEAD);
+			$$dst .= to_html($linkify, $s);
+		} elsif ($s =~ /^\+/) {
+			if ($self->{dstate} != DSTATE_ADD) {
+				to_state($dst, $state, DSTATE_ADD);
+			}
+			$$dst .= to_html($linkify, $s);
+		} elsif ($s =~ /^-/) {
+			if ($$state != DSTATE_DEL && $$state > DSTATE_STAT) {
+				to_state($dst, $state, DSTATE_DEL);
+			}
+			$$dst .= to_html($linkify, $s);
+		# ignore the following lines in headers:
+		} elsif ($s =~ /^(?:dis)similarity index/ ||
+			 $s =~ /^(?:old|new) mode/ ||
+			 $s =~ /^(?:deleted|new) file mode/ ||
+			 $s =~ /^(?:copy|rename) (?:from|to) / ||
+			 $s =~ /^(?:dis)?similarity index /) {
+			$$dst .= to_html($linkify, $s);
+		} else {
+			$$dst .= to_html($linkify, $s);
+		}
+		++$$ndiff;
+	}
+}
+
+sub git_diff_sed_run ($$) {
+	my ($self, $dst) = @_;
+	$self->{dstate} == DSTATE_STAT and git_diff_sed_stat($self, $dst);
+	$self->{dstate} > DSTATE_STAT and git_diff_sed_lines($self, $dst);
+	undef;
+}
+
+sub git_diff_sed_close ($$) {
+	my ($self, $dst) = @_;
+	my $tmp = delete $self->{dbuf};
+	utf8::upgrade($tmp);
+	$$dst .= $tmp;
+	undef;
+}
+
+sub git_diff_sed {
+	my ($self, $ctx) = @_;
+	my $ws = { ctx => $ctx };
+	my @first = PublicInbox::WwwStream::html_top($ws) . '<pre>';
+	$ctx->{-html_tip} = "<pre>Output of: git $self->{git_cmd}\n";
+	$self->{dstate} = DSTATE_STAT;
+
+	# this filters for $fh->write or $body->getline (see Qspawn)
+	sub {
+		my $dst = shift @first || '';
+		if (defined $_[0]) { # $_[0] == scalar buffer
+			$self->{dbuf} .= $_[0];
+			git_diff_sed_run($self, \$dst);
+		} else { # undef means EOF from "git show", flush the last bit
+			git_diff_sed_close($self, \$dst);
+			$dst .= '</pre>'.PublicInbox::WwwStream::html_end($ws);
+		}
+		$dst;
+	}
+}
+
 1;
diff --git a/lib/PublicInbox/ViewVCS.pm b/lib/PublicInbox/ViewVCS.pm
index c693fcf..2ba09a8 100644
--- a/lib/PublicInbox/ViewVCS.pm
+++ b/lib/PublicInbox/ViewVCS.pm
@@ -20,7 +20,7 @@ package PublicInbox::ViewVCS;
 use PublicInbox::SolverGit;
 use PublicInbox::WwwStream;
 use PublicInbox::Linkify;
-use PublicInbox::ViewDiff qw(flush_diff);
+use PublicInbox::ViewDiff;
 use PublicInbox::Hval qw(ascii_html to_filename);
 my $hl = eval {
 	require PublicInbox::HlMod;
@@ -181,23 +181,22 @@ ($$$$$$)
 		return html_page($ctx, 500, \'seek error');
 	}
 	$log = do { local $/; <$log> };
-	warn "log: $log\n";
+	my $vdiff = PublicInbox::ViewDiff->new;
 	my $git_b = $res_b->[0];
-	my $cmd = ['git', "--git-dir=$git_b->{git_dir}", 'diff',
-			$res_a->[1], $res_b->[1] ];
+	my $gcmd = $vdiff->diff_cmd($res_a->[1], $res_b->[1]);
+	my $cmd = ['git', "--git-dir=$git_b->{git_dir}", @$gcmd ];
 	my $qsp = PublicInbox::Qspawn->new($cmd);
 	my $env = $ctx->{env};
 	$env->{'qspawn.wcb'} = delete $ctx->{-wcb};
-	$qsp->psgi_return($env, undef, sub {
+	$qsp->psgi_return($env, undef, sub { # parse header
 		my ($r, $bref) = @_;
 		if (!defined $r) { # error
 			html_page($ctx, 500, $log);
-		} elsif (index($$bref, "\0") >= 0) {
-			my $ct = 'application/octet-stream';
-			[200, ['Content-Type', $ct ] ];
+		} elsif ($r == 0) {
+			PublicInbox::WwwStream::r($ctx, 200, 'empty diff');
 		} else {
-			my $ct = 'text/plain; charset=UTF-8';
-			[200, ['Content-Type', $ct] ];
+			$env->{'qspawn.filter'} = $vdiff->git_diff_sed($ctx);
+			PublicInbox::WwwStream::r($ctx, 200);
 		}
 	});
 }
-- 
EW


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH] WIP
@ 2020-01-13  9:24 Eric Wong
  0 siblings, 0 replies; 23+ messages in thread
From: Eric Wong @ 2020-01-13  9:24 UTC (permalink / raw)
  To: spew

---
 lib/PublicInbox/SearchView.pm |  4 +--
 lib/PublicInbox/View.pm       | 52 +++++++++++++++++++----------------
 lib/PublicInbox/WwwStream.pm  |  1 +
 3 files changed, 32 insertions(+), 25 deletions(-)

diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index 97233069..14a4aeaa 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -300,8 +300,8 @@ sub mset_thread_i {
 	my $msgs = $ctx->{msgs} or return;
 	while (my $smsg = pop @$msgs) {
 		$ctx->{-inbox}->smsg_mime($smsg) or next;
-		return PublicInbox::View::index_entry($smsg, $ctx,
-							scalar @$msgs);
+		return ${PublicInbox::View::index_entry($smsg, $ctx,
+							scalar @$msgs)};
 	}
 	my ($skel) = delete @$ctx{qw(skel msgs)};
 	$$skel .= "\n</pre>";
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index c9e6b3c5..be42c1be 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -208,12 +208,13 @@ sub index_entry {
 	my $obfs_ibx = $ctx->{-obfs_ibx};
 
 	$subj = '(no subject)' if $subj eq '';
-	my $rv = "<a\nhref=#e$id\nid=m$id>*</a> ";
+	my $obuf = $ctx->{obuf};
+	$$obuf .= "<a\nhref=#e$id\nid=m$id>*</a> ";
 	$subj = '<b>'.ascii_html($subj).'</b>';
 	obfuscate_addrs($obfs_ibx, $subj) if $obfs_ibx;
 	$subj = "<u\nid=u>$subj</u>" if $root_anchor eq $id_m;
-	$rv .= $subj . "\n";
-	$rv .= _th_index_lite($mid_raw, \$irt, $id, $ctx);
+	$$obuf .= $subj . "\n";
+	$$obuf .= _th_index_lite($mid_raw, \$irt, $id, $ctx);
 	my @tocc;
 	my $ds = $smsg->ds; # for v1 non-Xapian/SQLite users
 	# deleting {mime} is critical to memory use,
@@ -223,11 +224,11 @@ sub index_entry {
 	my $hdr = $mime->header_obj;
 	my $from = _hdr_names_html($hdr, 'From');
 	obfuscate_addrs($obfs_ibx, $from) if $obfs_ibx;
-	$rv .= "From: $from @ ".fmt_ts($ds)." UTC";
+	$$obuf .= "From: $from @ ".fmt_ts($ds)." UTC";
 	my $upfx = $ctx->{-upfx};
 	my $mhref = $upfx . mid_escape($mid_raw) . '/';
-	$rv .= qq{ (<a\nhref="$mhref">permalink</a> / };
-	$rv .= qq{<a\nhref="${mhref}raw">raw</a>)\n};
+	$$obuf .= qq{ (<a\nhref="$mhref">permalink</a> / };
+	$$obuf .= qq{<a\nhref="${mhref}raw">raw</a>)\n};
 	my $to = fold_addresses(_hdr_names_html($hdr, 'To'));
 	my $cc = fold_addresses(_hdr_names_html($hdr, 'Cc'));
 	my ($tlen, $clen) = (length($to), length($cc));
@@ -245,32 +246,30 @@ sub index_entry {
 		$to_cc .= "\n";
 	}
 	obfuscate_addrs($obfs_ibx, $to_cc) if $obfs_ibx;
-	$rv .= $to_cc;
+	$$obuf .= $to_cc;
 
 	my $mapping = $ctx->{mapping};
 	if (!$mapping && (defined($irt) || defined($irt = in_reply_to($hdr)))) {
 		my $mirt = PublicInbox::Hval->new_msgid($irt);
 		my $href = $upfx . $mirt->{href}. '/';
 		my $html = $mirt->as_html;
-		$rv .= qq(In-Reply-To: &lt;<a\nhref="$href">$html</a>&gt;\n)
+		$$obuf .= qq(In-Reply-To: &lt;<a\nhref="$href">$html</a>&gt;\n)
 	}
-	$rv .= "\n";
+	$$obuf .= "\n";
 
 	# scan through all parts, looking for displayable text
 	$ctx->{mhref} = $mhref;
-	$ctx->{obuf} = \$rv;
 	msg_iter($mime, \&add_text_body, $ctx, 1);
-	delete $ctx->{obuf};
 
 	# add the footer
-	$rv .= "\n<a\nhref=#$id_m\nid=e$id>^</a> ".
+	$$obuf .= "\n<a\nhref=#$id_m\nid=e$id>^</a> ".
 		"<a\nhref=\"$mhref\">permalink</a>" .
 		" <a\nhref=\"${mhref}raw\">raw</a>" .
 		" <a\nhref=\"${mhref}#R\">reply</a>";
 
 	my $hr;
 	if (defined(my $pct = $smsg->{pct})) { # used by SearchView.pm
-		$rv .= "\t[relevance $pct%]";
+		$$obuf .= "\t[relevance $pct%]";
 		$hr = 1;
 	} elsif ($mapping) {
 		my $nested = 'nested';
@@ -282,15 +281,15 @@ sub index_entry {
 		} else {
 			$nested = "<b>$nested</b>";
 		}
-		$rv .= "\t[<a\nhref=\"${mhref}T/#u\">$flat</a>";
-		$rv .= "|<a\nhref=\"${mhref}t/#u\">$nested</a>]";
-		$rv .= " <a\nhref=#r$id>$ctx->{s_nr}</a>";
+		$$obuf .= "\t[<a\nhref=\"${mhref}T/#u\">$flat</a>";
+		$$obuf .= "|<a\nhref=\"${mhref}t/#u\">$nested</a>]";
+		$$obuf .= " <a\nhref=#r$id>$ctx->{s_nr}</a>";
 	} else {
 		$hr = $ctx->{-hr};
 	}
 
-	$rv .= $more ? '</pre><hr><pre>' : '</pre>' if $hr;
-	$rv;
+	$$obuf .= $more ? '</pre><hr><pre>' : '</pre>' if $hr;
+	delete $ctx->{obuf};
 }
 
 sub pad_link ($$;$) {
@@ -392,7 +391,10 @@ sub pre_thread  { # walk_thread callback
 sub thread_index_entry {
 	my ($ctx, $level, $smsg) = @_;
 	my ($beg, $end) = thread_adj_level($ctx, $level);
-	$beg . '<pre>' . index_entry($smsg, $ctx, 0) . '</pre>' . $end;
+	my $obuf = $ctx->{obuf};
+	$$obuf .= $beg . '<pre>';
+	index_entry($smsg, $ctx, 0);
+	$$obuf .= '</pre>' . $end;
 }
 
 sub stream_thread_i { # PublicInbox::WwwStream::getline callback
@@ -475,20 +477,24 @@ sub thread_html {
 	}
 	return missing_thread($ctx) unless $smsg;
 	$ctx->{-title_html} = ascii_html($smsg->subject);
-	$ctx->{-html_tip} = '<pre>'.index_entry($smsg, $ctx, scalar @$msgs);
+	${$ctx->{obuf}} .= '<pre>';
+	index_entry($smsg, $ctx, scalar @$msgs);
+	$ctx->{-html_tip} = $obuf;
 	$ctx->{msgs} = $msgs;
 	PublicInbox::WwwStream->response($ctx, 200, \&thread_html_i);
 }
 
 sub thread_html_i { # PublicInbox::WwwStream::getline callback
 	my ($nr, $ctx) = @_;
-	my $msgs = $ctx->{msgs} or return;
+	my $msgs = $ctx->{msgs} or return; # final return value
+
 	while (my $smsg = shift @$msgs) {
 		$ctx->{-inbox}->smsg_mime($smsg) or next;
-		return index_entry($smsg, $ctx, scalar @$msgs);
+		return ${index_entry($smsg, $ctx, scalar @$msgs)};
 	}
+
 	my ($skel) = delete @$ctx{qw(skel msgs)};
-	$$skel;
+	$$skel; # penultimate return value
 }
 
 sub multipart_text_as_html {
diff --git a/lib/PublicInbox/WwwStream.pm b/lib/PublicInbox/WwwStream.pm
index f0a57d6a..fd2cbcff 100644
--- a/lib/PublicInbox/WwwStream.pm
+++ b/lib/PublicInbox/WwwStream.pm
@@ -160,6 +160,7 @@ sub getline {
 	return _html_top($self) if $nr == 0;
 
 	if (my $middle = $self->{cb}) {
+		$self->{ctx}->{obuf} = \(my $obuf = '');
 		$middle = $middle->($nr, $self->{ctx}) and return $middle;
 	}
 

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH] wip
@ 2020-04-20  7:14 Eric Wong
  0 siblings, 0 replies; 23+ messages in thread
From: Eric Wong @ 2020-04-20  7:14 UTC (permalink / raw)
  To: spew

---
 lib/PublicInbox/MsgIter.pm |  6 ++--
 lib/PublicInbox/View.pm    | 64 +++++++++++++++++++++++++++++++++++---
 t/message_subparts.t       | 46 +++++++++++++++++++++++++++
 3 files changed, 108 insertions(+), 8 deletions(-)
 create mode 100644 t/message_subparts.t

diff --git a/lib/PublicInbox/MsgIter.pm b/lib/PublicInbox/MsgIter.pm
index fa25564a..e33e2b2b 100644
--- a/lib/PublicInbox/MsgIter.pm
+++ b/lib/PublicInbox/MsgIter.pm
@@ -12,8 +12,8 @@ use PublicInbox::MIME;
 # Like Email::MIME::walk_parts, but this is:
 # * non-recursive
 # * passes depth and indices to the iterator callback
-sub msg_iter ($$;$$) {
-	my ($mime, $cb, $cb_arg, $do_undef) = @_;
+sub msg_iter ($$;$$$) {
+	my ($mime, $cb, $cb_arg, $do_undef, $sub_hdr_cb) = @_;
 	my @parts = $mime->subparts;
 	if (@parts) {
 		$mime = $_[0] = undef if $do_undef; # saves some memory
@@ -28,7 +28,7 @@ sub msg_iter ($$;$$) {
 				@sub = map { [ $_, $depth, @idx, ++$i ] } @sub;
 				@parts = (@sub, @parts);
 			} else {
-				$cb->($p, $cb_arg);
+				$cb->($p, $cb_arg, \$depth, \@parts);
 			}
 		}
 	} else {
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 9b62ed3c..527b8316 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -515,14 +515,67 @@ EOF
 	undef;
 }
 
+# show headers on message/rfc822 attachments
+sub submsg_hdr {
+	my ($hdr, $ctx) = @_;
+	my $obfs_ibx = $ctx->{-obfs_ibx};
+	my $rv = $ctx->{obuf};
+	$$rv .= "\n";
+	for my $h (qw(From To Cc)) {
+		for my $v ($hdr->header($h)) {
+			fold_addresses($v);
+			$v = ascii_html($v);
+			obfuscate_addrs($obfs_ibx, $v) if $obfs_ibx;
+			$$rv .= "$h: $v\n" if $v ne '';
+		}
+	}
+	for my $h (qw(Subject Date)) {
+		for my $v ($hdr->header($h)) {
+			$v = ascii_html($v);
+			obfuscate_addrs($obfs_ibx, $v) if $obfs_ibx;
+			$$rv .= "$h: $v\n" if $v ne '';
+		}
+	}
+	# my $lnk = PublicInbox::Linkify->new;
+	for my $h (qw(Message-ID X-Alt-Message-ID In-Reply-To References)) {
+		my $s = '';
+		$s .= "$h: $_\n" for ($hdr->header_raw($h));
+		# $lnk->linkify_mids('..', \$s, 1);
+		$$rv .= $s;
+	}
+	$$rv .= _parent_headers($hdr);
+	$$rv .= "\n";
+	undef;
+}
+
 sub add_text_body { # callback for msg_iter
-	my ($p, $ctx) = @_;
-	my $upfx = $ctx->{mhref};
-	my $ibx = $ctx->{-inbox};
+	my ($p, $ctx, $iter_depth, $iter_parts) = @_;
 	# $p - from msg_iter: [ Email::MIME, depth, @idx ]
 	my ($part, $depth, @idx) = @$p;
 	my $ct = $part->content_type || 'text/plain';
 	my $fn = $part->filename;
+
+	if ($ct =~ m!\Amessage/(?:rfc822|news)\b!i) {
+		# required for compatibility with old URLs
+		attach_link($ctx, $ct, $p, $fn);
+
+		my $submsg = PublicInbox::MIME->new(\($part->body));
+		submsg_hdr($submsg->header_obj, $ctx);
+		my @sub = $submsg->subparts;
+		@sub = $submsg if !@sub;
+		my $d = ++$$iter_depth;
+		my $i = 0;
+		@sub = map { [ $_, $d, @idx, ++$i ] } @sub;
+		@$iter_parts = (@sub, @$iter_parts);
+		return;
+		# }
+		# $p->[0] = $part = $submsg;
+		# $p->[1] = ++$depth;
+		# $ct = $part->content_type || 'text/plain';
+		# $fn = $part->filename;
+		# fall-through:
+	}
+
 	my ($s, $err) = msg_part_text($part, $ct);
 	return attach_link($ctx, $ct, $p, $fn) unless defined $s;
 
@@ -530,6 +583,7 @@ sub add_text_body { # callback for msg_iter
 	# link generation in diffs with the extra '%0D'
 	$s =~ s/\r\n/\n/sg;
 
+	my $ibx = $ctx->{-inbox};
 	# will be escaped to `&#8226;' in HTML
 	obfuscate_addrs($ibx, $s, "\x{2022}") if $ibx->{obfuscate};
 
@@ -537,6 +591,7 @@ sub add_text_body { # callback for msg_iter
 	# headers for solver unless some coderepo are configured:
 	my $diff;
 	if ($s =~ /^(?:diff|---|\+{3}) /ms) {
+		my $upfx = $ctx->{mhref};
 		# diffstat anchors do not link across attachments or messages:
 		$idx[0] = $upfx . $idx[0] if $upfx ne '';
 		$ctx->{-apfx} = join('/', @idx);
@@ -718,12 +773,11 @@ sub thread_skel ($$$) {
 }
 
 sub _parent_headers {
-	my ($hdr, $over) = @_;
+	my ($hdr, $over, $lnk) = @_;
 	my $rv = '';
 	my @irt = $hdr->header_raw('In-Reply-To');
 	my $refs;
 	if (@irt) {
-		my $lnk = PublicInbox::Linkify->new;
 		$rv .= "In-Reply-To: $_\n" for @irt;
 		$lnk->linkify_mids('..', \$rv);
 	} else {
diff --git a/t/message_subparts.t b/t/message_subparts.t
new file mode 100644
index 00000000..55ca8f4e
--- /dev/null
+++ b/t/message_subparts.t
@@ -0,0 +1,46 @@
+# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use Test::More;
+use PublicInbox::MIME;
+use PublicInbox::MsgIter;
+
+my $rfc822 = PublicInbox::MIME->new(<<'EOF');
+From: a@example.com
+To: a@example.com
+Date: Fri, 02 Oct 1993 00:00:00 +0000
+Subject: message/rfc822 embedded
+Content-Type: multipart/mixed; boundary="x"
+MIME-Version: 1.0
+
+--x
+Content-Transfer-Encoding: 7bit
+Content-Type: message/rfc822
+
+From: a@example.com
+To: a@example.com
+Date: Fri, 02 Oct 1993 00:00:00 +0000
+Subject: lost
+Content-Type: multipart/mixed; boundary="y"
+MIME-Version: 1.0
+
+--y
+Content-Transfer-Encoding: 7bit
+Content-Type: text/plain
+
+my leg
+
+--y--
+
+--x--
+EOF
+
+use Data::Dumper;
+msg_iter($rfc822, sub {
+	my ($part, $level, @idx) = @{$_[0]};
+	my ($s, $err) = msg_part_text($part, $part->content_type);
+	diag "S[$level,[".join(',', @idx)."]: $s";
+});
+
+ok 1;
+done_testing();

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH] wip
@ 2020-04-23  4:27 Eric Wong
  0 siblings, 0 replies; 23+ messages in thread
From: Eric Wong @ 2020-04-23  4:27 UTC (permalink / raw)
  To: spew

---
 MANIFEST                      |   1 -
 lib/PublicInbox/GzipFilter.pm |  17 ++--
 lib/PublicInbox/Mbox.pm       | 155 ++++++++++++++++++++--------------
 lib/PublicInbox/MboxGz.pm     |  49 -----------
 4 files changed, 99 insertions(+), 123 deletions(-)
 delete mode 100644 lib/PublicInbox/MboxGz.pm

diff --git a/MANIFEST b/MANIFEST
index b0f08567..857fab4c 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -131,7 +131,6 @@ lib/PublicInbox/MDA.pm
 lib/PublicInbox/MID.pm
 lib/PublicInbox/MIME.pm
 lib/PublicInbox/Mbox.pm
-lib/PublicInbox/MboxGz.pm
 lib/PublicInbox/MsgIter.pm
 lib/PublicInbox/MsgTime.pm
 lib/PublicInbox/Msgmap.pm
diff --git a/lib/PublicInbox/GzipFilter.pm b/lib/PublicInbox/GzipFilter.pm
index d2eb4e66..3a3c4a6b 100644
--- a/lib/PublicInbox/GzipFilter.pm
+++ b/lib/PublicInbox/GzipFilter.pm
@@ -10,7 +10,15 @@ our @EXPORT_OK = qw(gzf_maybe);
 my %OPT = (-WindowBits => 15 + 16, -AppendOutput => 1);
 my @GZIP_HDRS = qw(Vary Accept-Encoding Content-Encoding gzip);
 
-sub new { bless {}, shift }
+sub gzip_or_die () {
+	my ($gz, $err) = Compress::Raw::Zlib::Deflate->new(%OPT);
+	$err == Z_OK or die "Deflate->new failed: $err";
+	$gz;
+}
+
+sub new { bless { gz => $_[1] ? undef : gzip_or_die() }, $_[0] }
+
+# for GetlineBody (via Qspawn) when NOT using $env->{'pi-httpd.async'}
 
 # for Qspawn if using $env->{'pi-httpd.async'}
 sub attach {
@@ -32,13 +40,6 @@ sub gzf_maybe ($$) {
 	bless { gz => $gz }, __PACKAGE__;
 }
 
-sub gzip_or_die () {
-	my ($gz, $err) = Compress::Raw::Zlib::Deflate->new(%OPT);
-	$err == Z_OK or die "Deflate->new failed: $err";
-	$gz;
-}
-
-# for GetlineBody (via Qspawn) when NOT using $env->{'pi-httpd.async'}
 # Also used for ->getline callbacks
 sub translate ($$) {
 	my $self = $_[0]; # $_[1] => input
diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm
index 97bec5e7..36f7aea8 100644
--- a/lib/PublicInbox/Mbox.pm
+++ b/lib/PublicInbox/Mbox.pm
@@ -9,17 +9,16 @@
 # more common "push" model)
 package PublicInbox::Mbox;
 use strict;
-use warnings;
+use PublicInbox::GzipFilter;
+use PublicInbox::NoopFilter;
 use PublicInbox::MID qw/mid_escape/;
 use PublicInbox::Hval qw/to_filename/;
 use PublicInbox::Smsg;
-use PublicInbox::WwwStream qw(html_oneshot);
 use Email::Simple;
 use Email::MIME::Encode;
 
 sub subject_fn ($) {
-	my ($hdr) = @_;
-	my $fn = $hdr->header('Subject');
+	my ($fn) = @_;
 	return 'no-subject' if (!defined($fn) || $fn eq '');
 
 	# no need for full Email::MIME, here
@@ -31,63 +30,6 @@ sub subject_fn ($) {
 	$fn eq '' ? 'no-subject' : to_filename($fn);
 }
 
-sub mb_stream {
-	my ($more) = @_;
-	bless $more, 'PublicInbox::Mbox';
-}
-
-# called by PSGI server as body response
-# this gets called twice for every message, once to return the header,
-# once to retrieve the body
-sub getline {
-	my ($more) = @_; # self
-	my ($ctx, $id, $prev, $next, $mref, $hdr) = @$more;
-	if ($hdr) { # first message hits this, only
-		pop @$more; # $hdr
-		pop @$more; # $mref
-		return msg_hdr($ctx, $hdr) . msg_body($$mref);
-	}
-	my $cur = $next or return;
-	my $ibx = $ctx->{-inbox};
-	$next = $ibx->over->next_by_mid($ctx->{mid}, \$id, \$prev);
-	$mref = $ibx->msg_by_smsg($cur) or return;
-	$hdr = Email::Simple->new($mref)->header_obj;
-	@$more = ($ctx, $id, $prev, $next); # $next may be undef, here
-	msg_hdr($ctx, $hdr) . msg_body($$mref);
-}
-
-sub close {} # noop
-
-# /$INBOX/$MESSAGE_ID/raw
-sub emit_raw {
-	my ($ctx) = @_;
-	my $mid = $ctx->{mid};
-	my $ibx = $ctx->{-inbox};
-	$ctx->{base_url} = $ibx->base_url($ctx->{env});
-	my ($mref, $more, $id, $prev, $next);
-	if (my $over = $ibx->over) {
-		my $smsg = $over->next_by_mid($mid, \$id, \$prev) or return;
-		$mref = $ibx->msg_by_smsg($smsg) or return;
-		$next = $over->next_by_mid($mid, \$id, \$prev);
-	} else {
-		$mref = $ibx->msg_by_mid($mid) or return;
-	}
-	my $hdr = Email::Simple->new($mref)->header_obj;
-	$more = [ $ctx, $id, $prev, $next, $mref, $hdr ]; # for ->getline
-	my $fn = subject_fn($hdr);
-	my @hdr = ('Content-Type');
-	if ($ibx->{obfuscate}) {
-		# obfuscation is stupid, but maybe scrapers are, too...
-		push @hdr, 'application/mbox';
-		$fn .= '.mbox';
-	} else {
-		push @hdr, 'text/plain';
-		$fn .= '.txt';
-	}
-	push @hdr, 'Content-Disposition', "inline; filename=$fn";
-	[ 200, \@hdr, mb_stream($more) ];
-}
-
 sub msg_hdr ($$;$) {
 	my ($ctx, $header_obj, $mid) = @_;
 
@@ -135,6 +77,56 @@ sub msg_body ($) {
 	$_[0] .= "\n";
 }
 
+sub emit_raw_cb {
+	my ($self, $ctx) = @_;
+	delete($self->{first}) // $ctx->{-inbox}->over->next_by_mid(
+						$ctx->{mid},
+						\($self->{cur_id}),
+						\($self->{prev}))
+}
+
+# /$INBOX/$MESSAGE_ID/raw
+sub emit_raw {
+	my ($ctx) = @_;
+	my $mid = $ctx->{mid};
+	my $ibx = $ctx->{-inbox};
+	$ctx->{base_url} = $ibx->base_url($ctx->{env});
+	my $h = [ 'Content-Type' => undef, 'Content-Disposition' => undef ];
+	my ($fn, $body);
+	my $gzf = gzf_maybe($h, $ctx->{env}) || PublicInbox::NoopFilter->new;
+	if (my $over = $ibx->over) {
+		$body = bless {
+			ctx => $ctx,
+			cb => \&emit_raw_cb,
+			gzf => $gzf,
+			cur_id => undef,
+			prev => undef
+		}, __PACKAGE__;
+		my $smsg = $over->next_by_mid($ctx->{mid},
+						\($body->{cur_id}),
+						\($body->{prev})) or return;
+		$body->{first} = $smsg;
+		$fn = $smsg->{subject};
+	} else {
+		my $mref = $ibx->msg_by_mid($ctx->{mid}) or return;
+		my $hdr = Email::Simple->new($mref)->header_obj;
+		$fn = $hdr->header('Subject');
+		$gzf->zmore(msg_hdr($ctx, $hdr, $ctx->{mid}));
+		$gzf->zflush(msg_body($$mref));
+		$body = [ $hdr ];
+	}
+	if ($ibx->{obfuscate}) {
+		# obfuscation is stupid, but maybe scrapers are, too...
+		$h->[1] = 'application/mbox';
+		$fn .= '.mbox';
+	} else {
+		$h->[1] = 'text/plain';
+		$fn .= '.txt';
+	}
+	$h->[3] = 'inline; filename='.subject_fn($fn);
+	[ 200, $h, $body ];
+}
+
 sub thread_cb {
 	my ($ctx) = @_;
 	my $msgs = $ctx->{msgs};
@@ -150,6 +142,22 @@ sub thread_cb {
 	}
 }
 
+sub mboxgz ($$$) {
+	my ($ctx, $cb, $fn) = @_;
+	$ctx->{base_url} = $ctx->{-inbox}->base_url($ctx->{env});
+	my $body = bless {
+		gzf => PublicInbox::GzipFilter->new(1),
+		cb => $cb,
+		ctx => $ctx,
+	}, __PACKAGE__;
+	# http://www.iana.org/assignments/media-types/application/gzip
+	$fn = defined($fn) && $fn ne '' ? to_filename($fn) : 'no-subject';
+	my $h = [ qw(Content-Type application/gzip),
+		'Content-Disposition', "inline; filename=$fn.mbox.gz" ];
+	[ 200, $h, $body ];
+}
+
+
 sub thread_mbox {
 	my ($ctx, $over, $sfx) = @_;
 	require PublicInbox::MboxGz;
@@ -157,7 +165,7 @@ sub thread_mbox {
 	return [404, [qw(Content-Type text/plain)], []] if !@$msgs;
 	$ctx->{prev} = $msgs->[-1];
 	$ctx->{over} = $over; # bump refcnt
-	PublicInbox::MboxGz->response($ctx, \&thread_cb, $msgs->[0]->subject);
+	mboxgz($ctx, \&thread_cb, $msgs->[0]->subject);
 }
 
 sub emit_range {
@@ -195,7 +203,7 @@ sub mbox_all_ids {
 		return PublicInbox::WWW::need($ctx, 'Overview');
 	$ctx->{ids} = $ids;
 	$ctx->{prev} = $prev;
-	return PublicInbox::MboxGz->response($ctx, \&all_ids_cb, 'all');
+	mboxgz($ctx, \&all_ids_cb, 'all');
 }
 
 sub results_cb {
@@ -220,7 +228,6 @@ sub results_cb {
 sub mbox_all {
 	my ($ctx, $query) = @_;
 
-	require PublicInbox::MboxGz;
 	return mbox_all_ids($ctx) if $query eq '';
 	my $qopts = $ctx->{qopts} = { mset => 2 };
 	my $srch = $ctx->{srch} = $ctx->{-inbox}->search or
@@ -231,7 +238,25 @@ sub mbox_all {
 				["No results found\n"]];
 	$ctx->{iter} = 0;
 	$ctx->{query} = $query;
-	PublicInbox::MboxGz->response($ctx, \&results_cb, 'results-'.$query);
+	mboxgz($ctx, \&results_cb, 'results-'.$query);
 }
 
+# called by Plack::Util::foreach or similar
+sub getline {
+	my ($self) = @_;
+	my $gzf = $self->{gzf} or return;
+	my $ctx = $self->{ctx};
+	while (my $smsg = $self->{cb}->($ctx)) {
+		my $mref = $ctx->{-inbox}->msg_by_smsg($smsg) or next;
+		my $h = Email::Simple->new($mref)->header_obj;
+		$gzf->zmore(msg_hdr($ctx, $h, $smsg->{mid}));
+		return $gzf->translate(msg_body($$mref));
+	}
+	# signal that we're done and can return undef next call:
+	delete $self->{gzf};
+	$gzf->zflush;
+}
+
+sub close {} # noop
+
 1;
diff --git a/lib/PublicInbox/MboxGz.pm b/lib/PublicInbox/MboxGz.pm
deleted file mode 100644
index 7deabf54..00000000
--- a/lib/PublicInbox/MboxGz.pm
+++ /dev/null
@@ -1,49 +0,0 @@
-# Copyright (C) 2015-2020 all contributors <meta@public-inbox.org>
-# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
-package PublicInbox::MboxGz;
-use strict;
-use parent 'PublicInbox::GzipFilter';
-use Email::Simple;
-use PublicInbox::Hval qw/to_filename/;
-use PublicInbox::Mbox;
-
-sub new {
-	my ($class, $ctx, $cb) = @_;
-	$ctx->{base_url} = $ctx->{-inbox}->base_url($ctx->{env});
-	bless {
-		gz => PublicInbox::GzipFilter::gzip_or_die(),
-		cb => $cb,
-		ctx => $ctx
-	}, $class;
-}
-
-sub response {
-	my ($class, $ctx, $cb, $fn) = @_;
-	my $body = $class->new($ctx, $cb);
-	# http://www.iana.org/assignments/media-types/application/gzip
-	$fn = defined($fn) && $fn ne '' ? to_filename($fn) : 'no-subject';
-	my $h = [ qw(Content-Type application/gzip),
-		'Content-Disposition', "inline; filename=$fn.mbox.gz" ];
-	[ 200, $h, $body ];
-}
-
-# called by Plack::Util::foreach or similar
-sub getline {
-	my ($self) = @_;
-	my $ctx = $self->{ctx} or return;
-	while (my $smsg = $self->{cb}->($ctx)) {
-		my $mref = $ctx->{-inbox}->msg_by_smsg($smsg) or next;
-		my $h = Email::Simple->new($mref)->header_obj;
-		$self->zmore(
-			PublicInbox::Mbox::msg_hdr($ctx, $h, $smsg->{mid})
-		);
-		return $self->translate(PublicInbox::Mbox::msg_body($$mref));
-	}
-	# signal that we're done and can return undef next call:
-	delete $self->{ctx};
-	$self->zflush;
-}
-
-sub close {} # noop
-
-1;

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH] wip
@ 2020-11-15  7:35 Eric Wong
  0 siblings, 0 replies; 23+ messages in thread
From: Eric Wong @ 2020-11-15  7:35 UTC (permalink / raw)
  To: spew

---
 t/eml.t | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/t/eml.t b/t/eml.t
index 8d131b14..3012a035 100644
--- a/t/eml.t
+++ b/t/eml.t
@@ -269,7 +269,7 @@ Object-Id: ab0440d8cd6d843bee9a27709a459ce3b2bdb94d (lore/kvm)
 EOF
 	my $eml = $cls->new(\$s);
 	my ($str, $err) = msg_part_text($eml, $eml->content_type);
-	is($str, "\x{100}\n", "got wide character by assuming utf-8");
+	is($str, "\x{100}\n", "got wide character by assuming utf-8 ($cls)");
 }
 
 if ('we differ from Email::MIME with final "\n" on missing epilogue') {
@@ -383,8 +383,12 @@ SKIP: {
 		$msg->parts_set([$old[-1]]);
 		is(scalar $msg->subparts, 1, 'only last remains');
 	}
-	is($eml->as_string, $mime->as_string,
-		'as_string matches after parts_set');
+
+	# some versions of Email::MIME or Email::MIME::* will drop
+	# unnecessary ", while PublicInbox::Eml will preserve the original
+	my $exp = $mime->as_string;
+	$exp =~ s/; boundary=b\b/; boundary="b"/;
+	is($eml->as_string, $exp, 'as_string matches after parts_set');
 }
 
 for my $cls (@classes) {
@@ -395,7 +399,8 @@ Content-Disposition: attachment; filename="=?utf-8?q?vtpm-makefile.patch?="
 EOF
 	is($cls->new($s)->filename, 'vtpm-makefile.patch', 'filename decoded');
 	$s =~ s/^Content-Disposition:.*$//sm;
-	is($cls->new($s)->filename, 'vtpm-fakefile.patch', 'filename fallback');
+	is($cls->new($s)->filename, 'vtpm-fakefile.patch',
+		"filename fallback ($cls)") if $cls ne 'PublicInbox::MIME';
 	is($cls->new($s)->content_type,
 		'text/x-patch; name="vtpm-fakefile.patch"',
 		'matches Email::MIME output, "correct" or not');
@@ -416,7 +421,7 @@ EOF
 	my @tmp;
 	$cls->new($s)->each_part(sub { push @tmp, $_[0]->[0]->filename });
 	is_deeply(['vtpm-makefile.patch', 'vtpm-fakefile.patch'], \@tmp,
-		'got filename for both attachments');
+		"got filename for both attachments ($cls)");
 }
 
 done_testing;

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH] WIP
@ 2020-12-27 11:36 Eric Wong
  0 siblings, 0 replies; 23+ messages in thread
From: Eric Wong @ 2020-12-27 11:36 UTC (permalink / raw)
  To: spew

---
 MANIFEST                      |  2 ++
 lib/PublicInbox/LeiSearch.pm  | 14 ++++-----
 lib/PublicInbox/LeiXSearch.pm | 57 +++++++++++++++++++++++++++++++++++
 lib/PublicInbox/Search.pm     | 19 +++++-------
 t/lei_xsearch.t               | 47 +++++++++++++++++++++++++++++
 5 files changed, 119 insertions(+), 20 deletions(-)
 create mode 100644 lib/PublicInbox/LeiXSearch.pm
 create mode 100644 t/lei_xsearch.t

diff --git a/MANIFEST b/MANIFEST
index 656c707e..a5ff81cf 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -165,6 +165,7 @@ lib/PublicInbox/LEI.pm
 lib/PublicInbox/LeiExtinbox.pm
 lib/PublicInbox/LeiSearch.pm
 lib/PublicInbox/LeiStore.pm
+lib/PublicInbox/LeiXSearch.pm
 lib/PublicInbox/Linkify.pm
 lib/PublicInbox/Listener.pm
 lib/PublicInbox/Lock.pm
@@ -327,6 +328,7 @@ t/kqnotify.t
 t/lei-oneshot.t
 t/lei.t
 t/lei_store.t
+t/lei_xsearch.t
 t/linkify.t
 t/main-bin/spamc
 t/mda-mime.eml
diff --git a/lib/PublicInbox/LeiSearch.pm b/lib/PublicInbox/LeiSearch.pm
index 66c16e04..0b962b11 100644
--- a/lib/PublicInbox/LeiSearch.pm
+++ b/lib/PublicInbox/LeiSearch.pm
@@ -7,20 +7,18 @@ use v5.10.1;
 use parent qw(PublicInbox::ExtSearch);
 use PublicInbox::Search;
 
-sub combined_docid ($$) {
+# get combined docid from over.num:
+# (not generic Xapian, only works with our sharding scheme)
+sub num2docid ($$) {
 	my ($self, $num) = @_;
-	($num - 1) * $self->{nshard} + 1;
+	my $nshard = $self->{nshard};
+	($num - 1) * $nshard + $num % $nshard + 1;
 }
 
 sub msg_keywords {
 	my ($self, $num) = @_; # num_or_mitem
 	my $xdb = $self->xdb; # set {nshard};
-	my $docid = ref($num) ? $num->get_docid : do {
-		# get combined docid from over.num:
-		# (not generic Xapian, only works with our sharding scheme)
-		my $nshard = $self->{nshard};
-		($num - 1) * $nshard + $num % $nshard + 1;
-	};
+	my $docid = ref($num) ? $num->get_docid : num2docid($self, $num);
 	my %kw;
 	eval {
 		my $end = $xdb->termlist_end($docid);
diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
new file mode 100644
index 00000000..07ceb84e
--- /dev/null
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -0,0 +1,57 @@
+# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# Combine any combination of PublicInbox::Search,
+# PublicInbox::ExtSearch, and PublicInbox::LeiSearch objects
+# into one Xapian DB
+package PublicInbox::LeiXSearch;
+use strict;
+use v5.10.1;
+use parent qw(PublicInbox::LeiSearch);
+
+sub new {
+	my ($class) = @_;
+	PublicInbox::Search::load_xapian();
+	bless {}, $class
+}
+
+sub attach_extinbox {
+	my ($self, $ibxish) = @_; # ibxish = ExtSearch or Inbox
+	if (delete $self->{xdb}) {
+		# clobber existing {xdb} if amending
+		my $expect = delete $self->{nshard};
+		my $shards = delete $self->{shards_flat};
+		scalar(@$shards) == $expect or die
+			"BUG: {nshard}$expect != shards=".scalar(@$shards);
+
+		my $prev = {};
+		for my $old_ibxish (@{$self->{shard2ibx}}) {
+			next if $prev == $old_ibxish;
+			$prev = $old_ibxish;
+			my @shards = $old_ibxish->search->xdb_shards_flat;
+			push @{$self->{shards_flat}}, @shards;
+		}
+		my $nr = scalar(@{$self->{shards_flat}});
+		$nr == $expect or die
+			"BUG: reloaded $nr shards, expected $expect"
+	}
+	my @shards = $ibxish->search->xdb_shards_flat;
+	push @{$self->{shards_flat}}, @shards;
+	push(@{$self->{shard2ibx}}, $ibxish) for (@shards);
+}
+
+# called by PublicInbox::Search::xdb
+sub xdb_shards_flat { @{$_[0]->{shards_flat}} }
+
+# like over->get_art
+sub smsg_for {
+	my ($self, $mitem) = @_;
+	# cf. https://trac.xapian.org/wiki/FAQ/MultiDatabaseDocumentID
+	my $nshard = $self->{nshard};
+	my $docid = $mitem->get_docid;
+	my $shard = ($docid - 1) % $nshard;
+	my $num = int(($docid - 1) / $nshard) + 1;
+	$self->{shard2ibx}->[$shard]->over->get_art($num);
+}
+
+1;
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 498b6632..73926b1c 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -197,6 +197,7 @@ sub xdb_shards_flat ($) {
 	my ($self) = @_;
 	my $xpfx = $self->{xpfx};
 	my (@xdb, $slow_phrase);
+	load_xapian();
 	if ($xpfx =~ m/xapian${\SCHEMA_VERSION}\z/) {
 		@xdb = ($X{Database}->new($xpfx));
 		$self->{qp_flags} |= FLAG_PHRASE() if !-f "$xpfx/iamchert";
@@ -215,16 +216,6 @@ sub xdb_shards_flat ($) {
 	@xdb;
 }
 
-sub _xdb {
-	my ($self) = @_;
-	$self->{qp_flags} //= $QP_FLAGS;
-	my @xdb = xdb_shards_flat($self) or return;
-	$self->{nshard} = scalar(@xdb);
-	my $xdb = shift @xdb;
-	$xdb->add_database($_) for @xdb;
-	$xdb;
-}
-
 # v2 Xapian docids don't conflict, so they're identical to
 # NNTP article numbers and IMAP UIDs.
 # https://trac.xapian.org/wiki/FAQ/MultiDatabaseDocumentID
@@ -243,8 +234,12 @@ sub mset_to_artnums {
 sub xdb ($) {
 	my ($self) = @_;
 	$self->{xdb} //= do {
-		load_xapian();
-		$self->_xdb;
+		$self->{qp_flags} //= $QP_FLAGS;
+		my @xdb = $self->xdb_shards_flat or return;
+		$self->{nshard} = scalar(@xdb);
+		my $xdb = shift @xdb;
+		$xdb->add_database($_) for @xdb;
+		$xdb;
 	};
 }
 
diff --git a/t/lei_xsearch.t b/t/lei_xsearch.t
new file mode 100644
index 00000000..70e3affd
--- /dev/null
+++ b/t/lei_xsearch.t
@@ -0,0 +1,47 @@
+#!perl -w
+# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use v5.10.1;
+use Test::More;
+use PublicInbox::TestCommon;
+use PublicInbox::ExtSearchIdx;
+use PublicInbox::Eml;
+use PublicInbox::InboxWritable;
+require_mods(qw(DBD::SQLite Search::Xapian));
+require_git 2.6;
+require_ok 'PublicInbox::LeiXSearch';
+my ($home, $for_destroy) = tmpdir();
+my @ibx;
+for my $V (1..2) {
+	for my $i (3..6) {
+		my $ibx = PublicInbox::InboxWritable->new({
+			inboxdir => "$home/v$V-$i",
+			name => "test-v$V-$i",
+			version => $V,
+			indexlevel => 'medium',
+			-primary_address => "v$V-$i\@example.com",
+		}, { nproc => int(rand(8)) + 1 });
+		push @ibx, $ibx;
+		my $im = $ibx->importer(0);
+		my $eml = PublicInbox::Eml->new(<<EOF);
+From: x\@example.com
+To: $ibx->{-primary_address}
+Date: Fri, 02 Oct 1993 0$V:0$i:00 +0000
+Subject: v${V}i${i}
+
+${V}er ${i}on
+EOF
+		$im->add($eml);
+		$im->done;
+	}
+}
+my $first = shift @ibx; is($first->{name}, 'test-v1-3', 'first plucked');
+my $last = pop @ibx; is($last->{name}, 'test-v2-6', 'last plucked');
+my $eidx = PublicInbox::ExtSearchIdx->new("$home/eidx");
+$eidx->attach_inbox($first);
+$eidx->attach_inbox($last);
+$eidx->eidx_sync({fsync => 0});
+my $lxs = PublicInbox::LeiXSearch->new;
+
+done_testing;

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH] wip
@ 2021-01-03 22:57 Eric Wong
  0 siblings, 0 replies; 23+ messages in thread
From: Eric Wong @ 2021-01-03 22:57 UTC (permalink / raw)
  To: spew

---
 lib/PublicInbox/LEI.pm         |  7 ++-----
 lib/PublicInbox/LeiExternal.pm | 33 ++++++++++++++++++++++-----------
 lib/PublicInbox/LeiStore.pm    |  2 +-
 lib/PublicInbox/LeiXSearch.pm  | 16 ++++++++++++++++
 t/lei.t                        | 12 +++++++++---
 5 files changed, 50 insertions(+), 20 deletions(-)

diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index a0ea793b..6b141a37 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -8,7 +8,8 @@
 package PublicInbox::LEI;
 use strict;
 use v5.10.1;
-use parent qw(PublicInbox::DS PublicInbox::LeiExternal);
+use parent qw(PublicInbox::DS PublicInbox::LeiExternal
+	PublicInbox::LeiQuery);
 use Getopt::Long ();
 use Socket qw(AF_UNIX SOCK_STREAM pack_sockaddr_un);
 use Errno qw(EAGAIN ECONNREFUSED ENOENT);
@@ -475,10 +476,6 @@ sub lei_show {
 	my ($self, @argv) = @_;
 }
 
-sub lei_query {
-	my ($self, @argv) = @_;
-}
-
 sub lei_mark {
 	my ($self, @argv) = @_;
 }
diff --git a/lib/PublicInbox/LeiExternal.pm b/lib/PublicInbox/LeiExternal.pm
index 4facd451..64faf5a0 100644
--- a/lib/PublicInbox/LeiExternal.pm
+++ b/lib/PublicInbox/LeiExternal.pm
@@ -8,24 +8,35 @@ use v5.10.1;
 use parent qw(Exporter);
 our @EXPORT = qw(lei_ls_external lei_add_external lei_forget_external);
 
-sub lei_ls_external {
-	my ($self, @argv) = @_;
-	my $stor = $self->_lei_store(0);
+sub _externals_each {
+	my ($self, $cb, @arg) = @_;
 	my $cfg = $self->_lei_cfg(0);
-	my $out = $self->{1};
-	my ($OFS, $ORS) = $self->{opt}->{z} ? ("\0", "\0\0") : (" ", "\n");
-	my (%boost, @loc);
+	my %boost;
 	for my $sec (grep(/\Aexternal\./, @{$cfg->{-section_order}})) {
 		my $loc = substr($sec, length('external.'));
 		$boost{$loc} = $cfg->{"$sec.boost"};
-		push @loc, $loc;
 	}
-	use sort 'stable';
+	return \%boost if !wantarray && !$cb;
+
 	# highest boost first, but stable for alphabetic tie break
-	for (sort { $boost{$b} <=> $boost{$a} } sort keys %boost) {
-		# TODO: use miscidx and show docid so forget/set is easier
-		print $out $_, $OFS, 'boost=', $boost{$_}, $ORS;
+	use sort 'stable';
+	my @order = sort { $boost{$b} <=> $boost{$a} } sort keys %boost;
+	return @order if !$cb;
+	for my $loc (@order) {
+		$cb->(@arg, $loc, $boost{$loc});
 	}
+	@order; # scalar or array
+}
+
+sub lei_ls_external {
+	my ($self, @argv) = @_;
+	my $stor = $self->_lei_store(0);
+	my $out = $self->{1};
+	my ($OFS, $ORS) = $self->{opt}->{z} ? ("\0", "\0\0") : (" ", "\n");
+	$self->_externals_each(sub {
+		my ($loc, $boost_val) = @_;
+		print $out $loc, $OFS, 'boost=', $boost_val, $ORS;
+	});
 }
 
 sub lei_add_external {
diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm
index 07a3198a..a7267a64 100644
--- a/lib/PublicInbox/LeiStore.pm
+++ b/lib/PublicInbox/LeiStore.pm
@@ -24,7 +24,7 @@ sub new {
 	my (undef, $dir, $opt) = @_;
 	my $eidx = PublicInbox::ExtSearchIdx->new($dir, $opt);
 	my $self = bless { priv_eidx => $eidx }, __PACKAGE__;
-	eidx_init($self) if $opt->{creat};
+	eidx_init($self)->done if $opt->{creat};
 	$self;
 }
 
diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index 33e9c413..68be0c97 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -20,6 +20,21 @@ sub new {
 
 sub attach_external {
 	my ($self, $ibxish) = @_; # ibxish = ExtSearch or Inbox
+	if (!ref($ibxish) && -d $ibxish) {
+		if (-f "$ibxish/ei.lock") {
+			require PublicInbox::ExtSearch;
+			$ibxish = PublicInbox::ExtSearch->new($ibxish);
+		} elsif (-f "$ibxish/inbox.lock" || # v2
+				-f "$ibxish/public-inbox") { # v1 indexed
+			require PublicInbox::Inbox;
+			$ibxish = bless { inboxdir => $ibxish },
+					'PublicInbox::Inbox';
+		} else {
+			warn "W: ignoring $ibxish, unable to determine type\n";
+			return;
+		}
+	}
+
 	if (!$ibxish->can('over')) {
 		push @{$self->{remotes}}, $ibxish
 	}
@@ -44,6 +59,7 @@ sub attach_external {
 	my @shards = $ibxish->search->xdb_shards_flat;
 	push @{$self->{shards_flat}}, @shards;
 	push(@{$self->{shard2ibx}}, $ibxish) for (@shards);
+	$ibxish;
 }
 
 # called by PublicInbox::Search::xdb
diff --git a/t/lei.t b/t/lei.t
index 41638950..befbe723 100644
--- a/t/lei.t
+++ b/t/lei.t
@@ -177,12 +177,18 @@ my $test_external = sub {
 	});
 	$lei->('ls-external');
 	like($out, qr/boost=0\n/s, 'ls-external has output');
+	diag 'ls-external '.$out;
+
+	$lei->('q', 's:"use boolean prefix"', '-o', "mboxrd:$home/rd.mbox",
+		'--external');
+	diag $out;
+	diag $err;
 };
 
 my $test_lei_common = sub {
-	$test_help->();
-	$test_config->();
-	$test_init->();
+	# $test_help->();
+	# $test_config->();
+	# $test_init->();
 	$test_external->();
 };
 

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH] WIP
@ 2021-01-21  4:24 Eric Wong
  0 siblings, 0 replies; 23+ messages in thread
From: Eric Wong @ 2021-01-21  4:24 UTC (permalink / raw)
  To: spew

---
 perl/dtas-graph | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/perl/dtas-graph b/perl/dtas-graph
index a41e2e0..cabe5de 100755
--- a/perl/dtas-graph
+++ b/perl/dtas-graph
@@ -82,21 +82,24 @@ while (my $pid = shift @to_scan) {
 #
 foreach my $pid (keys %pids) {
 	my @out = `lsof -p $pid`;
-	# output is like this:
-	# play    12739   ew    0r  FIFO    0,7      0t0 36924019 pipe
+# output is like this:
+# play    12739  no1  0r  FIFO        0,7  0t0 36924019 pipe
+# daemon  10742  no1  3u  unix 0xdc6aea87  0t0 19554016 /path type=SEQPACKET
 	foreach my $l (@out) {
 		my @l = split(/\s+/, $l);
-		$l[4] eq "FIFO" or next;
-
 		my $fd = $l[3];
-		my $pipe_ino = $l[7];
-		my $info = $pipes{$pipe_ino} ||= { r => [], w => [] };
-		if ($fd =~ s/r\z//) {
-			push @{$info->{r}}, [ $pid, $fd ];
-		} elsif ($fd =~ s/w\z//) {
-			push @{$info->{w}}, [ $pid, $fd ];
+		my $type = $l[4];
+		my $ino = $l[7];
+		if ($type eq 'FIFO') {
+			my $info = $pipes{$ino} ||= { r => [], w => [] };
+			if ($fd =~ s/r\z//) {
+				push @{$info->{r}}, [ $pid, $fd ];
+			} elsif ($fd =~ s/w\z//) {
+				push @{$info->{w}}, [ $pid, $fd ];
+			}
+		} elsif ($type eq 'unix') {
+			my $info = $unix{$ino}
 		}
-
 	}
 }
 

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH] wip
@ 2021-03-08  7:11 Eric Wong
  0 siblings, 0 replies; 23+ messages in thread
From: Eric Wong @ 2021-03-08  7:11 UTC (permalink / raw)
  To: spew

From: Eric Wong <normalperson@yhbt.net>

---
 lib/PublicInbox/Search.pm | 8 ++++++++
 t/search.t                | 1 +
 2 files changed, 9 insertions(+)

diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 209969c5..3cd71ae6 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -242,6 +242,14 @@ sub xdb ($) {
 	};
 }
 
+sub sessionstate {
+	my ($self) = @_;
+	my $xdb = $self->xdb;
+	my @cb = map { $xdb->can($_) } qw(get_uuid get_revision);
+	scalar(@cb) == 2 ? join('-', map { $_->($xdb) }) :
+		$xdb->get_metadata('xs_sessionstate');
+}
+
 sub new {
 	my ($class, $ibx) = @_;
 	ref $ibx or die "BUG: expected PublicInbox::Inbox object: $ibx";
diff --git a/t/search.t b/t/search.t
index 124c9acf..c67d8352 100644
--- a/t/search.t
+++ b/t/search.t
@@ -479,6 +479,7 @@ EOF
 			"searching chopped($i) digit yielded result $wild ");
 	}
 	is($ibx->search->mset('m:Pine m:LNX m:10010260936330')->size, 1);
+	diag $ibx->search->sessionstate;
 });
 
 { # List-Id searching

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH] wip
@ 2021-04-05  7:42 Eric Wong
  0 siblings, 0 replies; 23+ messages in thread
From: Eric Wong @ 2021-04-05  7:42 UTC (permalink / raw)
  To: spew

---
 script/lei | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/script/lei b/script/lei
index 78a7dab9..5b9b2e34 100755
--- a/script/lei
+++ b/script/lei
@@ -14,12 +14,9 @@ my $send_cmd = PublicInbox::CmdIPC4->can('send_cmd4') // do {
 	PublicInbox::Spawn->can('send_cmd4');
 };
 
-my %pids;
 my $sigchld = sub {
 	my $flags = scalar(@_) ? POSIX::WNOHANG() : 0;
-	for my $pid (keys %pids) {
-		delete($pids{$pid}) if waitpid($pid, $flags) == $pid;
-	}
+	undef while ((waitpid(-1, $flags) // -1) > 0);
 };
 my @parent;
 my $exec_cmd = sub {
@@ -48,7 +45,7 @@ my $exec_cmd = sub {
 		$do_exec->() if scalar(@$fds); # git-credential, pager
 
 		# parent backgrounds on MUA
-		POSIX::setsid() > 0 or die "setsid: $!";
+		#POSIX::setsid() > 0 or die "setsid: $!";
 		@parent = ($parent);
 		return; # continue $recv_cmd in background
 	}

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH] wip
@ 2021-06-05 19:58 Eric Wong
  0 siblings, 0 replies; 23+ messages in thread
From: Eric Wong @ 2021-06-05 19:58 UTC (permalink / raw)
  To: spew

---
 MANIFEST                       |  1 +
 lib/PublicInbox/LEI.pm         |  7 ++--
 lib/PublicInbox/LeiImport.pm   | 33 ++++++++++++-------
 lib/PublicInbox/LeiInput.pm    | 29 +++++++++++------
 lib/PublicInbox/LeiMailSync.pm | 10 ++++++
 lib/PublicInbox/LeiPmdir.pm    | 59 ++++++++++++++++++++++++++++++++++
 lib/PublicInbox/LeiTag.pm      |  2 +-
 lib/PublicInbox/MdirReader.pm  | 22 +++++++------
 t/lei-import-maildir.t         |  2 +-
 9 files changed, 130 insertions(+), 35 deletions(-)
 create mode 100644 lib/PublicInbox/LeiPmdir.pm

diff --git a/MANIFEST b/MANIFEST
index 5a70a144..7bdbf252 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -221,6 +221,7 @@ lib/PublicInbox/LeiMailSync.pm
 lib/PublicInbox/LeiMirror.pm
 lib/PublicInbox/LeiOverview.pm
 lib/PublicInbox/LeiP2q.pm
+lib/PublicInbox/LeiPmdir.pm
 lib/PublicInbox/LeiQuery.pm
 lib/PublicInbox/LeiRediff.pm
 lib/PublicInbox/LeiRemote.pm
diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index 4b1a6da7..ee9f1f80 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -421,7 +421,7 @@ my %CONFIG_KEYS = (
 	'leistore.dir' => 'top-level storage location',
 );
 
-my @WQ_KEYS = qw(lxs l2m wq1 ikw); # internal workers
+my @WQ_KEYS = qw(lxs l2m wq1 ikw pmd); # internal workers
 
 sub _drop_wq {
 	my ($self) = @_;
@@ -565,7 +565,7 @@ sub pkt_op_pair {
 }
 
 sub workers_start {
-	my ($lei, $wq, $jobs, $ops) = @_;
+	my ($lei, $wq, $jobs, $ops, $flds) = @_;
 	$ops = {
 		'!' => [ \&fail_handler, $lei ],
 		'|' => [ \&sigpipe_handler, $lei ],
@@ -576,7 +576,8 @@ sub workers_start {
 	$ops->{''} //= [ $wq->can('_lei_wq_eof') || \&wq_eof, $lei ];
 	my $end = $lei->pkt_op_pair;
 	my $ident = $wq->{-wq_ident} // "lei-$lei->{cmd} worker";
-	$wq->wq_workers_start($ident, $jobs, $lei->oldset, { lei => $lei });
+	$flds->{lei} = $lei;
+	$wq->wq_workers_start($ident, $jobs, $lei->oldset, $flds);
 	delete $lei->{pkt_op_p};
 	my $op_c = delete $lei->{pkt_op_c};
 	@$end = ();
diff --git a/lib/PublicInbox/LeiImport.pm b/lib/PublicInbox/LeiImport.pm
index 60f3241b..14b98480 100644
--- a/lib/PublicInbox/LeiImport.pm
+++ b/lib/PublicInbox/LeiImport.pm
@@ -6,6 +6,7 @@ package PublicInbox::LeiImport;
 use strict;
 use v5.10.1;
 use parent qw(PublicInbox::IPC PublicInbox::LeiInput);
+use PublicInbox::InboxWritable qw(eml_from_path);
 
 # /^input_/ subs are used by (or override) PublicInbox::LeiInput superclass
 
@@ -28,17 +29,26 @@ sub input_mbox_cb { # MboxReader callback
 	input_eml_cb($self, $eml, $vmd);
 }
 
-sub input_maildir_cb { # maildir_each_eml cb
-	my ($f, $kw, $eml, $self) = @_;
+sub pmdir_cb { # called via wq_io_do from LeiPmdir->each_mdir_fn
+	my ($self, $f, @args) = @_;
+	my ($folder, $bn) = ($f =~ m!\A(.+?)/(?:new|cur)/([^/]+)\z!) or
+		die "BUG: $f was not from a Maildir?\n";
+	my $fl = PublicInbox::MdirReader::maildir_basename_flags($bn);
+	return if index($fl, 'T') >= 0; # no Trashed messages
+	my $kw = PublicInbox::MdirReader::flags2kw($fl);
+	substr($folder, 0, 0) = 'maildir:'; # add prefix
+	my $lms = $self->{-lms_ro};
+	my $oidbin = $lms ? $lms->name_oidbin($folder, $bn) : undef;
+	my @docids = defined($oidbin) ?
+			$self->{over}->oidbin_exists($oidbin) : ();
 	my $vmd = $self->{-import_kw} ? { kw => $kw } : undef;
-	if ($self->{-mail_sync}) {
-		if ($f =~ m!\A(.+?)/(?:new|cur)/([^/]+)\z!) { # ugh...
-			$vmd->{sync_info} = [ "maildir:$1", \(my $n = $2) ];
-		} else {
-			warn "E: $f was not from a Maildir?\n";
-		}
+	if (scalar @docids) {
+		$self->{lse}->kw_changed(undef, $kw, \@docids) or return;
+		$self->{sto}->ipc_do('set_eml_vmd', undef, $vmd, \@docids);
+	} elsif (my $eml = eml_from_path($f)) {
+		$vmd->{sync_info} = [ $folder, $bn ] if $self->{-mail_sync};
+		$self->input_eml_cb($eml, $vmd);
 	}
-	$self->input_eml_cb($eml, $vmd);
 }
 
 sub input_net_cb { # imap_each / nntp_each
@@ -62,10 +72,10 @@ sub do_import_index ($$@) {
 	my $vmd_mod = $self->vmd_mod_extract(\@inputs);
 	return $lei->fail(join("\n", @{$vmd_mod->{err}})) if $vmd_mod->{err};
 	$self->{all_vmd} = $vmd_mod if scalar keys %$vmd_mod;
-	$self->prepare_inputs($lei, \@inputs) or return;
+	$lei->ale; # initialize for workers to read (before LeiPmdir->new)
 	$self->{-mail_sync} = $lei->{opt}->{'mail-sync'} // 1;
+	$self->prepare_inputs($lei, \@inputs) or return;
 
-	$lei->ale; # initialize for workers to read
 	my $j = $lei->{opt}->{jobs} // scalar(@{$self->{inputs}}) || 1;
 	my $ikw;
 	if (my $net = $lei->{net}) {
@@ -93,6 +103,7 @@ sub do_import_index ($$@) {
 	$lei->{wq1} = $self;
 	$lei->{-err_type} = 'non-fatal';
 	$ikw->wq_close(1) if $ikw;
+	$lei->{pmd}->wq_close(1) if $lei->{pmd};
 	net_merge_all_done($self) unless $lei->{auth};
 	$op_c->op_wait_event($ops);
 }
diff --git a/lib/PublicInbox/LeiInput.pm b/lib/PublicInbox/LeiInput.pm
index 4ff7a379..f0c141c8 100644
--- a/lib/PublicInbox/LeiInput.pm
+++ b/lib/PublicInbox/LeiInput.pm
@@ -151,9 +151,16 @@ sub input_path_url {
 		return $lei->fail(<<EOM) if $ifmt && $ifmt ne 'maildir';
 $input appears to be a maildir, not $ifmt
 EOM
-		PublicInbox::MdirReader->new->maildir_each_eml($input,
-					$self->can('input_maildir_cb'),
-					$self, @args);
+		my $mdr = PublicInbox::MdirReader->new;
+		if (my $pmd = $self->{pmd}) {
+			$mdr->maildir_each_file($input,
+						$pmd->can('each_mdir_fn'),
+						$pmd, @args);
+		} else {
+			$mdr->maildir_each_eml($input,
+						$self->can('input_maildir_cb'),
+						$self, @args);
+		}
 	} else {
 		$lei->fail("$input unsupported (TODO)");
 	}
@@ -215,7 +222,7 @@ sub prepare_inputs { # returns undef on error
 		push @{$sync->{no}}, '/dev/stdin' if $sync;
 	}
 	my $net = $lei->{net}; # NetWriter may be created by l2m
-	my (@f, @d);
+	my (@f, @md);
 	# e.g. Maildir:/home/user/Mail/ or imaps://example.com/INBOX
 	for my $input (@$inputs) {
 		my $input_path = $input;
@@ -247,11 +254,11 @@ sub prepare_inputs { # returns undef on error
 				PublicInbox::MboxReader->reads($ifmt) or return
 					$lei->fail("$ifmt not supported");
 			} elsif (-d $input_path) {
-				require PublicInbox::MdirReader;
 				$ifmt eq 'maildir' or return
 					$lei->fail("$ifmt not supported");
 				$sync and $input = 'maildir:'.
 						$lei->abs_path($input_path);
+				push @md, $input;
 			} else {
 				return $lei->fail("Unable to handle $input");
 			}
@@ -266,21 +273,18 @@ $input is `eml', not --in-format=$in_fmt
 			if ($devfd >= 0 || -f $input || -p _) {
 				push @{$sync->{no}}, $input if $sync;
 				push @f, $input;
-			} elsif (-d $input) {
+			} elsif (-d "$input/new" && -d "$input/cur") {
 				if ($sync) {
 					$input = $lei->abs_path($input);
 					push @{$sync->{ok}}, $input;
 				}
-				push @d, $input;
+				push @md, $input;
 			} else {
 				return $lei->fail("Unable to handle $input")
 			}
 		}
 	}
 	if (@f) { check_input_format($lei, \@f) or return }
-	if (@d) { # TODO: check for MH vs Maildir, here
-		require PublicInbox::MdirReader;
-	}
 	if ($sync && $sync->{no}) {
 		return $lei->fail(<<"") if !$sync->{ok};
 --mail-sync specified but no inputs support it
@@ -299,6 +303,11 @@ $input is `eml', not --in-format=$in_fmt
 		$lei->{auth} //= PublicInbox::LeiAuth->new;
 		$lei->{net} //= $net;
 	}
+	if (scalar(@md)) {
+		require PublicInbox::MdirReader;
+		require PublicInbox::LeiPmdir;
+		$self->{pmd} = PublicInbox::LeiPmdir->new($lei, $self);
+	}
 	$self->{inputs} = $inputs;
 }
 
diff --git a/lib/PublicInbox/LeiMailSync.pm b/lib/PublicInbox/LeiMailSync.pm
index 75603d89..c87685a0 100644
--- a/lib/PublicInbox/LeiMailSync.pm
+++ b/lib/PublicInbox/LeiMailSync.pm
@@ -375,6 +375,16 @@ EOM
 	$sth->fetchrow_array;
 }
 
+sub name_oidbin ($$$) {
+	my ($self, $mdir, $nm) = @_;
+	my $fid = $self->{fmap}->{$mdir} //= fid_for($self, $mdir) // return;
+	my $sth = $self->{dbh}->prepare_cached(<<EOM, undef, 1);
+SELECT oidbin FROM blob2name WHERE fid = ? AND name = ?
+EOM
+	$sth->execute($fid, $nm);
+	$sth->fetchrow_array;
+}
+
 sub imap_oid {
 	my ($self, $lei, $uid_uri) = @_;
 	my $mailbox_uri = $uid_uri->clone;
diff --git a/lib/PublicInbox/LeiPmdir.pm b/lib/PublicInbox/LeiPmdir.pm
new file mode 100644
index 00000000..3f9c9e4a
--- /dev/null
+++ b/lib/PublicInbox/LeiPmdir.pm
@@ -0,0 +1,59 @@
+# Copyright (C) 2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# WQ worker for dealing with parallel Maildir reads;
+# this does NOT use the {shard_info} field of LeiToMail
+# (and we may remove {shard_info})
+# WQ key: {pmd}
+package PublicInbox::LeiPmdir;
+use strict;
+use v5.10.1;
+use parent qw(PublicInbox::IPC);
+use PublicInbox::MdirReader;
+
+sub new {
+	my ($cls, $lei, $ipt) = @_;
+	my $self = bless { -wq_ident => 'lei Maildir worker' }, $cls;
+	my ($op_c, $ops) = $lei->workers_start($self, $self->detect_nproc,
+		undef, { ipt => $ipt }); # LeiInput subclass
+	$op_c->{ops} = $ops; # for PktOp->event_step
+	$lei->{pmd} = $self;
+}
+
+sub ipc_atfork_child {
+	my ($self) = @_;
+	my $lei = $self->{lei};
+	$lei->_lei_atfork_child;
+	my $ipt = $self->{ipt} // die 'BUG: no self->{ipt}';
+	$ipt->{lei} = $lei;
+	$ipt->{sto} = $lei->{sto} // die 'BUG: no lei->{sto}';
+	$ipt->{lse} = $ipt->{sto}->search;
+	$ipt->{over} = $ipt->{lse}->over;
+	$ipt->{-lms_ro} //= $ipt->{lse}->lms; # may be undef or '0'
+	$self->SUPER::ipc_atfork_child;
+}
+
+sub each_mdir_fn { # maildir_each_file callback
+	my ($f, $self, @args) = @_;
+	$self->wq_io_do('mdir_iter', [], $f, @args);
+}
+
+sub mdir_iter { # via wq_io_do
+	my ($self, $f, @args) = @_;
+	$self->{ipt}->pmdir_cb($f, @args);
+}
+
+sub pmd_done_wait {
+	my ($arg, $pid) = @_;
+	my ($self, $lei) = @$arg;
+	my $wait = $lei->{sto}->ipc_do('done');
+	$lei->can('wq_done_wait')->($arg, $pid);
+}
+
+sub _lei_wq_eof { # EOF callback for main lei daemon
+	my ($lei) = @_;
+	my $pmd = delete $lei->{pmd} or return $lei->fail;
+	$pmd->wq_wait_old(\&pmd_done_wait, $lei);
+}
+
+1;
diff --git a/lib/PublicInbox/LeiTag.pm b/lib/PublicInbox/LeiTag.pm
index b6abd533..0a8644be 100644
--- a/lib/PublicInbox/LeiTag.pm
+++ b/lib/PublicInbox/LeiTag.pm
@@ -117,6 +117,6 @@ sub _complete_tag {
 
 no warnings 'once'; # the following works even when LeiAuth is lazy-loaded
 *net_merge_all = \&PublicInbox::LeiAuth::net_merge_all;
-*net_merge_all_done = \&PublicInbox::LeiInput::input_only_net_merge_all_done;
+*net_merge_all_done = \&PublicInbox::LeiInput::parallel_net_merge_all_done;
 
 1;
diff --git a/lib/PublicInbox/MdirReader.pm b/lib/PublicInbox/MdirReader.pm
index 304be63d..484bf0a8 100644
--- a/lib/PublicInbox/MdirReader.pm
+++ b/lib/PublicInbox/MdirReader.pm
@@ -87,17 +87,21 @@ sub maildir_each_eml {
 sub new { bless {}, __PACKAGE__ }
 
 sub flags2kw ($) {
-	my @unknown;
-	my %kw;
-	for (split(//, $_[0])) {
-		my $k = $c2kw{$_};
-		if (defined($k)) {
-			$kw{$k} = 1;
-		} else {
-			push @unknown, $_;
+	if (wantarray) {
+		my @unknown;
+		my %kw;
+		for (split(//, $_[0])) {
+			my $k = $c2kw{$_};
+			if (defined($k)) {
+				$kw{$k} = 1;
+			} else {
+				push @unknown, $_;
+			}
 		}
+		(\%kw, \@unknown);
+	} else {
+		[ sort(map { $c2kw{$_} // () } split(//, $_[0])) ];
 	}
-	(\%kw, \@unknown);
 }
 
 1;
diff --git a/t/lei-import-maildir.t b/t/lei-import-maildir.t
index 688b10ce..c81e7805 100644
--- a/t/lei-import-maildir.t
+++ b/t/lei-import-maildir.t
@@ -28,7 +28,7 @@ test_lei(sub {
 	is(scalar(keys %v), 1, 'inspect handles relative and absolute paths');
 	my $inspect = json_utf8->decode([ keys %v ]->[0]);
 	is_deeply($inspect, {"maildir:$md" => { 'name.count' => 1 }},
-		'inspect maildir: path had expected output');
+		'inspect maildir: path had expected output') or xbail($inspect);
 
 	lei_ok(qw(q s:boolean));
 	my $res = json_utf8->decode($lei_out);

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH] wip
@ 2021-10-27 20:16 Eric Wong
  0 siblings, 0 replies; 23+ messages in thread
From: Eric Wong @ 2021-10-27 20:16 UTC (permalink / raw)
  To: spew

---
 lib/PublicInbox/LeiXSearch.pm | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index acc36897..2a037f2b 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -283,20 +283,22 @@ sub each_remote_eml { # callback for MboxReader->mboxrd
 	my $smsg = bless {}, 'PublicInbox::Smsg';
 	if ($self->{import_sto} && !$xoids) {
 		my ($res, $kw) = $self->{import_sto}->wq_do('add_eml', $eml);
-		$smsg = $res if ref($res) eq ref($smsg); # totally new message
+		if (ref($res) eq ref($smsg)) { # totally new message
+			$smsg = $res;
+			$self->{-imported} = 1;
+		}
 		$smsg->{kw} = $kw; # short-circuit xsmsg_vmd
 	}
 	$smsg->{blob} //= $xoids ? (keys(%$xoids))[0]
 				: $lei->git_oid($eml)->hexdigest;
 	_smsg_fill($smsg, $eml);
 	wait_startq($lei);
+	my $nr = ++$lei->{-nr_remote_eml}; # needed for lss->cfg_set
 	if ($lei->{-progress}) {
-		++$lei->{-nr_remote_eml};
 		my $now = now();
 		my $next = $lei->{-next_progress} //= ($now + 1);
 		if ($now > $next) {
 			$lei->{-next_progress} = $now + 1;
-			my $nr = $lei->{-nr_remote_eml};
 			mset_progress($lei, $lei->{-current_url}, $nr, '?');
 		}
 	}
@@ -374,13 +376,14 @@ sub query_remote_mboxrd {
 		$fh = IO::Uncompress::Gunzip->new($fh, MultiStream => 1);
 		PublicInbox::MboxReader->mboxrd($fh, \&each_remote_eml, $self,
 						$lei, $each_smsg);
-		my $nr = $lei->{-nr_remote_eml};
-		my $wait = $lei->{sto}->wq_do('done') if $nr && $lei->{sto};
+		if ($self->{import_sto} && delete($self->{-imported})) {
+			my $wait = $self->{import_sto}->wq_do('done');
+		}
 		$reap_curl->join;
 		if ($? == 0) {
 			# don't update if no results, maybe MTA is down
-			$key && $nr and
-				$lei->{lss}->cfg_set($key, $start);
+			my $nr = $lei->{-nr_remote_eml};
+			$lei->{lss}->cfg_set($key, $start) if $key && $nr;
 			mset_progress($lei, $lei->{-current_url}, $nr, $nr);
 			next;
 		}

^ permalink raw reply related	[flat|nested] 23+ messages in thread

end of thread, other threads:[~2021-10-27 20:16 UTC | newest]

Thread overview: 23+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-04-05 18:40 [PATCH] wip Eric Wong
  -- strict thread matches above, loose matches on Subject: below --
2021-10-27 20:16 Eric Wong
2021-06-05 19:58 Eric Wong
2021-04-05  7:42 Eric Wong
2021-03-08  7:11 Eric Wong
2021-01-21  4:24 [PATCH] WIP Eric Wong
2021-01-03 22:57 [PATCH] wip Eric Wong
2020-12-27 11:36 [PATCH] WIP Eric Wong
2020-11-15  7:35 [PATCH] wip Eric Wong
2020-04-23  4:27 Eric Wong
2020-04-20  7:14 Eric Wong
2020-01-13  9:24 [PATCH] WIP Eric Wong
2019-05-11 22:55 Eric Wong
2019-01-02  9:21 [PATCH] wip Eric Wong
2018-07-06 21:31 Eric Wong
2018-06-24 11:55 Eric Wong
2018-06-24  8:39 Eric Wong
2017-07-15  1:42 [PATCH] WIP Eric Wong
2017-04-12 20:17 [PATCH] wip Eric Wong
2016-08-23 20:07 Eric Wong
2016-08-18  2:16 Eric Wong
2016-06-26  3:46 Eric Wong
2015-12-22  0:15 Eric Wong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).