dumping ground for random patches and texts
 help / color / mirror / Atom feed
From: Eric Wong <e@80x24.org>
To: spew@80x24.org
Subject: [PATCH] cindex cxx shard roots
Date: Wed, 23 Aug 2023 11:50:42 +0000	[thread overview]
Message-ID: <20230823115042.3890073-1-e@80x24.org> (raw)

---
 lib/PublicInbox/CidxXapHelperAux.pm |   2 +-
 lib/PublicInbox/CodeSearchIdx.pm    |   5 +-
 lib/PublicInbox/XapHelper.pm        |  31 ++-
 lib/PublicInbox/xap_helper.h        | 331 ++++++++++++++++++++++++----
 t/xap_helper.t                      |  44 +++-
 5 files changed, 356 insertions(+), 57 deletions(-)

diff --git a/lib/PublicInbox/CidxXapHelperAux.pm b/lib/PublicInbox/CidxXapHelperAux.pm
index a35c38f7..f402bde0 100644
--- a/lib/PublicInbox/CidxXapHelperAux.pm
+++ b/lib/PublicInbox/CidxXapHelperAux.pm
@@ -32,7 +32,7 @@ sub event_step {
 	my @lines = split(/^/m, $buf);
 	$self->{buf} = pop @lines if substr($lines[-1], -1) ne "\n";
 	for my $l (@lines) {
-		if ($l =~ /\Amset\.size=[0-9]+\n\z/) {
+		if ($l =~ /\Amset\.size=[0-9]+ nr_out=[0-9]+\n\z/) {
 			delete $self->{cidx}->{PENDING}->{$pfx};
 			$self->{cidx}->index_next;
 		}
diff --git a/lib/PublicInbox/CodeSearchIdx.pm b/lib/PublicInbox/CodeSearchIdx.pm
index 96bbf6f4..f8e11761 100644
--- a/lib/PublicInbox/CodeSearchIdx.pm
+++ b/lib/PublicInbox/CodeSearchIdx.pm
@@ -518,9 +518,11 @@ sub dump_roots_once {
 	progress($self, 'dumping IDs from coderepos');
 	local $self->{xdb};
 	@ID2ROOT = $self->all_terms('G');
+	die 'too many roots' if scalar(@ID2ROOT) > 0xffffffff;
 	my $root2id = "$TMPDIR/root2id";
 	open my $fh, '>', $root2id or die "open($root2id): $!";
-	say $fh $_ for @ID2ROOT;
+	my $nr = -1;
+	for (@ID2ROOT) { print $fh $_, "\0", ++$nr, "\0" } # mmap-friendly
 	close $fh or die "close: $!";
 	# dump_roots | sort -k1,1 | OFS=' ' uniq_fold >to_root_id
 	pipe(my ($sort_r, $sort_w)) or die "pipe: $!";
@@ -876,6 +878,7 @@ sub associate {
 	my ($self) = @_;
 	return if $DO_QUIT;
 	@IDX_SHARDS or return warn("# aborting on no shards\n");
+	unlink("$TMPDIR/root2id");
 	my @pending = keys %{$self->{PENDING}};
 	die "E: pending=@pending jobs not done\n" if @pending;
 	progress($self, 'associating...');
diff --git a/lib/PublicInbox/XapHelper.pm b/lib/PublicInbox/XapHelper.pm
index e62a5c1c..c80be810 100644
--- a/lib/PublicInbox/XapHelper.pm
+++ b/lib/PublicInbox/XapHelper.pm
@@ -46,15 +46,19 @@ sub cmd_dump_ibx {
 	my $mset = $req->{srch}->mset($qry_str, $opt);
 	my $out = $req->{0};
 	$out->autoflush(1);
+	my $nr = 0;
 	for my $it ($mset->items) {
 		my $doc = $it->get_document;
 		for my $p (@pfx) {
 			for (xap_terms($p, $doc)) {
 				print $out "$_ $ibx_id\n" or die "print: $!";
+				++$nr;
 			}
 		}
 	}
-	if (my $err = $req->{1}) { say $err 'mset.size=', $mset->size }
+	if (my $err = $req->{1}) {
+		say $err 'mset.size='.$mset->size.' nr_out='.$nr
+	}
 }
 
 sub cmd_dump_roots {
@@ -63,23 +67,28 @@ sub cmd_dump_roots {
 		warn('usage: dump_roots [OPTIONS] ROOT2ID_FILE QRY_STR');
 	my @pfx = @{$req->{A}} or return warn('dump_roots requires -A PREFIX');
 	open my $fh, '<', $root2id_file or die "open($root2id_file): $!";
-	my @lines = <$fh> or die "read($root2id_file): $!";
-	chomp(@lines);
-	my $nr = -1;
-	my %root2id = map { $_ => ++$nr } @lines;
-	undef @lines;
-	my $opt = { relevance => -2, limit => $req->{'m'},
+	my %root2id; # record format: $OIDHEX "\0" uint32_t
+	my @x = split(/\0/, do { local $/; <$fh> } // die "readline: $!");
+	while (@x) {
+		my $oidhex = shift @x;
+		$root2id{$oidhex} = shift @x;
+	}
+	my $opt = { relevance => -1, limit => $req->{'m'},
 			offset => $req->{o} // 0 };
 	my $mset = $req->{srch}->mset($qry_str, $opt);
 	$req->{0}->autoflush(1);
 	my $buf = '';
+	my $nr = 0;
 	for my $it ($mset->items) {
 		my $doc = $it->get_document;
 		my $G = join(' ', map { $root2id{$_} } xap_terms('G', $doc));
 		for my $p (@pfx) {
-			$buf .= "$_ $G\n" for (xap_terms($p, $doc));
+			for (xap_terms($p, $doc)) {
+				$buf .= "$_ $G\n";
+				++$nr;
+			}
 		}
-		if (length($buf) > 8192) {
+		if (!($nr & 0x3fff)) {
 			flock($fh, LOCK_EX) or die "flock: $!";
 			print { $req->{0} } $buf or die "print: $!";
 			flock($fh, LOCK_UN) or die "flock: $!";
@@ -91,7 +100,9 @@ sub cmd_dump_roots {
 		print { $req->{0} } $buf or die "print: $!";
 		flock($fh, LOCK_UN) or die "flock: $!";
 	}
-	if (my $err = $req->{1}) { say $err 'mset.size=', $mset->size }
+	if (my $err = $req->{1}) {
+		say $err 'mset.size='.$mset->size.' nr_out='.$nr
+	}
 }
 
 sub dispatch {
diff --git a/lib/PublicInbox/xap_helper.h b/lib/PublicInbox/xap_helper.h
index 52db92b7..558053cc 100644
--- a/lib/PublicInbox/xap_helper.h
+++ b/lib/PublicInbox/xap_helper.h
@@ -6,12 +6,16 @@
  * this is not linked to Perl in any way.
  * C (not C++) is used as much as possible to lower the contribution
  * barrier for hackers who mainly know C (this includes the maintainer).
+ * Yes, that means we use C stdlib stuff like hsearch and open_memstream
+ * instead their equivalents in the C++ stdlib :P
  * Everything here is an unstable internal API of public-inbox and
  * NOT intended for ordinary users; only public-inbox hackers
  */
 #ifndef _ALL_SOURCE
 #	define _ALL_SOURCE
 #endif
+#include <sys/file.h>
+#include <sys/mman.h>
 #include <sys/resource.h>
 #include <sys/socket.h>
 #include <sys/stat.h>
@@ -80,6 +84,7 @@ struct req { // argv and pfxv point into global rbuf
 	unsigned long long max;
 	unsigned long long off;
 	unsigned long timeout_sec;
+	size_t nr_out;
 	long sort_col; // value column, negative means BoolWeight
 	int argc;
 	int pfxc;
@@ -96,6 +101,28 @@ struct worker {
 	unsigned nr;
 };
 
+#define SPLIT2ARGV(dst,buf,len) split2argv(dst,buf,len,MY_ARRAY_SIZE(dst))
+static size_t split2argv(char **dst, char *buf, size_t len, size_t limit)
+{
+	if (buf[0] == 0 || len == 0 || buf[len - 1] != 0) {
+		warnx("bogus argument given");
+		return 0;
+	}
+	size_t nr = 0;
+	char *c = buf;
+	for (size_t i = 1; i < len; i++) {
+		if (!buf[i]) {
+			dst[nr++] = c;
+			c = buf + i + 1;
+		}
+		if (nr == limit) {
+			warnx("too many args: %zu", nr);
+			return 0;
+		}
+	}
+	return (long)nr;
+}
+
 static bool has_threadid(const struct srch *srch)
 {
 	return srch->db->get_metadata("has_threadid") == "1";
@@ -118,8 +145,12 @@ static Xapian::Enquire prep_enquire(const struct req *req)
 
 static Xapian::MSet enquire_mset(struct req *req, Xapian::Enquire *enq)
 {
-	if (!req->max)
-		req->max = 50;
+	if (!req->max) {
+		switch (sizeof(Xapian::doccount)) {
+		case 4: req->max = UINT_MAX; break;
+		default: req->max = ULLONG_MAX;
+		}
+	}
 	for (int i = 0; i < 9; i++) {
 		try {
 			Xapian::MSet mset = enq->get_mset(req->off, req->max);
@@ -131,6 +162,7 @@ static Xapian::MSet enquire_mset(struct req *req, Xapian::Enquire *enq)
 	return enq->get_mset(req->off, req->max);
 }
 
+// for v1, v2, and extindex
 static Xapian::MSet mail_mset(struct req *req, const char *qry_str)
 {
 	struct srch *srch = req->srch;
@@ -150,6 +182,21 @@ static Xapian::MSet mail_mset(struct req *req, const char *qry_str)
 	return enquire_mset(req, &enq);
 }
 
+// for cindex
+static Xapian::MSet commit_mset(struct req *req, const char *qry_str)
+{
+	struct srch *srch = req->srch;
+	Xapian::Query qry = srch->qp->parse_query(qry_str, srch->qp_flags);
+	// TODO: git_dir + roots_filter
+
+	// we only want commits:
+	qry = Xapian::Query(Xapian::Query::OP_FILTER, qry,
+				Xapian::Query("T" "c"));
+	Xapian::Enquire enq = prep_enquire(req);
+	enq.set_query(qry);
+	return enquire_mset(req, &enq);
+}
+
 static bool starts_with(const std::string *s, const char *pfx, size_t pfx_len)
 {
 	return s->size() >= pfx_len && !memcmp(pfx, s->c_str(), pfx_len);
@@ -165,9 +212,11 @@ static void dump_ibx_term(struct req *req, const char *pfx,
 	for (cur.skip_to(pfx); cur != end; cur++) {
 		std::string tn = *cur;
 
-		if (starts_with(&tn, pfx, pfx_len))
+		if (starts_with(&tn, pfx, pfx_len)) {
 			fprintf(req->fp[0], "%s %s\n",
 				tn.c_str() + pfx_len, ibx_id);
+			++req->nr_out;
+		}
 	}
 }
 
@@ -194,7 +243,6 @@ static bool cmd_dump_ibx(struct req *req)
 	}
 	req->asc = true;
 	req->sort_col = -1;
-	req->max = (unsigned long long)req->srch->db->get_doccount();
 	Xapian::MSet mset = mail_mset(req, req->argv[optind + 1]);
 	for (Xapian::MSetIterator i = mset.begin(); i != mset.end(); i++) {
 		try {
@@ -208,8 +256,244 @@ static bool cmd_dump_ibx(struct req *req)
 		}
 	}
 	if (req->fp[1])
-		fprintf(req->fp[1], "mset.size=%llu\n",
-			(unsigned long long)mset.size());
+		fprintf(req->fp[1], "mset.size=%llu nr_out=%zu\n",
+			(unsigned long long)mset.size(), req->nr_out);
+	return true;
+}
+
+struct fbuf {
+	FILE *fp;
+	char *ptr;
+	size_t len;
+};
+
+struct dump_roots_tmp {
+	struct stat sb;
+	void *mm_ptr;
+	char **entries;
+	struct fbuf wbuf;
+	int root2id_fd;
+};
+
+#define CLEANUP_FBUF __attribute__((__cleanup__(fbuf_ensure)))
+static void fbuf_ensure(void *ptr)
+{
+	struct fbuf *fbuf = (struct fbuf *)ptr;
+	if (fbuf->fp && fclose(fbuf->fp))
+		perror("fclose(fbuf->fp)");
+	fbuf->fp = NULL;
+	free(fbuf->ptr);
+}
+
+static bool fbuf_init(struct fbuf *fbuf)
+{
+	assert(!fbuf->ptr);
+	fbuf->fp = open_memstream(&fbuf->ptr, &fbuf->len);
+	if (fbuf->fp) return true;
+	perror("open_memstream(fbuf)");
+	return false;
+}
+
+static void xclose(int fd)
+{
+	if (close(fd) < 0 && errno != EINTR)
+		err(EXIT_FAILURE, "BUG: close");
+}
+
+#define CLEANUP_DUMP_ROOTS __attribute__((__cleanup__(dump_roots_ensure)))
+static void dump_roots_ensure(void *ptr)
+{
+	struct dump_roots_tmp *drt = (struct dump_roots_tmp *)ptr;
+	if (drt->root2id_fd >= 0)
+		xclose(drt->root2id_fd);
+	hdestroy(); // idempotent
+	if (drt->mm_ptr && munmap(drt->mm_ptr, drt->sb.st_size))
+		err(EXIT_FAILURE, "BUG: munmap");
+	free(drt->entries);
+	fbuf_ensure(&drt->wbuf);
+}
+
+static bool root2ids_str(struct fbuf *root_ids, struct dump_roots_tmp *drt,
+			Xapian::Document *doc)
+{
+	if (!fbuf_init(root_ids)) return false;
+
+	bool ok = true;
+	Xapian::TermIterator cur = doc->termlist_begin();
+	Xapian::TermIterator end = doc->termlist_end();
+	ENTRY e, *ep;
+	for (cur.skip_to("G"); cur != end; cur++) {
+		std::string tn = *cur;
+		if (!starts_with(&tn, "G", 1))
+			continue;
+		union { const char *in; char *out; } u;
+		u.in = tn.c_str() + 1;
+		e.key = u.out;
+		ep = hsearch(e, FIND);
+		if (!ep) {
+			warnx("hsearch miss `%s'", e.key);
+			return false;
+		}
+		// ep->data is a NUL-terminated string matching /[0-9]+/
+		fputc(' ', root_ids->fp);
+		fputs((const char *)ep->data, root_ids->fp);
+	}
+	fputc('\n', root_ids->fp);
+	if (ferror(root_ids->fp) | fclose(root_ids->fp)) {
+		perror("ferror|fclose(root_ids)");
+		ok = false;
+	}
+	root_ids->fp = NULL;
+	return ok;
+}
+
+// writes term values matching @pfx for a given @doc, ending the line
+// with the contents of @root_ids
+static void dump_roots_term(struct req *req, const char *pfx,
+				struct dump_roots_tmp *drt,
+				struct fbuf *root_ids,
+				Xapian::Document *doc)
+{
+	Xapian::TermIterator cur = doc->termlist_begin();
+	Xapian::TermIterator end = doc->termlist_end();
+	size_t pfx_len = strlen(pfx);
+
+	for (cur.skip_to(pfx); cur != end; cur++) {
+		std::string tn = *cur;
+		if (!starts_with(&tn, pfx, pfx_len))
+			continue;
+		fputs(tn.c_str() + pfx_len, drt->wbuf.fp);
+		fwrite(root_ids->ptr, root_ids->len, 1, drt->wbuf.fp);
+		++req->nr_out;
+	}
+}
+
+// we may have lines which exceed PIPE_BUF, so we do our own
+// buffering and rely on flock(2), here
+static bool dump_roots_flush(struct req *req, struct dump_roots_tmp *drt)
+{
+	char *p;
+	int fd = fileno(req->fp[0]);
+	bool ok = true;
+
+	if (!drt->wbuf.fp) return true;
+	if (fd < 0) err(EXIT_FAILURE, "BUG: fileno");
+	if (fclose(drt->wbuf.fp)) {
+		warn("fclose(drt->wbuf.fp)"); // malloc failure?
+		return false;
+	}
+	drt->wbuf.fp = NULL;
+	if (!drt->wbuf.len) goto done_free;
+	if (flock(drt->root2id_fd, LOCK_EX)) {
+		perror("LOCK_EX");
+		return false;
+	}
+	p = drt->wbuf.ptr;
+	do {
+		ssize_t n = write(fd, p, drt->wbuf.len);
+		if (n > 0) {
+			drt->wbuf.len -= n;
+			p += n;
+		} else {
+			perror(n ? "write" : "write (zero bytes)");
+			return false;
+		}
+	} while (drt->wbuf.len);
+	if (flock(drt->root2id_fd, LOCK_UN)) {
+		perror("LOCK_UN");
+		return false;
+	}
+done_free:
+	free(drt->wbuf.ptr);
+	drt->wbuf.ptr = NULL;
+	return ok;
+}
+
+static bool cmd_dump_roots(struct req *req)
+{
+	CLEANUP_DUMP_ROOTS struct dump_roots_tmp drt { .root2id_fd = -1 };
+	if ((optind + 1) >= req->argc) {
+		warnx("usage: dump_roots [OPTIONS] ROOT2ID_FILE QRY_STR");
+		return false; // need file + qry_str
+	}
+	if (!req->pfxc) {
+		warnx("dump_roots requires -A PREFIX");
+		return false;
+	}
+	const char *root2id_file = req->argv[optind];
+	drt.root2id_fd = open(root2id_file, O_RDONLY);
+	if (drt.root2id_fd < 0) {
+		warn("open(%s)", root2id_file);
+		return false;
+	}
+	if (fstat(drt.root2id_fd, &drt.sb)) {
+		warn("fstat(%s)", root2id_file);
+		return false;
+	}
+	// each entry is at least 43 bytes ({OIDHEX}\0{INT}\0),
+	// so /32 overestimates the number of expected entries by
+	// ~%25 (as recommended by Linux hcreate(3) manpage)
+	size_t est = (drt.sb.st_size / 32) + 1;
+	if ((uint64_t)drt.sb.st_size > (uint64_t)SIZE_MAX) {
+		warnx("%s size too big (%lld bytes > %zu)", root2id_file,
+			(long long)drt.sb.st_size, SIZE_MAX);
+		return false;
+	}
+	drt.mm_ptr = mmap(NULL, drt.sb.st_size, PROT_READ,
+				MAP_PRIVATE, drt.root2id_fd, 0);
+	if (drt.mm_ptr == MAP_FAILED) {
+		warn("mmap(%s)", root2id_file);
+		return false;
+	}
+	drt.entries = (char **)calloc(est * 2, sizeof(char *));
+	if (!drt.entries) {
+		warn("calloc(%zu * 2, %zu)", est, sizeof(char *));
+		return false;
+	}
+	size_t tot = split2argv(drt.entries, (char *)drt.mm_ptr,
+				drt.sb.st_size, est * 2);
+	if (tot <= 0) return false; // split2argv already warned on error
+	if (!hcreate(est)) {
+		warn("hcreate(%zu)", est);
+		return false;
+	}
+	for (size_t i = 0; i < tot; ) {
+		ENTRY e;
+		e.key = drt.entries[i++];
+		e.data = drt.entries[i++];
+		if (!hsearch(e, ENTER)) {
+			warn("hsearch(%s => %s, ENTER)", e.key,
+				(const char *)e.data);
+			return false;
+		}
+	}
+	req->asc = true;
+	req->sort_col = -1;
+	Xapian::MSet mset = commit_mset(req, req->argv[optind + 1]);
+	for (Xapian::MSetIterator i = mset.begin(); i != mset.end(); i++) {
+		CLEANUP_FBUF struct fbuf root_ids = { 0 };
+		if (!drt.wbuf.fp && !fbuf_init(&drt.wbuf))
+			return false;
+		try {
+			Xapian::Document doc = i.get_document();
+			if (!root2ids_str(&root_ids, &drt, &doc))
+				return false;
+			for (int p = 0; p < req->pfxc; p++)
+				dump_roots_term(req, req->pfxv[p], &drt,
+						&root_ids, &doc);
+		} catch (const Xapian::Error & e) {
+			fprintf(orig_err, "W: %s (#%ld)\n",
+				e.get_description().c_str(), (long)(*i));
+			continue;
+		}
+		if (!(req->nr_out & 0x3fff) && !dump_roots_flush(req, &drt))
+			return false;
+	}
+	if (!dump_roots_flush(req, &drt))
+		return false;
+	if (req->fp[1])
+		fprintf(req->fp[1], "mset.size=%llu nr_out=%zu\n",
+			(unsigned long long)mset.size(), req->nr_out);
 	return true;
 }
 
@@ -228,7 +512,8 @@ static const struct cmd_entry {
 	cmd fn;
 } cmds[] = { // should be small enough to not need bsearch || gperf
 	// most common commands first
-	CMD(dump_ibx),
+	CMD(dump_ibx), // many inboxes
+	CMD(dump_roots), // per-cidx shard
 	CMD(test_inspect), // least common commands last
 };
 
@@ -240,12 +525,6 @@ union my_cmsg {
 	char pad[sizeof(struct cmsghdr) + 16 + RECV_FD_SPACE];
 };
 
-static void xclose(int fd)
-{
-	if (close(fd) < 0 && errno != EINTR)
-		err(EXIT_FAILURE, "BUG: close");
-}
-
 static bool recv_req(struct req *req, char *rbuf, size_t *len)
 {
 	union my_cmsg cmsg = { 0 };
@@ -306,28 +585,6 @@ static bool recv_req(struct req *req, char *rbuf, size_t *len)
 	return false;
 }
 
-#define SPLIT2ARGV(dst,buf,len) split2argv(dst,buf,len,MY_ARRAY_SIZE(dst))
-static int split2argv(char **dst, char *buf, size_t len, size_t limit)
-{
-	if (buf[0] == 0 || len == 0 || buf[len - 1] != 0) {
-		warnx("bogus argument given");
-		return 0;
-	}
-	size_t nr = 0;
-	char *c = buf;
-	for (size_t i = 1; i < len; i++) {
-		if (!buf[i]) {
-			dst[nr++] = c;
-			c = buf + i + 1;
-		}
-		if (nr == limit) {
-			warnx("too many args: %zu", nr);
-			return 0;
-		}
-	}
-	return (int)nr;
-}
-
 static int srch_cmp(const void *pa, const void *pb) // for tfind|tsearch
 {
 	const struct srch *a = (const struct srch *)pa;
@@ -355,7 +612,7 @@ static bool srch_init(struct req *req)
 	char *dirv[MY_ARG_MAX];
 	int i;
 	struct srch *srch = req->srch;
-	int dirc = SPLIT2ARGV(dirv, srch->paths, (size_t)srch->paths_len);
+	int dirc = (int)SPLIT2ARGV(dirv, srch->paths, (size_t)srch->paths_len);
 	const unsigned FLAG_PHRASE = Xapian::QueryParser::FLAG_PHRASE;
 	srch->qp_flags = FLAG_PHRASE |
 			Xapian::QueryParser::FLAG_BOOLEAN |
@@ -538,7 +795,7 @@ static void recv_loop(void) // worker process loop
 				perror("W: setlinebuf(req.fp[1])");
 			stderr = req.fp[1];
 		}
-		req.argc = SPLIT2ARGV(req.argv, rbuf, len);
+		req.argc = (int)SPLIT2ARGV(req.argv, rbuf, len);
 		if (req.argc > 0)
 			dispatch(&req);
 		if (ferror(req.fp[0]) | fclose(req.fp[0]))
diff --git a/t/xap_helper.t b/t/xap_helper.t
index 11791e1d..410bf263 100644
--- a/t/xap_helper.t
+++ b/t/xap_helper.t
@@ -91,7 +91,7 @@ my $test = sub {
 	my $res = do { local $/; <$r> };
 	is(join('', @res), $res, 'got identical response w/ error pipe');
 	my $stats = do { local $/; <$err_rd> };
-	is($stats, "mset.size=6\n", 'mset.size reported');
+	is($stats, "mset.size=6 nr_out=6\n", 'mset.size reported');
 
 	if ($arg[-1] !~ /\('-j0'\)/) {
 		kill('KILL', $cinfo{pid});
@@ -105,12 +105,14 @@ my $test = sub {
 };
 my $ar;
 
-$ar = $test->(qw[-MPublicInbox::XapHelper -e
-		PublicInbox::XapHelper::start('-j0')]);
-$ar = $test->(qw[-MPublicInbox::XapHelper -e
-		PublicInbox::XapHelper::start('-j1')]);
-
-my @NO_CXX = (0);
+my @NO_CXX;
+if (!$ENV{TEST_XH_CXX_ONLY}) {
+	$ar = $test->(qw[-MPublicInbox::XapHelper -e
+			PublicInbox::XapHelper::start('-j0')]);
+	$ar = $test->(qw[-MPublicInbox::XapHelper -e
+			PublicInbox::XapHelper::start('-j1')]);
+	push @NO_CXX, 0;
+}
 SKIP: {
 	require PublicInbox::XapHelperCxx;
 	eval { PublicInbox::XapHelperCxx::check_build() };
@@ -123,6 +125,20 @@ SKIP: {
 			PublicInbox::XapHelperCxx::start('-j1')]);
 };
 
+require PublicInbox::CodeSearch;
+my $cs_int = PublicInbox::CodeSearch->new("$crepo/public-inbox-cindex");
+my $root2id_file = "$tmp/root2id";
+my @id2root;
+{
+	open my $fh, '>', $root2id_file;
+	my $i = -1;
+	for ($cs_int->all_terms('G')) {
+		print $fh $_, "\0", ++$i, "\0";
+		$id2root[$i] = $_;
+	}
+	close $fh;
+}
+
 for my $n (@NO_CXX) {
 	local $ENV{PI_NO_CXX} = $n;
 	my ($xhc, $pid) = PublicInbox::XapClient::start_helper('-j0');
@@ -139,7 +155,19 @@ for my $n (@NO_CXX) {
 	my $res = do { local $/; <$r> };
 	is($res, "$dfid 9\n$mid 9\n", "got expected result ($xhc->{impl})");
 	my $err = do { local $/; <$err_r> };
-	is($err, "mset.size=1\n", "got expected status ($xhc->{impl})");
+	is($err, "mset.size=1 nr_out=2\n", "got expected status ($xhc->{impl})");
+
+	pipe($err_r, $err_w);
+	$r = $xhc->mkreq([ undef, $err_w ], qw(dump_roots -c -A XDFID),
+			(map { ('-d', $_) } @int),
+			$root2id_file, 'dt:19700101'.'000000..');
+	close $err_w;
+	my @res = <$r>;
+	is(scalar(@res), 5, 'got expected rows');
+	is(scalar(@res), scalar(grep(/\A[0-9a-f]{40,} [0-9]+\n\z/, @res)),
+		'entries match format');
+	$err = do { local $/; <$err_r> };
+	is($err, "mset.size=6 nr_out=5\n", "got expected status ($xhc->{impl})");
 }
 
 done_testing;

                 reply	other threads:[~2023-08-23 11:50 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230823115042.3890073-1-e@80x24.org \
    --to=e@80x24.org \
    --cc=spew@80x24.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).