From b18ecb7707e83cb8cb38c3736aecd984999ca0a7 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 24 Aug 2023 01:22:33 +0000 Subject: introduce optional C++ xap_helper This allows us to perform the expensive "dump_ibx" operations in native C++ code using the Xapian C++ library. This provides the majority of the speedup with the -cindex --associate switch. Eventually this may be expanded to cover all uses of Xapian within the project to ensure we have access to Xapian APIs which aren't available in XS|SWIG bindings; and also for ease-of-installation on systems which don't provide pre-packaged Perl Xapian bindings (e.g. OpenBSD 7.3) but do provide Xapian development libraries. Most of the C++ code is still C, as I'm not remotely familiar with C++ compared to C. I suspect many users and potential hackers being from git, Linux kernel, and glibc world are in the same boat. --- lib/PublicInbox/CidxXapHelperAux.pm | 48 +++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 lib/PublicInbox/CidxXapHelperAux.pm (limited to 'lib/PublicInbox/CidxXapHelperAux.pm') diff --git a/lib/PublicInbox/CidxXapHelperAux.pm b/lib/PublicInbox/CidxXapHelperAux.pm new file mode 100644 index 00000000..c9a5ddad --- /dev/null +++ b/lib/PublicInbox/CidxXapHelperAux.pm @@ -0,0 +1,48 @@ +# Copyright (C) all contributors +# License: AGPL-3.0+ + +# Intended for PublicInbox::DS::event_loop for -cindex --associate, +# this reports auxilliary status while dumping +package PublicInbox::CidxXapHelperAux; +use v5.12; +use parent qw(PublicInbox::DS); +use PublicInbox::Syscall qw(EPOLLIN); + +# rpipe connects to req->fp[1] in xap_helper.h +sub new { + my ($cls, $rpipe, $cidx, $pfx, $associate) = @_; + my $self = bless { + cidx => $cidx, + pfx => $pfx, + associate => $associate + }, $cls; + $rpipe->blocking(0); + $self->SUPER::new($rpipe, EPOLLIN); +} + +sub event_step { + my ($self) = @_; # xap_helper.h is line-buffered + my $buf = delete($self->{buf}) // ''; + my $n = sysread($self->{sock}, $buf, 65536, length($buf)); + if (!defined($n)) { + return if $!{EAGAIN}; + die "sysread: $!"; + } + my $pfx = $self->{pfx}; + if ($n == 0) { + $self->{cidx}->progress("$pfx $buf") if $buf ne ''; + return $self->close; + } + my @lines = split(/^/m, $buf); + $self->{buf} = pop @lines if substr($lines[-1], -1) ne "\n"; + for my $l (@lines) { + if ($l =~ /\Amset\.size=[0-9]+\n\z/) { + delete $self->{cidx}->{PENDING}->{$pfx}; + $self->{cidx}->index_next; + } + chomp $l; + $self->{cidx}->progress("$pfx $l"); + } +} + +1; -- cgit v1.2.3-24-ge0c7