diff options
author | Eric Wong <e@80x24.org> | 2022-06-20 19:27:29 +0000 |
---|---|---|
committer | Eric Wong <e@80x24.org> | 2022-06-21 10:39:10 +0000 |
commit | d0079a334fe2b769e4f81d6bd3c7e4346a8fa2b2 (patch) | |
tree | 952a2f606d24b9a81bb447cbb06f73a16088973e | |
parent | 913b8e2f5c3c85eab5d10112c130db44ee2893c3 (diff) | |
download | public-inbox-d0079a334fe2b769e4f81d6bd3c7e4346a8fa2b2.tar.gz |
This allows easy searching via patch-id from a git commit. Currently, abbreviations are not supported, and it seems needless to support them since AFAIK (git) doesn't generate nor resolve abbreviated patch-ids anywhere.
-rw-r--r-- | TODO | 3 | ||||
-rw-r--r-- | lib/PublicInbox/Search.pm | 5 | ||||
-rw-r--r-- | lib/PublicInbox/SearchIdx.pm | 15 | ||||
-rw-r--r-- | t/extsearch.t | 7 | ||||
-rw-r--r-- | t/v2mda.t | 10 |
5 files changed, 32 insertions, 8 deletions
@@ -137,9 +137,6 @@ all need to be considered for everything we introduce) * make "git cat-file --batch" detect unlinked packfiles so we don't have to restart processes (very long-term) -* support searching based on `git-patch-id --stable` to improve - bidirectional mapping of commits <=> emails - * linter to check validity of config file * linter option and WWW endpoint to graph relationships and flows diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index 523003b3..6f9fdde1 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -1,4 +1,4 @@ -# Copyright (C) 2015-2021 all contributors <meta@public-inbox.org> +# Copyright (C) all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> # based on notmuch, but with no concept of folders, files or flags # @@ -118,9 +118,10 @@ my %bool_pfx_external = ( dfpre => 'XDFPRE', dfpost => 'XDFPOST', dfblob => 'XDFPRE XDFPOST', + patchid => 'XDFID', ); -my $non_quoted_body = 'XNQ XDFN XDFA XDFB XDFHH XDFCTX XDFPRE XDFPOST'; +my $non_quoted_body = 'XNQ XDFN XDFA XDFB XDFHH XDFCTX XDFPRE XDFPOST XDFID'; my %prob_prefix = ( # for mairix compatibility s => 'S', diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 50e26050..53ec23a5 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -18,6 +18,7 @@ use PublicInbox::MsgIter; use PublicInbox::IdxStack; use Carp qw(croak carp); use POSIX qw(strftime); +use Fcntl qw(SEEK_SET); use Time::Local qw(timegm); use PublicInbox::OverIdx; use PublicInbox::Spawn qw(spawn); @@ -349,6 +350,20 @@ sub index_xapian { # msg_iter callback defined $s or return; $_[0]->[0] = $part = undef; # free memory + if ($s =~ /^(?:diff|---|\+\+\+) /ms) { + open(my $fh, '+>:utf8', undef) or die "open: $!"; + open(my $eh, '+>', undef) or die "open: $!"; + $fh->autoflush(1); + print $fh $s or die "print: $!"; + sysseek($fh, 0, SEEK_SET) or die "sysseek: $!"; + my $id = ($self->{ibx} // $self->{eidx})->git->qx( + [qw(patch-id --stable)], + {}, { 0 => $fh, 2 => $eh }); + $id =~ /\A([a-f0-9]{40,})/ and $doc->add_term('XDFID'.$1); + seek($eh, 0, SEEK_SET) or die "seek: $!"; + while (<$eh>) { warn $_ } + } + # split off quoted and unquoted blocks: my @sections = PublicInbox::MsgIter::split_quotes($s); undef $s; # free memory diff --git a/t/extsearch.t b/t/extsearch.t index 09cbdabe..2d7375d6 100644 --- a/t/extsearch.t +++ b/t/extsearch.t @@ -314,7 +314,12 @@ if ('reindex catches missed messages') { is($new->{subject}, $eml->header('Subject'), 'new message added'); $es->{xdb}->reopen; - my $mset = $es->mset("mid:$new->{mid}"); + # git patch-id --stable <t/data/0001.patch | awk '{print $1}' + my $patchid = '91ee6b761fc7f47cad9f2b09b10489f313eb5b71'; + my $mset = $es->search->mset("patchid:$patchid"); + is($mset->size, 1, 'patchid search works'); + + $mset = $es->mset("mid:$new->{mid}"); is($mset->size, 1, 'previously unseen, now indexed in Xapian'); ok($im->remove($eml), 'remove new message from v2 inbox'); @@ -1,7 +1,8 @@ -# Copyright (C) 2018-2021 all contributors <meta@public-inbox.org> +#!perl -w +# Copyright (C) all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> +use v5.10.1; use strict; -use warnings; use Test::More; use Fcntl qw(SEEK_SET); use Cwd; @@ -88,6 +89,11 @@ is($eml->as_string, $mime->as_string, 'injected message'); $pre = $ibx->search->mset_to_smsg($ibx, $pre); $post = $ibx->search->mset_to_smsg($ibx, $post); is($post->[0]->{blob}, $pre->[0]->{blob}, 'same message in both cases'); + + # git patch-id --stable <t/data/0001.patch | awk '{print $1}' + my $patchid = '91ee6b761fc7f47cad9f2b09b10489f313eb5b71'; + my $mset = $ibx->search->mset("patchid:$patchid"); + is($mset->size, 1, 'patchid search works'); } done_testing(); |