about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2022-06-20 19:27:29 +0000
committerEric Wong <e@80x24.org>2022-06-21 10:39:10 +0000
commitd0079a334fe2b769e4f81d6bd3c7e4346a8fa2b2 (patch)
tree952a2f606d24b9a81bb447cbb06f73a16088973e
parent913b8e2f5c3c85eab5d10112c130db44ee2893c3 (diff)
downloadpublic-inbox-d0079a334fe2b769e4f81d6bd3c7e4346a8fa2b2.tar.gz
This allows easy searching via patch-id from a git commit.

Currently, abbreviations are not supported, and it seems
needless to support them since AFAIK (git) doesn't generate
nor resolve abbreviated patch-ids anywhere.
-rw-r--r--TODO3
-rw-r--r--lib/PublicInbox/Search.pm5
-rw-r--r--lib/PublicInbox/SearchIdx.pm15
-rw-r--r--t/extsearch.t7
-rw-r--r--t/v2mda.t10
5 files changed, 32 insertions, 8 deletions
diff --git a/TODO b/TODO
index 5be4b5e3..43eee063 100644
--- a/TODO
+++ b/TODO
@@ -137,9 +137,6 @@ all need to be considered for everything we introduce)
 * make "git cat-file --batch" detect unlinked packfiles so we don't
   have to restart processes (very long-term)
 
-* support searching based on `git-patch-id --stable` to improve
-  bidirectional mapping of commits <=> emails
-
 * linter to check validity of config file
 
 * linter option and WWW endpoint to graph relationships and flows
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 523003b3..6f9fdde1 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2015-2021 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 # based on notmuch, but with no concept of folders, files or flags
 #
@@ -118,9 +118,10 @@ my %bool_pfx_external = (
         dfpre => 'XDFPRE',
         dfpost => 'XDFPOST',
         dfblob => 'XDFPRE XDFPOST',
+        patchid => 'XDFID',
 );
 
-my $non_quoted_body = 'XNQ XDFN XDFA XDFB XDFHH XDFCTX XDFPRE XDFPOST';
+my $non_quoted_body = 'XNQ XDFN XDFA XDFB XDFHH XDFCTX XDFPRE XDFPOST XDFID';
 my %prob_prefix = (
         # for mairix compatibility
         s => 'S',
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 50e26050..53ec23a5 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -18,6 +18,7 @@ use PublicInbox::MsgIter;
 use PublicInbox::IdxStack;
 use Carp qw(croak carp);
 use POSIX qw(strftime);
+use Fcntl qw(SEEK_SET);
 use Time::Local qw(timegm);
 use PublicInbox::OverIdx;
 use PublicInbox::Spawn qw(spawn);
@@ -349,6 +350,20 @@ sub index_xapian { # msg_iter callback
         defined $s or return;
         $_[0]->[0] = $part = undef; # free memory
 
+        if ($s =~ /^(?:diff|---|\+\+\+) /ms) {
+                open(my $fh, '+>:utf8', undef) or die "open: $!";
+                open(my $eh, '+>', undef) or die "open: $!";
+                $fh->autoflush(1);
+                print $fh $s or die "print: $!";
+                sysseek($fh, 0, SEEK_SET) or die "sysseek: $!";
+                my $id = ($self->{ibx} // $self->{eidx})->git->qx(
+                                                [qw(patch-id --stable)],
+                                                {}, { 0 => $fh, 2 => $eh });
+                $id =~ /\A([a-f0-9]{40,})/ and $doc->add_term('XDFID'.$1);
+                seek($eh, 0, SEEK_SET) or die "seek: $!";
+                while (<$eh>) { warn $_ }
+        }
+
         # split off quoted and unquoted blocks:
         my @sections = PublicInbox::MsgIter::split_quotes($s);
         undef $s; # free memory
diff --git a/t/extsearch.t b/t/extsearch.t
index 09cbdabe..2d7375d6 100644
--- a/t/extsearch.t
+++ b/t/extsearch.t
@@ -314,7 +314,12 @@ if ('reindex catches missed messages') {
         is($new->{subject}, $eml->header('Subject'), 'new message added');
 
         $es->{xdb}->reopen;
-        my $mset = $es->mset("mid:$new->{mid}");
+        # git patch-id --stable <t/data/0001.patch | awk '{print $1}'
+        my $patchid = '91ee6b761fc7f47cad9f2b09b10489f313eb5b71';
+        my $mset = $es->search->mset("patchid:$patchid");
+        is($mset->size, 1, 'patchid search works');
+
+        $mset = $es->mset("mid:$new->{mid}");
         is($mset->size, 1, 'previously unseen, now indexed in Xapian');
 
         ok($im->remove($eml), 'remove new message from v2 inbox');
diff --git a/t/v2mda.t b/t/v2mda.t
index 3dfc569e..8f2f335d 100644
--- a/t/v2mda.t
+++ b/t/v2mda.t
@@ -1,7 +1,8 @@
-# Copyright (C) 2018-2021 all contributors <meta@public-inbox.org>
+#!perl -w
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use v5.10.1;
 use strict;
-use warnings;
 use Test::More;
 use Fcntl qw(SEEK_SET);
 use Cwd;
@@ -88,6 +89,11 @@ is($eml->as_string, $mime->as_string, 'injected message');
         $pre = $ibx->search->mset_to_smsg($ibx, $pre);
         $post = $ibx->search->mset_to_smsg($ibx, $post);
         is($post->[0]->{blob}, $pre->[0]->{blob}, 'same message in both cases');
+
+        # git patch-id --stable <t/data/0001.patch | awk '{print $1}'
+        my $patchid = '91ee6b761fc7f47cad9f2b09b10489f313eb5b71';
+        my $mset = $ibx->search->mset("patchid:$patchid");
+        is($mset->size, 1, 'patchid search works');
 }
 
 done_testing();