From d0079a334fe2b769e4f81d6bd3c7e4346a8fa2b2 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Mon, 20 Jun 2022 19:27:29 +0000 Subject: search: support "patchid:" prefix (git patch-id --stable) This allows easy searching via patch-id from a git commit. Currently, abbreviations are not supported, and it seems needless to support them since AFAIK (git) doesn't generate nor resolve abbreviated patch-ids anywhere. --- TODO | 3 --- lib/PublicInbox/Search.pm | 5 +++-- lib/PublicInbox/SearchIdx.pm | 15 +++++++++++++++ t/extsearch.t | 7 ++++++- t/v2mda.t | 10 ++++++++-- 5 files changed, 32 insertions(+), 8 deletions(-) diff --git a/TODO b/TODO index 5be4b5e3..43eee063 100644 --- a/TODO +++ b/TODO @@ -137,9 +137,6 @@ all need to be considered for everything we introduce) * make "git cat-file --batch" detect unlinked packfiles so we don't have to restart processes (very long-term) -* support searching based on `git-patch-id --stable` to improve - bidirectional mapping of commits <=> emails - * linter to check validity of config file * linter option and WWW endpoint to graph relationships and flows diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index 523003b3..6f9fdde1 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -1,4 +1,4 @@ -# Copyright (C) 2015-2021 all contributors +# Copyright (C) all contributors # License: AGPL-3.0+ # based on notmuch, but with no concept of folders, files or flags # @@ -118,9 +118,10 @@ my %bool_pfx_external = ( dfpre => 'XDFPRE', dfpost => 'XDFPOST', dfblob => 'XDFPRE XDFPOST', + patchid => 'XDFID', ); -my $non_quoted_body = 'XNQ XDFN XDFA XDFB XDFHH XDFCTX XDFPRE XDFPOST'; +my $non_quoted_body = 'XNQ XDFN XDFA XDFB XDFHH XDFCTX XDFPRE XDFPOST XDFID'; my %prob_prefix = ( # for mairix compatibility s => 'S', diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 50e26050..53ec23a5 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -18,6 +18,7 @@ use PublicInbox::MsgIter; use PublicInbox::IdxStack; use Carp qw(croak carp); use POSIX qw(strftime); +use Fcntl qw(SEEK_SET); use Time::Local qw(timegm); use PublicInbox::OverIdx; use PublicInbox::Spawn qw(spawn); @@ -349,6 +350,20 @@ sub index_xapian { # msg_iter callback defined $s or return; $_[0]->[0] = $part = undef; # free memory + if ($s =~ /^(?:diff|---|\+\+\+) /ms) { + open(my $fh, '+>:utf8', undef) or die "open: $!"; + open(my $eh, '+>', undef) or die "open: $!"; + $fh->autoflush(1); + print $fh $s or die "print: $!"; + sysseek($fh, 0, SEEK_SET) or die "sysseek: $!"; + my $id = ($self->{ibx} // $self->{eidx})->git->qx( + [qw(patch-id --stable)], + {}, { 0 => $fh, 2 => $eh }); + $id =~ /\A([a-f0-9]{40,})/ and $doc->add_term('XDFID'.$1); + seek($eh, 0, SEEK_SET) or die "seek: $!"; + while (<$eh>) { warn $_ } + } + # split off quoted and unquoted blocks: my @sections = PublicInbox::MsgIter::split_quotes($s); undef $s; # free memory diff --git a/t/extsearch.t b/t/extsearch.t index 09cbdabe..2d7375d6 100644 --- a/t/extsearch.t +++ b/t/extsearch.t @@ -314,7 +314,12 @@ if ('reindex catches missed messages') { is($new->{subject}, $eml->header('Subject'), 'new message added'); $es->{xdb}->reopen; - my $mset = $es->mset("mid:$new->{mid}"); + # git patch-id --stable search->mset("patchid:$patchid"); + is($mset->size, 1, 'patchid search works'); + + $mset = $es->mset("mid:$new->{mid}"); is($mset->size, 1, 'previously unseen, now indexed in Xapian'); ok($im->remove($eml), 'remove new message from v2 inbox'); diff --git a/t/v2mda.t b/t/v2mda.t index 3dfc569e..8f2f335d 100644 --- a/t/v2mda.t +++ b/t/v2mda.t @@ -1,7 +1,8 @@ -# Copyright (C) 2018-2021 all contributors +#!perl -w +# Copyright (C) all contributors # License: AGPL-3.0+ +use v5.10.1; use strict; -use warnings; use Test::More; use Fcntl qw(SEEK_SET); use Cwd; @@ -88,6 +89,11 @@ is($eml->as_string, $mime->as_string, 'injected message'); $pre = $ibx->search->mset_to_smsg($ibx, $pre); $post = $ibx->search->mset_to_smsg($ibx, $post); is($post->[0]->{blob}, $pre->[0]->{blob}, 'same message in both cases'); + + # git patch-id --stable search->mset("patchid:$patchid"); + is($mset->size, 1, 'patchid search works'); } done_testing(); -- cgit v1.2.3-24-ge0c7