From 8188164496fcd36aa4fc6ac14a5e4782feace077 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 8 Dec 2023 03:54:38 +0000 Subject: cindex: switch --join to use dfpost7 by default Post-image blob OIDs are what solver already works with, and longer OIDs may not be available in historical mail archives. `patchid' turns out to be unsuitable since: 1) git's default diff algorithm has changed over time 2) users may use different diff options to improve readability Of course, we could eventually run `lei rediff' during the index phase to regenerate patchids, but that's out-of-scope for now and likely to be too expensive. --- lib/PublicInbox/CodeSearchIdx.pm | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/lib/PublicInbox/CodeSearchIdx.pm b/lib/PublicInbox/CodeSearchIdx.pm index 967933f2..5d420de2 100644 --- a/lib/PublicInbox/CodeSearchIdx.pm +++ b/lib/PublicInbox/CodeSearchIdx.pm @@ -34,9 +34,9 @@ # The $IBX_OFF here is ephemeral (per-join_data) and NOT related to # the `ibx_off' column of `over.sqlite3' for extindex. # @ROOT_COMMIT_OID_OFFS is space-delimited -# In both cases, $PFX is typically the value of the patchid (XDFID) but it -# can be configured to use any combination of patchid, dfpre, dfpost or -# dfblob. +# In both cases, $PFX is typically the value of the 7-(hex)char dfpost +# XDFPOST but it can be configured to use any combination of patchid, +# dfpre, dfpost or dfblob. # # WARNING: this is vulnerable to arbitrary memory usage attacks if we # attempt to index or join against malicious coderepos with @@ -1199,11 +1199,13 @@ sub init_join_prefork ($) { require PublicInbox::CidxXapHelperAux; require PublicInbox::XapClient; my @unknown; - my $pfx = $JOIN{prefixes} // 'patchid'; - for (split /\+/, $pfx) { - my $v = $PublicInbox::Search::PATCH_BOOL_COMMON{$_} // - push(@unknown, $_); - push(@JOIN_PFX, split(/ /, $v)); + my $pfx = $JOIN{prefixes} // 'dfpost7'; + for my $p (split /\+/, $pfx) { + my $n = ''; + $p =~ s/([0-9]+)\z// and $n = $1; + my $v = $PublicInbox::Search::PATCH_BOOL_COMMON{$p} // + push(@unknown, $p); + push(@JOIN_PFX, map { $_.$n } split(/ /, $v)); } @unknown and die <