From 21ab8f3cc530d9483091f32c0865ba1ce867cef8 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 9 Sep 2016 00:01:25 +0000 Subject: search: fix space regressions from recent changes As of Xapian 1.0.4 (from 2007) is possible to use Search::Xapian::QueryParser::add_prefix multiple times with the same user field name but different term prefixes. This brings my current git@vger mirror from 6.5GB to 2.1GB (both sizes are after xapian-compact). --- lib/PublicInbox/SearchIdx.pm | 25 ++++--------------------- 1 file changed, 4 insertions(+), 21 deletions(-) (limited to 'lib/PublicInbox/SearchIdx.pm') diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index cd27a294..ae890605 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -129,15 +129,9 @@ sub index_users ($$) { $tg->index_text($from, 1, 'A'); # A - author $tg->increase_termpos; - $tg->index_text($to, 1, 'XTO') if $to ne ''; + $tg->increase_termpos; $tg->index_text($cc, 1, 'XCC') if $cc ne ''; - my $tc = join("\t", $to, $cc); - $tg->index_text($tc, 1, 'XTC') if $tc ne ''; - my $tcf = join("\t", $tc, $from); - $tg->index_text($tcf, 1, 'XTCF') if $tcf ne ''; - - $tg->index_text($from); $tg->increase_termpos; } @@ -173,12 +167,7 @@ sub add_message { my $tg = $self->term_generator; $tg->set_document($doc); - if ($subj) { - $tg->index_text($subj, 1, 'S'); - $tg->index_text($subj, 1, 'XBS'); - } - $tg->increase_termpos; - $tg->index_text($subj) if $subj; + $tg->index_text($subj, 1, 'S') if $subj; $tg->increase_termpos; index_users($tg, $smsg); @@ -204,25 +193,19 @@ sub add_message { if (@quot) { my $s = join("\n", @quot); @quot = (); - $tg->index_text($s, 1, 'XQUOT'); - $tg->index_text($s, 0, 'XBS'); - $tg->index_text($s, 0, 'XBODY'); - $tg->index_text($s, 0); + $tg->index_text($s, 0, 'XQUOT'); $tg->increase_termpos; } if (@orig) { my $s = join("\n", @orig); @orig = (); $tg->index_text($s, 1, 'XNQ'); - $tg->index_text($s, 1, 'XBS'); - $tg->index_text($s, 1, 'XBODY'); - $tg->index_text($s); $tg->increase_termpos; } }); link_message($self, $smsg, $old_tid); - $tg->index_text($mid, 1); + $tg->index_text($mid, 1, 'XMID'); $doc->set_data($smsg->to_doc_data($blob)); if (my $altid = $self->{-altid}) { -- cgit v1.2.3-24-ge0c7