From e6dfbf68639876580a63ed0502174c4e7f8fa722 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 9 Feb 2017 21:11:00 +0000 Subject: repo: increase search index flush granularity We need to flush Xapian more frequently to account for gigantic commits which introduce lots of text, so do it when accounting for each line processed, and not for each commit processed. --- lib/PublicInbox/RepoGitSearchIdx.pm | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/PublicInbox/RepoGitSearchIdx.pm b/lib/PublicInbox/RepoGitSearchIdx.pm index 333558ca..bb92b0ff 100644 --- a/lib/PublicInbox/RepoGitSearchIdx.pm +++ b/lib/PublicInbox/RepoGitSearchIdx.pm @@ -197,6 +197,11 @@ sub each_log_line ($$) { local $/ = "\n"; while (defined(my $l = <$log>)) { $batch -= bytes::length($l); + # prevent memory growth from Xapian + if ($batch <= 0) { + $db->flush; + $batch = BATCH_BYTES; + } if ($l =~ /^commit (\S+)(\s+\([^\)]+\))?/) { my ($oid, $decor) = ($1, $2); commit_doc($self, $doc_id, $doc) if $doc; @@ -204,11 +209,6 @@ sub each_log_line ($$) { $state = 0; $cc_ins = $cc_del = undef; - # prevent OOM - if ($batch <= 0) { - $db->flush; - $batch = BATCH_BYTES; - } $doc = get_doc($self, \$doc_id, 'commit', $oid); decor_update($self, $doc, $decor, $oid) if $decor; # old commit -- cgit v1.2.3-24-ge0c7