about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2017-02-12 09:04:54 +0000
committerEric Wong <e@80x24.org>2017-02-14 22:56:35 +0000
commiteb0267592377e116c7184d206bff48413ae202fe (patch)
tree9b6fbe64cc5867e99ef4c056e080f4b123b26420
parent60f38818f2dbc76e20041cf1b01e2baf3322a1f1 (diff)
downloadpublic-inbox-eb0267592377e116c7184d206bff48413ae202fe.tar.gz
Xapian memory usage is tied to the size of the indexed
text, so take the raw message size into account when
deciding when to flush Xapian data.

More importantly, we now flush Xapian before we have it
buffer beyond our maximum; and we do it unconditionally
to prevent even high priority processes from OOM-ing.
-rw-r--r--lib/PublicInbox/SearchIdx.pm25
1 files changed, 17 insertions, 8 deletions
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 2548ddf2..e2a0a544 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -21,7 +21,10 @@ use Carp qw(croak);
 use POSIX qw(strftime);
 require PublicInbox::Git;
 
-use constant MAX_MID_SIZE => 244; # max term size - 1 in Xapian
+use constant {
+        MAX_MID_SIZE => 244, # max term size - 1 in Xapian
+        BATCH_BYTES => 1_000_000,
+};
 
 sub new {
         my ($class, $inbox, $creat) = @_;
@@ -64,7 +67,6 @@ sub _xdb_acquire {
                 require File::Path;
                 _lock_acquire($self);
                 File::Path::mkpath($dir);
-                $self->{batch_size} = 100;
                 $flag = Search::Xapian::DB_CREATE_OR_OPEN;
         }
         $self->{xdb} = Search::Xapian::WritableDatabase->new($dir, $flag);
@@ -406,6 +408,15 @@ sub index_sync {
         with_umask($self->{'umask'}, sub { $self->_index_sync($opts) });
 }
 
+sub batch_adjust ($$$$) {
+        my ($max, $bytes, $batch_cb, $latest) = @_;
+        $$max -= $bytes;
+        if ($$max <= 0) {
+                $$max = BATCH_BYTES;
+                $batch_cb->($latest, 1);
+        }
+}
+
 sub rlog {
         my ($self, $log, $add_cb, $del_cb, $batch_cb) = @_;
         my $hex = '[a-f0-9]';
@@ -415,23 +426,21 @@ sub rlog {
         my $git = $self->{git};
         my $latest;
         my $bytes;
-        my $max = $self->{batch_size}; # may be undef
+        my $max = BATCH_BYTES;
         local $/ = "\n";
         my $line;
         while (defined($line = <$log>)) {
                 if ($line =~ /$addmsg/o) {
                         my $blob = $1;
                         my $mime = do_cat_mail($git, $blob, \$bytes) or next;
+                        batch_adjust(\$max, $bytes, $batch_cb, $latest);
                         $add_cb->($self, $mime, $bytes, $blob);
                 } elsif ($line =~ /$delmsg/o) {
                         my $blob = $1;
-                        my $mime = do_cat_mail($git, $blob) or next;
+                        my $mime = do_cat_mail($git, $blob, \$bytes) or next;
+                        batch_adjust(\$max, $bytes, $batch_cb, $latest);
                         $del_cb->($self, $mime);
                 } elsif ($line =~ /^commit ($h40)/o) {
-                        if (defined $max && --$max <= 0) {
-                                $max = $self->{batch_size};
-                                $batch_cb->($latest, 1);
-                        }
                         $latest = $1;
                 }
         }