From 003829cdaa583d66c5af7564296d266952e0f12f Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 8 Mar 2024 21:05:03 +0000 Subject: lei: prevent empty {bytes} field in saved search Noticed while tracking down fast-import crash bug report. Link: https://public-inbox.org/meta/CAL_JsqK7P4gjLPyvzxNEcYmxT4j6Ah5f3Pz1RqDHxmysTg3aEg@mail.gmail.com/ --- lib/PublicInbox/LeiSearch.pm | 2 ++ lib/PublicInbox/LeiToMail.pm | 1 + lib/PublicInbox/OverIdx.pm | 6 +++++- 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/lib/PublicInbox/LeiSearch.pm b/lib/PublicInbox/LeiSearch.pm index 29e3213f..684668c5 100644 --- a/lib/PublicInbox/LeiSearch.pm +++ b/lib/PublicInbox/LeiSearch.pm @@ -103,6 +103,8 @@ sub xoids_for { for my $o (@overs) { my ($id, $prev); while (my $cur = $o->next_by_mid($mid, \$id, \$prev)) { + # {bytes} may be '' from old bug + $cur->{bytes} = 1 if $cur->{bytes} eq ''; next if $cur->{bytes} == 0 || $xoids->{$cur->{blob}}; $git->cat_async($cur->{blob}, \&_cmp_1st, diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm index a816df6c..dfae29e9 100644 --- a/lib/PublicInbox/LeiToMail.pm +++ b/lib/PublicInbox/LeiToMail.pm @@ -149,6 +149,7 @@ sub git_to_mail { # git->cat_async callback "W: $oid is $type (!= blob)"); $size or return $self->{lei}->child_error(0,"E: $oid is empty"); $smsg->{blob} eq $oid or die "BUG: expected=$smsg->{blob}"; + $smsg->{bytes} ||= $size; $self->{wcb}->($bref, $smsg); }; $self->{lei}->fail("$@ (oid=$oid)") if $@; diff --git a/lib/PublicInbox/OverIdx.pm b/lib/PublicInbox/OverIdx.pm index c9c25828..4f8533f7 100644 --- a/lib/PublicInbox/OverIdx.pm +++ b/lib/PublicInbox/OverIdx.pm @@ -17,6 +17,7 @@ use PublicInbox::MID qw/id_compress mids_for_index references/; use PublicInbox::Smsg qw(subject_normalized); use Compress::Zlib qw(compress); use Carp qw(croak); +use bytes (); # length sub dbh_new { my ($self) = @_; @@ -263,7 +264,10 @@ sub ddd_for ($) { sub add_overview { my ($self, $eml, $smsg) = @_; - $smsg->{lines} = $eml->body_raw =~ tr!\n!\n!; + my $raw = $eml->body_raw; + $smsg->{lines} = $raw =~ tr!\n!\n!; + $smsg->{bytes} //= bytes::length $raw; + undef $raw; my $mids = mids_for_index($eml); my $refs = $smsg->parse_references($eml, $mids); $mids->[0] //= do { -- cgit v1.2.3-24-ge0c7