From 73214b02e581ef0a037d85a1ae37ff425654d7b3 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 25 Aug 2015 02:03:16 +0000 Subject: search: implement subject summarization We ought to summarize subjects to avoid exploding line lengths in the web interface. --- lib/PublicInbox/Search.pm | 25 +++++++++++++++++++++++++ lib/PublicInbox/SearchMsg.pm | 3 +-- t/search.t | 17 +++++++++++++++++ 3 files changed, 43 insertions(+), 2 deletions(-) diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index bcc53125..5ef380e8 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -237,6 +237,31 @@ sub subject_normalized { $subj; } +# for doc data +sub subject_summary { + my $subj = pop; + my $max = 68; + if (length($subj) > $max) { + my @subj = split(/\s+/, $subj); + $subj = ''; + my $l; + + while ($l = shift @subj) { + my $new = $subj . $l . ' '; + last if length($new) >= $max; + $subj = $new; + } + if (length $subj) { + my $r = scalar @subj ? ' ...' : ''; + $subj =~ s/ \z/$r/s; + } else { + @subj = ($l =~ /\A(.{1,72})/); + $subj = $subj[0] . ' ...'; + } + } + $subj; +} + sub enquire { my ($self) = @_; $self->{enquire} ||= Search::Xapian::Enquire->new($self->{xdb}); diff --git a/lib/PublicInbox/SearchMsg.pm b/lib/PublicInbox/SearchMsg.pm index a8f99bd5..a9f3180b 100644 --- a/lib/PublicInbox/SearchMsg.pm +++ b/lib/PublicInbox/SearchMsg.pm @@ -94,9 +94,8 @@ sub date { sub to_doc_data { my ($self) = @_; - $self->mid . "\n" . - $self->subject . "\n" . + PublicInbox::Search::subject_summary($self->subject) . "\n" . $self->from_name . "\n". $self->date . "\n" . $self->references_sorted; diff --git a/t/search.t b/t/search.t index 17e9eaf4..65539f14 100644 --- a/t/search.t +++ b/t/search.t @@ -16,6 +16,23 @@ is(0, system(qw(git init -q --bare), $git_dir), "git init (main)"); eval { PublicInbox::Search->new($git_dir) }; ok($@, "exception raised on non-existent DB"); +{ + my $orig = "FOO " x 30; + my $summ = PublicInbox::Search::subject_summary($orig); + + $summ = length($summ); + $orig = length($orig); + ok($summ < $orig && $summ > 0, "summary shortened ($orig => $summ)"); + + $orig = "FOO" x 30; + $summ = PublicInbox::Search::subject_summary($orig); + + $summ = length($summ); + $orig = length($orig); + ok($summ < $orig && $summ > 0, + "summary shortened but not empty: $summ"); +} + my $rw = PublicInbox::SearchIdx->new($git_dir, 1); my $ro = PublicInbox::Search->new($git_dir); my $rw_commit = sub { -- cgit v1.2.3-24-ge0c7