From d20753c2a9d63e60b90942176684370c9dce97fb Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sun, 4 Sep 2022 04:27:49 +0000 Subject: prepare HTML rendering maintainer tests for upcoming changes There'll be a number of upcoming changes to HTML rendering of messages to hopefully reduce memory usage and speedups by writing out to the gzip buffer earlier. Update the tests now so it'll be easier to test before and after results. --- xt/cmp-msgview.t | 94 ----------------------------------------------------- xt/perf-msgview.t | 24 +++++++------- xt/perf-obfuscate.t | 26 ++++++++------- 3 files changed, 27 insertions(+), 117 deletions(-) delete mode 100644 xt/cmp-msgview.t (limited to 'xt') diff --git a/xt/cmp-msgview.t b/xt/cmp-msgview.t deleted file mode 100644 index 9b06f88d..00000000 --- a/xt/cmp-msgview.t +++ /dev/null @@ -1,94 +0,0 @@ -#!perl -w -# Copyright (C) 2020-2021 all contributors -# License: AGPL-3.0+ -use strict; -use Test::More; -use Benchmark qw(:all); -use PublicInbox::Inbox; -use PublicInbox::View; -use PublicInbox::TestCommon; -use PublicInbox::Eml; -use Digest::MD5; -require_git(2.19); -require_mods qw(Data::Dumper Email::MIME Plack::Util); -Data::Dumper->import('Dumper'); -require PublicInbox::MIME; -my ($tmpdir, $for_destroy) = tmpdir(); -my $inboxdir = $ENV{GIANT_INBOX_DIR}; -plan skip_all => "GIANT_INBOX_DIR not defined for $0" unless $inboxdir; -my @cat = qw(cat-file --buffer --batch-check --batch-all-objects --unordered); -my $ibx = PublicInbox::Inbox->new({ inboxdir => $inboxdir, name => 'perf' }); -my $git = $ibx->git; -my $fh = $git->popen(@cat); -vec(my $vec = '', fileno($fh), 1) = 1; -select($vec, undef, undef, 60) or die "timed out waiting for --batch-check"; -my $mime_ctx = { - env => { HTTP_HOST => 'example.com', 'psgi.url_scheme' => 'https' }, - ibx => $ibx, - www => Plack::Util::inline_object(style => sub {''}), - obuf => \(my $mime_buf = ''), - mhref => '../', -}; -my $eml_ctx = { %$mime_ctx, obuf => \(my $eml_buf = '') }; -my $n = 0; -my $m = 0; -my $ndiff_html = 0; -my $dig_cls = 'Digest::MD5'; -my $digest_attach = sub { # ensure ->body (not ->body_raw) matches - my ($p, $cmp_arg) = @_; - my $part = shift @$p; - my $dig = $cmp_arg->[0] //= $dig_cls->new; - $dig->add($part->body_raw); - push @$cmp_arg, join(', ', @$p); -}; - -my $git_cb = sub { - my ($bref, $oid) = @_; - local $SIG{__WARN__} = sub { diag "$inboxdir $oid ", @_ }; - ++$m; - my $mime = PublicInbox::MIME->new($$bref); - PublicInbox::View::multipart_text_as_html($mime, $mime_ctx); - my $eml = PublicInbox::Eml->new($$bref); - PublicInbox::View::multipart_text_as_html($eml, $eml_ctx); - if ($eml_buf ne $mime_buf) { - ++$ndiff_html; - open my $fh, '>', "$tmpdir/mime" or die $!; - print $fh $mime_buf or die $!; - close $fh or die $!; - open $fh, '>', "$tmpdir/eml" or die $!; - print $fh $eml_buf or die $!; - close $fh or die $!; - # using `git diff', diff(1) may not be installed - diag "$inboxdir $oid differs"; - diag xqx([qw(git diff), "$tmpdir/mime", "$tmpdir/eml"]); - } - $eml_buf = $mime_buf = ''; - - # don't tolerate differences in attachment downloads - $mime = PublicInbox::MIME->new($$bref); - $mime->each_part($digest_attach, my $mime_cmp = [], 1); - $eml = PublicInbox::Eml->new($$bref); - $eml->each_part($digest_attach, my $eml_cmp = [], 1); - $mime_cmp->[0] = $mime_cmp->[0]->hexdigest; - $eml_cmp->[0] = $eml_cmp->[0]->hexdigest; - # don't have millions of "ok" lines - if (join("\0", @$eml_cmp) ne join("\0", @$mime_cmp)) { - diag Dumper([ $oid, eml => $eml_cmp, mime =>$mime_cmp ]); - is_deeply($eml_cmp, $mime_cmp, "$inboxdir $oid match"); - } -}; -my $t = timeit(1, sub { - while (<$fh>) { - my ($oid, $type) = split / /; - next if $type ne 'blob'; - ++$n; - $git->cat_async($oid, $git_cb); - } - $git->async_wait_all; -}); -is($m, $n, 'rendered all messages'); - -# we'll tolerate minor differences in HTML rendering -diag "$ndiff_html HTML differences"; - -done_testing(); diff --git a/xt/perf-msgview.t b/xt/perf-msgview.t index cf550c1a..7f92ce85 100644 --- a/xt/perf-msgview.t +++ b/xt/perf-msgview.t @@ -7,7 +7,7 @@ use PublicInbox::TestCommon; use Benchmark qw(:all); use PublicInbox::Inbox; use PublicInbox::View; -use PublicInbox::Spawn qw(popen_rd); +use PublicInbox::WwwStream; my $inboxdir = $ENV{GIANT_INBOX_DIR} // $ENV{GIANT_PI_DIR}; my $blob = $ENV{TEST_BLOB}; @@ -31,26 +31,28 @@ if ($fh) { die "timed out waiting for --batch-check"; } -my $ctx = { +my $ctx = bless { env => { HTTP_HOST => 'example.com', 'psgi.url_scheme' => 'https' }, ibx => $ibx, www => Plack::Util::inline_object(style => sub {''}), -}; -my ($mime, $res, $oid, $type); + gz => PublicInbox::GzipFilter::gzip_or_die(), +}, 'PublicInbox::WwwStream'; +my ($eml, $res, $oid, $type); my $n = 0; -my $obuf = ''; my $m = 0; +${$ctx->{obuf}} = ''; +$ctx->{mhref} = '../'; my $cb = sub { - $mime = PublicInbox::Eml->new(shift); - PublicInbox::View::multipart_text_as_html($mime, $ctx); + $eml = PublicInbox::Eml->new(shift); + $eml->each_part(\&PublicInbox::View::add_text_body, $ctx, 1); + $ctx->zflush; ++$m; - $obuf = ''; + delete $ctx->{zbuf}; + ${$ctx->{obuf}} = ''; }; my $t = timeit(1, sub { - $ctx->{obuf} = \$obuf; - $ctx->{mhref} = '../'; if (defined $blob) { my $nr = $ENV{NR} // 10000; for (1..$nr) { @@ -67,6 +69,6 @@ my $t = timeit(1, sub { } $git->async_wait_all; }); -diag 'multipart_text_as_html took '.timestr($t)." for $n <=> $m messages"; +diag 'add_text_body took '.timestr($t)." for $n <=> $m messages"; is($m, $n, 'rendered all messages'); done_testing(); diff --git a/xt/perf-obfuscate.t b/xt/perf-obfuscate.t index 640309d2..4da36124 100644 --- a/xt/perf-obfuscate.t +++ b/xt/perf-obfuscate.t @@ -1,5 +1,5 @@ #!perl -w -# Copyright (C) 2021 all contributors +# Copyright (C) all contributors # License: AGPL-3.0+ use strict; use v5.10.1; @@ -7,6 +7,7 @@ use PublicInbox::TestCommon; use Benchmark qw(:all); use PublicInbox::Inbox; use PublicInbox::View; +use PublicInbox::WwwStream; my $inboxdir = $ENV{GIANT_INBOX_DIR}; plan skip_all => "GIANT_INBOX_DIR not defined for $0" unless $inboxdir; @@ -22,7 +23,6 @@ if (require_git(2.19, 1)) { "git <2.19, cat-file lacks --unordered, locality suffers\n"; } require_mods qw(Plack::Util); -use_ok 'Plack::Util'; my $ibx = PublicInbox::Inbox->new({ inboxdir => $inboxdir, name => 'name' , obfuscate => $obfuscate}); my $git = $ibx->git; @@ -31,26 +31,28 @@ my $vec = ''; vec($vec, fileno($fh), 1) = 1; select($vec, undef, undef, 60) or die "timed out waiting for --batch-check"; -my $ctx = { +my $ctx = bless { env => { HTTP_HOST => 'example.com', 'psgi.url_scheme' => 'https' }, ibx => $ibx, www => Plack::Util::inline_object(style => sub {''}), -}; -my ($mime, $res, $oid, $type); + gz => PublicInbox::GzipFilter::gzip_or_die(), +}, 'PublicInbox::WwwStream'; +my ($eml, $res, $oid, $type); my $n = 0; -my $obuf = ''; my $m = 0; +${$ctx->{obuf}} = ''; +$ctx->{mhref} = '../'; my $cb = sub { - $mime = PublicInbox::Eml->new(shift); - PublicInbox::View::multipart_text_as_html($mime, $ctx); + $eml = PublicInbox::Eml->new(shift); + $eml->each_part(\&PublicInbox::View::add_text_body, $ctx, 1); + $ctx->zflush; ++$m; - $obuf = ''; + delete $ctx->{zbuf}; + ${$ctx->{obuf}} = ''; }; my $t = timeit(1, sub { - $ctx->{obuf} = \$obuf; - $ctx->{mhref} = '../'; while (<$fh>) { ($oid, $type) = split / /; next if $type ne 'blob'; @@ -59,6 +61,6 @@ my $t = timeit(1, sub { } $git->async_wait_all; }); -diag 'multipart_text_as_html took '.timestr($t)." for $n <=> $m messages"; +diag 'add_text_body took '.timestr($t)." for $n <=> $m messages"; is($m, $n, 'rendered all messages'); done_testing(); -- cgit v1.2.3-24-ge0c7