From ecacd5d9c9604bf2fe235f2ff4a79e9668fd5010 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 15 Jan 2019 02:42:10 +0000 Subject: t/perf-msgview: add test to check msg_html performance This will be necessary to ensure we maintain reasonable performance when we add diff-highlighting support. --- t/perf-msgview.t | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 t/perf-msgview.t (limited to 't') diff --git a/t/perf-msgview.t b/t/perf-msgview.t new file mode 100644 index 00000000..adeb7aac --- /dev/null +++ b/t/perf-msgview.t @@ -0,0 +1,50 @@ +# Copyright (C) 2019 all contributors +# License: AGPL-3.0+ +use strict; +use warnings; +use Test::More; +use Benchmark qw(:all); +use PublicInbox::Inbox; +use PublicInbox::View; +require './t/common.perl'; + +my @cat = qw(cat-file --buffer --batch-check --batch-all-objects); +if (require_git(2.19, 1)) { + push @cat, '--unordered'; +} else { + warn +"git <2.19, cat-file lacks --unordered, locality suffers\n"; +} + +my $pi_dir = $ENV{GIANT_PI_DIR}; +plan skip_all => "GIANT_PI_DIR not defined for $0" unless $pi_dir; + +my $ibx = PublicInbox::Inbox->new({ mainrepo => $pi_dir, name => 'name' }); +my $git = $ibx->git; +my $fh = $git->popen(@cat); +my $vec = ''; +vec($vec, fileno($fh), 1) = 1; +select($vec, undef, undef, 60) or die "timed out waiting for --batch-check"; + +my $ctx = { + env => { HTTP_HOST => 'example.com', 'psgi.url_scheme' => 'https' }, + -inbox => $ibx, +}; +my ($str, $mime, $res, $cmt, $type); +my $n = 0; +my $t = timeit(1, sub { + while (<$fh>) { + ($cmt, $type) = split / /; + next if $type ne 'blob'; + ++$n; + $str = $git->cat_file($cmt); + $mime = PublicInbox::MIME->new($str); + $res = PublicInbox::View::msg_html($ctx, $mime); + $res = $res->[2]; + while (defined($res->getline)) {} + $res->close; + } +}); +diag 'msg_html took '.timestr($t)." for $n messages"; +ok 1; +done_testing(); -- cgit v1.2.3-24-ge0c7 From 53ac5b18f6f124fe33bf6736aac0b8c85a0d0d1b Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 15 Jan 2019 08:22:41 +0000 Subject: solver: initial Perl implementation This will lookup git blobs from associated git source code repositories. If the blobs can't be found, an attempt to "solve" them via patch application will be performed. Eventually, this may become the basis of a type-agnostic frontend similar to "git show" --- t/solve/0001-simple-mod.patch | 20 ++++++ t/solve/0002-rename-with-modifications.patch | 37 +++++++++++ t/solver_git.t | 91 ++++++++++++++++++++++++++++ 3 files changed, 148 insertions(+) create mode 100644 t/solve/0001-simple-mod.patch create mode 100644 t/solve/0002-rename-with-modifications.patch create mode 100644 t/solver_git.t (limited to 't') diff --git a/t/solve/0001-simple-mod.patch b/t/solve/0001-simple-mod.patch new file mode 100644 index 00000000..c6bb1575 --- /dev/null +++ b/t/solve/0001-simple-mod.patch @@ -0,0 +1,20 @@ +From: WEB DESIGN EXPERT +To: meta@public-inbox.org +Subject: [PATCH] TODO: take expert web design advice +Date: Mon, 1 Apr 2019 08:15:20 +0000 +Message-Id: <20190401081523.16213-1-BOFH@YHBT.net> + +--- + TODO | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/TODO b/TODO +index 605013e..69df7d5 100644 +--- a/TODO ++++ b/TODO +@@ -109,3 +109,5 @@ all need to be considered for everything we introduce) + + * Optional history squashing to reduce commit and intermediate + tree objects ++ ++ * Make use of and tags diff --git a/t/solve/0002-rename-with-modifications.patch b/t/solve/0002-rename-with-modifications.patch new file mode 100644 index 00000000..aa415e01 --- /dev/null +++ b/t/solve/0002-rename-with-modifications.patch @@ -0,0 +1,37 @@ +From: POLITICAL CORRECTNESS EXPERT +To: meta@public-inbox.org +Subject: [PATCH] POLITICALLY CORRECT FILE NAMING +Date: Mon, 1 Apr 2019 08:15:20 +0000 +Message-Id: <20190401081523.16213-2-BOFH@YHBT.net> + +HACKING MIGHT GET US REPORTED TO EFF-BEE-EYE +AND USE MARKDOWN CUZ MOAR FLAVORS == BETTER +--- + HACKING => CONTRIBUTING.md | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + rename HACKING => CONTRIBUTING.md (94%) + +diff --git a/HACKING b/CONTRIBUTING.md +similarity index 94% +rename from HACKING +rename to CONTRIBUTING.md +index 3435775..0a92431 100644 +--- a/HACKING ++++ b/CONTRIBUTING.md +@@ -1,5 +1,5 @@ +-hacking public-inbox +--------------------- ++contributing to public-inbox ++---------------------------- + + Send all patches and "git request-pull"-formatted emails to our + self-hosting inbox at meta@public-inbox.org +@@ -15,7 +15,7 @@ Please consider our goals in mind: + Decentralization, Accessibility, Compatibility, Performance + + These goals apply to everyone: users viewing over the web or NNTP, +-sysadmins running public-inbox, and other hackers working public-inbox. ++sysadmins running public-inbox, and other contributors working public-inbox. + + We will reject any feature which advocates or contributes to any + particular instance of a public-inbox becoming a single point of failure. diff --git a/t/solver_git.t b/t/solver_git.t new file mode 100644 index 00000000..fe322eab --- /dev/null +++ b/t/solver_git.t @@ -0,0 +1,91 @@ +# Copyright (C) 2019 all contributors +# License: AGPL-3.0+ +use strict; +use warnings; +use Test::More; +use File::Temp qw(tempdir); +use Cwd qw(abs_path); +require './t/common.perl'; +require_git(2.6); + +my @mods = qw(DBD::SQLite Search::Xapian HTTP::Request::Common Plack::Test + URI::Escape Plack::Builder); +foreach my $mod (@mods) { + eval "require $mod"; + plan skip_all => "$mod missing for $0" if $@; +} +chomp(my $git_dir = `git rev-parse --git-dir 2>/dev/null`); +plan skip_all => "$0 must be run from a git working tree" if $?; +$git_dir = abs_path($git_dir); + +use_ok "PublicInbox::$_" for (qw(Inbox V2Writable MIME Git SolverGit)); + +my $mainrepo = tempdir('pi-solver-XXXXXX', TMPDIR => 1, CLEANUP => 1); +my $opts = { + mainrepo => $mainrepo, + name => 'test-v2writable', + version => 2, + -primary_address => 'test@example.com', +}; +my $ibx = PublicInbox::Inbox->new($opts); +my $im = PublicInbox::V2Writable->new($ibx, 1); +$im->{parallel} = 0; + +sub deliver_patch ($) { + open my $fh, '<', $_[0] or die "open: $!"; + my $mime = PublicInbox::MIME->new(do { local $/; <$fh> }); + $im->add($mime); + $im->done; +} + +deliver_patch('t/solve/0001-simple-mod.patch'); + +my $gits = [ PublicInbox::Git->new($git_dir) ]; +my $solver = PublicInbox::SolverGit->new($gits, [ $ibx ]); +open my $log, '+>>', "$mainrepo/solve.log" or die "open: $!"; +my $res = $solver->solve($log, '69df7d5', {}); +ok($res, 'solved a blob!'); +my $wt_git = $res->[0]; +is(ref($wt_git), 'PublicInbox::Git', 'got a git object for the blob'); +my $expect = '69df7d565d49fbaaeb0a067910f03dc22cd52bd0'; +is($res->[1], $expect, 'resolved blob to unabbreviated identifier'); +is($res->[2], 'blob', 'type specified'); +is($res->[3], 4405, 'size returned'); + +is(ref($wt_git->cat_file($res->[1])), 'SCALAR', 'wt cat-file works'); +is_deeply([$expect, 'blob', 4405], + [$wt_git->check($res->[1])], 'wt check works'); + +if (0) { # TODO: check this? + seek($log, 0, 0); + my $z = do { local $/; <$log> }; + diag $z; +} + +$res = undef; +my $wt_git_dir = $wt_git->{git_dir}; +$wt_git = undef; +ok(!-d $wt_git_dir, 'no references to WT held'); + +$res = $solver->solve($log, '0'x40, {}); +is($res, undef, 'no error on z40'); + +my $git_v2_20_1_tag = '7a95a1cd084cb665c5c2586a415e42df0213af74'; +$res = $solver->solve($log, $git_v2_20_1_tag, {}); +is($res, undef, 'no error on a tag not in our repo'); + +deliver_patch('t/solve/0002-rename-with-modifications.patch'); +$res = $solver->solve($log, '0a92431', {}); +ok($res, 'resolved without hints'); + +my $hints = { + oid_a => '3435775', + path_a => 'HACKING', + path_b => 'CONTRIBUTING' +}; +my $hinted = $solver->solve($log, '0a92431', $hints); +# don't compare ::Git objects: +shift @$res; shift @$hinted; +is_deeply($res, $hinted, 'hints work (or did not hurt :P'); + +done_testing(); -- cgit v1.2.3-24-ge0c7 From c8a4111320aaed484deecbbc7d1f63f38f3dc57b Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 18 Jan 2019 10:21:40 +0000 Subject: git: add git_quote It'll be helpful for displaying progress in SolverGit output. --- t/git.t | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 't') diff --git a/t/git.t b/t/git.t index 2d58a106..9c80fbb4 100644 --- a/t/git.t +++ b/t/git.t @@ -144,11 +144,16 @@ if ('alternates reloaded') { is($$found, $config, 'alternates reloaded'); } -use_ok 'PublicInbox::Git', qw(git_unquote); +use_ok 'PublicInbox::Git', qw(git_unquote git_quote); my $s; is("foo\nbar", git_unquote($s = '"foo\\nbar"'), 'unquoted newline'); is("Eléanor", git_unquote($s = '"El\\303\\251anor"'), 'unquoted octal'); is(git_unquote($s = '"I\"m"'), 'I"m', 'unquoted dq'); is(git_unquote($s = '"I\\m"'), 'I\\m', 'unquoted backslash'); +is(git_quote($s = "Eléanor"), '"El\\303\\251anor"', 'quoted octal'); +is(git_quote($s = "hello\"world"), '"hello\"world"', 'quoted dq'); +is(git_quote($s = "hello\\world"), '"hello\\\\world"', 'quoted backslash'); +is(git_quote($s = "hello\nworld"), '"hello\\nworld"', 'quoted LF'); + done_testing(); -- cgit v1.2.3-24-ge0c7 From f026dbdd392c9dd5fddbdad9a2240738d4956640 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sun, 20 Jan 2019 04:21:07 +0000 Subject: www: admin-configurable CSS via "publicinbox.css" Maybe we'll default to a dark theme to promote energy savings... See contrib/css/README for details --- t/view.t | 2 ++ 1 file changed, 2 insertions(+) (limited to 't') diff --git a/t/view.t b/t/view.t index b829ecf8..ef7d6958 100644 --- a/t/view.t +++ b/t/view.t @@ -6,6 +6,7 @@ use Test::More; use Email::MIME; use Plack::Util; use_ok 'PublicInbox::View'; +use_ok 'PublicInbox::Config'; # FIXME: make this test less fragile my $ctx = { @@ -18,6 +19,7 @@ my $ctx = { nntp_url => sub {[]}, max_git_part => sub { undef }, description => sub { '' }), + www => Plack::Util::inline_object(style => sub { '' }), }; $ctx->{-inbox}->{-primary_address} = 'test@example.com'; -- cgit v1.2.3-24-ge0c7 From d4adef2ef649b738e83d065170dad9d84027dd77 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sun, 20 Jan 2019 11:40:22 +0000 Subject: t/check-www-inbox: use xmlstarlet to validate Atom if available I almost forgot about this script; but remembering to test it against real-world data can be useful to hunt for bugs. --- t/check-www-inbox.perl | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) (limited to 't') diff --git a/t/check-www-inbox.perl b/t/check-www-inbox.perl index 08e62471..7dd1eebe 100644 --- a/t/check-www-inbox.perl +++ b/t/check-www-inbox.perl @@ -1,5 +1,5 @@ #!/usr/bin/perl -w -# Copyright (C) 2016-2018 all contributors +# Copyright (C) 2016-2019 all contributors # License: AGPL-3.0+ # Parallel WWW checker my $usage = "$0 [-j JOBS] [-s SLOW_THRESHOLD] URL_OF_INBOX\n"; @@ -23,6 +23,16 @@ my %opts = ( GetOptions(%opts) or die "bad command-line args\n$usage"; my $root_url = shift or die $usage; +chomp(my $xmlstarlet = `which xmlstarlet 2>/dev/null`); +my $atom_check = eval { + require IPC::Run; + my $cmd = [ qw(xmlstarlet val -e -) ]; + sub { + my ($in, $out, $err) = @_; + IPC::Run::run($cmd, $in, $out, $err); + } +} if $xmlstarlet; + my %workers; $SIG{TERM} = sub { exit 0 }; $SIG{CHLD} = sub { @@ -146,7 +156,15 @@ sub worker_loop { # make sure the HTML source doesn't screw up terminals # when people curl the source (not remotely an expert # on languages or encodings, here). - next if $r->header('Content-Type') !~ m!\btext/html\b!; + my $ct = $r->header('Content-Type'); + if ($atom_check && $ct =~ m!\bapplication/atom\+xml\b!) { + my $raw = $r->decoded_content; + my ($out, $err) = ('', ''); + $atom_check->(\$raw, \$out, \$err) and + warn "Atom ($?) - $u - <1:$out> <2:$err>\n"; + } + + next if $ct !~ m!\btext/html\b!; my $dc = $r->decoded_content; if ($dc =~ /([\x00-\x08\x0d-\x1f\x7f-\x{99999999}]+)/s) { my $o = $1; -- cgit v1.2.3-24-ge0c7 From cfa8ff7c256e20f3240aed5f98d155c019788e3b Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sun, 20 Jan 2019 20:56:22 +0000 Subject: config: each_inbox iteration preserves config order For cross-inbox Message-ID resolution; having some sort of stable ordering makes the most sense. Relying on the order of the config file seems most natural and allows us to avoid introducing yet another configuration knob. --- t/config.t | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 't') diff --git a/t/config.t b/t/config.t index 5f0a95ba..7531fd75 100644 --- a/t/config.t +++ b/t/config.t @@ -150,4 +150,23 @@ for my $s (@valid) { ok(PublicInbox::Config::valid_inbox_name($s), "$d name accepted"); } +{ + my $f = "$tmpdir/ordered"; + open my $fh, '>', $f or die "open: $!"; + my @expect; + foreach my $i (0..3) { + push @expect, "$i"; + print $fh <<"" or die "print: $!"; +[publicinbox "$i"] + mainrepo = /path/to/$i.git + address = $i\@example.com + + } + close $fh or die "close: $!"; + my $cfg = PublicInbox::Config->new($f); + my @result; + $cfg->each_inbox(sub { push @result, $_[0]->{name} }); + is_deeply(\@result, \@expect); +} + done_testing(); -- cgit v1.2.3-24-ge0c7 From 69822fef73720cb201bf9cb7f25ed5ea2c4fa10a Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Mon, 21 Jan 2019 03:09:22 +0000 Subject: t/check-www-inbox: warn on missing Content-Type Oops, I might've left it out, somewhere. --- t/check-www-inbox.perl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 't') diff --git a/t/check-www-inbox.perl b/t/check-www-inbox.perl index 7dd1eebe..6232f164 100644 --- a/t/check-www-inbox.perl +++ b/t/check-www-inbox.perl @@ -156,7 +156,9 @@ sub worker_loop { # make sure the HTML source doesn't screw up terminals # when people curl the source (not remotely an expert # on languages or encodings, here). - my $ct = $r->header('Content-Type'); + my $ct = $r->header('Content-Type') || ''; + warn "no Content-Type: $u\n" if $ct eq ''; + if ($atom_check && $ct =~ m!\bapplication/atom\+xml\b!) { my $raw = $r->decoded_content; my ($out, $err) = ('', ''); -- cgit v1.2.3-24-ge0c7 From 0a04fa7bd38c8f491b429dc7d8578735ca7ca3f4 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Mon, 21 Jan 2019 03:19:20 +0000 Subject: highlight: initial wrapper and PSGI service I'll probably expose the PSGI service for cgit; but it could be useful to others as well. --- t/hl_mod.t | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 t/hl_mod.t (limited to 't') diff --git a/t/hl_mod.t b/t/hl_mod.t new file mode 100644 index 00000000..b8b8eb9d --- /dev/null +++ b/t/hl_mod.t @@ -0,0 +1,54 @@ +#!/usr/bin/perl -w +# Copyright (C) 2019 all contributors +# License: AGPL-3.0+ +use strict; +use warnings; +use Test::More; +eval { require highlight } or + plan skip_all => 'failed to load highlight.pm'; +use_ok 'PublicInbox::HlMod'; +my $hls = PublicInbox::HlMod->new; +ok($hls, 'initialized OK'); +is($hls->_shebang2lang(\"#!/usr/bin/perl -w\n"), 'perl', 'perl shebang OK'); +is($hls->{-ext2lang}->{'pm'}, 'perl', '.pm suffix OK'); +is($hls->{-ext2lang}->{'pl'}, 'perl', '.pl suffix OK'); +is($hls->_path2lang('Makefile'), 'make', 'Makefile OK'); +my $str = do { local $/; open(my $fh, __FILE__); <$fh> }; +my $orig = $str; + +{ + my $ref = $hls->do_hl(\$str, 'foo.perl'); + is(ref($ref), 'SCALAR', 'got a scalar reference back'); + like($$ref, qr/I can see you!/, 'we can see ourselves in output'); + + use PublicInbox::Spawn qw(which); + if (eval { require IPC::Run } && which('w3m')) { + require File::Temp; + my $cmd = [ qw(w3m -T text/html -dump -config /dev/null) ]; + my ($out, $err) = ('', ''); + IPC::Run::run($cmd, $ref, \$out, \$err); + # expand tabs and normalize whitespace, + # w3m doesn't preserve tabs + $orig =~ s/\t/ /gs; + $out =~ s/\s*\z//sg; + $orig =~ s/\s*\z//sg; + is($out, $orig, 'w3m output matches'); + } +} + +my $nr = $ENV{TEST_MEMLEAK}; +if ($nr && -r "/proc/$$/status") { + my $fh; + open $fh, '<', "/proc/$$/status"; + diag "starting at memtest at ".join('', grep(/VmRSS:/, <$fh>)); + PublicInbox::HlMod->new->do_hl(\$orig) for (1..$nr); + open $fh, '<', "/proc/$$/status"; + diag "creating $nr instances: ".join('', grep(/VmRSS:/, <$fh>)); + my $hls = PublicInbox::HlMod->new; + $hls->do_hl(\$orig) for (1..$nr); + $hls = undef; + open $fh, '<', "/proc/$$/status"; + diag "reused instance $nr times: ".join('', grep(/VmRSS:/, <$fh>)); +} + +done_testing; -- cgit v1.2.3-24-ge0c7 From c440c879d38e67f62bdbb74f616dc84d20899c33 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Mon, 21 Jan 2019 06:51:23 +0000 Subject: t/check-www-inbox: trap SIGINT for File::Temp destruction Otherwise, temporary GDBM files don't get unlinked when I SIGINT the process. --- t/check-www-inbox.perl | 2 ++ 1 file changed, 2 insertions(+) (limited to 't') diff --git a/t/check-www-inbox.perl b/t/check-www-inbox.perl index 6232f164..1e88e952 100644 --- a/t/check-www-inbox.perl +++ b/t/check-www-inbox.perl @@ -14,6 +14,7 @@ use POSIX qw(:sys_wait_h); use Time::HiRes qw(gettimeofday tv_interval); use WWW::Mechanize; use Data::Dumper; +our $tmp_owner = $$; my $nproc = 4; my $slow = 0.5; my %opts = ( @@ -34,6 +35,7 @@ my $atom_check = eval { } if $xmlstarlet; my %workers; +$SIG{INT} = sub { exit 130 }; $SIG{TERM} = sub { exit 0 }; $SIG{CHLD} = sub { while (1) { -- cgit v1.2.3-24-ge0c7 From b8655db863d42a023a32c4604e10ee2acb619aa9 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 22 Jan 2019 07:57:06 +0000 Subject: t/qspawn.t: psgi_qx stderr test --- t/qspawn.t | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 't') diff --git a/t/qspawn.t b/t/qspawn.t index 170e4d7f..745ec4d8 100644 --- a/t/qspawn.t +++ b/t/qspawn.t @@ -1,8 +1,16 @@ -# Copyright (C) 2016-2018 all contributors +# Copyright (C) 2016-2019 all contributors # License: AGPL-3.0+ use Test::More; use_ok 'PublicInbox::Qspawn'; +{ + my $cmd = [qw(sh -c), 'echo >&2 err; echo out']; + my $qsp = PublicInbox::Qspawn->new($cmd, {}, { 2 => 1 }); + my $res; + $qsp->psgi_qx({}, undef, sub { $res = ${$_[0]} }); + is($res, "err\nout\n", 'captured stderr and stdout'); +} + my $limiter = PublicInbox::Qspawn::Limiter->new(1); { my $x = PublicInbox::Qspawn->new([qw(true)]); -- cgit v1.2.3-24-ge0c7 From fffbc9ec32b78731acd30539f6e3f2778d2d1fb2 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 22 Jan 2019 02:10:13 +0000 Subject: solver: rewrite to use Qspawn->psgi_qx and pi-httpd.async The psgi_qx routine in the now-abandoned "repobrowse" branch allows us to break down blob-solving at each process execution point. It reuses the Qspawn facility for git-http-backend(1), allowing us to limit parallel subprocesses independently of Perl worker count. This is actually a 2-3% slower a fully-synchronous execution; but it is fair to other clients as it won't monopolize the server for hundreds of milliseconds (or even seconds) at a time. --- t/solver_git.t | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) (limited to 't') diff --git a/t/solver_git.t b/t/solver_git.t index fe322eab..197a003a 100644 --- a/t/solver_git.t +++ b/t/solver_git.t @@ -40,10 +40,12 @@ sub deliver_patch ($) { deliver_patch('t/solve/0001-simple-mod.patch'); -my $gits = [ PublicInbox::Git->new($git_dir) ]; -my $solver = PublicInbox::SolverGit->new($gits, [ $ibx ]); +$ibx->{-repo_objs} = [ PublicInbox::Git->new($git_dir) ]; +my $res; +my $solver = PublicInbox::SolverGit->new($ibx, sub { $res = $_[0] }); open my $log, '+>>', "$mainrepo/solve.log" or die "open: $!"; -my $res = $solver->solve($log, '69df7d5', {}); +my $psgi_env = { 'psgi.url_scheme' => 'http', HTTP_HOST => 'example.com' }; +$solver->solve($psgi_env, $log, '69df7d5', {}); ok($res, 'solved a blob!'); my $wt_git = $res->[0]; is(ref($wt_git), 'PublicInbox::Git', 'got a git object for the blob'); @@ -62,20 +64,24 @@ if (0) { # TODO: check this? diag $z; } +$solver = undef; $res = undef; my $wt_git_dir = $wt_git->{git_dir}; $wt_git = undef; ok(!-d $wt_git_dir, 'no references to WT held'); -$res = $solver->solve($log, '0'x40, {}); +$solver = PublicInbox::SolverGit->new($ibx, sub { $res = $_[0] }); +$solver->solve($psgi_env, $log, '0'x40, {}); is($res, undef, 'no error on z40'); my $git_v2_20_1_tag = '7a95a1cd084cb665c5c2586a415e42df0213af74'; -$res = $solver->solve($log, $git_v2_20_1_tag, {}); +$solver = PublicInbox::SolverGit->new($ibx, sub { $res = $_[0] }); +$solver->solve($psgi_env, $log, $git_v2_20_1_tag, {}); is($res, undef, 'no error on a tag not in our repo'); deliver_patch('t/solve/0002-rename-with-modifications.patch'); -$res = $solver->solve($log, '0a92431', {}); +$solver = PublicInbox::SolverGit->new($ibx, sub { $res = $_[0] }); +$solver->solve($psgi_env, $log, '0a92431', {}); ok($res, 'resolved without hints'); my $hints = { @@ -83,7 +89,9 @@ my $hints = { path_a => 'HACKING', path_b => 'CONTRIBUTING' }; -my $hinted = $solver->solve($log, '0a92431', $hints); +$solver = PublicInbox::SolverGit->new($ibx, sub { $res = $_[0] }); +$solver->solve($psgi_env, $log, '0a92431', $hints); +my $hinted = $res; # don't compare ::Git objects: shift @$res; shift @$hinted; is_deeply($res, $hinted, 'hints work (or did not hurt :P'); -- cgit v1.2.3-24-ge0c7 From cee59522e00810f508aabce5a46e63bddcc203cc Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sun, 27 Jan 2019 03:30:30 +0000 Subject: qspawn: decode $? for user-friendliness The raw value of $? isn't very useful, generally. --- t/qspawn.t | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 't') diff --git a/t/qspawn.t b/t/qspawn.t index 745ec4d8..ab6e3758 100644 --- a/t/qspawn.t +++ b/t/qspawn.t @@ -31,7 +31,7 @@ my $limiter = PublicInbox::Qspawn::Limiter->new(1); my ($rpipe) = @_; is(0, sysread($rpipe, my $buf, 1), 'read zero bytes from false'); my $err = $x->finish; - is($err, 256, 'error on finish'); + ok($err, 'error on finish'); $run = 1; }); is($run, 1, 'callback ran alright'); -- cgit v1.2.3-24-ge0c7 From 3998cdd1c94661687cee413b01b95422cf02c34a Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sun, 27 Jan 2019 11:35:29 +0000 Subject: t/hl_mod: extra check to ensure we escape HTML Otherwise, it's open season on our users :< --- t/hl_mod.t | 1 + 1 file changed, 1 insertion(+) (limited to 't') diff --git a/t/hl_mod.t b/t/hl_mod.t index b8b8eb9d..62cc6248 100644 --- a/t/hl_mod.t +++ b/t/hl_mod.t @@ -20,6 +20,7 @@ my $orig = $str; my $ref = $hls->do_hl(\$str, 'foo.perl'); is(ref($ref), 'SCALAR', 'got a scalar reference back'); like($$ref, qr/I can see you!/, 'we can see ourselves in output'); + like($$ref, qr/&&/, 'escaped'); use PublicInbox::Spawn qw(which); if (eval { require IPC::Run } && which('w3m')) { -- cgit v1.2.3-24-ge0c7 From fd8e62c1ed9b9f7eb851a263a9ada2cb8c182193 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sun, 27 Jan 2019 11:36:22 +0000 Subject: hlmod: disable enclosing
 tag

We already have a 
 tag in ViewVCS, and nesting 
inside the pre-existing 
 overrides the "white-space:pre"
we use to align line numbers.
---
 t/hl_mod.t | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 't')

diff --git a/t/hl_mod.t b/t/hl_mod.t
index 62cc6248..80f88907 100644
--- a/t/hl_mod.t
+++ b/t/hl_mod.t
@@ -27,7 +27,7 @@ my $orig = $str;
 		require File::Temp;
 		my $cmd = [ qw(w3m -T text/html -dump -config /dev/null) ];
 		my ($out, $err) = ('', '');
-		IPC::Run::run($cmd, $ref, \$out, \$err);
+		IPC::Run::run($cmd, \('
'.$$ref.'
'), \$out, \$err); # expand tabs and normalize whitespace, # w3m doesn't preserve tabs $orig =~ s/\t/ /gs; -- cgit v1.2.3-24-ge0c7 From 6025d9881c00a0d4b538f06ce157eed416045e10 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sun, 27 Jan 2019 11:43:56 +0000 Subject: hval: add src_escape for highlight post-processing We need to post-process "highlight" output to ensure it doesn't contain odd bytes which cause "wide character" warnings or require odd glyphs in source form. --- t/hval.t | 3 +++ 1 file changed, 3 insertions(+) (limited to 't') diff --git a/t/hval.t b/t/hval.t index a193c296..bfc9a856 100644 --- a/t/hval.t +++ b/t/hval.t @@ -43,5 +43,8 @@ is('foo-bar', PublicInbox::Hval::to_filename("foo bar\nanother line\n"), is('foo.bar', PublicInbox::Hval::to_filename("foo....bar"), 'to_filename squeezes -'); +my $s = "\0\x07\n"; +PublicInbox::Hval::src_escape($s); +is($s, "\\0\\a\n", 'src_escape works as intended'); done_testing(); -- cgit v1.2.3-24-ge0c7 From a401ba449b6005632d817cae573939bdffc7125b Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Mon, 28 Jan 2019 09:02:42 +0000 Subject: t/check-www-inbox: replace IPC::Run with PublicInbox::Spawn Because WWW::Mechanize uses truckload of memory, fork needs to prepare all that memory for CoW, which ends up bailing with ENOMEM. --- t/check-www-inbox.perl | 39 ++++++++++++++++++++++++++++++++++----- 1 file changed, 34 insertions(+), 5 deletions(-) (limited to 't') diff --git a/t/check-www-inbox.perl b/t/check-www-inbox.perl index 1e88e952..933362a7 100644 --- a/t/check-www-inbox.perl +++ b/t/check-www-inbox.perl @@ -14,6 +14,12 @@ use POSIX qw(:sys_wait_h); use Time::HiRes qw(gettimeofday tv_interval); use WWW::Mechanize; use Data::Dumper; + +# we want to use vfork+exec with spawn, WWW::Mechanize can use too much +# memory and fork(2) fails +use PublicInbox::Spawn qw(spawn which); +$ENV{PERL_INLINE_DIRECTORY} or warn "PERL_INLINE_DIRECTORY unset, may OOM\n"; + our $tmp_owner = $$; my $nproc = 4; my $slow = 0.5; @@ -24,13 +30,35 @@ my %opts = ( GetOptions(%opts) or die "bad command-line args\n$usage"; my $root_url = shift or die $usage; -chomp(my $xmlstarlet = `which xmlstarlet 2>/dev/null`); +chomp(my $xmlstarlet = which('xmlstarlet')); my $atom_check = eval { - require IPC::Run; my $cmd = [ qw(xmlstarlet val -e -) ]; sub { my ($in, $out, $err) = @_; - IPC::Run::run($cmd, $in, $out, $err); + use autodie; + open my $in_fh, '+>', undef; + open my $out_fh, '+>', undef; + open my $err_fh, '+>', undef; + print $in_fh $$in; + $in_fh->flush; + sysseek($in_fh, 0, 0); + my $rdr = { + 0 => fileno($in_fh), + 1 => fileno($out_fh), + 2 => fileno($err_fh), + }; + my $pid = spawn($cmd, undef, $rdr); + defined $pid or die "spawn failure: $!"; + while (waitpid($pid, 0) != $pid) { + next if $!{EINTR}; + warn "waitpid(xmlstarlet, $pid) $!"; + return $!; + } + sysseek($out_fh, 0, 0); + sysread($out_fh, $$out, -s $out_fh); + sysseek($err_fh, 0, 0); + sysread($err_fh, $$err, -s $err_fh); + $? } } if $xmlstarlet; @@ -120,6 +148,7 @@ while (keys %workers) { # reacts to SIGCHLD sub worker_loop { my ($todo_rd, $done_wr) = @_; + $SIG{CHLD} = 'DEFAULT'; my $m = WWW::Mechanize->new(autocheck => 0); my $cc = LWP::ConnCache->new; $m->conn_cache($cc); @@ -164,8 +193,8 @@ sub worker_loop { if ($atom_check && $ct =~ m!\bapplication/atom\+xml\b!) { my $raw = $r->decoded_content; my ($out, $err) = ('', ''); - $atom_check->(\$raw, \$out, \$err) and - warn "Atom ($?) - $u - <1:$out> <2:$err>\n"; + my $fail = $atom_check->(\$raw, \$out, \$err); + warn "Atom ($fail) - $u - <1:$out> <2:$err>\n" if $fail; } next if $ct !~ m!\btext/html\b!; -- cgit v1.2.3-24-ge0c7 From 75154e05332cae23502bb5b503fe5c797bdf6526 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 29 Jan 2019 03:37:24 +0000 Subject: t/check-www-inbox: don't follow mboxes They can be extremely large with no limit, so can lead to OOM errors. --- t/check-www-inbox.perl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 't') diff --git a/t/check-www-inbox.perl b/t/check-www-inbox.perl index 933362a7..0a6d61bb 100644 --- a/t/check-www-inbox.perl +++ b/t/check-www-inbox.perl @@ -175,7 +175,7 @@ sub worker_loop { my $s; # blocking foreach my $l (@links, "DONE\t$u") { - next if $l eq ''; + next if $l eq '' || $l =~ /\.mbox(?:\.gz)\z/; do { $s = $done_wr->send($l, MSG_EOR); } while (!defined $s && $!{EINTR}); -- cgit v1.2.3-24-ge0c7 From a6f2d5b61e37a49d8278b250d172b497a88c2b45 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 29 Jan 2019 03:42:48 +0000 Subject: t/check-www-inbox: disable history WWW::Mechanize keeps an infinitely large stack, which was leading to OOM errors on my system. --- t/check-www-inbox.perl | 1 + 1 file changed, 1 insertion(+) (limited to 't') diff --git a/t/check-www-inbox.perl b/t/check-www-inbox.perl index 0a6d61bb..db292c50 100644 --- a/t/check-www-inbox.perl +++ b/t/check-www-inbox.perl @@ -151,6 +151,7 @@ sub worker_loop { $SIG{CHLD} = 'DEFAULT'; my $m = WWW::Mechanize->new(autocheck => 0); my $cc = LWP::ConnCache->new; + $m->stack_depth(0); # no history $m->conn_cache($cc); while (1) { $todo_rd->recv(my $u, 65535, 0); -- cgit v1.2.3-24-ge0c7