about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2016-01-19 01:16:19 +0000
committerEric Wong <e@80x24.org>2016-04-05 18:58:27 +0000
commite90aa39b6447ceef78d31d4c68291585eb02244a (patch)
treeb1307ffb83ae07a12ee90cb14228c8f00b56a088
parent2b79ca6a1f3f12cac616b2bce800f7f9487c8170 (diff)
downloadpublic-inbox-e90aa39b6447ceef78d31d4c68291585eb02244a.tar.gz
It is expensive and of dubious usefulness to have tree content
auto-followed and indexed by search engines.  Reduce the
links we recommend to crawlers so they don't waste resources
of users running our code or waste the time of search engine
users finding low-value content.

We will allow indexing blobs, however, as they may contain
useful information for others to refer to.

Of course, I definitely want search engines to index commit
messages; as I consider those to be the most important data
served.
-rw-r--r--lib/PublicInbox/RepobrowseBase.pm14
-rw-r--r--lib/PublicInbox/RepobrowseGitCommit.pm26
-rw-r--r--lib/PublicInbox/RepobrowseGitTree.pm13
3 files changed, 35 insertions, 18 deletions
diff --git a/lib/PublicInbox/RepobrowseBase.pm b/lib/PublicInbox/RepobrowseBase.pm
index 6223ebde..14926d70 100644
--- a/lib/PublicInbox/RepobrowseBase.pm
+++ b/lib/PublicInbox/RepobrowseBase.pm
@@ -58,11 +58,21 @@ sub mime_type {
 
 # starts an HTML page for Repobrowse in a consistent way
 sub html_start {
-        my ($self, $req, $title_html) = @_;
+        my ($self, $req, $title_html, $opts) = @_;
         my $desc = $req->{repo_info}->{desc_html};
+        my $meta;
+
+        if ($opts) {
+                my @robots;
+                foreach (qw(nofollow noindex)) {
+                        push @robots, $_ if $opts->{$_};
+                }
+                $meta = qq(<meta\nname=robots\ncontent=") .
+                        join(',', @robots) . '" />';
+        }
 
         "<html><head><title>$title_html</title>" .
-                PublicInbox::Hval::STYLE .
+                PublicInbox::Hval::STYLE . $meta .
                 "</head><body><pre><b>$desc</b>";
 }
 
diff --git a/lib/PublicInbox/RepobrowseGitCommit.pm b/lib/PublicInbox/RepobrowseGitCommit.pm
index e4e0f3c6..d6843a6d 100644
--- a/lib/PublicInbox/RepobrowseGitCommit.pm
+++ b/lib/PublicInbox/RepobrowseGitCommit.pm
@@ -23,7 +23,7 @@ use constant GIT_FMT => '--pretty=format:'.join('%n',
         '%t', '%p', '%D', '%b%x00');
 
 sub git_commit_stream {
-        my ($req, $q, $H, $log, $fh) = @_;
+        my ($self, $req, $q, $H, $log, $fh) = @_;
         chomp(my $h = <$log>); # abbreviated commit
         my $l;
         chomp(my $s = utf8_html($l = <$log>)); # subject
@@ -40,10 +40,9 @@ sub git_commit_stream {
         my $rel = $req->{relcmd};
         my $qs = $q->qs(id => $h);
         chomp $H;
-        my $x = "<html><head><title>$s</title>" .
-                PublicInbox::Hval::STYLE . '</head><body><pre>' .
-                "   commit $H (<a\nhref=\"${rel}patch$qs\">patch</a>)\n" .
-                "     tree <a\nhref=\"${rel}tree?id=$h\">$t</a>";
+        my $x = $self->html_start($req, $s) . "\n" .
+                qq(   commit $H (<a\nhref="${rel}patch$qs">patch</a>)\n) .
+                qq(     tree <a\nrel=nofollow\nhref="${rel}tree?id=$h">$t</a>);
 
         # extra show path information, if any
         my $extra = $req->{extra};
@@ -57,7 +56,8 @@ sub git_commit_stream {
                         my $e = PublicInbox::Hval->utf8($_, join('/', @t));
                         $ep = $e->as_path;
                         my $eh = $e->as_html;
-                        "<a\nhref=\"${rel}tree/$ep?id=$h\">$eh</a>";
+                        $ep = "${rel}tree/$ep?id=$h";
+                        qq(<a\nrel=nofollow\nhref="$ep">$eh</a>);
                 } @$extra);
                 $path = "/$ep";
         }
@@ -158,7 +158,7 @@ sub call_git_commit {
         sub {
                 my ($res) = @_; # Plack callback
                 my $fh = $res->([200, ['Content-Type'=>'text/html']]);
-                git_commit_stream($req, $q, $H, $log, $fh);
+                git_commit_stream($self, $req, $q, $H, $log, $fh);
                 $fh->close;
         }
 }
@@ -266,7 +266,8 @@ sub git_diff_ab_hunk {
                 $rv .= $ca;
         } else {
                 my $p = $diff->{p}->[0];
-                $rv .= "<a\nhref=\"${rel}tree/$diff->{path_a}?id=$p#n$na\">";
+                $rv .= qq(<a\nrel=nofollow);
+                $rv .= qq(\nhref="${rel}tree/$diff->{path_a}?id=$p#n$na">);
                 $rv .= "$ca</a>";
         }
         $rv .= ' ';
@@ -274,7 +275,8 @@ sub git_diff_ab_hunk {
                 $rv .= $cb;
         } else {
                 my $h = $diff->{h};
-                $rv .= "<a\nhref=\"${rel}tree/$diff->{path_b}?id=$h#n$nb\">";
+                $rv .= qq(<a\nrel=nofollow);
+                $rv .= qq(\nhref="${rel}tree/$diff->{path_b}?id=$h#n$nb">);
                 $rv .= "<b>$cb</b></a>";
         }
         $rv . ' @@' . utf8_html($ctx);
@@ -340,7 +342,8 @@ sub git_diff_cc_hunk {
                 $rv .= " <b>$last</b>";
         } else {
                 my $h = $diff->{h};
-                $rv .= " <a\nhref=\"${rel}tree/$path?id=$h#n$n\">";
+                $rv .= qq( <a\nrel=nofollow);
+                $rv .= qq(\nhref="${rel}tree/$path?id=$h#n$n">);
                 $rv .= "<b>$last</b></a>";
         }
         $rv .= " $at" . utf8_html($ctx);
@@ -418,7 +421,8 @@ sub show_unchanged {
                 my $p = PublicInbox::Hval->utf8(git_unquote($fn));
                 $p = $p->as_path;
                 $fn = utf8_html($fn);
-                $s .= qq(\t<a\nid="$anchor"\nhref="${rel}tree/$p$qs">);
+                $s .= qq(\t<a\nrel=nofollow);
+                $s .= qq(\nid="$anchor"\nhref="${rel}tree/$p$qs">);
                 $s .= "$fn</a>\n";
         }
         $fh->write($s);
diff --git a/lib/PublicInbox/RepobrowseGitTree.pm b/lib/PublicInbox/RepobrowseGitTree.pm
index d499cb4b..ceb28910 100644
--- a/lib/PublicInbox/RepobrowseGitTree.pm
+++ b/lib/PublicInbox/RepobrowseGitTree.pm
@@ -36,12 +36,16 @@ sub git_tree_stream {
         }
 
         my $fh = $res->([200, ['Content-Type'=>'text/html; charset=UTF-8']]);
-        $fh->write('<html><head>'. PublicInbox::Hval::STYLE .
-                '<title></title></head><body>');
+        my $opts = { nofollow => 1 };
+        my $title = $req->{expath};
+        $title = $title eq '' ? 'tree' : utf8_html($title);
 
         if ($type eq 'tree') {
+                $opts->{noindex} = 1;
+                $fh->write($self->html_start($req, $title, $opts) . "\n");
                 git_tree_show($req, $fh, $git, $hex, $q);
         } elsif ($type eq 'blob') {
+                $fh->write($self->html_start($req, $title, $opts) . "\n");
                 git_blob_show($req, $fh, $git, $hex, $q);
         } else {
                 # TODO
@@ -87,7 +91,7 @@ sub git_blob_show {
         my $plain = join('/', "${rel}plain", @{$req->{extra}});
         $plain = PublicInbox::Hval->utf8($plain)->as_path . $q->qs;
         my $t = cur_path($req, $q);
-        my $h = qq{<pre>path: $t\n\nblob $hex};
+        my $h = qq{\npath: $t\n\nblob $hex};
         my $end = '';
 
         $git->cat_file($hex, sub {
@@ -141,11 +145,10 @@ sub git_blob_show {
 
 sub git_tree_show {
         my ($req, $fh, $git, $hex, $q) = @_;
-        $fh->write('<pre>');
         my $ls = $git->popen(qw(ls-tree -l -z), $git->abbrev, $hex);
         my $t = cur_path($req, $q);
         my $pfx;
-        $fh->write("path: $t\n\n");
+        $fh->write("\npath: $t\n\n");
         my $qs = $q->qs;
 
         if ($req->{tslash}) {