about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2019-06-04 10:38:20 +0000
committerEric Wong <e@80x24.org>2019-06-04 10:38:20 +0000
commite56b908c0e97c2f479b350dfb0868bc463413b89 (patch)
tree6b8a8fc59840ebc3e6bee1bbb2e3a5e568bce351
parentd5c56e2a1adde279ff86fbd0be81a6b7e5ba0896 (diff)
parent91af69a41f2963f1f952cb0932ed23cd86cd1093 (diff)
downloadpublic-inbox-e56b908c0e97c2f479b350dfb0868bc463413b89.tar.gz
* charclass: (24 commits)
  www: require ASCII word characters for CSS filenames
  www: require ASCII range for mbox downloads
  githttpbackend: require ASCII in path
  require ASCII digits for local FS items
  www: require ASCII digit for git epoch
  solver|viewdiff: restrict digit matches to ASCII
  inbox: require ASCII digits for feedmax var
  filter/rubylang: require ASCII digit for mailcount
  msgtime: require ASCII digits for parsing dates
  searchview: do not allow non-ASCII offsets and limits
  githttpbackend: require Range:, Status: to be ASCII digits
  view: require YYYYmmDD(HHMMSS) timestamps to be ASCII
  newswww: only accept ASCII digits as article numbers
  config: do not accept non-ASCII digits in cgitrc params
  www: require ASCII filenames in git blob downloads
  www: only emit ASCII chars in attachment filenames
  wwwattach: only pass the charset through if ASCII
  wwwlisting: require ASCII digit for port number
  http: require SERVER_PORT to be ASCII digit
  feed: only accept ASCII digits for ref~$N
  ...
-rw-r--r--lib/PublicInbox/Config.pm2
-rw-r--r--lib/PublicInbox/Feed.pm2
-rw-r--r--lib/PublicInbox/Filter/RubyLang.pm2
-rw-r--r--lib/PublicInbox/GitHTTPBackend.pm8
-rw-r--r--lib/PublicInbox/HTTP.pm2
-rw-r--r--lib/PublicInbox/Hval.pm3
-rw-r--r--lib/PublicInbox/Inbox.pm6
-rw-r--r--lib/PublicInbox/Linkify.pm5
-rw-r--r--lib/PublicInbox/MID.pm4
-rw-r--r--lib/PublicInbox/MsgTime.pm7
-rw-r--r--lib/PublicInbox/NNTP.pm16
-rw-r--r--lib/PublicInbox/NewsWWW.pm2
-rw-r--r--lib/PublicInbox/Search.pm2
-rw-r--r--lib/PublicInbox/SearchView.pm4
-rw-r--r--lib/PublicInbox/SolverGit.pm2
-rw-r--r--lib/PublicInbox/V2Writable.pm4
-rw-r--r--lib/PublicInbox/View.pm6
-rw-r--r--lib/PublicInbox/ViewDiff.pm4
-rw-r--r--lib/PublicInbox/WWW.pm20
-rw-r--r--lib/PublicInbox/WwwAttach.pm2
-rw-r--r--lib/PublicInbox/WwwListing.pm4
-rw-r--r--lib/PublicInbox/Xapcmd.pm6
-rwxr-xr-xscript/public-inbox-purge2
-rw-r--r--t/linkify.t12
24 files changed, 75 insertions, 52 deletions
diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm
index 09f9179b..6e85750a 100644
--- a/lib/PublicInbox/Config.pm
+++ b/lib/PublicInbox/Config.pm
@@ -307,7 +307,7 @@ sub parse_cgitrc {
                         }
                 } elsif (m!\Ainclude=(.+)\z!) {
                         parse_cgitrc($self, $1, $nesting + 1);
-                } elsif (m!\A(scan-hidden-path|remove-suffix)=(\d+)\z!) {
+                } elsif (m!\A(scan-hidden-path|remove-suffix)=([0-9]+)\z!) {
                         my ($k, $v) = ($1, $2);
                         $k =~ tr/-/_/;
                         $self->{"-cgit_$k"} = $v;
diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm
index a04838a1..ae071895 100644
--- a/lib/PublicInbox/Feed.pm
+++ b/lib/PublicInbox/Feed.pm
@@ -102,7 +102,7 @@ sub recent_msgs {
         my $hex = '[a-f0-9]';
         my $addmsg = qr!^:000000 100644 \S+ (\S+) A\t${hex}{2}/${hex}{38}$!;
         my $delmsg = qr!^:100644 000000 (\S+) \S+ D\t(${hex}{2}/${hex}{38})$!;
-        my $refhex = qr/(?:HEAD|${hex}{4,40})(?:~\d+)?/;
+        my $refhex = qr/(?:HEAD|${hex}{4,40})(?:~[0-9]+)?/;
 
         # revision ranges may be specified
         my $range = 'HEAD';
diff --git a/lib/PublicInbox/Filter/RubyLang.pm b/lib/PublicInbox/Filter/RubyLang.pm
index a43d67a9..d40705b7 100644
--- a/lib/PublicInbox/Filter/RubyLang.pm
+++ b/lib/PublicInbox/Filter/RubyLang.pm
@@ -50,7 +50,7 @@ sub scrub {
                 my @v = $hdr->header_raw('X-Mail-Count');
                 my $n;
                 foreach (@v) {
-                        /\A\s*(\d+)\s*\z/ or next;
+                        /\A\s*([0-9]+)\s*\z/ or next;
                         $n = $1;
                         last;
                 }
diff --git a/lib/PublicInbox/GitHTTPBackend.pm b/lib/PublicInbox/GitHTTPBackend.pm
index 09411048..a2a81f8e 100644
--- a/lib/PublicInbox/GitHTTPBackend.pm
+++ b/lib/PublicInbox/GitHTTPBackend.pm
@@ -51,8 +51,8 @@ sub serve {
 
         # Documentation/technical/http-protocol.txt in git.git
         # requires one and exactly one query parameter:
-        if ($env->{QUERY_STRING} =~ /\Aservice=git-\w+-pack\z/ ||
-                                $path =~ /\Agit-\w+-pack\z/) {
+        if ($env->{QUERY_STRING} =~ /\Aservice=git-[A-Za-z0-9_]+-pack\z/ ||
+                                $path =~ /\Agit-[A-Za-z0-9_]+-pack\z/) {
                 my $ok = serve_smart($env, $git, $path);
                 return $ok if $ok;
         }
@@ -90,7 +90,7 @@ sub static_result ($$$$) {
         my $len = $size;
         my $code = 200;
         push @$h, 'Content-Type', $type;
-        if (($env->{HTTP_RANGE} || '') =~ /\bbytes=(\d*)-(\d*)\z/) {
+        if (($env->{HTTP_RANGE} || '') =~ /\bbytes=([0-9]*)-([0-9]*)\z/) {
                 ($code, $len) = prepare_range($env, $in, $h, $1, $2, $size);
                 if ($code == 416) {
                         push @$h, 'Content-Range', "bytes */$size";
@@ -260,7 +260,7 @@ sub parse_cgi_headers {
         foreach my $l (split(/\r?\n/, $h)) {
                 my ($k, $v) = split(/:\s*/, $l, 2);
                 if ($k =~ /\AStatus\z/i) {
-                        ($code) = ($v =~ /\b(\d+)\b/);
+                        ($code) = ($v =~ /\b([0-9]+)\b/);
                 } else {
                         push @h, $k, $v;
                 }
diff --git a/lib/PublicInbox/HTTP.pm b/lib/PublicInbox/HTTP.pm
index 10e6d6a4..977614b4 100644
--- a/lib/PublicInbox/HTTP.pm
+++ b/lib/PublicInbox/HTTP.pm
@@ -142,7 +142,7 @@ sub app_dispatch {
         $env->{REMOTE_ADDR} = $self->{remote_addr};
         $env->{REMOTE_PORT} = $self->{remote_port};
         if (my $host = $env->{HTTP_HOST}) {
-                $host =~ s/:(\d+)\z// and $env->{SERVER_PORT} = $1;
+                $host =~ s/:([0-9]+)\z// and $env->{SERVER_PORT} = $1;
                 $env->{SERVER_NAME} = $host;
         }
         if (defined $input) {
diff --git a/lib/PublicInbox/Hval.pm b/lib/PublicInbox/Hval.pm
index 95a0f709..2b443970 100644
--- a/lib/PublicInbox/Hval.pm
+++ b/lib/PublicInbox/Hval.pm
@@ -13,6 +13,9 @@ our @EXPORT_OK = qw/ascii_html obfuscate_addrs to_filename src_escape
                 to_attr from_attr/;
 my $enc_ascii = find_encoding('us-ascii');
 
+# safe-ish acceptable filename pattern for portability
+our $FN = '[a-zA-Z0-9][a-zA-Z0-9_\-\.]+[a-zA-Z0-9]'; # needs \z anchor
+
 sub new {
         my ($class, $raw, $href) = @_;
 
diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm
index b3178b98..c9330332 100644
--- a/lib/PublicInbox/Inbox.pm
+++ b/lib/PublicInbox/Inbox.pm
@@ -74,7 +74,7 @@ sub _set_uint ($$$) {
         my $val = $opts->{$field};
         if (defined $val) {
                 $val = $val->[-1] if ref($val) eq 'ARRAY';
-                $val = undef if $val !~ /\A\d+\z/;
+                $val = undef if $val !~ /\A[0-9]+\z/;
         }
         $opts->{$field} = $val || $default;
 }
@@ -87,7 +87,7 @@ sub _set_limiter ($$$) {
                 my $mkey = $pfx.'max';
                 my $val = $self->{$mkey} or return;
                 my $lim;
-                if ($val =~ /\A\d+\z/) {
+                if ($val =~ /\A[0-9]+\z/) {
                         require PublicInbox::Qspawn;
                         $lim = PublicInbox::Qspawn::Limiter->new($val);
                 } elsif ($val =~ /\A[a-z][a-z0-9]*\z/) {
@@ -161,7 +161,7 @@ sub max_git_part {
                 if (opendir my $dh, $gits) {
                         my $max = -1;
                         while (defined(my $git_dir = readdir($dh))) {
-                                $git_dir =~ m!\A(\d+)\.git\z! or next;
+                                $git_dir =~ m!\A([0-9]+)\.git\z! or next;
                                 $max = $1 if $1 > $max;
                         }
                         $part = $self->{-max_git_part} = $max if $max >= 0;
diff --git a/lib/PublicInbox/Linkify.pm b/lib/PublicInbox/Linkify.pm
index d4778e7d..84960a98 100644
--- a/lib/PublicInbox/Linkify.pm
+++ b/lib/PublicInbox/Linkify.pm
@@ -13,6 +13,7 @@ package PublicInbox::Linkify;
 use strict;
 use warnings;
 use Digest::SHA qw/sha1_hex/;
+use PublicInbox::Hval qw(ascii_html);
 
 my $SALT = rand;
 my $LINK_RE = qr{([\('!])?\b((?:ftps?|https?|nntps?|gopher)://
@@ -61,12 +62,12 @@ sub linkify_1 {
                         $end = ')';
                 }
 
+                $url = ascii_html($url); # for IDN
+
                 # salt this, as this could be exploited to show
                 # links in the HTML which don't show up in the raw mail.
                 my $key = sha1_hex($url . $SALT);
 
-                # only escape ampersands, others do not match LINK_RE
-                $url =~ s/&/&#38;/g;
                 $_[0]->{$key} = $url;
                 $beg . 'PI-LINK-'. $key . $end;
         ^ge;
diff --git a/lib/PublicInbox/MID.pm b/lib/PublicInbox/MID.pm
index 7f1ab15e..6904d61a 100644
--- a/lib/PublicInbox/MID.pm
+++ b/lib/PublicInbox/MID.pm
@@ -26,11 +26,11 @@ sub mid_clean {
         $mid;
 }
 
-# this is idempotent
+# this is idempotent, used for HTML anchor/ids and such
 sub id_compress {
         my ($id, $force) = @_;
 
-        if ($force || $id =~ /[^\w\-]/ || length($id) > MID_MAX) {
+        if ($force || $id =~ /[^a-zA-Z0-9_\-]/ || length($id) > MID_MAX) {
                 utf8::encode($id);
                 return sha1_hex($id);
         }
diff --git a/lib/PublicInbox/MsgTime.pm b/lib/PublicInbox/MsgTime.pm
index 62160233..12412825 100644
--- a/lib/PublicInbox/MsgTime.pm
+++ b/lib/PublicInbox/MsgTime.pm
@@ -44,8 +44,9 @@ sub msg_received_at ($) {
         my @recvd = $hdr->header_raw('Received');
         my ($ts);
         foreach my $r (@recvd) {
-                $r =~ /\s*(\d+\s+[[:alpha:]]+\s+\d{2,4}\s+
-                        \d+\D\d+(?:\D\d+)\s+([\+\-]\d+))/sx or next;
+                $r =~ /\s*([0-9]+\s+[a-zA-Z]+\s+[0-9]{2,4}\s+
+                        [0-9]+[^0-9][0-9]+(?:[^0-9][0-9]+)
+                        \s+([\+\-][0-9]+))/sx or next;
                 $ts = eval { str2date_zone($1) } and return $ts;
                 my $mid = $hdr->header_raw('Message-ID');
                 warn "no date in $mid Received: $r\n";
@@ -59,7 +60,7 @@ sub msg_date_only ($) {
         my ($ts);
         foreach my $d (@date) {
                 # Y2K problems: 3-digit years
-                $d =~ s!([A-Za-z]{3}) (\d{3}) (\d\d:\d\d:\d\d)!
+                $d =~ s!([A-Za-z]{3}) ([0-9]{3}) ([0-9]{2}:[0-9]{2}:[0-9]{2})!
                         my $yyyy = $2 + 1900; "$1 $yyyy $3"!e;
                 $ts = eval { str2date_zone($d) } and return $ts;
                 if ($@) {
diff --git a/lib/PublicInbox/NNTP.pm b/lib/PublicInbox/NNTP.pm
index 8cb6c56d..be80560f 100644
--- a/lib/PublicInbox/NNTP.pm
+++ b/lib/PublicInbox/NNTP.pm
@@ -121,7 +121,7 @@ sub args_ok ($$) {
 # returns 1 if we can continue, 0 if not due to buffered writes or disconnect
 sub process_line ($$) {
         my ($self, $l) = @_;
-        my ($req, @args) = split(/\s+/, $l);
+        my ($req, @args) = split(/[ \t]/, $l);
         return 1 unless defined($req); # skip blank line
         $req = lc($req);
         $req = eval {
@@ -437,7 +437,7 @@ sub set_nntp_headers ($$$$$) {
         # clobber some
         my $xref = xref($self, $ng, $n, $mid);
         $hdr->header_set('Xref', $xref);
-        $xref =~ s/:\d+//g;
+        $xref =~ s/:[0-9]+//g;
         $hdr->header_set('Newsgroups', (split(/ /, $xref, 2))[1]);
         header_append($hdr, 'List-Post', "<mailto:$ng->{-primary_address}>");
         if (my $url = $ng->base_url) {
@@ -453,7 +453,7 @@ sub art_lookup ($$$) {
         my ($n, $mid);
         my $err;
         if (defined $art) {
-                if ($art =~ /\A\d+\z/o) {
+                if ($art =~ /\A[0-9]+\z/) {
                         $err = '423 no such article number in this group';
                         $n = int($art);
                         goto find_mid;
@@ -508,7 +508,7 @@ sub simple_body_write ($$) {
 
 sub set_art {
         my ($self, $art) = @_;
-        $self->{article} = $art if defined $art && $art =~ /\A\d+\z/;
+        $self->{article} = $art if defined $art && $art =~ /\A[0-9]+\z/;
 }
 
 sub _header ($) {
@@ -576,11 +576,11 @@ sub get_range ($$) {
         defined $range or return '420 No article(s) selected';
         my ($beg, $end);
         my ($min, $max) = $ng->mm->minmax;
-        if ($range =~ /\A(\d+)\z/) {
+        if ($range =~ /\A([0-9]+)\z/) {
                 $beg = $end = $1;
-        } elsif ($range =~ /\A(\d+)-\z/) {
+        } elsif ($range =~ /\A([0-9]+)-\z/) {
                 ($beg, $end) = ($1, $max);
-        } elsif ($range =~ /\A(\d+)-(\d+)\z/) {
+        } elsif ($range =~ /\A([0-9]+)-([0-9]+)\z/) {
                 ($beg, $end) = ($1, $2);
         } else {
                 return r501;
@@ -959,7 +959,7 @@ sub event_read {
                 $self->{rbuf} .= $$buf;
         }
         my $r = 1;
-        while ($r > 0 && $self->{rbuf} =~ s/\A\s*([^\r\n]*)\r?\n//) {
+        while ($r > 0 && $self->{rbuf} =~ s/\A[ \t\r\n]*([^\r\n]*)\r?\n//) {
                 my $line = $1;
                 return $self->close if $line =~ /[[:cntrl:]]/s;
                 my $t0 = now();
diff --git a/lib/PublicInbox/NewsWWW.pm b/lib/PublicInbox/NewsWWW.pm
index 8626cf96..80bb4886 100644
--- a/lib/PublicInbox/NewsWWW.pm
+++ b/lib/PublicInbox/NewsWWW.pm
@@ -47,7 +47,7 @@ sub call {
         if (my $ibx = $pi_config->lookup_newsgroup($ng)) {
                 my $url = PublicInbox::Hval::prurl($env, $ibx->{url});
                 my $code = 301;
-                if (defined $article && $article =~ /\A\d+\z/) {
+                if (defined $article && $article =~ /\A[0-9]+\z/) {
                         my $mid = eval { $ibx->mm->mid_for($article) };
                         if (defined $mid) {
                                 # article IDs are not stable across clones,
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index c054a874..9903f427 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -144,7 +144,7 @@ sub _xdb ($) {
         my $qpf = \($self->{qp_flags} ||= $QP_FLAGS);
         if ($self->{version} >= 2) {
                 foreach my $part (<$dir/*>) {
-                        -d $part && $part =~ m!/\d+\z! or next;
+                        -d $part && $part =~ m!/[0-9]+\z! or next;
                         my $sub = Search::Xapian::Database->new($part);
                         if ($xdb) {
                                 $xdb->add_database($sub);
diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index 6592b3b2..b089de9c 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -308,12 +308,12 @@ sub new {
         my ($class, $qp) = @_;
 
         my $r = $qp->{r};
-        my ($l) = (($qp->{l} || '') =~ /(\d+)/);
+        my ($l) = (($qp->{l} || '') =~ /([0-9]+)/);
         $l = $LIM if !$l || $l > $LIM;
         bless {
                 q => $qp->{'q'},
                 x => $qp->{x} || '',
-                o => (($qp->{o} || '0') =~ /(\d+)/),
+                o => (($qp->{o} || '0') =~ /([0-9]+)/),
                 l => $l,
                 r => (defined $r && $r ne '0'),
         }, $class;
diff --git a/lib/PublicInbox/SolverGit.pm b/lib/PublicInbox/SolverGit.pm
index 3841c567..81f99025 100644
--- a/lib/PublicInbox/SolverGit.pm
+++ b/lib/PublicInbox/SolverGit.pm
@@ -206,7 +206,7 @@ sub find_extract_diff ($$$) {
         }
 
         my $msgs = $srch->query($q, { relevance => 1 });
-        my $re = qr/\Aindex ($pre[a-f0-9]*)\.\.($post[a-f0-9]*)(?: (\d+))?/;
+        my $re = qr/\Aindex ($pre[a-f0-9]*)\.\.($post[a-f0-9]*)(?: ([0-9]+))?/;
 
         my $di;
         foreach my $smsg (@$msgs) {
diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index 76844cd4..a8c33ef4 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -48,7 +48,7 @@ sub count_partitions ($) {
         # due to -compact
         if (-d $xpfx) {
                 foreach my $part (<$xpfx/*>) {
-                        -d $part && $part =~ m!/\d+\z! or next;
+                        -d $part && $part =~ m!/[0-9]+\z! or next;
                         eval {
                                 Search::Xapian::Database->new($part)->close;
                                 $nparts++;
@@ -574,7 +574,7 @@ sub git_dir_latest {
         my $latest;
         opendir my $dh, $pfx or die "opendir $pfx: $!\n";
         while (defined(my $git_dir = readdir($dh))) {
-                $git_dir =~ m!\A(\d+)\.git\z! or next;
+                $git_dir =~ m!\A([0-9]+)\.git\z! or next;
                 if ($1 > $$max) {
                         $$max = $1;
                         $latest = "$pfx/$git_dir";
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 09afdaf1..1b52bf86 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -528,7 +528,7 @@ sub attach_link ($$$$;$) {
         $desc = $fn unless defined $desc;
         $desc = '' unless defined $desc;
         my $sfn;
-        if (defined $fn && $fn =~ /\A[[:alnum:]][\w\.-]+[[:alnum:]]\z/) {
+        if (defined $fn && $fn =~ /\A$PublicInbox::Hval::FN\z/o) {
                 $sfn = $fn;
         } elsif ($ct eq 'text/plain') {
                 $sfn = 'a.txt';
@@ -1160,8 +1160,8 @@ sub paginate_recent ($$) {
         # Xapian uses '..' but '-' is perhaps friendier to URL linkifiers
         # if only $after exists "YYYYMMDD.." because "." could be skipped
         # if interpreted as an end-of-sentence
-        $t =~ s/\A(\d{8,14})-// and $after = str2ts($1);
-        $t =~ /\A(\d{8,14})\z/ and $before = str2ts($1);
+        $t =~ s/\A([0-9]{8,14})-// and $after = str2ts($1);
+        $t =~ /\A([0-9]{8,14})\z/ and $before = str2ts($1);
 
         my $ibx = $ctx->{-inbox};
         my $msgs = $ibx->recent($opts, $after, $before);
diff --git a/lib/PublicInbox/ViewDiff.pm b/lib/PublicInbox/ViewDiff.pm
index 411ed2bb..b7dab819 100644
--- a/lib/PublicInbox/ViewDiff.pm
+++ b/lib/PublicInbox/ViewDiff.pm
@@ -55,12 +55,12 @@ sub diff_hunk ($$$$) {
         (defined($spfx) && defined($oid_a) && defined($oid_b)) or
                 return "@@ $ca $cb @@";
 
-        my ($n) = ($ca =~ /^-(\d+)/);
+        my ($n) = ($ca =~ /^-([0-9]+)/);
         $n = defined($n) ? do { ++$n; "#n$n" } : '';
 
         my $rv = qq(@@ <a\nhref="$spfx$oid_a/s/$dctx->{Q}$n">$ca</a>);
 
-        ($n) = ($cb =~ /^\+(\d+)/);
+        ($n) = ($cb =~ /^\+([0-9]+)/);
         $n = defined($n) ? do { ++$n; "#n$n" } : '';
 
         $rv .= qq( <a\nhref="$spfx$oid_b/s/$dctx->{Q}$n">$cb</a> @@);
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index b6f18f8d..7ea98204 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -28,7 +28,7 @@ use PublicInbox::UserContent;
 our $INBOX_RE = qr!\A/([\w\-][\w\.\-]*)!;
 our $MID_RE = qr!([^/]+)!;
 our $END_RE = qr!(T/|t/|t\.mbox(?:\.gz)?|t\.atom|raw|)!;
-our $ATTACH_RE = qr!(\d[\.\d]*)-([[:alnum:]][\w\.-]+[[:alnum:]])!i;
+our $ATTACH_RE = qr!([0-9][0-9\.]*)-($PublicInbox::Hval::FN)!;
 our $OID_RE = qr![a-f0-9]{7,40}!;
 
 sub new {
@@ -74,7 +74,8 @@ sub call {
         my $method = $env->{REQUEST_METHOD};
 
         if ($method eq 'POST') {
-                if ($path_info =~ m!$INBOX_RE/(?:(\d+)/)?(git-upload-pack)\z!) {
+                if ($path_info =~ m!$INBOX_RE/(?:([0-9]+)/)?
+                                        (git-upload-pack)\z!x) {
                         my ($part, $path) = ($2, $3);
                         return invalid_inbox($ctx, $1) ||
                                 serve_git($ctx, $part, $path);
@@ -97,11 +98,11 @@ sub call {
                 invalid_inbox($ctx, $1) || get_atom($ctx);
         } elsif ($path_info =~ m!$INBOX_RE/new\.html\z!o) {
                 invalid_inbox($ctx, $1) || get_new($ctx);
-        } elsif ($path_info =~ m!$INBOX_RE/(?:(\d+)/)?
+        } elsif ($path_info =~ m!$INBOX_RE/(?:([0-9]+)/)?
                                 ($PublicInbox::GitHTTPBackend::ANY)\z!ox) {
                 my ($part, $path) = ($2, $3);
                 invalid_inbox($ctx, $1) || serve_git($ctx, $part, $path);
-        } elsif ($path_info =~ m!$INBOX_RE/([\w-]+).mbox\.gz\z!o) {
+        } elsif ($path_info =~ m!$INBOX_RE/([a-zA-Z0-9_\-]+).mbox\.gz\z!o) {
                 serve_mbox_range($ctx, $1, $2);
         } elsif ($path_info =~ m!$INBOX_RE/$MID_RE/$END_RE\z!o) {
                 msg_page($ctx, $1, $2, $3);
@@ -123,11 +124,12 @@ sub call {
                 r301($ctx, $1, $2);
         } elsif ($path_info =~ m!$INBOX_RE/_/text(?:/(.*))?\z!o) {
                 get_text($ctx, $1, $2);
-        } elsif ($path_info =~ m!$INBOX_RE/([\w\-\.]+)\.css\z!o) {
+        } elsif ($path_info =~ m!$INBOX_RE/([a-zA-Z0-9_\-\.]+)\.css\z!o) {
                 get_css($ctx, $1, $2);
         } elsif ($path_info =~ m!$INBOX_RE/($OID_RE)/s/\z!o) {
                 get_vcs_object($ctx, $1, $2);
-        } elsif ($path_info =~ m!$INBOX_RE/($OID_RE)/s/([\w\.\-]+)\z!o) {
+        } elsif ($path_info =~ m!$INBOX_RE/($OID_RE)/s/
+                                ($PublicInbox::Hval::FN)\z!ox) {
                 get_vcs_object($ctx, $1, $2, $3);
         } elsif ($path_info =~ m!$INBOX_RE/($OID_RE)/s\z!o) {
                 r301($ctx, $1, $2, 's/');
@@ -534,11 +536,15 @@ sub stylesheets_prepare ($$) {
                         $inline_ok = 0;
                 } else {
                         my $fn = $_;
+                        my ($key) = (m!([^/]+?)(?:\.css)?\z!i);
+                        if ($key !~ /\A[a-zA-Z0-9_\-\.]+\z/) {
+                                warn "ignoring $fn, non-ASCII word character\n";
+                                next;
+                        }
                         open(my $fh, '<', $fn) or do {
                                 warn "failed to open $fn: $!\n";
                                 next;
                         };
-                        my ($key) = (m!([^/]+?)(?:\.css)?\z!i);
                         my $ctime = 0;
                         my $local = do { local $/; <$fh> };
                         if ($local =~ /\S/) {
diff --git a/lib/PublicInbox/WwwAttach.pm b/lib/PublicInbox/WwwAttach.pm
index d690ce41..96103cb0 100644
--- a/lib/PublicInbox/WwwAttach.pm
+++ b/lib/PublicInbox/WwwAttach.pm
@@ -27,7 +27,7 @@ sub get_attach ($$$) {
                 if ($ct && (($ct->{discrete} || '') eq 'text')) {
                         # display all text as text/plain:
                         my $cset = $ct->{attributes}->{charset};
-                        if ($cset && ($cset =~ /\A[\w-]+\z/)) {
+                        if ($cset && ($cset =~ /\A[a-zA-Z0-9_\-]+\z/)) {
                                 $res->[1]->[1] .= qq(; charset=$cset);
                         }
                 } else { # TODO: allow user to configure safe types
diff --git a/lib/PublicInbox/WwwListing.pm b/lib/PublicInbox/WwwListing.pm
index e8dad4b8..e1473b3d 100644
--- a/lib/PublicInbox/WwwListing.pm
+++ b/lib/PublicInbox/WwwListing.pm
@@ -24,8 +24,8 @@ sub list_match_domain ($$) {
         my ($self, $env) = @_;
         my @list;
         my $host = $env->{HTTP_HOST} // $env->{SERVER_NAME};
-        $host =~ s/:\d+\z//;
-        my $re = qr!\A(?:https?:)?//\Q$host\E(?::\d+)?/!i;
+        $host =~ s/:[0-9]+\z//;
+        my $re = qr!\A(?:https?:)?//\Q$host\E(?::[0-9]+)?/!i;
         $self->{pi_config}->each_inbox(sub {
                 my ($ibx) = @_;
                 push @list, $ibx if !$ibx->{-hide}->{www} && $ibx->{url} =~ $re;
diff --git a/lib/PublicInbox/Xapcmd.pm b/lib/PublicInbox/Xapcmd.pm
index 90672310..dad080c8 100644
--- a/lib/PublicInbox/Xapcmd.pm
+++ b/lib/PublicInbox/Xapcmd.pm
@@ -150,7 +150,7 @@ sub run {
         } else {
                 opendir my $dh, $old or die "Failed to opendir $old: $!\n";
                 while (defined(my $dn = readdir($dh))) {
-                        if ($dn =~ /\A\d+\z/) {
+                        if ($dn =~ /\A[0-9]+\z/) {
                                 my $tmpl = "$dn-XXXXXXXX";
                                 my $dst = tempdir($tmpl, DIR => $old);
                                 same_fs_or_die($old, $dst);
@@ -200,7 +200,7 @@ sub progress_pfx ($) {
         my @p = split('/', $_[0]);
 
         # return "xap15/0" for v2, or "xapian15" for v1:
-        ($p[-1] =~ /\A\d+\z/) ? "$p[-2]/$p[-1]" : $p[-1];
+        ($p[-1] =~ /\A[0-9]+\z/) ? "$p[-2]/$p[-1]" : $p[-1];
 }
 
 # xapian-compact wrapper
@@ -276,7 +276,7 @@ sub cpdb ($$) {
                         $dst->set_metadata('last_commit', $lc) if $lc;
 
                         # only the first xapian partition (0) gets 'indexlevel'
-                        if ($old =~ m!(?:xapian\d+|xap\d+/0)\z!) {
+                        if ($old =~ m!(?:xapian[0-9]+|xap[0-9]+/0)\z!) {
                                 my $l = $src->get_metadata('indexlevel');
                                 if ($l eq 'medium') {
                                         $dst->set_metadata('indexlevel', $l);
diff --git a/script/public-inbox-purge b/script/public-inbox-purge
index 381826dc..25e6cc9b 100755
--- a/script/public-inbox-purge
+++ b/script/public-inbox-purge
@@ -91,7 +91,7 @@ foreach my $ibx (@inboxes) {
         my $xdir_ro = $ibx->{search}->xdir(1);
         my $npart = 0;
         foreach my $part (<$xdir_ro/*>) {
-                if (-d $part && $part =~ m!/\d+\z!) {
+                if (-d $part && $part =~ m!/[0-9]+\z!) {
                         my $bytes = 0;
                         $bytes += -s $_ foreach glob("$part/*");
                         $npart++ if $bytes;
diff --git a/t/linkify.t b/t/linkify.t
index fe218b91..c4923582 100644
--- a/t/linkify.t
+++ b/t/linkify.t
@@ -132,4 +132,16 @@ use PublicInbox::Linkify;
                 'punctuation with unpaired ) OK')
 }
 
+if ('IDN example: <ACDB98F4-178C-43C3-99C4-A1D03DD6A8F5@sb.org>') {
+        my $hc = '&#26376;';
+        my $u = "http://www.\x{6708}.example.com/";
+        my $s = $u;
+        my $l = PublicInbox::Linkify->new;
+        $s = $l->linkify_1($s);
+        $s = $l->linkify_2($s);
+        my $expect = qq{<a
+href="http://www.$hc.example.com/">http://www.$hc.example.com/</a>};
+        is($s, $expect, 'IDN message escaped properly');
+}
+
 done_testing();