about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2021-02-10 07:07:44 +0000
committerEric Wong <e@80x24.org>2021-02-10 19:21:32 +0000
commitdd80cd59cf0c90d61257fbfe8176e3312edbb841 (patch)
tree0eca4983372b1c80377663179157b993bb2535e9
parent04b73104416e4734b3a01e47525119cac867065a (diff)
downloadpublic-inbox-dd80cd59cf0c90d61257fbfe8176e3312edbb841.tar.gz
The "ls-external" now accepts the same glob patterns used by
with lei q --{include,only,exclude}.  If no glob is detected, it
will be treated as a literal substring match (like "grep -F").

Inverting matches is also supported ("grep -v").
-rw-r--r--lib/PublicInbox/LEI.pm4
-rw-r--r--lib/PublicInbox/LeiExternal.pm74
-rw-r--r--t/lei_external.t20
3 files changed, 71 insertions, 27 deletions
diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index dd831c54..eb5a646e 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -124,8 +124,8 @@ our %CMD = ( # sorted in order of importance/use:
         qw(boost=i c=s@ mirror=s no-torsocks torsocks=s inbox-version=i),
         qw(quiet|q verbose|v+),
         index_opt(), PublicInbox::LeiQuery::curl_opt() ],
-'ls-external' => [ '[FILTER...]', 'list publicinbox|extindex locations',
-        qw(format|f=s z|0 local remote quiet|q) ],
+'ls-external' => [ '[FILTER]', 'list publicinbox|extindex locations',
+        qw(format|f=s z|0 globoff|g invert-match|v local remote) ],
 'forget-external' => [ 'LOCATION...|--prune',
         'exclude further results from a publicinbox|extindex',
         qw(prune quiet|q) ],
diff --git a/lib/PublicInbox/LeiExternal.pm b/lib/PublicInbox/LeiExternal.pm
index b65dc87c..bac15226 100644
--- a/lib/PublicInbox/LeiExternal.pm
+++ b/lib/PublicInbox/LeiExternal.pm
@@ -22,22 +22,16 @@ sub externals_each {
         # highest boost first, but stable for alphabetic tie break
         use sort 'stable';
         my @order = sort { $boost{$b} <=> $boost{$a} } sort keys %boost;
-        return @order if !$cb;
-        for my $loc (@order) {
-                $cb->(@arg, $loc, $boost{$loc});
+        if (ref($cb) eq 'CODE') {
+                for my $loc (@order) {
+                        $cb->(@arg, $loc, $boost{$loc});
+                }
+        } elsif (ref($cb) eq 'HASH') {
+                %$cb = %boost;
         }
         @order; # scalar or array
 }
 
-sub lei_ls_external {
-        my ($self, @argv) = @_;
-        my ($OFS, $ORS) = $self->{opt}->{z} ? ("\0", "\0\0") : (" ", "\n");
-        externals_each($self, sub {
-                my ($loc, $boost_val) = @_;
-                $self->out($loc, $OFS, 'boost=', $boost_val, $ORS);
-        });
-}
-
 sub ext_canonicalize {
         my ($location) = @_;
         if ($location !~ m!\Ahttps?://!) {
@@ -52,28 +46,47 @@ sub ext_canonicalize {
         }
 }
 
-my %patmap = ('*' => '[^/]*?', '?' => '[^/]', '[' => '[', ']' => ']');
-sub glob2pat {
-        my ($glob) = @_;
-        $glob =~ s!(.)!$patmap{$1} || "\Q$1"!ge;
-        $glob;
+my %re_map = ( '*' => '[^/]*?', '?' => '[^/]',
+                '[' => '[', ']' => ']', ',' => ',' );
+
+sub glob2re {
+        my ($re) = @_;
+        my $p = '';
+        my $in_bracket = 0;
+        my $qm = 0;
+        my $changes = ($re =~ s!(.)!
+                $re_map{$p eq '\\' ? '' : do {
+                        if ($1 eq '[') { ++$in_bracket }
+                        elsif ($1 eq ']') { --$in_bracket }
+                        $p = $1;
+                }} // do {
+                        $p = $1;
+                        ($p eq '-' && $in_bracket) ? $p : (++$qm, "\Q$p")
+                }!sge);
+        # bashism (also supported by curl): {a,b,c} => (a|b|c)
+        $re =~ s/([^\\]*)\\\{([^,]*?,[^\\]*?)\\\}/
+                (my $in_braces = $2) =~ tr!,!|!;
+                $1."($in_braces)";
+                /sge;
+        ($changes - $qm) ? $re : undef;
 }
 
+# get canonicalized externals list matching $loc
+# $is_exclude denotes it's for --exclude
+# otherwise it's for --only/--include is assumed
 sub get_externals {
-        my ($self, $loc, $exclude) = @_;
+        my ($self, $loc, $is_exclude) = @_;
         return (ext_canonicalize($loc)) if -e $loc;
-
         my @m;
         my @cur = externals_each($self);
         my $do_glob = !$self->{opt}->{globoff}; # glob by default
-        if ($do_glob && ($loc =~ /[\*\?]/s || $loc =~ /\[.*\]/s)) {
-                my $re = glob2pat($loc);
+        if ($do_glob && (my $re = glob2re($loc))) {
                 @m = grep(m!$re!, @cur);
                 return @m if scalar(@m);
         } elsif (index($loc, '/') < 0) { # exact basename match:
                 @m = grep(m!/\Q$loc\E/?\z!, @cur);
                 return @m if scalar(@m) == 1;
-        } elsif ($exclude) { # URL, maybe:
+        } elsif ($is_exclude) { # URL, maybe:
                 my $canon = ext_canonicalize($loc);
                 @m = grep(m!\A\Q$canon\E\z!, @cur);
                 return @m if scalar(@m) == 1;
@@ -88,6 +101,23 @@ sub get_externals {
         ();
 }
 
+sub lei_ls_external {
+        my ($self, $filter) = @_;
+        my $do_glob = !$self->{opt}->{globoff}; # glob by default
+        my ($OFS, $ORS) = $self->{opt}->{z} ? ("\0", "\0\0") : (" ", "\n");
+        $filter //= '*';
+        my $re = $do_glob ? glob2re($filter) : undef;
+        $re //= index($filter, '/') < 0 ?
+                        qr!/\Q$filter\E/?\z! : # exact basename match
+                        qr/\Q$filter\E/; # grep -F semantics
+        my @ext = externals_each($self, my $boost = {});
+        @ext = $self->{opt}->{'invert-match'} ? grep(!/$re/, @ext)
+                                        : grep(/$re/, @ext);
+        for my $loc (@ext) {
+                $self->out($loc, $OFS, 'boost=', $boost->{$loc}, $ORS);
+        }
+}
+
 sub add_external_finish {
         my ($self, $location) = @_;
         my $cfg = $self->_lei_cfg(1);
diff --git a/t/lei_external.t b/t/lei_external.t
index 587990db..0ef6633d 100644
--- a/t/lei_external.t
+++ b/t/lei_external.t
@@ -1,7 +1,8 @@
 #!perl -w
-use strict;
-use v5.10.1;
-use Test::More;
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+# internal unit test, see t/lei-externals.t for functional tests
+use strict; use v5.10.1; use Test::More;
 my $cls = 'PublicInbox::LeiExternal';
 require_ok $cls;
 my $canon = $cls->can('ext_canonicalize');
@@ -15,4 +16,17 @@ is($canon->('/this/path/is/nonexistent/'), '/this/path/is/nonexistent',
 is($canon->('/this//path/'), '/this/path', 'extra slashes gone');
 is($canon->('/ALL/CAPS'), '/ALL/CAPS', 'caps preserved');
 
+my $glob2re = $cls->can('glob2re');
+is($glob2re->('foo'), undef, 'plain string unchanged');
+is_deeply($glob2re->('[f-o]'), '[f-o]' , 'range accepted');
+is_deeply($glob2re->('*'), '[^/]*?' , 'wildcard accepted');
+is_deeply($glob2re->('{a,b,c}'), '(a|b|c)' , 'braces');
+is_deeply($glob2re->('{,b,c}'), '(|b|c)' , 'brace with empty @ start');
+is_deeply($glob2re->('{a,b,}'), '(a|b|)' , 'brace with empty @ end');
+is_deeply($glob2re->('{a}'), undef, 'ungrouped brace');
+is_deeply($glob2re->('{a'), undef, 'open left brace');
+is_deeply($glob2re->('a}'), undef, 'open right brace');
+is_deeply($glob2re->('*.[ch]'), '[^/]*?\\.[ch]', 'suffix glob');
+is_deeply($glob2re->('{[a-z],9,}'), '([a-z]|9|)' , 'brace with range');
+
 done_testing;