diff options
author | Eric Wong <e@80x24.org> | 2023-12-08 03:54:37 +0000 |
---|---|---|
committer | Eric Wong <e@80x24.org> | 2023-12-09 02:03:51 +0000 |
commit | 2ebbc92688ec061cefd3ec374f2e4c2937e330f9 (patch) | |
tree | 511d54fb76dc5a264f0e682cb039c3c214ae1c3f /lib/PublicInbox/XapHelper.pm | |
parent | 352c78a9a1ec3bfc6743b3217aa5964aa09a3829 (diff) | |
download | public-inbox-2ebbc92688ec061cefd3ec374f2e4c2937e330f9.tar.gz |
This will allow us to use p2q-compatible specifications such as "dfpost7" to only capture blob OIDs which are 7 characters in length (the indexer will always index down to 7 characters)
Diffstat (limited to 'lib/PublicInbox/XapHelper.pm')
-rw-r--r-- | lib/PublicInbox/XapHelper.pm | 24 |
1 files changed, 20 insertions, 4 deletions
diff --git a/lib/PublicInbox/XapHelper.pm b/lib/PublicInbox/XapHelper.pm index b21e70a2..ed11a2f8 100644 --- a/lib/PublicInbox/XapHelper.pm +++ b/lib/PublicInbox/XapHelper.pm @@ -39,13 +39,24 @@ sub iter_retry_check ($) { } } +sub term_length_extract ($) { + my ($req) = @_; + @{$req->{A_len}} = map { + my $len = s/([0-9]+)\z// ? ($1 + 0) : undef; + [ $_, $len ]; + } @{$req->{A}}; +} + sub dump_ibx_iter ($$$) { my ($req, $ibx_id, $it) = @_; my $out = $req->{0}; eval { my $doc = $it->get_document; - for my $p (@{$req->{A}}) { - for (xap_terms($p, $doc)) { + for my $pair (@{$req->{A_len}}) { + my ($pfx, $len) = @$pair; + my @t = xap_terms($pfx, $doc); + @t = grep { length == $len } @t if defined($len); + for (@t) { print $out "$_ $ibx_id\n" or die "print: $!"; ++$req->{nr_out}; } @@ -64,6 +75,7 @@ sub cmd_dump_ibx { my ($req, $ibx_id, $qry_str) = @_; $qry_str // die 'usage: dump_ibx [OPTIONS] IBX_ID QRY_STR'; $req->{A} or die 'dump_ibx requires -A PREFIX'; + term_length_extract $req; my $max = $req->{'m'} // $req->{srch}->{xdb}->get_doccount; my $opt = { relevance => -1, limit => $max, offset => $req->{o} // 0 }; $opt->{eidx_key} = $req->{O} if defined $req->{O}; @@ -82,8 +94,11 @@ sub dump_roots_iter ($$$) { eval { my $doc = $it->get_document; my $G = join(' ', map { $root2off->{$_} } xap_terms('G', $doc)); - for my $p (@{$req->{A}}) { - for (xap_terms($p, $doc)) { + for my $pair (@{$req->{A_len}}) { + my ($pfx, $len) = @$pair; + my @t = xap_terms($pfx, $doc); + @t = grep { length == $len } @t if defined($len); + for (@t) { $req->{wbuf} .= "$_ $G\n"; ++$req->{nr_out}; } @@ -106,6 +121,7 @@ sub cmd_dump_roots { my ($req, $root2off_file, $qry_str) = @_; $qry_str // die 'usage: dump_roots [OPTIONS] ROOT2ID_FILE QRY_STR'; $req->{A} or die 'dump_roots requires -A PREFIX'; + term_length_extract $req; open my $fh, '<', $root2off_file; my $root2off; # record format: $OIDHEX "\0" uint32_t my @x = split(/\0/, read_all $fh); |