From 811b8d3cbaa790f59b7b107140b86248da16499b Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 27 Nov 2020 09:52:54 +0000 Subject: nntp: xref: use ->ALL extindex if available Getting Xref for cross-posted messages is an O(n) operation where `n' is the number of newsgroups on the server. This works acceptably when there are dozens of groups, but would be unnacceptable when there's tens of thousands of newsgroups. With ~140 newsgroups, a lore.kernel.org mirror already handles "XHDR Xref $MESSAGE_ID" requests around 30% faster after creating the xref3.idx_nntp index. The SQL additions to ExtSearch.pm may be a bit strange and seem more appropriate for Over.pm; however it currently makes sense to me since those bits of over.sqlite3 access are exclusive to ExtSearch and can't be used by traditional v1/v2 inboxes... --- lib/PublicInbox/ExtSearch.pm | 56 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) (limited to 'lib/PublicInbox/ExtSearch.pm') diff --git a/lib/PublicInbox/ExtSearch.pm b/lib/PublicInbox/ExtSearch.pm index dd93cd32..20ec3224 100644 --- a/lib/PublicInbox/ExtSearch.pm +++ b/lib/PublicInbox/ExtSearch.pm @@ -11,6 +11,7 @@ use PublicInbox::Over; use PublicInbox::Inbox; use File::Spec (); use PublicInbox::MiscSearch; +use DBI qw(:sql_types); # SQL_BLOB # for ->reopen, ->mset, ->mset_to_artnums use parent qw(PublicInbox::Search); @@ -49,6 +50,61 @@ sub git { $self->{git} //= PublicInbox::Git->new("$self->{topdir}/ALL.git"); } +# returns an arrayref of [ $NEWSGROUP_NAME:$ART_NO ] using +# the `xref3' table +sub nntp_xref_for { # NNTP only + my ($self, $xibx, $xsmsg) = @_; + my $dbh = over($self)->dbh; + + my $sth = $dbh->prepare_cached(<<'', undef, 1); +SELECT ibx_id FROM inboxes WHERE eidx_key = ? LIMIT 1 + + $sth->execute($xibx->{newsgroup}); + my $xibx_id = $sth->fetchrow_array // do { + warn "W: `$xibx->{newsgroup}' not found in $self->{topdir}\n"; + return; + }; + + $sth = $dbh->prepare_cached(<<'', undef, 1); +SELECT docid FROM xref3 WHERE oidbin = ? AND xnum = ? AND ibx_id = ? LIMIT 1 + + $sth->bind_param(1, pack('H*', $xsmsg->{blob}), SQL_BLOB); + $sth->bind_param(2, $xsmsg->{num}); + $sth->bind_param(3, $xibx_id); + $sth->execute; + my $docid = $sth->fetchrow_array // do { + warn <{newsgroup}:$xsmsg->{num}' not found in $self->{topdir}" +EOF + return; + }; + + # LIMIT is number of newsgroups on server: + $sth = $dbh->prepare_cached(<<'', undef, 1); +SELECT ibx_id,xnum FROM xref3 WHERE docid = ? + + $sth->execute($docid); + my $rows = $sth->fetchall_arrayref; + + my $eidx_key_sth = $dbh->prepare_cached(<<'', undef, 1); +SELECT eidx_key FROM inboxes WHERE ibx_id = ? LIMIT 1 + + my %xref = map { + my ($ibx_id, $xnum) = @$_; + if ($ibx_id == $xibx_id) { + (); + } else { + $eidx_key_sth->execute($ibx_id); + my $eidx_key = $eidx_key_sth->fetchrow_array; + + # only include if there's a newsgroup name + $eidx_key && index($eidx_key, '/') >= 0 ? + () : ($eidx_key => $xnum) + } + } @$rows; + [ map { "$_:$xref{$_}" } sort keys %xref ]; # match NNTP LIST order +} + sub mm { undef } sub altid_map { {} } -- cgit v1.2.3-24-ge0c7