From 166532d5a7fb7409db8e7877ca961afb60ad28e5 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Mon, 11 Mar 2024 19:40:10 +0000 Subject: codesearch: deduplicate {ibx_score} name pairs With my current mirror of lore + gko, this saves over 300K allocations and brings the allocation count in this area down to under 5K. The reduction in AV refs saves around 45MB RAM according to measurements done live via Devel::Mwrap. --- lib/PublicInbox/CodeSearch.pm | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/lib/PublicInbox/CodeSearch.pm b/lib/PublicInbox/CodeSearch.pm index 1f95a726..48033bb5 100644 --- a/lib/PublicInbox/CodeSearch.pm +++ b/lib/PublicInbox/CodeSearch.pm @@ -292,6 +292,7 @@ W: cindex.$name.topdir=$self->{topdir} has no usable join data for $cfg_f EOM my ($ekeys, $roots, $ibx2root) = @$jd{qw(ekeys roots ibx2root)}; my $roots2paths = roots2paths($self); + my %dedupe; # 50x alloc reduction w/ lore + gko mirror (Mar 2024) for my $root_offs (@$ibx2root) { my $ekey = shift(@$ekeys) // die 'BUG: {ekeys} empty'; scalar(@$root_offs) or next; @@ -320,9 +321,15 @@ EOM if (my $git = $dir2cr{$_}) { $ibx_p2g{$_} = $git; $ibx2self = 1; - $ibx->{-hide_www} or - push @{$git->{ibx_score}}, + if (!$ibx->{-hide_www}) { + # don't stringify $nr directly + # to avoid long-lived PV + my $k = ($nr + 0)."\0". + ($ibx + 0); + my $s = $dedupe{$k} //= [ $nr, $ibx->{name} ]; + push @{$git->{ibx_score}}, $s; + } push @$gits, $git; } else { warn <