about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2019-06-27 17:31:30 +0000
committerEric Wong <e@80x24.org>2019-06-27 22:36:24 +0000
commitea71a5606c633f82975e8208a6c552053f7f5af8 (patch)
tree4f6fc0bb3417d6b97c948469e1e3ab6599ceb3ac
parent71ea9961786fa14ea0a67200847bac5a76abf751 (diff)
downloadpublic-inbox-ea71a5606c633f82975e8208a6c552053f7f5af8.tar.gz
Chances are we already have extra buffer space following the
expensive LF => CRLF conversion that we can safely append an
extra CRLF in those places without incurring a copy of the
full string buffer.

While we're at it, document where our pain points are in terms
of memory usage, since tracking/controlling memory use isn't
exactly obvious in high-level languages.

Perhaps we should start storing messages in git as CRLF...
-rw-r--r--lib/PublicInbox/NNTP.pm22
1 files changed, 12 insertions, 10 deletions
diff --git a/lib/PublicInbox/NNTP.pm b/lib/PublicInbox/NNTP.pm
index 30d3dab6..5a886a3c 100644
--- a/lib/PublicInbox/NNTP.pm
+++ b/lib/PublicInbox/NNTP.pm
@@ -521,10 +521,12 @@ found:
 
 sub msg_body_write ($$) {
         my ($self, $msg) = @_;
+
+        # these can momentarily double the memory consumption :<
         $$msg =~ s/^\./../smg;
-        $$msg =~ s/(?<!\r)\n/\r\n/sg;
+        $$msg =~ s/(?<!\r)\n/\r\n/sg; # Alpine barfs without this
+        $$msg .= "\r\n" unless $$msg =~ /\r\n\z/s;
         msg_more($self, $$msg);
-        msg_more($self, "\r\n") unless $$msg =~ /\r\n\z/s;
         '.'
 }
 
@@ -533,18 +535,19 @@ sub set_art {
         $self->{article} = $art if defined $art && $art =~ /\A[0-9]+\z/;
 }
 
-sub _header ($) {
-        my $hdr = $_[0]->as_string;
+sub msg_hdr_write ($$$) {
+        my ($self, $hdr, $body_follows) = @_;
+        $hdr = $hdr->as_string;
         utf8::encode($hdr);
-        $hdr =~ s/(?<!\r)\n/\r\n/sg;
+        $hdr =~ s/(?<!\r)\n/\r\n/sg; # Alpine barfs without this
 
         # for leafnode compatibility, we need to ensure Message-ID headers
         # are only a single line.  We can't subclass Email::Simple::Header
         # and override _default_fold_at in here, either; since that won't
         # affect messages already in the archive.
         $hdr =~ s/^(Message-ID:)[ \t]*\r\n[ \t]+([^\r]+)\r\n/$1 $2\r\n/igsm;
-
-        $hdr
+        $hdr .= "\r\n" if $body_follows;
+        msg_more($self, $hdr);
 }
 
 sub cmd_article ($;$) {
@@ -554,8 +557,7 @@ sub cmd_article ($;$) {
         my ($n, $mid, $msg, $hdr) = @$r;
         set_art($self, $art);
         more($self, "220 $n <$mid> article retrieved - head and body follow");
-        msg_more($self, _header($hdr));
-        msg_more($self, "\r\n");
+        msg_hdr_write($self, $hdr, 1);
         msg_body_write($self, $msg);
 }
 
@@ -566,7 +568,7 @@ sub cmd_head ($;$) {
         my ($n, $mid, undef, $hdr) = @$r;
         set_art($self, $art);
         more($self, "221 $n <$mid> article retrieved - head follows");
-        msg_more($self, _header($hdr));
+        msg_hdr_write($self, $hdr, 0);
         '.'
 }