about summary refs log tree commit
diff options
context:
space:
mode:
-rw-r--r--Changes30
-rw-r--r--Makefile.PL3
-rw-r--r--lib/Net/Cmd.pm36
-rw-r--r--lib/Net/NNTP.pm14
-rw-r--r--lib/Net/SMTP.pm11
-rw-r--r--t/datasend.t9
-rw-r--r--t/pod_coverage.t4
7 files changed, 74 insertions, 33 deletions
diff --git a/Changes b/Changes
index bf24244..4c45615 100644
--- a/Changes
+++ b/Changes
@@ -2,7 +2,35 @@ Revision history for Perl distribution libnet
 
 3.07 Development
 
-    - TODO
+    - Fixed a bug in Net::Cmd::datasend() which caused octets in [\x80-\xFF]
+      stored in a "binary string" to be replaced with their UTF-8 encodings if
+      the string happened to be stored internally in an "upgraded" state (i.e.
+      with the UTF-8 flag on). (As noted below, strings passed to datasend()
+      should always be encoded first, and therefore not stored in such a state
+      anyway, but it is all too easy for perl to change this internal state
+      unless the encodeing is done at the very last minute before calling
+      datasend(), so it helps if datasend() plays more nicely in this case. In
+      particular, it was wrong of datasend() to treat upgraded and downgraded
+      strings differently when their contents were identical at the Perl level.)
+
+      This bugfix results in a breaking change to the case of a "text string"
+      with characters in U+0080..U+00FF stored internally in an upgraded state
+      since those characters are likewise no longer encoded to UTF-8 by
+      datasend(), but callers of datasend() should not have been relying on this
+      behaviour anyway: In general, datasend() has no idea what encoding is
+      required for output so callers should always encode the data to be output
+      to whatever encoding is required first. This has now been clarified in the
+      documentation.
+
+      Finally, a text string with characters >= U+0100 will now cause a "Wide
+      character in print" warning from datasend() since such characters cannot
+      be output as bytes and datasend() no longer encodes to UTF-8. In this
+      case, UTF-8 bytes will still be output as before since that happens to be
+      the internal representation of such characters, but the warning is new.
+      Callers should heed this warning and encode such strings to whatever
+      encoding is required before calling datasend(), as noted above.
+
+      [Ricardo Signes, CPAN RT#104433]
 
 3.06 2015-04-01
 
diff --git a/Makefile.PL b/Makefile.PL
index 7db02c5..872eac8 100644
--- a/Makefile.PL
+++ b/Makefile.PL
@@ -7,7 +7,7 @@
 #   Makefile creation script.
 #
 # COPYRIGHT
-#   Copyright (C) 2014 Steve Hay.  All rights reserved.
+#   Copyright (C) 2014, 2015 Steve Hay.  All rights reserved.
 #
 # LICENCE
 #   This script is free software; you can redistribute it and/or modify it under
@@ -206,6 +206,7 @@ MAIN: {
             'Time::Local'    => '0',
             'constant'       => '0',
             'strict'         => '0',
+            'utf8'           => '0',
             'vars'           => '0'
         },
 
diff --git a/lib/Net/Cmd.pm b/lib/Net/Cmd.pm
index cec44bf..3bf5ec6 100644
--- a/lib/Net/Cmd.pm
+++ b/lib/Net/Cmd.pm
@@ -2,7 +2,7 @@
 #
 # Versions up to 2.29_1 Copyright (c) 1995-2006 Graham Barr <gbarr@pobox.com>.
 # All rights reserved.
-# Changes in Version 2.29_2 onwards Copyright (C) 2013-2014 Steve Hay.  All
+# Changes in Version 2.29_2 onwards Copyright (C) 2013-2015 Steve Hay.  All
 # rights reserved.
 # This module is free software; you can redistribute it and/or modify it under
 # the same terms as Perl itself, i.e. under the terms of either the GNU General
@@ -27,21 +27,6 @@ BEGIN {
   }
 }
 
-BEGIN {
-  if (!eval { require utf8 }) {
-    *is_utf8 = sub { 0 };
-  }
-  elsif (eval { utf8::is_utf8(undef); 1 }) {
-    *is_utf8 = \&utf8::is_utf8;
-  }
-  elsif (eval { require Encode; Encode::is_utf8(undef); 1 }) {
-    *is_utf8 = \&Encode::is_utf8;
-  }
-  else {
-    *is_utf8 = sub { $_[0] =~ /[^\x00-\xff]/ };
-  }
-}
-
 our $VERSION = "3.07";
 our @ISA     = qw(Exporter);
 our @EXPORT  = qw(CMD_INFO CMD_OK CMD_MORE CMD_REJECT CMD_ERROR CMD_PENDING);
@@ -429,9 +414,17 @@ sub datasend {
   my $arr  = @_ == 1 && ref($_[0]) ? $_[0] : \@_;
   my $line = join("", @$arr);
 
-  # encode to individual utf8 bytes if
-  # $line is a string (in internal UTF-8)
-  utf8::encode($line) if is_utf8($line);
+  # Perls < 5.10.1 (with the exception of 5.8.9) have a performance problem with
+  # the substitutions below when dealing with strings stored internally in
+  # UTF-8, so downgrade them (if possible).
+  # Data passed to datasend() should be encoded to octets upstream already so
+  # shouldn't even have the UTF-8 flag on to start with, but if it so happens
+  # that the octets are stored in an upgraded string (as can sometimes occur)
+  # then they would still downgrade without fail anyway.
+  # Only Unicode codepoints > 0xFF stored in an upgraded string will fail to
+  # downgrade. We fail silently in that case, and a "Wide character in print"
+  # warning will be emitted later by syswrite().
+  utf8::downgrade($line, 1) if $] < 5.010001 && $] != 5.008009;
 
   return 0
     if $cmd->_is_closed;
@@ -722,6 +715,8 @@ is pending then C<CMD_PENDING> is returned.
 Send data to the remote server, converting LF to CRLF. Any line starting
 with a '.' will be prefixed with another '.'.
 C<DATA> may be an array or a reference to an array.
+The C<DATA> passed in must be encoded by the caller to octets of whatever
+encoding is required, e.g. by using the Encode module's C<encode()> function.
 
 =item dataend ()
 
@@ -794,6 +789,9 @@ Unget a line of text from the server.
 
 Send data to the remote server without performing any conversions. C<DATA>
 is a scalar.
+As with C<datasend()>, the C<DATA> passed in must be encoded by the caller
+to octets of whatever encoding is required, e.g. by using the Encode module's
+C<encode()> function.
 
 =item read_until_dot ()
 
diff --git a/lib/Net/NNTP.pm b/lib/Net/NNTP.pm
index 120292c..b9c5d6f 100644
--- a/lib/Net/NNTP.pm
+++ b/lib/Net/NNTP.pm
@@ -2,7 +2,7 @@
 #
 # Versions up to 2.24_1 Copyright (c) 1995-1997 Graham Barr <gbarr@pobox.com>.
 # All rights reserved.
-# Changes in Version 2.25 onwards Copyright (C) 2013-2014 Steve Hay.  All rights
+# Changes in Version 2.25 onwards Copyright (C) 2013-2015 Steve Hay.  All rights
 # reserved.
 # This module is free software; you can redistribute it and/or modify it under
 # the same terms as Perl itself, i.e. under the terms of either the GNU General
@@ -947,15 +947,17 @@ implementation) from the server. Returns the text or undef upon failure.
 
 The C<ihave> command informs the server that the client has an article
 whose id is C<MSGID>.  If the server desires a copy of that
-article, and C<MESSAGE> has been given the it will be sent.
+article and C<MESSAGE> has been given then it will be sent.
 
 Returns I<true> if the server desires the article and C<MESSAGE> was
-successfully sent,if specified.
+successfully sent, if specified.
 
 If C<MESSAGE> is not specified then the message must be sent using the
 C<datasend> and C<dataend> methods from L<Net::Cmd>
 
-C<MESSAGE> can be either an array of lines or a reference to an array.
+C<MESSAGE> can be either an array of lines or a reference to an array
+and must be encoded by the caller to octets of whatever encoding is required,
+e.g. by using the Encode module's C<encode()> function.
 
 =item last ()
 
@@ -1028,7 +1030,9 @@ is allowed then the message will be sent.
 If C<MESSAGE> is not specified then the message must be sent using the
 C<datasend> and C<dataend> methods from L<Net::Cmd>
 
-C<MESSAGE> can be either an array of lines or a reference to an array.
+C<MESSAGE> can be either an array of lines or a reference to an array
+and must be encoded by the caller to octets of whatever encoding is required,
+e.g. by using the Encode module's C<encode()> function.
 
 The message, either sent via C<datasend> or as the C<MESSAGE>
 parameter, must be in the format as described by RFC822 and must
diff --git a/lib/Net/SMTP.pm b/lib/Net/SMTP.pm
index e200e37..573d8ea 100644
--- a/lib/Net/SMTP.pm
+++ b/lib/Net/SMTP.pm
@@ -2,7 +2,7 @@
 #
 # Versions up to 2.31_1 Copyright (c) 1995-2004 Graham Barr <gbarr@pobox.com>.
 # All rights reserved.
-# Changes in Version 2.31_2 onwards Copyright (C) 2013-2014 Steve Hay.  All
+# Changes in Version 2.31_2 onwards Copyright (C) 2013-2015 Steve Hay.  All
 # rights reserved.
 # This module is free software; you can redistribute it and/or modify it under
 # the same terms as Perl itself, i.e. under the terms of either the GNU General
@@ -942,9 +942,12 @@ Synonyms for C<recipient>.
 
 Initiate the sending of the data from the current message.
 
-C<DATA> may be a reference to a list or a list. If specified the contents
-of C<DATA> and a termination string C<".\r\n"> is sent to the server. And the
-result will be true if the data was accepted.
+C<DATA> may be a reference to a list or a list and must be encoded by the
+caller to octets of whatever encoding is required, e.g. by using the Encode
+module's C<encode()> function.
+
+If specified the contents of C<DATA> and a termination string C<".\r\n"> is
+sent to the server. The result will be true if the data was accepted.
 
 If C<DATA> is not specified then the result will indicate that the server
 wishes the data to be sent. The data must then be sent using the C<datasend>
diff --git a/t/datasend.t b/t/datasend.t
index 3a97c4b..0aea9d4 100644
--- a/t/datasend.t
+++ b/t/datasend.t
@@ -44,7 +44,7 @@ BEGIN {
 (my $libnet_t = __FILE__) =~ s/datasend.t/libnet_t.pl/;
 require $libnet_t or die;
 
-print "1..51\n";
+print "1..54\n";
 
 sub check {
   my $expect = pop;
@@ -158,3 +158,10 @@ check(
   "a\015\012..\015\012.\015\012",
 );
 
+# Test that datasend() plays nicely with bytes in an upgraded string,
+# even though the input should really be encode()d already.
+check(
+  substr("\x{100}", 0, 0) . "\x{e9}",
+
+  "\x{e9}\015\012.\015\012"
+);
diff --git a/t/pod_coverage.t b/t/pod_coverage.t
index 9cb64c2..3d674d4 100644
--- a/t/pod_coverage.t
+++ b/t/pod_coverage.t
@@ -7,7 +7,7 @@
 #   Test script to check POD coverage.
 #
 # COPYRIGHT
-#   Copyright (C) 2014 Steve Hay.  All rights reserved.
+#   Copyright (C) 2014, 2015 Steve Hay.  All rights reserved.
 #
 # LICENCE
 #   This script is free software; you can redistribute it and/or modify it under
@@ -48,7 +48,7 @@ MAIN: {
         my $params = { coverage_class => qw(Pod::Coverage::CountParents) };
         pod_coverage_ok('Net::Cmd', {
             %$params,
-            also_private => [qw(is_utf8 toascii toebcdic set_status)]
+            also_private => [qw(toascii toebcdic set_status)]
         });
         pod_coverage_ok('Net::Config', {
             %$params,