diff options
-rw-r--r-- | Changes | 30 | ||||
-rw-r--r-- | Makefile.PL | 3 | ||||
-rw-r--r-- | lib/Net/Cmd.pm | 36 | ||||
-rw-r--r-- | lib/Net/NNTP.pm | 14 | ||||
-rw-r--r-- | lib/Net/SMTP.pm | 11 | ||||
-rw-r--r-- | t/datasend.t | 9 | ||||
-rw-r--r-- | t/pod_coverage.t | 4 |
7 files changed, 74 insertions, 33 deletions
@@ -2,7 +2,35 @@ Revision history for Perl distribution libnet 3.07 Development - - TODO + - Fixed a bug in Net::Cmd::datasend() which caused octets in [\x80-\xFF] + stored in a "binary string" to be replaced with their UTF-8 encodings if + the string happened to be stored internally in an "upgraded" state (i.e. + with the UTF-8 flag on). (As noted below, strings passed to datasend() + should always be encoded first, and therefore not stored in such a state + anyway, but it is all too easy for perl to change this internal state + unless the encodeing is done at the very last minute before calling + datasend(), so it helps if datasend() plays more nicely in this case. In + particular, it was wrong of datasend() to treat upgraded and downgraded + strings differently when their contents were identical at the Perl level.) + + This bugfix results in a breaking change to the case of a "text string" + with characters in U+0080..U+00FF stored internally in an upgraded state + since those characters are likewise no longer encoded to UTF-8 by + datasend(), but callers of datasend() should not have been relying on this + behaviour anyway: In general, datasend() has no idea what encoding is + required for output so callers should always encode the data to be output + to whatever encoding is required first. This has now been clarified in the + documentation. + + Finally, a text string with characters >= U+0100 will now cause a "Wide + character in print" warning from datasend() since such characters cannot + be output as bytes and datasend() no longer encodes to UTF-8. In this + case, UTF-8 bytes will still be output as before since that happens to be + the internal representation of such characters, but the warning is new. + Callers should heed this warning and encode such strings to whatever + encoding is required before calling datasend(), as noted above. + + [Ricardo Signes, CPAN RT#104433] 3.06 2015-04-01 diff --git a/Makefile.PL b/Makefile.PL index 7db02c5..872eac8 100644 --- a/Makefile.PL +++ b/Makefile.PL @@ -7,7 +7,7 @@ # Makefile creation script. # # COPYRIGHT -# Copyright (C) 2014 Steve Hay. All rights reserved. +# Copyright (C) 2014, 2015 Steve Hay. All rights reserved. # # LICENCE # This script is free software; you can redistribute it and/or modify it under @@ -206,6 +206,7 @@ MAIN: { 'Time::Local' => '0', 'constant' => '0', 'strict' => '0', + 'utf8' => '0', 'vars' => '0' }, diff --git a/lib/Net/Cmd.pm b/lib/Net/Cmd.pm index cec44bf..3bf5ec6 100644 --- a/lib/Net/Cmd.pm +++ b/lib/Net/Cmd.pm @@ -2,7 +2,7 @@ # # Versions up to 2.29_1 Copyright (c) 1995-2006 Graham Barr <gbarr@pobox.com>. # All rights reserved. -# Changes in Version 2.29_2 onwards Copyright (C) 2013-2014 Steve Hay. All +# Changes in Version 2.29_2 onwards Copyright (C) 2013-2015 Steve Hay. All # rights reserved. # This module is free software; you can redistribute it and/or modify it under # the same terms as Perl itself, i.e. under the terms of either the GNU General @@ -27,21 +27,6 @@ BEGIN { } } -BEGIN { - if (!eval { require utf8 }) { - *is_utf8 = sub { 0 }; - } - elsif (eval { utf8::is_utf8(undef); 1 }) { - *is_utf8 = \&utf8::is_utf8; - } - elsif (eval { require Encode; Encode::is_utf8(undef); 1 }) { - *is_utf8 = \&Encode::is_utf8; - } - else { - *is_utf8 = sub { $_[0] =~ /[^\x00-\xff]/ }; - } -} - our $VERSION = "3.07"; our @ISA = qw(Exporter); our @EXPORT = qw(CMD_INFO CMD_OK CMD_MORE CMD_REJECT CMD_ERROR CMD_PENDING); @@ -429,9 +414,17 @@ sub datasend { my $arr = @_ == 1 && ref($_[0]) ? $_[0] : \@_; my $line = join("", @$arr); - # encode to individual utf8 bytes if - # $line is a string (in internal UTF-8) - utf8::encode($line) if is_utf8($line); + # Perls < 5.10.1 (with the exception of 5.8.9) have a performance problem with + # the substitutions below when dealing with strings stored internally in + # UTF-8, so downgrade them (if possible). + # Data passed to datasend() should be encoded to octets upstream already so + # shouldn't even have the UTF-8 flag on to start with, but if it so happens + # that the octets are stored in an upgraded string (as can sometimes occur) + # then they would still downgrade without fail anyway. + # Only Unicode codepoints > 0xFF stored in an upgraded string will fail to + # downgrade. We fail silently in that case, and a "Wide character in print" + # warning will be emitted later by syswrite(). + utf8::downgrade($line, 1) if $] < 5.010001 && $] != 5.008009; return 0 if $cmd->_is_closed; @@ -722,6 +715,8 @@ is pending then C<CMD_PENDING> is returned. Send data to the remote server, converting LF to CRLF. Any line starting with a '.' will be prefixed with another '.'. C<DATA> may be an array or a reference to an array. +The C<DATA> passed in must be encoded by the caller to octets of whatever +encoding is required, e.g. by using the Encode module's C<encode()> function. =item dataend () @@ -794,6 +789,9 @@ Unget a line of text from the server. Send data to the remote server without performing any conversions. C<DATA> is a scalar. +As with C<datasend()>, the C<DATA> passed in must be encoded by the caller +to octets of whatever encoding is required, e.g. by using the Encode module's +C<encode()> function. =item read_until_dot () diff --git a/lib/Net/NNTP.pm b/lib/Net/NNTP.pm index 120292c..b9c5d6f 100644 --- a/lib/Net/NNTP.pm +++ b/lib/Net/NNTP.pm @@ -2,7 +2,7 @@ # # Versions up to 2.24_1 Copyright (c) 1995-1997 Graham Barr <gbarr@pobox.com>. # All rights reserved. -# Changes in Version 2.25 onwards Copyright (C) 2013-2014 Steve Hay. All rights +# Changes in Version 2.25 onwards Copyright (C) 2013-2015 Steve Hay. All rights # reserved. # This module is free software; you can redistribute it and/or modify it under # the same terms as Perl itself, i.e. under the terms of either the GNU General @@ -947,15 +947,17 @@ implementation) from the server. Returns the text or undef upon failure. The C<ihave> command informs the server that the client has an article whose id is C<MSGID>. If the server desires a copy of that -article, and C<MESSAGE> has been given the it will be sent. +article and C<MESSAGE> has been given then it will be sent. Returns I<true> if the server desires the article and C<MESSAGE> was -successfully sent,if specified. +successfully sent, if specified. If C<MESSAGE> is not specified then the message must be sent using the C<datasend> and C<dataend> methods from L<Net::Cmd> -C<MESSAGE> can be either an array of lines or a reference to an array. +C<MESSAGE> can be either an array of lines or a reference to an array +and must be encoded by the caller to octets of whatever encoding is required, +e.g. by using the Encode module's C<encode()> function. =item last () @@ -1028,7 +1030,9 @@ is allowed then the message will be sent. If C<MESSAGE> is not specified then the message must be sent using the C<datasend> and C<dataend> methods from L<Net::Cmd> -C<MESSAGE> can be either an array of lines or a reference to an array. +C<MESSAGE> can be either an array of lines or a reference to an array +and must be encoded by the caller to octets of whatever encoding is required, +e.g. by using the Encode module's C<encode()> function. The message, either sent via C<datasend> or as the C<MESSAGE> parameter, must be in the format as described by RFC822 and must diff --git a/lib/Net/SMTP.pm b/lib/Net/SMTP.pm index e200e37..573d8ea 100644 --- a/lib/Net/SMTP.pm +++ b/lib/Net/SMTP.pm @@ -2,7 +2,7 @@ # # Versions up to 2.31_1 Copyright (c) 1995-2004 Graham Barr <gbarr@pobox.com>. # All rights reserved. -# Changes in Version 2.31_2 onwards Copyright (C) 2013-2014 Steve Hay. All +# Changes in Version 2.31_2 onwards Copyright (C) 2013-2015 Steve Hay. All # rights reserved. # This module is free software; you can redistribute it and/or modify it under # the same terms as Perl itself, i.e. under the terms of either the GNU General @@ -942,9 +942,12 @@ Synonyms for C<recipient>. Initiate the sending of the data from the current message. -C<DATA> may be a reference to a list or a list. If specified the contents -of C<DATA> and a termination string C<".\r\n"> is sent to the server. And the -result will be true if the data was accepted. +C<DATA> may be a reference to a list or a list and must be encoded by the +caller to octets of whatever encoding is required, e.g. by using the Encode +module's C<encode()> function. + +If specified the contents of C<DATA> and a termination string C<".\r\n"> is +sent to the server. The result will be true if the data was accepted. If C<DATA> is not specified then the result will indicate that the server wishes the data to be sent. The data must then be sent using the C<datasend> diff --git a/t/datasend.t b/t/datasend.t index 3a97c4b..0aea9d4 100644 --- a/t/datasend.t +++ b/t/datasend.t @@ -44,7 +44,7 @@ BEGIN { (my $libnet_t = __FILE__) =~ s/datasend.t/libnet_t.pl/; require $libnet_t or die; -print "1..51\n"; +print "1..54\n"; sub check { my $expect = pop; @@ -158,3 +158,10 @@ check( "a\015\012..\015\012.\015\012", ); +# Test that datasend() plays nicely with bytes in an upgraded string, +# even though the input should really be encode()d already. +check( + substr("\x{100}", 0, 0) . "\x{e9}", + + "\x{e9}\015\012.\015\012" +); diff --git a/t/pod_coverage.t b/t/pod_coverage.t index 9cb64c2..3d674d4 100644 --- a/t/pod_coverage.t +++ b/t/pod_coverage.t @@ -7,7 +7,7 @@ # Test script to check POD coverage.
#
# COPYRIGHT
-# Copyright (C) 2014 Steve Hay. All rights reserved.
+# Copyright (C) 2014, 2015 Steve Hay. All rights reserved.
#
# LICENCE
# This script is free software; you can redistribute it and/or modify it under
@@ -48,7 +48,7 @@ MAIN: { my $params = { coverage_class => qw(Pod::Coverage::CountParents) };
pod_coverage_ok('Net::Cmd', {
%$params,
- also_private => [qw(is_utf8 toascii toebcdic set_status)]
+ also_private => [qw(toascii toebcdic set_status)]
});
pod_coverage_ok('Net::Config', {
%$params,
|