develooper Front page | perl.perl5.porters | Postings from December 2004

[PATCH] :encoding(utf8) broken in perl-5.8.6

Thread Previous | Thread Next
From:
Gisle Aas
Date:
December 3, 2004 06:09
Subject:
[PATCH] :encoding(utf8) broken in perl-5.8.6
Message ID:
lrllcfeank.fsf_-_@caliper.activestate.com
Gisle Aas <gisle@ActiveState.com> writes:

> When using ':encoding(utf8)' all data after a bad byte is simply lost.
> This seems like a serious perl-5.8.6 recession to me.

This is a fix:

Index: perl/ext/Encode/Encode.pm
--- perl/ext/Encode/Encode.pm.~1~	Fri Dec  3 15:04:36 2004
+++ perl/ext/Encode/Encode.pm	Fri Dec  3 15:04:36 2004
@@ -3,7 +3,7 @@
 #
 package Encode;
 use strict;
-our $VERSION = do { my @r = (q$Revision: 2.8 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = (q$Revision: 2.8.1 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
 sub DEBUG () { 0 }
 use XSLoader ();
 XSLoader::load(__PACKAGE__, $VERSION);
Index: perl/ext/Encode/Encode.xs
--- perl/ext/Encode/Encode.xs.~1~	Fri Dec  3 15:04:36 2004
+++ perl/ext/Encode/Encode.xs	Fri Dec  3 15:04:36 2004
@@ -279,7 +279,6 @@
 #if 0
 	fprintf(stderr, "renewed == %d\n", renewed);
 #endif
-	if (renewed){ check |= ENCODE_RETURN_ON_ERR; }
     }
     FREETMPS; LEAVE;
     /* end PerlIO check */
@@ -302,6 +301,8 @@
 	    U8 skip = UTF8SKIP(s);
 	    if ((s + skip) > e) {
 	    	/* Partial character - done */
+	        if (renewed)
+		    break;
 	    	goto decode_utf8_fallback;
 	    }
 	    else if (is_utf8_char(s)) {
Index: perl/ext/PerlIO/t/encoding.t
--- perl/ext/PerlIO/t/encoding.t.~1~	Fri Dec  3 15:04:36 2004
+++ perl/ext/PerlIO/t/encoding.t	Fri Dec  3 15:04:36 2004
@@ -16,7 +16,7 @@
     }
 }
 
-print "1..14\n";
+print "1..15\n";
 
 my $grk = "grk$$";
 my $utf = "utf$$";
@@ -150,6 +150,18 @@
 print "not " unless ($dstr eq $str);
 print "ok 14\n";
 
+# Try decoding some bad stuff
+open(F,'>:raw',$threebyte) || die "Cannot open $threebyte:$!";
+print F "foo\xF0\x80\x80\x80bar\n\x80foo\n";
+close(F);
+
+open(F,'<:encoding(utf-8)',$threebyte) || die "Cannot open $threebyte:$!";
+$dstr = join(":", <F>);
+close(F);
+print "not " unless $dstr eq "foo\\xF0\\x80\\x80\\x80bar\n:\\x80foo\n";
+print "ok 15\n";
+
+
 END {
     1 while unlink($grk, $utf, $fail1, $fail2, $russki, $threebyte);
 }
End of Patch.


Thread Previous | Thread Next


nntp.perl.org: Perl Programming lists via nntp and http.
Comments to Ask Bjørn Hansen at ask@perl.org | Group listing | About