develooper Front page | perl.perl5.porters | Postings from March 2003

Re: [perl #21395] rcatline doesn't grok utf8

Thread Previous | Thread Next
From:
Enache Adrian
Date:
March 1, 2003 19:41
Subject:
Re: [perl #21395] rcatline doesn't grok utf8
Message ID:
20030302034354.GA4905@ratsnest.hole
On Sat, Mar 01, 2003 at 11:21:34AM +0900, Inaba Hiroto wrote:
> I think also scalar-utf8/file-binary case should be considered.
> The recently added macro `sv_catpvn_utf8_upgrade' may be useful.

The following patch implements the kludge described in my previous
message.
I tried to keep the most common cases ( first append==0 ~ 90% - only
the 'if(append) ..' complication there -, then bytes/bytes, utf8/utf8)
straightforward.

Better tests will be probably needed.

Regards
Adi

--------------------------------------------------------------------
--- /arc/perl-current/sv.c	2003-02-26 04:50:55.000000000 +0200
+++ sv.c	2003-03-02 05:28:32.000000000 +0200
@@ -6247,7 +6247,27 @@ Perl_sv_gets(pTHX_ register SV *sv, regi
     (void)SvUPGRADE(sv, SVt_PV);
 
     SvSCREAM_off(sv);
-    SvPOK_only(sv);    /* Validate pointer */
+
+    if (append) {
+	if (PerlIO_isutf8(fp)) {
+	    if (!SvUTF8(sv)) {
+		sv_utf8_upgrade_nomg(sv);
+		sv_pos_u2b(sv,&append,0);
+	    }
+	} else if (SvUTF8(sv)) {
+	    SV *tsv = NEWSV(0,0);
+	    sv_gets(tsv, fp, 0);
+	    sv_utf8_upgrade_nomg(tsv);
+	    SvCUR_set(sv,append);
+	    sv_catsv(sv,tsv);
+	    sv_free(tsv);
+	    goto return_string_or_null;
+	}
+    }
+
+    SvPOK_only(sv);
+    if (PerlIO_isutf8(fp))
+	SvUTF8_on(sv);
 
     if (PL_curcop == &PL_compiling) {
 	/* we always read code in line mode */
@@ -6290,7 +6310,7 @@ Perl_sv_gets(pTHX_ register SV *sv, regi
 #endif
       SvCUR_set(sv, bytesread += append);
       buffer[bytesread] = '\0';
-      goto check_utf8_and_return;
+      goto return_string_or_null;
     }
     else if (RsPARA(PL_rs)) {
 	rsptr = "\n\n";
@@ -6543,12 +6563,7 @@ screamer2:
 	}
     }
 
-check_utf8_and_return:
-    if (PerlIO_isutf8(fp))
-	SvUTF8_on(sv);
-    else
-	SvUTF8_off(sv);
-
+return_string_or_null:
     return (SvCUR(sv) - append) ? SvPVX(sv) : Nullch;
 }
 
--------------------------------------------------------------------
--- /arc/perl-current/t/io/utf8.t	2002-06-15 22:10:03.000000000 +0300
+++ ./t/io/utf8.t	2003-03-02 04:26:14.000000000 +0200
@@ -12,7 +12,7 @@ BEGIN {
 no utf8; # needed for use utf8 not griping about the raw octets
 
 $| = 1;
-print "1..31\n";
+print "1..33\n";
 
 open(F,"+>:utf8",'a');
 print F chr(0x100).'�';
@@ -273,6 +273,28 @@ print "ok 26\n";
     print $@ =~ /Wide character in print/ ? "ok 31\n" : "not ok 31\n";
 }
 
+{
+    open F, ">:bytes","a"; print F "\xde"; close F;
+
+    open F, "<:bytes", "a";
+    my $b = chr 0x100;
+    $b .= <F>;
+    print $b eq chr(0x100).chr(0xde) ? "ok 32" : "not ok 32";
+    print " \#21395 '.= <>' utf8 vs. bytes\n";
+    close F;
+}
+
+{
+    open F, ">:utf8","a"; print F chr 0x100; close F;
+
+    open F, "<:utf8", "a";
+    my $b = "\xde";
+    $b .= <F>;
+    print $b eq chr(0xde).chr(0x100) ? "ok 33" : "not ok 33";
+    print " \#21395 '.= <>' bytes vs. utf8\n";
+    close F;
+}
+
 # sysread() and syswrite() tested in lib/open.t since Fnctl is used
 
 END {
--------------------------------------------------------------------


Thread Previous | Thread Next


nntp.perl.org: Perl Programming lists via nntp and http.
Comments to Ask Bjørn Hansen at ask@perl.org | Group listing | About