develooper Front page | perl.perl5.porters | Postings from January 2012

Re: [perl #108386] perlbug AutoReply: pp_ctl.c:PP(pp_formline) bugswith wide chars in caret fields

From:
Antons Suspans
Date:
January 20, 2012 11:58
Subject:
Re: [perl #108386] perlbug AutoReply: pp_ctl.c:PP(pp_formline) bugswith wide chars in caret fields
Message ID:
20120120094333.GA13408@ant
I have discovered another bug in "Perl_pp_formline", "FF_MORE" branch: ellipsis takes 3 bytes which is wrong when there are wide characters. I include the combined patch (for "FF_CHECHCKOP" and "FF_MORE") and 2 related tests for "t/op/write.t". I am not sure about quality of my code but I hope it is of some help.

The HMB tests produce different results, depending on prior plain "formline" calls (with unpatched "pp_ctl.c" too). I have not figured out what is the reason, and just put a blank "write" after the added tests.


--- old/perl-5.14.2/pp_ctl.c
+++ new/perl-5.14.2/pp_ctl.c
@@ -705,10 +705,11 @@
 		    if (itemsize != (I32)len) {
 			I32 itembytes;
 			if (itemsize <= fieldsize) {
-			    const char *send = chophere = s + itemsize;
+			    const char *send = chophere = s + len;
 			    while (s < send) {
 				if (*s == '\r') {
 				    itemsize = s - item;
+				    sv_pos_b2u(sv, &itemsize);
 				    chophere = s;
 				    break;
 				}
@@ -1044,8 +1045,17 @@
 			while (arg-- > 0)
 			    *t++ = ' ';
 		    }
-		    s1 = t - 3;
-		    if (strnEQ(s1,"   ",3)) {
+		    if (item_is_utf8) {
+			for (s1=t, arg=3; arg;) {
+			    s1--;
+			    if (! UTF8_IS_CONTINUATION(*s1))
+				arg--;
+			    *s1 = ' ';
+			}
+		    }
+		    else
+			s1 = t - 3;
+		    if (s1 == t - 3 && strnEQ(s1,"   ",3)) {
 			while (s1 > SvPVX_const(PL_formtarget) && isSPACE(s1[-1]))
 			    s1--;
 		    }


--- old/perl-5.14.2/t/op/write.t
+++ new/perl-5.14.2/t/op/write.t
@@ -61,7 +61,7 @@ my $bas_tests = 20;
 
 # number of tests in section 3
-my $bug_tests = 4 + 3 * 3 * 5 * 2 * 3 + 2 + 2 + 1;
+my $bug_tests = 4 + 3 * 3 * 5 * 2 * 3 + 2 + 2 + 1 + 2;
 
 # number of tests in section 4
 my $hmb_tests = 35;
@@ -639,6 +639,64 @@
 write;
 EOP
 
+# formline Unicode tests (#108386):
+# - chopping in caret-fields;
+# - putting '...' in at-fields.
+{
+my ($format, $output, $expected);
+
+# sample data
+my @input = split "\n", <<EOD;
+Tokyo - \x{6771}\x{4eac}
+Yokohama - \x{6a2a}\x{6d5c}\x{5e02}
+Osaka - \x{5927}\x{962a}
+Nagoya - \x{540d}\x{53e4}\x{5c4b}
+Sapporo - \x{672d}\x{5e4c}\x{5e02}
+K\x{14d}be - \x{795e}\x{6238}\x{5e02}
+Kyoto - \x{4eac}\x{90fd}\x{5e02}
+EOD
+
+# caret-field test
+$format = <<EOD;
+^<<<<<<<<< ~~
+EOD
+my $input = join "\r", @input;
+$output = swrite($format, $input);
+$expected = <<EOD;
+Tokyo - \x{6771}\x{4eac}
+Yokohama -
+\x{6a2a}\x{6d5c}\x{5e02}
+Osaka - \x{5927}\x{962a}
+Nagoya -
+\x{540d}\x{53e4}\x{5c4b}
+Sapporo -
+\x{672d}\x{5e4c}\x{5e02}
+K\x{14d}be - \x{795e}\x{6238}\x{5e02}
+Kyoto -
+\x{4eac}\x{90fd}\x{5e02}
+EOD
+is $output, $expected;
+
+# at-field test
+$format = <<EOD;
+@<<<<<<<<...
+EOD
+$output = join '', map swrite($format, $_), @input;
+$expected = <<EOD;
+Tokyo - \x{6771}\x{4eac}
+Yokohama ...
+Osaka - \x{5927}\x{962a}
+Nagoya - \x{540d}\x{53e4}\x{5c4b}
+Sapporo -...
+K\x{14d}be - \x{795e}\x{6238}\x{5e02}
+Kyoto - \x{4eac}\x{90fd}\x{5e02}
+EOD
+is $output, $expected;
+}
+
+# XXX: HMB will fail without some write
+$~ = 'EMPTY'; write;
+
 #############################
 ## Section 4
 ## Add new tests *above* here


P. S. I am sorry about misspelling Nicholas' name in previous message.




nntp.perl.org: Perl Programming lists via nntp and http.
Comments to Ask Bjørn Hansen at ask@perl.org | Group listing | About