develooper Front page | perl.perl5.porters | Postings from January 2001

Re: new feature: s?printf parameter reordering

Thread Previous | Thread Next
From:
Hugo
Date:
January 11, 2001 08:06
Subject:
Re: new feature: s?printf parameter reordering
Message ID:
200101111709.RAA23756@crypt.compulink.co.uk
A long time ago, in <200010291445.OAA00353@crypt.compulink.co.uk>, I wrote:
:=head2 SYNOPSIS
:
:So the changes I propose are:
:- support reordering for all parameters: %, *v, *, .*
:- lay down that the reordering specification must immediately
:  follow that parameter: %3$, *v3$, *3$, .*3$
:[
:  or optionally, that it must immediately precede the '*' or format
:  letter: 3$*v, 3$*, .3$*, 3$<format letter>
:  I think I prefer this since it also mirrors the order that the
:  arguments would otherwise be picked up, but we then also need to
:  specify whether the latter should appear before or after a size letter.
:]
:- document (including in perldelta) the order that unordered arguments
:  are picked up if not reordered: *v, *, .*, % (and make it so if it
:  isn't already)
:- fix vectorisation of a zero-length string
:- complain if the user tries to do something we don't support:
:  - complain if reordering is used with 'args'
:  - complain if vectorisation is specified for unsupported format type
:- factor out the code choosing the argument to format
:- don't turn on UTF8 more often than needed

The attached patch implements some of these:
- support reordering for all parameters: %, *v, *, .*
- lay down that the reordering specification must immediately
  follow that parameter: %3$, *v3$, *3$, .*3$
- fix vectorisation of a zero-length string
- factor out the code choosing the argument to format

Possibly unwanted side-effects:
- the special format specifiers ' +-0' must now precede any vectorisation
  specifier. Tests in op/sprintf and op/ver have been changed to reflect this.
- in many cases, the next parameter has already been consumed when an
  invalid type letter is detected. Test #214 has been left failing to
  reflect this.

Probably wanted side-effects:
- attempts to format a non-existent parameter will warn as if C<undef>
- attempt to write to non-existent parameter with '%n' will complain
  of "attempt to modify read-only value" instead of being silent

If the side-effects are acceptable, I'll continue with docs and more tests,
and perhaps try to address the remaining issues (complain for more of the
invalid combinations; don't turn on UTF8 more often than needed).

Patch is to perl@8389. Object file without debugging is about 300 bytes
smaller here; speed should be comparable, with minor gains or losses in
some cases, but I have not tested this. I think the code is also slightly
more legible, but that's a judgement call.

Enjoy,

Hugo
--- sv.c.old	Wed Jan 10 06:53:22 2001
+++ sv.c	Thu Jan 11 16:39:58 2001
@@ -6704,6 +6704,21 @@
     sv_vcatpvfn(sv, pat, patlen, args, svargs, svmax, maybe_tainted);
 }
 
+I32
+S_expect_number(char** pattern)
+{
+    I32 var = 0;
+    switch (**pattern) {
+    case '1': case '2': case '3':
+    case '4': case '5': case '6':
+    case '7': case '8': case '9':
+	while (isDIGIT(**pattern))
+	    var = var * 10 + (*(*pattern)++ - '0');
+    }
+    return var;
+}
+#define EXPECT(pattern, var) (var = S_expect_number(&pattern))
+
 /*
 =for apidoc sv_vcatpvfn
 
@@ -6764,6 +6779,7 @@
 	bool alt = FALSE;
 	bool left = FALSE;
 	bool vectorize = FALSE;
+	bool vectorarg = FALSE;
 	bool utf = FALSE;
 	char fill = ' ';
 	char plus = 0;
@@ -6801,10 +6817,13 @@
 	STRLEN gap;
 	char *dotstr = ".";
 	STRLEN dotstrlen = 1;
-	I32 epix = 0; /* explicit parameter index */
+	I32 efix = 0; /* explicit format parameter index */
 	I32 ewix = 0; /* explicit width index */
+	I32 epix = 0; /* explicit precision index */
+	I32 evix = 0; /* explicit vector index */
 	bool asterisk = FALSE;
 
+	/* echo everything up to the next format specification */
 	for (q = p; q < patend && *q != '%'; ++q) ;
 	if (q > p) {
 	    sv_catpvn(sv, p, q - p);
@@ -6813,6 +6832,25 @@
 	if (q++ >= patend)
 	    break;
 
+/*
+    We allow format specification elements in this order:
+	\d+\$              explicit format parameter index
+	[-+ 0#]+           flags
+	\*?(\d+\$)?v       vector with optional (optionally specified) arg
+	\d+|\*(\d+\$)?     width using optional (optionally specified) arg
+	\.(\d*|\*(\d+\$)?) precision using optional (optionally specified) arg
+	[hlqLV]            size
+    [%bcdefginopsux_DFOUX] format (mandatory)
+*/
+	if (EXPECT(q, width)) {
+	    if (*q == '$') {
+		++q;
+		efix = width;
+	    } else {
+		goto gotwidth;
+	    }
+	}
+
 	/* FLAGS */
 
 	while (*q) {
@@ -6836,64 +6874,63 @@
 		q++;
 		continue;
 
-	    case '*':			/* printf("%*vX",":",$ipv6addr) */
-		if (q[1] != 'v')
-		    break;
-		q++;
-		if (args)
-		    vecsv = va_arg(*args, SV*);
-		else if (svix < svmax)
-		    vecsv = svargs[svix++];
-		else
-		    continue;
-		dotstr = SvPVx(vecsv,dotstrlen);
-		if (DO_UTF8(vecsv))
-		    is_utf = TRUE;
-		/* FALL THROUGH */
-
-	    case 'v':
-		vectorize = TRUE;
-		q++;
-		continue;
-
 	    default:
 		break;
 	    }
 	    break;
 	}
 
-	/* WIDTH */
-
-    scanwidth:
-
+      tryasterisk:
 	if (*q == '*') {
-	    if (asterisk)
-		goto unknown;
+	    q++;
+	    if (EXPECT(q, ewix))
+		if (*q++ != '$')
+		    goto unknown;
 	    asterisk = TRUE;
+	}
+	if (*q == 'v') {
 	    q++;
+	    if (vectorize)
+		goto unknown;
+	    if (vectorarg = asterisk) {
+		evix = ewix;
+		ewix = 0;
+		asterisk = FALSE;
+	    }
+	    vectorize = TRUE;
+	    goto tryasterisk;
 	}
 
-	switch (*q) {
-	case '1': case '2': case '3':
-	case '4': case '5': case '6':
-	case '7': case '8': case '9':
-	    width = 0;
-	    while (isDIGIT(*q))
-		width = width * 10 + (*q++ - '0');
-	    if (*q == '$') {
-		if (asterisk && ewix == 0) {
-		    ewix  = width;
-		    width = 0;
-		    q++;
-		    goto scanwidth;
-		} else if (epix == 0) {
-		    epix  = width;
-		    width = 0;
-		    q++;
-		    goto scanwidth;
-		} else
-		    goto unknown;
+	if (!asterisk)
+	    EXPECT(q, width);
+
+	if (vectorize) {
+	    if (vectorarg) {
+		if (args)
+		    vecsv = va_arg(*args, SV*);
+		else
+		    vecsv = (evix ? evix <= svmax : svix < svmax) ?
+			svargs[ewix ? ewix-1 : svix++] : &PL_sv_undef;
+		dotstr = (U8*)SvPVx(vecsv, dotstrlen);
+		if (DO_UTF8(vecsv))
+		    is_utf = TRUE;
+	    }
+	    if (args) {
+		vecsv = va_arg(*args, SV*);
+		vecstr = (U8*)SvPVx(vecsv,veclen);
+		utf = DO_UTF8(vecsv);
+	    }
+	    else if (efix ? efix <= svmax : svix < svmax) {
+		vecsv = svargs[efix ? efix-1 : svix++];
+		vecstr = (U8*)SvPVx(vecsv,veclen);
+		utf = DO_UTF8(vecsv);
+	    }
+	    else {
+		vecstr = (U8*)"";
+		veclen = 0;
 	    }
+	    if (DO_UTF8(vecsv))
+		is_utf = TRUE;
 	}
 
 	if (asterisk) {
@@ -6905,19 +6942,22 @@
 	    left |= (i < 0);
 	    width = (i < 0) ? -i : i;
 	}
+      gotwidth:
 
 	/* PRECISION */
 
 	if (*q == '.') {
 	    q++;
 	    if (*q == '*') {
+		q++;
+		if (EXPECT(q, epix) && *q++ != '$')
+		    goto unknown;
 		if (args)
 		    i = va_arg(*args, int);
 		else
 		    i = (ewix ? ewix <= svmax : svix < svmax)
 			? SvIVx(svargs[ewix ? ewix-1 : svix++]) : 0;
 		precis = (i < 0) ? 0 : i;
-		q++;
 	    }
 	    else {
 		precis = 0;
@@ -6927,23 +6967,6 @@
 	    has_precis = TRUE;
 	}
 
-	if (vectorize) {
-	    if (args) {
-		vecsv = va_arg(*args, SV*);
-		vecstr = (U8*)SvPVx(vecsv,veclen);
-		utf = DO_UTF8(vecsv);
-	    }
-	    else if (epix ? epix <= svmax : svix < svmax) {
-		vecsv = svargs[epix ? epix-1 : svix++];
-		vecstr = (U8*)SvPVx(vecsv,veclen);
-		utf = DO_UTF8(vecsv);
-	    }
-	    else {
-		vecstr = (U8*)"";
-		veclen = 0;
-	    }
-	}
-
 	/* SIZE */
 
 	switch (*q) {
@@ -6975,21 +6998,22 @@
 
 	/* CONVERSION */
 
+	if (*q == '%') {
+	    eptr = q++;
+	    elen = 1;
+	    goto string;
+	}
+
+	if (!args)
+	    argsv = (efix ? efix <= svmax : svix < svmax) ?
+		    svargs[efix ? efix-1 : svix++] : &PL_sv_undef;
+
 	switch (c = *q++) {
 
 	    /* STRINGS */
 
-	case '%':
-	    eptr = q - 1;
-	    elen = 1;
-	    goto string;
-
 	case 'c':
-	    if (args)
-		uv = va_arg(*args, int);
-	    else
-		uv = (epix ? epix <= svmax : svix < svmax) ?
-		    SvIVx(svargs[epix ? epix-1 : svix++]) : 0;
+	    uv = args ? va_arg(*args, int) : SvIVx(argsv);
 	    if ((uv > 255 || (uv > 127 && SvUTF8(sv))) && !IN_BYTE) {
 		eptr = (char*)utf8buf;
 		elen = uv_to_utf8((U8*)eptr, uv) - utf8buf;
@@ -7018,8 +7042,7 @@
 		    elen = sizeof nullstr - 1;
 		}
 	    }
-	    else if (epix ? epix <= svmax : svix < svmax) {
-		argsv = svargs[epix ? epix-1 : svix++];
+	    else {
 		eptr = SvPVx(argsv, elen);
 		if (DO_UTF8(argsv)) {
 		    if (has_precis && precis < elen) {
@@ -7043,7 +7066,7 @@
 	     */
 	    if (!args)
 		goto unknown;
-	    argsv = va_arg(*args,SV*);
+	    argsv = va_arg(*args, SV*);
 	    eptr = SvPVx(argsv, elen);
 	    if (DO_UTF8(argsv))
 		is_utf = TRUE;
@@ -7059,11 +7082,7 @@
 	case 'p':
 	    if (alt)
 		goto unknown;
-	    if (args)
-		uv = PTR2UV(va_arg(*args, void*));
-	    else
-		uv = (epix ? epix <= svmax : svix < svmax) ?
-		    PTR2UV(svargs[epix ? epix-1 : svix++]) : 0;
+	    uv = PTR2UV(args ? va_arg(*args, void*) : argsv);
 	    base = 16;
 	    goto integer;
 
@@ -7078,10 +7097,8 @@
 	case 'i':
 	    if (vectorize) {
 		STRLEN ulen;
-		if (!veclen) {
-		    vectorize = FALSE;
-		    break;
-		}
+		if (!veclen)
+		    continue;
 		if (utf)
 		    iv = (IV)utf8_to_uv(vecstr, veclen, &ulen, 0);
 		else {
@@ -7103,8 +7120,7 @@
 		}
 	    }
 	    else {
-		iv = (epix ? epix <= svmax : svix < svmax) ?
-		    SvIVx(svargs[epix ? epix-1 : svix++]) : 0;
+		iv = SvIVx(argsv);
 		switch (intsize) {
 		case 'h':	iv = (short)iv; break;
 		default:	break;
@@ -7161,10 +7177,8 @@
 	    if (vectorize) {
 		STRLEN ulen;
 	vector:
-		if (!veclen) {
-		    vectorize = FALSE;
-		    break;
-		}
+		if (!veclen)
+		    continue;
 		if (utf)
 		    uv = utf8_to_uv(vecstr, veclen, &ulen, 0);
 		else {
@@ -7186,8 +7200,7 @@
 		}
 	    }
 	    else {
-		uv = (epix ? epix <= svmax : svix < svmax) ?
-		    SvUVx(svargs[epix ? epix-1 : svix++]) : 0;
+		uv = SvUVx(argsv);
 		switch (intsize) {
 		case 'h':	uv = (unsigned short)uv; break;
 		default:	break;
@@ -7276,11 +7289,7 @@
 	    /* This is evil, but floating point is even more evil */
 
 	    vectorize = FALSE;
-	    if (args)
-		nv = va_arg(*args, NV);
-	    else
-		nv = (epix ? epix <= svmax : svix < svmax) ?
-		    SvNVx(svargs[epix ? epix-1 : svix++]) : 0.0;
+	    nv = args ? va_arg(*args, NV) : SvNVx(argsv);
 
 	    need = 0;
 	    if (c != 'e' && c != 'E') {
@@ -7360,8 +7369,8 @@
 #endif
 		}
 	    }
-	    else if (epix ? epix <= svmax : svix < svmax)
-		sv_setuv_mg(svargs[epix ? epix-1 : svix++], (UV)i);
+	    else 
+		sv_setuv_mg(argsv, (UV)i);
 	    continue;	/* not "break" */
 
 	    /* UNKNOWN */
--- t/op/sprintf.t.old	Wed Nov 29 01:49:18 2000
+++ t/op/sprintf.t	Thu Jan 11 16:43:27 2001
@@ -21,7 +21,9 @@
 
 $SIG{__WARN__} = sub {
     if ($_[0] =~ /^Invalid conversion/) {
-	$w = ' INVALID'
+	$w = ' INVALID';
+    } elsif ($_[0] =~ /^Use of uninitialized value/) {
+	$w = ' UNINIT';
     } else {
 	warn @_;
     }
@@ -175,19 +177,19 @@
 >%#vd<      >chr(1)<      >1<
 >%vd<       >"\01\02\03"< >1.2.3<
 >%v.3d<     >"\01\02\03"< >001.002.003<
->%v03d<     >"\01\02\03"< >001.002.003<
->%v-3d<     >"\01\02\03"< >1  .2  .3  <
->%v+-3d<    >"\01\02\03"< >+1 .2  .3  <
+>%0v3d<     >"\01\02\03"< >001.002.003<
+>%-v3d<     >"\01\02\03"< >1  .2  .3  <
+>%+-v3d<    >"\01\02\03"< >+1 .2  .3  <
 >%v4.3d<    >"\01\02\03"< > 001. 002. 003<
->%v04.3d<   >"\01\02\03"< >0001.0002.0003<
->%*v02d<    >['-', "\0\7\14"]< >00-07-12<
->%v.*d<     >[3, "\01\02\03"]< >001.002.003<
->%v0*d<     >[3, "\01\02\03"]< >001.002.003<
->%v-*d<     >[3, "\01\02\03"]< >1  .2  .3  <
->%v+-*d<    >[3, "\01\02\03"]< >+1 .2  .3  <
->%v*.*d<    >[4, 3, "\01\02\03"]< > 001. 002. 003<
->%v0*.*d<   >[4, 3, "\01\02\03"]< >0001.0002.0003<
->%*v0*d<    >['-', 2, "\0\7\13"]< >00-07-11<
+>%0v4.3d<   >"\01\02\03"< >0001.0002.0003<
+>%0*v2d<    >['-', "\0\7\14"]< >00-07-12<
+>%v.*d<     >["\01\02\03", 3]< >001.002.003<
+>%0v*d<     >["\01\02\03", 3]< >001.002.003<
+>%-v*d<     >["\01\02\03", 3]< >1  .2  .3  <
+>%+-v*d<    >["\01\02\03", 3]< >+1 .2  .3  <
+>%v*.*d<    >["\01\02\03", 4, 3]< > 001. 002. 003<
+>%0v*.*d<   >["\01\02\03", 4, 3]< >0001.0002.0003<
+>%0*v*d<    >['-', "\0\7\13", 2]< >00-07-11<
 >%e<        >1234.875<    >1.234875e+03<
 >%e<        >0.000012345< >1.234500e-05<
 >%e<        >1234567E96<  >1.234567e+102<
@@ -314,10 +316,11 @@
 >%2$d %d %d<	>[12, 34]<	>34 12 34<
 >%3$d %d %d<	>[12, 34, 56]<	>56 12 34<
 >%2$*3$d %d<	>[12, 34, 3]<	> 34 12<
->%*3$2$d %d<	>[12, 34, 3]<	> 34 12<
->%2$d<		>12<	>0<
+>%*3$2$d %d<	>[12, 34, 3]<	>%*3$2$d 12 INVALID<
+>%2$d<		>12<	>0 UNINIT<
 >%0$d<		>12<	>%0$d INVALID<
 >%1$$d<		>12<	>%1$$d INVALID<
 >%1$1$d<	>12<	>%1$1$d INVALID<
 >%*2$*2$d<	>[12, 3]<	>%*2$*2$d INVALID<
 >%*2*2$d<	>[12, 3]<	>%*2*2$d INVALID<
+>%0v2.2d<	>''<	><
--- t/op/ver.t.old	Tue Dec  5 22:14:59 2000
+++ t/op/ver.t	Thu Jan 11 15:47:17 2001
@@ -102,10 +102,10 @@
 print "ok $test\n";  ++$test;
 
 if (ord("\t") == 9) { # ASCII
-    print "not " unless sprintf("%*v#o", ":", "Perl") eq '0120:0145:0162:0154';
+    print "not " unless sprintf("%#*vo", ":", "Perl") eq '0120:0145:0162:0154';
 }
 else {
-    print "not " unless sprintf("%*v#o", ":", "Perl") eq '0327:0205:0231:0223';
+    print "not " unless sprintf("%#*vo", ":", "Perl") eq '0327:0205:0231:0223';
 }
 print "ok $test\n";  ++$test;
 
@@ -144,10 +144,10 @@
     print "ok $test\n";  ++$test;
 
     if (ord("\t") == 9) { # ASCII
-        print "not " unless sprintf("%*v#o", ":", "Perl") eq '0120:0145:0162:0154';
+        print "not " unless sprintf("%#*vo", ":", "Perl") eq '0120:0145:0162:0154';
     }
     else {
-        print "not " unless sprintf("%*v#o", ":", "Perl") eq '0327:0205:0231:0223';
+        print "not " unless sprintf("%#*vo", ":", "Perl") eq '0327:0205:0231:0223';
     }
     print "ok $test\n";  ++$test;
 

Thread Previous | Thread Next


nntp.perl.org: Perl Programming lists via nntp and http.
Comments to Ask Bjørn Hansen at ask@perl.org | Group listing | About