develooper Front page | perl.perl5.porters | Postings from December 2009

[PATCH] New force_package_version uses scan_version

Thread Next
From:
John Peacock
Date:
December 26, 2009 13:52
Subject:
[PATCH] New force_package_version uses scan_version
Message ID:
4B36858A.2060805@havurah-software.org
Instead of using a custom tokenizer to create an old-style dualvar
version, use the version object code's scan_version, and produce
a version object directly.  This is currently only called by

   package name version;

and required that scan_version be made more strict about legal
version formats.

A function, prescan_version(), was added to facilitate this.  It can
be called in two different modes:

   isVERSION(s, TRUE) - returns a true value iff the next token in s
                        passes the strict (TRUE) or non-strict (FALSE)
		       version parsing rules.  Throws warnings with
		       appropriate context.
   prescan_version()  - called by scan_version and updates passed in
                        values, so that scan_version can then proceed
		       to actually build the version object

Currently, the warnings thrown by prescan_version() are mandatory,
i.e. they are displayed even if warnings are not enabled.
---
  embed.fnc        |    3 +
  embed.h          |    4 +
  global.sym       |    1 +
  handy.h          |    3 +
  proto.h          |   10 ++
  t/comp/package.t |    4 +-
  t/porting/diag.t |    6 ++
  toke.c           |   50 ++++++++++-
  util.c           |  258 
++++++++++++++++++++++++++++++++++++++++++-----------
  9 files changed, 282 insertions(+), 57 deletions(-)

diff --git a/embed.fnc b/embed.fnc
index 17089ff..52a46b5 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -745,6 +745,8 @@ Apa	|PERL_SI*|new_stackinfo|I32 stitems|I32 cxitems
  Ap	|char*	|scan_vstring	|NN const char *s|NN const char *const e \
  				|NN SV *sv
  Apd	|const char*	|scan_version	|NN const char *s|NN SV *rv|bool qv
+Apd	|const char*	|prescan_version	|NN const char *s\
+	|bool strict|bool *sqv|int *ssaw_period|int *swidth|bool *salpha
  Apd	|SV*	|new_version	|NN SV *ver
  Apd	|SV*	|upg_version	|NN SV *ver|bool qv
  Apd	|bool	|vverify	|NN SV *vs
@@ -1804,6 +1806,7 @@ sRn	|PTR_TBL_ENT_t *|ptr_table_find|NN PTR_TBL_t 
*const tbl|NULLOK const void *c
  s	|void	|check_uni
  s	|void	|force_next	|I32 type
  s	|char*	|force_version	|NN char *s|int guessing
+s	|char*	|force_package_version	|NN char *s
  s	|char*	|force_word	|NN char *start|int token|int check_keyword \
  				|int allow_pack|int allow_tick
  s	|SV*	|tokeq		|NN SV *sv
diff --git a/embed.h b/embed.h
index c949c5c..d6e8a83 100644
--- a/embed.h
+++ b/embed.h
@@ -636,6 +636,7 @@
  #define new_stackinfo		Perl_new_stackinfo
  #define scan_vstring		Perl_scan_vstring
  #define scan_version		Perl_scan_version
+#define prescan_version		Perl_prescan_version
  #define new_version		Perl_new_version
  #define upg_version		Perl_upg_version
  #define vverify			Perl_vverify
@@ -1591,6 +1592,7 @@
  #define check_uni		S_check_uni
  #define force_next		S_force_next
  #define force_version		S_force_version
+#define force_package_version	S_force_package_version
  #define force_word		S_force_word
  #define tokeq			S_tokeq
  #define readpipe_override	S_readpipe_override
@@ -3036,6 +3038,7 @@
  #define new_stackinfo(a,b)	Perl_new_stackinfo(aTHX_ a,b)
  #define scan_vstring(a,b,c)	Perl_scan_vstring(aTHX_ a,b,c)
  #define scan_version(a,b,c)	Perl_scan_version(aTHX_ a,b,c)
+#define prescan_version(a,b,c,d,e,f)	Perl_prescan_version(aTHX_ 
a,b,c,d,e,f)
  #define new_version(a)		Perl_new_version(aTHX_ a)
  #define upg_version(a,b)	Perl_upg_version(aTHX_ a,b)
  #define vverify(a)		Perl_vverify(aTHX_ a)
@@ -4000,6 +4003,7 @@
  #define check_uni()		S_check_uni(aTHX)
  #define force_next(a)		S_force_next(aTHX_ a)
  #define force_version(a,b)	S_force_version(aTHX_ a,b)
+#define force_package_version(a)	S_force_package_version(aTHX_ a)
  #define force_word(a,b,c,d,e)	S_force_word(aTHX_ a,b,c,d,e)
  #define tokeq(a)		S_tokeq(aTHX_ a)
  #define readpipe_override()	S_readpipe_override(aTHX)
diff --git a/global.sym b/global.sym
index ae6a48f..f0361df 100644
--- a/global.sym
+++ b/global.sym
@@ -376,6 +376,7 @@ Perl_newWHILEOP
  Perl_new_stackinfo
  Perl_scan_vstring
  Perl_scan_version
+Perl_prescan_version
  Perl_new_version
  Perl_upg_version
  Perl_vverify
diff --git a/handy.h b/handy.h
index 848cc0e..04cf7a9 100644
--- a/handy.h
+++ b/handy.h
@@ -656,6 +656,9 @@ US-ASCII (Basic Latin) range are viewed as not 
having any case.
  typedef U32 line_t;
  #define NOLINE ((line_t) 4294967295UL)
  +/* Helpful alias for version prescan */
+#define isVERSION(a,b) \
+	(a != Perl_prescan_version(aTHX_ a, b, NULL, NULL, NULL, NULL))
   /*
  =head1 Memory Management
diff --git a/proto.h b/proto.h
index 02fdd2d..fe06dde 100644
--- a/proto.h
+++ b/proto.h
@@ -2383,6 +2383,11 @@ PERL_CALLCONV const char*	Perl_scan_version(pTHX_ 
const char *s, SV *rv, bool qv
  #define PERL_ARGS_ASSERT_SCAN_VERSION	\
  	assert(s); assert(rv)
  +PERL_CALLCONV const char*	Perl_prescan_version(pTHX_ const char *s, 
bool strict, bool *sqv, int *ssaw_period, int *swidth, bool *salpha)
+			__attribute__nonnull__(pTHX_1);
+#define PERL_ARGS_ASSERT_PRESCAN_VERSION	\
+	assert(s)
+
  PERL_CALLCONV SV*	Perl_new_version(pTHX_ SV *ver)
  			__attribute__nonnull__(pTHX_1);
  #define PERL_ARGS_ASSERT_NEW_VERSION	\
@@ -5802,6 +5807,11 @@ STATIC char*	S_force_version(pTHX_ char *s, int 
guessing)
  #define PERL_ARGS_ASSERT_FORCE_VERSION	\
  	assert(s)
  +STATIC char*	S_force_package_version(pTHX_ char *s)
+			__attribute__nonnull__(pTHX_1);
+#define PERL_ARGS_ASSERT_FORCE_PACKAGE_VERSION	\
+	assert(s)
+
  STATIC char*	S_force_word(pTHX_ char *start, int token, int 
check_keyword, int allow_pack, int allow_tick)
  			__attribute__nonnull__(pTHX_1);
  #define PERL_ARGS_ASSERT_FORCE_WORD	\
diff --git a/t/comp/package.t b/t/comp/package.t
index 85fd1a5..e2f6d60 100644
--- a/t/comp/package.t
+++ b/t/comp/package.t
@@ -76,9 +76,9 @@ print eval '__PACKAGE__' eq 'bug32562' ? "ok 14\n" : 
"not ok 14\n";
   my @variations = (
    '1.00',
-  '1.00_01',
+  '1.00001',
    'v1.2.3',
-  'v1.2_3',
+  'v1.2.3.4',
  );
   my $test_count = 15;
diff --git a/t/porting/diag.t b/t/porting/diag.t
index 0241a12..cc41eb0 100644
--- a/t/porting/diag.t
+++ b/t/porting/diag.t
@@ -272,6 +272,12 @@ Invalid type '%c' in pack
  Invalid type '%c' in %s
  Invalid type '%c' in unpack
  Invalid type ',' in %s
+Invalid strict version format (0 before decimal required)
+Invalid strict version format (no leading zeros)
+Invalid strict version format (no underscores)
+Invalid strict version format (v1.2.3 required)
+Invalid strict version format (version required)
+Invalid strict version format (1.[0-9] required)
  Invalid version format (alpha without decimal)
  Invalid version format (misplaced _ in number)
  Invalid version object
diff --git a/toke.c b/toke.c
index cec8ac2..08be7cc 100644
--- a/toke.c
+++ b/toke.c
@@ -2134,6 +2134,54 @@ S_force_version(pTHX_ char *s, int guessing)
  }
   /*
+ * S_force_package_version
+ * Forces the next token to be a version number.
+ * If the next token appears to be an invalid version number, (e.g. "v2b"),
+ * and if "guessing" is TRUE, then no new token is created (and the caller
+ * must use an alternative parsing method).
+ */
+
+STATIC char *
+S_force_package_version(pTHX_ char *s)
+{
+    dVAR;
+    OP *version = NULL;
+#ifdef PERL_MAD
+    I32 startoff = s - SvPVX(PL_linestr);
+#endif
+
+    PERL_ARGS_ASSERT_FORCE_PACKAGE_VERSION;
+
+    while (isSPACE(*s)) /* leading whitespace */
+	s++;
+
+    if (isVERSION(s,TRUE)) {
+	SV *ver = newSV(0);
+	s = (char *)scan_version(s, ver, 0);
+	version = newSVOP(OP_CONST, 0, ver);
+    }
+    else if (*s != ';' && (s = SKIPSPACE1(s), (*s != ';' ))) {
+	PL_bufptr = s;
+	yyerror("syntax error"); /* version required */
+	return s;
+    }
+
+#ifdef PERL_MAD
+    if (PL_madskills && !version) {
+	sv_free(PL_nextwhite);	/* let next token collect whitespace */
+	PL_nextwhite = 0;
+	s = SvPVX(PL_linestr) + startoff;
+    }
+#endif
+    /* NOTE: The parser sees the package name and the VERSION swapped */
+    start_force(PL_curforce);
+    NEXTVAL_NEXTTOKE.opval = version;
+    force_next(WORD);
+
+    return s;
+}
+
+/*
   * S_tokeq
   * Tokenize a quoted string passed in as an SV.  It finds the next
   * chunk, up to end of string or a backslash.  It may make a new
@@ -6965,7 +7013,7 @@ Perl_yylex(pTHX)
   	case KEY_package:
  	    s = force_word(s,WORD,FALSE,TRUE,FALSE);
-	    s = force_version(s, FALSE);
+	    s = force_package_version(s);
  	    OPERATOR(PACKAGE);
   	case KEY_pipe:
diff --git a/util.c b/util.c
index 70f5a26..ea74e28 100644
--- a/util.c
+++ b/util.c
@@ -4181,6 +4181,188 @@ Perl_getcwd_sv(pTHX_ register SV *sv)
  }
   #define VERSION_MAX 0x7FFFFFFF
+
+const char *
+Perl_prescan_version(pTHX_ const char *s, bool strict,
+		     bool *sqv, int *ssaw_decimal, int *swidth, bool *salpha) {
+    bool qv = (sqv ? *sqv : FALSE);
+    int width = 3;
+    int saw_decimal = 0;
+    bool alpha = FALSE;
+    const char *d = s;
+
+    PERL_ARGS_ASSERT_PRESCAN_VERSION;
+
+    if (qv && isDIGIT(*d))
+	goto dotted_decimal_version;
+
+    if (*d == 'v' && isDIGIT(d[1]) ) /* explicit v-string */
+    {
+	qv = TRUE;
+	d++;
+
+dotted_decimal_version:
+	if (strict && d[0] == '0' && ! d[1] == '.')
+	{
+	    /* no leading zeros allowed */
+	    Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
+		    "Invalid strict version format (no leading zeros)");
+	    return s;
+	}
+
+	while (isDIGIT(*d)) 	/* integer part */
+	    d++;
+
+	if (*d == '.')
+	{
+	    saw_decimal++;
+	    d++; 		/* decimal point */
+	}
+	else
+	{
+	    if (strict) {
+		/* require v1.2.3 */
+		Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
+			"Invalid strict version format (v1.2.3 required)");
+		return s;
+	    }
+	    else {
+		goto version_prescan_success;
+	    }
+	}
+
+	{
+	    int i = 0;
+	    int j = 0;
+	    while (isDIGIT(*d)) {	/* just keep reading */
+		i++;
+		while (isDIGIT(*d)) {
+		    d++; j++;
+		    /* maximum 3 digits between decimal */
+		    if (strict && j == 3)
+			break;
+		}
+		if (*d == '_') {
+		    if (strict) {
+			Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
+				"Invalid strict version format (no underscores)");
+			return s;
+		    }
+		    if ( alpha )
+			Perl_croak(aTHX_ "Invalid version format (multiple underscores)");
+		    d++;
+		    alpha = TRUE;
+		}
+		else if (*d == '.') {
+		    if (alpha)
+			Perl_croak(aTHX_ "Invalid version format (underscores before decimal)");
+		    saw_decimal++;
+		    d++;
+		}
+		else if (!isDIGIT(*d)) {
+		    break;
+		}
+		j = 0;
+	    }
+	
+	    if (strict && i < 2) {
+		/* requires v1.2.3 */
+		Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
+			"Invalid strict version format (v1.2.3 required)");
+		return s;
+	    }
+	}
+    } 					/* end if dotted-decimal */
+    else
+    {					/* decimal versions */
+
+	if (d[0] == '.' && isDIGIT(d[1])) {
+	    if (strict) {
+		Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
+			"Invalid strict version format (0 before decimal required)");
+		return s;
+	    }
+	    goto version_saw_decimal;
+	}
+
+	if (!isDIGIT(*d) && d[0] != ';')
+	{
+	    /* version required */
+	    Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
+		    "Invalid strict version format (version required)");
+	    return s;
+	}
+
+	if (d[0] == '0' && ! d[1] == '.')
+	{
+	    /* no leading zeros allowed */
+	    Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
+		    "Invalid strict version format (no leading zeros)");
+	    return s;
+	}
+
+	while (isDIGIT(*d)) 	/* integer part */
+	    d++;
+
+	if (d[0] == '_' && isDIGIT(d[1])) {
+	    Perl_croak(aTHX_ "Invalid version format (alpha without decimal)");
+	}
+
+version_saw_decimal:
+	if (*d == '.')
+	{
+	    saw_decimal++;
+	    d++; 		/* decimal point */
+	}
+	if (strict && !isDIGIT(*d) && d != s ) {
+	    /* requires 1.[0-9] */
+	    Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
+		    "Invalid strict version format (1.[0-9] required)");
+	    return s;
+	}
+
+	while (isDIGIT(*d)) {
+	    d++;
+	    if (*d == '.' && isDIGIT(d[-1])) {
+		if (alpha)
+		    Perl_croak(aTHX_ "Invalid version format (underscores before 
decimal)");
+		if (strict) {
+		    Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
+			    "Invalid strict version format (v1.2.3 required)");
+		    return s;
+		}
+		d = (char *)s; 		/* start all over again */
+		qv = TRUE;
+		goto dotted_decimal_version;
+	    }
+	    if (*d == '_') {
+		if (strict) {
+		    Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
+			    "Invalid strict version format (no underscores)");
+		    return s;
+		}
+		if ( alpha )
+		    Perl_croak(aTHX_ "Invalid version format (multiple underscores)");
+		d++;
+		alpha = TRUE;
+	    }
+	}
+    }
+
+version_prescan_success:
+    if ( alpha && saw_decimal && width == 0 )
+	Perl_croak(aTHX_ "Invalid version format (misplaced _ in number)");
+    if (sqv)
+	*sqv = qv;
+    if (swidth)
+	*swidth = width;
+    if (ssaw_decimal)
+	*ssaw_decimal = saw_decimal;
+    if (salpha)
+	*salpha = alpha;
+    return d;
+}
+
  /*
  =for apidoc scan_version
  @@ -4209,9 +4391,9 @@ Perl_scan_version(pTHX_ const char *s, SV *rv, 
bool qv)
      const char *start;
      const char *pos;
      const char *last;
-    int saw_period = 0;
-    int alpha = 0;
+    int saw_decimal = 0;
      int width = 3;
+    bool alpha = FALSE;
      bool vinf = FALSE;
      AV * const av = newAV();
      SV * const hv = newSVrv(rv, "version"); /* create an SV and 
upgrade the RV */
@@ -4220,54 +4402,17 @@ Perl_scan_version(pTHX_ const char *s, SV *rv, 
bool qv)
       (void)sv_upgrade(hv, SVt_PVHV); /* needs to be an HV type */
  +#ifndef NODEFAULT_SHAREKEYS
+    HvSHAREKEYS_on(hv);         /* key-sharing on by default */
+#endif
+
      while (isSPACE(*s)) /* leading whitespace is OK */
  	s++;
  -    start = last = s;
-
-    if (*s == 'v') {
-	s++;  /* get past 'v' */
-	qv = 1; /* force quoted version processing */
-    }
-
-    pos = s;
-
-    /* pre-scan the input string to check for decimals/underbars */
-    while ( *pos == '.' || *pos == '_' || *pos == ',' || isDIGIT(*pos) )
-    {
-	if ( *pos == '.' )
-	{
-	    if ( alpha )
-		Perl_croak(aTHX_ "Invalid version format (underscores before decimal)");
-	    saw_period++ ;
-	    last = pos;
-	}
-	else if ( *pos == '_' )
-	{
-	    if ( alpha )
-		Perl_croak(aTHX_ "Invalid version format (multiple underscores)");
-	    alpha = 1;
-	    width = pos - last - 1; /* natural width of sub-version */
-	}
-	else if ( *pos == ',' && isDIGIT(pos[1]) )
-	{
-	    saw_period++ ;
-	    last = pos;
-	}
-
-	pos++;
-    }
-
-    if ( alpha && !saw_period )
-	Perl_croak(aTHX_ "Invalid version format (alpha without decimal)");
-
-    if ( alpha && saw_period && width == 0 )
-	Perl_croak(aTHX_ "Invalid version format (misplaced _ in number)");
-
-    if ( saw_period > 1 )
-	qv = 1; /* force quoted version processing */
-
-    last = pos;
+    last = prescan_version(s, FALSE, &qv, &saw_decimal, &width, &alpha);
+    start = s;
+    if (*s == 'v')
+	s++;
      pos = s;
       if ( qv )
@@ -4294,7 +4439,7 @@ Perl_scan_version(pTHX_ const char *s, SV *rv, 
bool qv)
  		 * point of a version originally created with a bare
  		 * floating point number, i.e. not quoted in any way
  		 */
- 		if ( !qv && s > start && saw_period == 1 ) {
+ 		if ( !qv && s > start && saw_decimal == 1 ) {
  		    mult *= 100;
   		    while ( s < end ) {
  			orev = rev;
@@ -4384,7 +4529,7 @@ Perl_scan_version(pTHX_ const char *s, SV *rv, 
bool qv)
      }
      else if ( s > start ) {
  	SV * orig = newSVpvn(start,s-start);
-	if ( qv && saw_period == 1 && *start != 'v' ) {
+	if ( qv && saw_decimal == 1 && *start != 'v' ) {
  	    /* need to insert a v to be consistent */
  	    sv_insert(orig, 0, 0, "v", 1);
  	}
@@ -4433,6 +4578,9 @@ Perl_new_version(pTHX_ SV *ver)
  	/* This will get reblessed later if a derived class*/
  	SV * const hv = newSVrv(rv, "version");  	(void)sv_upgrade(hv, 
SVt_PVHV); /* needs to be an HV type */
+#ifndef NODEFAULT_SHAREKEYS
+	HvSHAREKEYS_on(hv);         /* key-sharing on by default */
+#endif
   	if ( SvROK(ver) )
  	    ver = SvRV(ver);
@@ -4530,7 +4678,7 @@ Perl_upg_version(pTHX_ SV *ver, bool qv)
  #ifdef SvVOK
      else if ( (mg = SvVSTRING_mg(ver)) ) { /* already a v-string */
  	version = savepvn( (const char*)mg->mg_ptr,mg->mg_len );
-	qv = 1;
+	qv = TRUE;
      }
  #endif
      else /* must be a string or something like a string */
@@ -4540,12 +4688,14 @@ Perl_upg_version(pTHX_ SV *ver, bool qv)
  #ifndef SvVOK
  #  if PERL_VERSION > 5
  	/* This will only be executed for 5.6.0 - 5.8.0 inclusive */
-	if ( len == 3 && !instr(version,".") && !instr(version,"_") ) {
+	if ( len >= 3 && !instr(version,".") && !instr(version,"_")
+	    && !(*version == 'u' && strEQ(version, "undef"))
+	    && (*version < '0' || *version > '9') ) {
  	    /* may be a v-string */
  	    SV * const nsv = sv_newmortal();
  	    const char *nver;
  	    const char *pos;
-	    int saw_period = 0;
+	    int saw_decimal = 0;
  	    sv_setpvf(nsv,"v%vd",ver);
  	    pos = nver = savepv(SvPV_nolen(nsv));
  @@ -4553,12 +4703,12 @@ Perl_upg_version(pTHX_ SV *ver, bool qv)
  	    pos++; /* skip the leading 'v' */
  	    while ( *pos == '.' || isDIGIT(*pos) ) {
  		if ( *pos == '.' )
-		    saw_period++ ;
+		    saw_decimal++ ;
  		pos++;
  	    }
   	    /* is definitely a v-string */
-	    if ( saw_period == 2 ) {	
+	    if ( saw_decimal >= 2 ) {	
  		Safefree(version);
  		version = nver;
  	    }

Thread Next


nntp.perl.org: Perl Programming lists via nntp and http.
Comments to Ask Bjørn Hansen at ask@perl.org | Group listing | About