develooper Front page | perl.perl5.porters | Postings from November 1999

[PATCH 5.005_62] Fixes to hopscotch

From:
Ilya Zakharevich
Date:
November 23, 1999 19:55
Subject:
[PATCH 5.005_62] Fixes to hopscotch
Message ID:
199911240355.WAA23020@monk.mps.ohio-state.edu
This patch corrects bugs in the hopscotch logic which I discovered during
implementing first-char cognizance optimization.

Enjoy,
Ilya

--- ./regexec.c-pre	Sun Nov 14 15:59:52 1999
+++ ./regexec.c	Tue Nov 23 21:31:22 1999
@@ -653,6 +653,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog,
 	);
       success_at_start:
 	if (!(prog->reganch & ROPT_NAUGHTY)	/* XXXX If strpos moved? */
+	    && prog->check_substr	/* Could be deleted already */
 	    && --BmUSEFUL(prog->check_substr) < 0
 	    && prog->check_substr == prog->float_substr) { /* boo */
 	    /* If flags & SOMETHING - do not do it many times on the same match */
@@ -680,9 +681,12 @@ Perl_re_intuit_start(pTHX_ regexp *prog,
 	   regstclass does not come from lookahead...  */
 	/* If regstclass takes bytelength more than 1: If charlength==1, OK.
 	   This leaves EXACTF only, which is dealt with in find_byclass().  */
+	int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
+		    ? STR_LEN(prog->regstclass)
+		    : 1);
 	char *endpos = (prog->anchored_substr || ml_anch)
-		? s + (prog->minlen? 1 : 0)
-		: (prog->float_substr ? check_at - start_shift + 1
+		? s + (prog->minlen? cl_l : 0)
+		: (prog->float_substr ? check_at - start_shift + cl_l
 				      : strend) ;
 	char *startpos = sv ? strend - SvCUR(sv) : s;
 
@@ -697,30 +701,43 @@ Perl_re_intuit_start(pTHX_ regexp *prog,
 				"Could not match STCLASS...\n") );
 		goto fail;
 	    }
+	    DEBUG_r( PerlIO_printf(Perl_debug_log,
+				   "This position contradicts STCLASS...\n") );
 	    /* Contradict one of substrings */
 	    if (prog->anchored_substr) {
-		DEBUG_r( PerlIO_printf(Perl_debug_log,
-				"This position contradicts STCLASS...\n") );
 		if (prog->anchored_substr == check) {
 		    DEBUG_r( what = "anchored" );
 		  hop_and_restart:
 		    PL_regeol = strend;	/* Used in HOP() */
 		    s = HOPc(t, 1);
+		    if (s + start_shift + end_shift > strend) {
+			/* XXXX Should be taken into account earlier? */
+			DEBUG_r( PerlIO_printf(Perl_debug_log,
+					       "Could not match STCLASS...\n") );
+			goto fail;
+		    }
 		    DEBUG_r( PerlIO_printf(Perl_debug_log,
-				"trying %s substr starting at offset %ld...\n",
+				"Trying %s substr starting at offset %ld...\n",
 				 what, (long)(s + start_shift - i_strpos)) );
 		    goto restart;
 		}
-		/* Have both, check is floating */
+		/* Have both, check_string is floating */
 		if (t + start_shift >= check_at) /* Contradicts floating=check */
 		    goto retry_floating_check;
 		/* Recheck anchored substring, but not floating... */
 		s = check_at; 
 		DEBUG_r( PerlIO_printf(Perl_debug_log,
-			  "trying anchored substr starting at offset %ld...\n",
+			  "Trying anchored substr starting at offset %ld...\n",
 			  (long)(other_last - i_strpos)) );
 		goto do_other_anchored;
 	    }
+	    if (!prog->float_substr) {	/* Could have been deleted */
+		if (ml_anch) {
+		    s = t = t + 1;
+		    goto try_at_offset;
+		}
+		goto fail;
+	    }
 	    /* Check is floating subtring. */
 	  retry_floating_check:
 	    t = check_at - start_shift;
@@ -740,7 +757,8 @@ Perl_re_intuit_start(pTHX_ regexp *prog,
     return s;
 
   fail_finish:				/* Substring not found */
-    BmUSEFUL(prog->check_substr) += 5;	/* hooray */
+    if (prog->check_substr)		/* could be removed already */
+	BmUSEFUL(prog->check_substr) += 5; /* hooray */
   fail:
     DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
 			  PL_colors[4],PL_colors[5]));
@@ -807,9 +825,9 @@ find_byclass(regexp * prog, regnode *c, 
 	    if (c1 == c2) {
 		while (s <= e) {
 		    if ( *s == c1
-			 && (ln == 1 || (OP(c) == EXACTF
-					 ? ibcmp(s, m, ln)
-					 : ibcmp_locale(s, m, ln)))
+			 && (ln == 1 || !(OP(c) == EXACTF
+					  ? ibcmp(s, m, ln)
+					  : ibcmp_locale(s, m, ln)))
 			 && (norun || regtry(prog, s)) )
 			goto got_it;
 		    s++;
@@ -817,9 +835,9 @@ find_byclass(regexp * prog, regnode *c, 
 	    } else {
 		while (s <= e) {
 		    if ( (*s == c1 || *s == c2)
-			 && (ln == 1 || (OP(c) == EXACTF
-					 ? ibcmp(s, m, ln)
-					 : ibcmp_locale(s, m, ln)))
+			 && (ln == 1 || !(OP(c) == EXACTF
+					  ? ibcmp(s, m, ln)
+					  : ibcmp_locale(s, m, ln)))
 			 && (norun || regtry(prog, s)) )
 			goto got_it;
 		    s++;
@@ -1494,7 +1512,8 @@ Perl_regexec_flags(pTHX_ register regexp
 	goto phooey;
     }
     else if (c = prog->regstclass) {
-	if (minlen)		/* don't bother with what can't match */
+	if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
+	    /* don't bother with what can't match */
 	    strend = HOPc(strend, -(minlen - 1));
   	if (find_byclass(prog, c, s, strend, startpos, 0))
 	    goto got_it;
--- ./t/op/re_tests-pre	Mon Nov  8 21:04:36 1999
+++ ./t/op/re_tests	Tue Nov 23 21:13:56 1999
@@ -744,3 +744,6 @@ tt+$	xxxtt	y	-	-
 \GX.*X	aaaXbX	n	-	-
 (\d+\.\d+)	3.1415926	y	$1	3.1415926
 (\ba.{0,10}br)	have a web browser	y	$1	a web br
+'\.c(pp|xx|c)?$'i	Changes	n	-	-
+'\.c(pp|xx|c)?$'i	IO.c	y	-	-
+'(\.c(pp|xx|c)?$)'i	IO.c	y	$1	.c



nntp.perl.org: Perl Programming lists via nntp and http.
Comments to Ask Bjørn Hansen at ask@perl.org | Group listing | About