develooper Front page | perl.perl5.porters | Postings from October 1999

[PATCH 5.005_62] Missing REx engine patch

Thread Next
From:
Ilya Zakharevich
Date:
October 24, 1999 20:47
Subject:
[PATCH 5.005_62] Missing REx engine patch
Message ID:
199910250347.XAA16094@monk.mps.ohio-state.edu
The following patch should have come at the time of 5.003_05 (or
somesuch) when Chip converted //i to not casefold the target before
the match.  This omission lead to people rewriting /foo-bar/i as
/[fF][oO][oO]-[bB][aA][rR]/ to get an acceptable speed.

This patch would not make the first one as quick as the second, but it
would make /foobar/i as quick as /[fF][oO][oO][bB][aA][rR]/ (note the
absence of "-"): this patch implements only the first-node
optimization, and with /[fF][oO][oO]-[bB][aA][rR]/ the additional
const-substring-"-"-optimization will be also triggered.

Tested with TEST and UTEST.

Enjoy,
Ilya

--- ./regcomp.c~	Fri Oct 15 02:07:44 1999
+++ ./regcomp.c	Sun Oct 24 22:45:34 1999
@@ -944,7 +944,12 @@ Perl_pregcomp(pTHX_ char *exp, char *xen
 
 	/* Starting-point info. */
       again:
-	if (OP(first) == EXACT);	/* Empty, get anchored substr later. */
+	if (PL_regkind[(U8)OP(first) == EXACT]) {
+	    if (OP(first) == EXACT);	/* Empty, get anchored substr later. */
+	    else if ((OP(first) == EXACTF || OP(first) == EXACTFL)
+		     && !UTF)
+		r->regstclass = first;
+	}
 	else if (strchr((char*)PL_simple+4,OP(first)))
 	    r->regstclass = first;
 	else if (PL_regkind[(U8)OP(first)] == BOUND ||
--- ./regexec.c~	Sun Oct 10 14:25:52 1999
+++ ./regexec.c	Sun Oct 24 22:50:14 1999
@@ -917,6 +917,11 @@ Perl_regexec_flags(pTHX_ register regexp
     else if (c = prog->regstclass) {
 	I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
 	char *cc;
+	char *m;
+	int ln;
+	int c1;
+	int c2;
+	char *e;
 
 	if (minlen)
 	    dontbother = minlen - 1;
@@ -952,6 +957,43 @@ Perl_regexec_flags(pTHX_ register regexp
 		s++;
 	    }
 	    break;
+	case EXACTF:
+	    m = STRING(c);
+	    ln = STR_LEN(c);
+	    c1 = *m;
+	    c2 = PL_fold[c1];
+	    goto do_exactf;
+	case EXACTFL:
+	    m = STRING(c);
+	    ln = STR_LEN(c);
+	    c1 = *m;
+	    c2 = PL_fold_locale[c1];
+	  do_exactf:
+	    e = strend - ln;
+
+	    /* Here it is NOT UTF!  */
+	    if (c1 == c2) {
+		while (s <= e) {
+		    if ( *s == c1
+			 && (ln == 1 || (OP(c) == EXACTF
+					 ? ibcmp(s, m, ln)
+					 : ibcmp_locale(s, m, ln)))
+			 && regtry(prog, s) )
+			goto got_it;
+		    s++;
+		}
+	    } else {
+		while (s <= e) {
+		    if ( (*s == c1 || *s == c2)
+			 && (ln == 1 || (OP(c) == EXACTF
+					 ? ibcmp(s, m, ln)
+					 : ibcmp_locale(s, m, ln)))
+			 && regtry(prog, s) )
+			goto got_it;
+		    s++;
+		}
+	    }
+	    break;
 	case BOUNDL:
 	    PL_reg_flags |= RF_tainted;
 	    /* FALL THROUGH */
@@ -1362,6 +1404,9 @@ Perl_regexec_flags(pTHX_ register regexp
 		    tmp = 1;
 		s += UTF8SKIP(s);
 	    }
+	    break;
+	default:
+	    croak("panic: unknown regstclass %d", (int)OP(c));
 	    break;
 	}
     }

Thread Next


nntp.perl.org: Perl Programming lists via nntp and http.
Comments to Ask Bjørn Hansen at ask@perl.org | Group listing | About