Front page | perl.perl5.porters |
Postings from April 2001
Re: [PATCH bleadperl] [ID 20010426.002] Word boundry regex [...]
Thread Previous
|
Thread Next
From:
Hugo
Date:
April 29, 2001 10:07
Subject:
Re: [PATCH bleadperl] [ID 20010426.002] Word boundry regex [...]
Message ID:
200104291609.RAA17790@crypt.compulink.co.uk
In <20010428164714.B22905@math.ohio-state.edu>, Ilya Zakharevich writes:
:On Sat, Apr 28, 2001 at 08:41:13PM +0100, Hugo wrote:
:> :The only suggestion I have is that there might have been a case when
:> :this chunk is entered *before* PL_regprev is set. Can this happen?
:>
:> Not that I can see: regexec_flags() sets it whenever startpos == strbeg,
:> before any calls to find_byclass either indirectly (through intuit_start)
:> or directly.
:
:This means that I'm right: intuit_start() is usually called *before*
:regexec_flags() is entered.
However, it is called either at the start of the string or after a
successful call to regexec_flags(), after either of which PL_regprev
should be correctly set. The only exception is when we get a successful
match without calling regexec_flags(), in pp_match (look for 'goto yup'),
so I guess we additionally need to set PL_regprev in this case.
:> Is it possible that lookbehind could get back to strbeg when PL_regprev
:> is not set, or not set correctly?
:
:I would hope that lookbehind resets PL_regprev.
Not that I can see.
It seems real difficult to get PL_regprev correct in all cases. Below is
a patch to remove PL_regprev entirely, which doesn't fail any of the
existing tests and which I believe should offer a performance improvement.
I assume there may be problems not currently tested, particularly in the
area of in-place substitutions - can you suggest test cases that fail?
The remaining mention of PL_regprev is in Porting/findvars - I'm not sure
what that is used for, so I have left it alone.
Hugo
--- thrdvar.h.old Tue Jan 30 18:14:40 2001
+++ thrdvar.h Sun Apr 29 16:18:07 2001
@@ -183,7 +183,6 @@
PERLVAR(Tregendp, I32 *) /* Ditto for endp. */
PERLVAR(Treglastparen, U32 *) /* Similarly for lastparen. */
PERLVAR(Tregtill, char *) /* How far we are required to go. */
-PERLVAR(Tregprev, char) /* char before regbol, \n if none */
PERLVAR(Treg_start_tmp, char **) /* from regexec.c */
PERLVAR(Treg_start_tmpl,U32) /* from regexec.c */
PERLVAR(Tregdata, struct reg_data *)
--- sv.c.old Mon Apr 23 15:43:21 2001
+++ sv.c Sun Apr 29 16:16:16 2001
@@ -9295,7 +9295,6 @@
PL_regendp = (I32*)NULL;
PL_reglastparen = (U32*)NULL;
PL_regtill = Nullch;
- PL_regprev = '\n';
PL_reg_start_tmp = (char**)NULL;
PL_reg_start_tmpl = 0;
PL_regdata = (struct reg_data*)NULL;
--- regcomp.c.old Thu Apr 26 15:21:19 2001
+++ regcomp.c Sun Apr 29 16:15:33 2001
@@ -4683,7 +4683,6 @@
SAVEVPTR(PL_regendp); /* Ditto for endp. */
SAVEVPTR(PL_reglastparen); /* Similarly for lastparen. */
SAVEPPTR(PL_regtill); /* How far we are required to go. */
- SAVEI8(PL_regprev); /* char before regbol, \n if none */
SAVEGENERICPV(PL_reg_start_tmp); /* from regexec.c */
PL_reg_start_tmp = 0;
SAVEI32(PL_reg_start_tmpl); /* from regexec.c */
--- regexec.c.old Wed Apr 11 14:14:26 2001
+++ regexec.c Sun Apr 29 17:00:30 2001
@@ -946,7 +946,7 @@
/* FALL THROUGH */
case BOUND:
if (do_utf8) {
- if (s == startpos)
+ if (s == PL_bostr)
tmp = '\n';
else {
U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
@@ -969,7 +969,7 @@
}
}
else {
- tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
+ tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
while (s < strend) {
if (tmp ==
@@ -989,7 +989,7 @@
/* FALL THROUGH */
case NBOUND:
if (do_utf8) {
- if (s == startpos)
+ if (s == PL_bostr)
tmp = '\n';
else {
U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
@@ -1010,7 +1010,7 @@
}
}
else {
- tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
+ tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
tmp = ((OP(c) == NBOUND ?
isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
while (s < strend) {
@@ -1429,19 +1429,6 @@
if (strend - startpos < minlen) goto phooey;
}
- if (startpos == strbeg) /* is ^ valid at stringarg? */
- PL_regprev = '\n';
- else {
- if (prog->reganch & ROPT_UTF8 && do_utf8) {
- U8 *s = reghop3((U8*)stringarg, -1, (U8*)strbeg);
- PL_regprev = utf8n_to_uvchr(s, (U8*)stringarg - s, NULL, 0);
- }
- else
- PL_regprev = (U32)stringarg[-1];
- if (!PL_multiline && PL_regprev == '\n')
- PL_regprev = '\0'; /* force ^ to NOT match */
- }
-
/* Check validity of program. */
if (UCHARAT(prog->program) != REG_MAGIC) {
Perl_croak(aTHX_ "corrupted regexp program");
@@ -2044,19 +2031,16 @@
switch (OP(scan)) {
case BOL:
- if (locinput == PL_bostr
- ? PL_regprev == '\n'
- : (PL_multiline &&
- (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
+ if (locinput == PL_bostr || (PL_multiline &&
+ (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
{
/* regtill = regbol; */
break;
}
sayNO;
case MBOL:
- if (locinput == PL_bostr
- ? PL_regprev == '\n'
- : ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
+ if (locinput == PL_bostr ||
+ ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
{
break;
}
@@ -2259,8 +2243,8 @@
case NBOUND:
/* was last char in word? */
if (do_utf8) {
- if (locinput == PL_regbol)
- ln = PL_regprev;
+ if (locinput == PL_bostr)
+ ln = '\n';
else {
U8 *r = reghop((U8*)locinput, -1);
@@ -2277,8 +2261,8 @@
}
}
else {
- ln = (locinput != PL_regbol) ?
- UCHARAT(locinput - 1) : PL_regprev;
+ ln = (locinput != PL_bostr) ?
+ UCHARAT(locinput - 1) : '\n';
if (OP(scan) == BOUND || OP(scan) == NBOUND) {
ln = isALNUM(ln);
n = isALNUM(nextchr);
--- t/op/re_tests.old Thu Mar 22 06:06:54 2001
+++ t/op/re_tests Sun Apr 29 16:58:51 2001
@@ -787,3 +787,4 @@
(a)?(a)+ a y $1:$2 :a -
(ab)?(ab)+ ab y $1:$2 :ab -
(abc)?(abc)+ abc y $1:$2 :abc -
+'b\s^'m a\nb\n n - -
--- t/op/subst.t.old Tue Aug 29 13:54:13 2000
+++ t/op/subst.t Sat Apr 28 13:57:22 2001
@@ -6,7 +6,7 @@
require Config; import Config;
}
-print "1..84\n";
+print "1..85\n";
$x = 'foo';
$_ = "x";
@@ -378,4 +378,8 @@
$_ = "C:/";
s/^([a-z]:)/\u$1/ and print "not ";
print "ok 84\n";
+
+$_ = "Charles Bronson";
+s/\B\w//g;
+print $_ eq "C B" ? "ok 85\n" : "not ok 85\n# \$_ eq '$_'\n";
Thread Previous
|
Thread Next