develooper Front page | perl.perl5.changes | Postings from September 2019

[perl.git] branch smoke-me/khw-lexact updated.v5.31.3-188-geb8f0ab482

From:
Karl Williamson
Date:
September 15, 2019 23:25
Subject:
[perl.git] branch smoke-me/khw-lexact updated.v5.31.3-188-geb8f0ab482
Message ID:
E1i9dtO-0000Su-3f@git.dc.perl.space
In perl.git, the branch smoke-me/khw-lexact has been updated

<https://perl5.git.perl.org/perl.git/commitdiff/eb8f0ab482110da549e2c58a1c05a6ce1c0b6246?hp=8224bce37e0350a2b529bcff668f074a310b9971>

- Log -----------------------------------------------------------------
commit eb8f0ab482110da549e2c58a1c05a6ce1c0b6246
Author: Karl Williamson <khw@cpan.org>
Date:   Sun Sep 15 17:25:08 2019 -0600

    f

-----------------------------------------------------------------------

Summary of changes:
 regcomp.h |  8 ++++----
 regexec.c | 64 ++++++++++++++++++++++++++++++++++++++++++---------------------
 2 files changed, 47 insertions(+), 25 deletions(-)

diff --git a/regcomp.h b/regcomp.h
index ed16f2491d..0b6844ad11 100644
--- a/regcomp.h
+++ b/regcomp.h
@@ -331,12 +331,12 @@ struct regnode_ssc {
 #define FLAGS(p)	((p)->flags)	/* Caution: Doesn't apply to all      \
 					   regnode types.  For some, it's the \
 					   character set of the regnode */
-#define	STR_LENs(p)	(((struct regnode_string *)p)->str_len)
-#define	STRINGs(p)	(((struct regnode_string *)p)->string)
+#define	STR_LENs(p)	(__ASSERT_(OP(p) != LEXACT) ((struct regnode_string *)p)->str_len)
+#define	STRINGs(p)	(__ASSERT_(OP(p) != LEXACT) ((struct regnode_string *)p)->string)
 #define	OPERANDs(p)	STRINGs(p)
 
-#define	STR_LENl(p)	((U16)((((struct regnode_string *)p)->str_len) | ((((struct regnode_string *)p)->string[0]) << 8)))
-#define	STRINGl(p)	((((struct regnode_string *)p)->string) + 1)
+#define	STR_LENl(p)	(__ASSERT_(OP(p) == LEXACT) (U16)((((struct regnode_string *)p)->str_len) | ((((struct regnode_string *)p)->string[0]) << 8)))
+#define	STRINGl(p)	(__ASSERT_(OP(p) == LEXACT) (((struct regnode_string *)p)->string) + 1)
 #define	OPERANDl(p)	STRINGl(p)
 
 #define	OPERAND(p)	((OP(p) == LEXACT) ? OPERANDl(p) : OPERANDs(p))
diff --git a/regexec.c b/regexec.c
index 62e473e6a9..973b9e93a1 100644
--- a/regexec.c
+++ b/regexec.c
@@ -2298,8 +2298,8 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
          * first character.  c2 is its fold.  This logic will not work for
          * Unicode semantics and the german sharp ss, which hence should
          * not be compiled into a node that gets here. */
-        pat_string = STRING(c);
-        ln  = STR_LEN(c);	/* length to match in octets/bytes */
+        pat_string = STRINGs(c);
+        ln  = STR_LENs(c);	/* length to match in octets/bytes */
 
         /* We know that we have to match at least 'ln' bytes (which is the
          * same as characters, since not utf8).  If we have to match 3
@@ -2374,8 +2374,8 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
         /* If one of the operands is in utf8, we can't use the simpler folding
          * above, due to the fact that many different characters can have the
          * same fold, or portion of a fold, or different- length fold */
-        pat_string = STRING(c);
-        ln  = STR_LEN(c);	/* length to match in octets/bytes */
+        pat_string = STRINGs(c);
+        ln  = STR_LENs(c);	/* length to match in octets/bytes */
         pat_end = pat_string + ln;
         lnc = is_utf8_pat       /* length to match in characters */
                 ? utf8_length((U8 *) pat_string, (U8 *) pat_end)
@@ -4263,7 +4263,7 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p,
         }
     }
     else { /* an EXACTFish node */
-        U8 *pat_end = pat + STR_LEN(text_node);
+        U8 *pat_end = pat + STR_LENs(text_node);
 
         /* An EXACTFL node has at least some characters unfolded, because what
          * they match is not known until now.  So, now is the time to fold
@@ -6275,6 +6275,14 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
         }
 #undef  ST
 
+	case LEXACT:
+        {
+	    char *s;
+
+	    s = STRINGl(scan);
+	    ln = STR_LENl(scan);
+            goto join_short_long_exact;
+
 	case EXACTL:             /*  /abc/l       */
             _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
 
@@ -6293,13 +6301,13 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                 sayNO;
             }
             /* FALLTHROUGH */
+
 	case EXACT:             /*  /abc/        */
-	case LEXACT:
-        {
-	    char *s;
           do_exact:
-	    s = STRING(scan);
-	    ln = STR_LEN(scan);
+	    s = STRINGs(scan);
+	    ln = STR_LENs(scan);
+
+          join_short_long_exact:
 	    if (utf8_target != is_utf8_pat) {
 		/* The target and the pattern have differing utf8ness. */
 		char *l = locinput;
@@ -6451,8 +6459,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
 	    fold_utf8_flags = 0;
 
 	  do_exactf:
-	    s = STRING(scan);
-	    ln = STR_LEN(scan);
+	    s = STRINGs(scan);
+	    ln = STR_LENs(scan);
 
 	    if (   utf8_target
                 || is_utf8_pat
@@ -9366,6 +9374,16 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
 	else
 	    scan = this_eol;
 	break;
+
+    case LEXACT:
+      {
+        U8 * string;
+        Size_t str_len;
+
+	string = (U8 *) STRINGl(p);
+        str_len = STR_LENl(p);
+        goto join_short_long_exact;
+
     case EXACTL:
         _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
         if (utf8_target && UTF8_IS_ABOVE_LATIN1(*scan)) {
@@ -9378,12 +9396,15 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
             break;
         }
         /* FALLTHROUGH */
-    case LEXACT:
     case EXACT:
       do_exact:
-        assert(STR_LEN(p) == reginfo->is_utf8_pat ? UTF8SKIP(STRING(p)) : 1);
+	string = (U8 *) STRINGs(p);
+        str_len = STR_LENs(p);
+
+      join_short_long_exact:
+        assert(str_len == reginfo->is_utf8_pat ? UTF8SKIP(string) : 1);
 
-	c = (U8)*STRING(p);
+	c = *string;
 
         /* Can use a simple find if the pattern char to match on is invariant
          * under UTF-8, or both target and pattern aren't UTF-8.  Note that we
@@ -9405,8 +9426,8 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
                  * string EQ */
                 while (hardcount < max
                        && scan < this_eol
-                       && (scan_char_len = UTF8SKIP(scan)) <= STR_LEN(p)
-                       && memEQ(scan, STRING(p), scan_char_len))
+                       && (scan_char_len = UTF8SKIP(scan)) <= str_len
+                       && memEQ(scan, string, scan_char_len))
                 {
                     scan += scan_char_len;
                     hardcount++;
@@ -9416,7 +9437,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
 
                 /* Target isn't utf8; convert the character in the UTF-8
                  * pattern to non-UTF8, and do a simple find */
-                c = EIGHT_BIT_UTF8_TO_NATIVE(c, *(STRING(p) + 1));
+                c = EIGHT_BIT_UTF8_TO_NATIVE(c, *(string + 1));
                 scan = (char *) find_span_end((U8 *) scan, (U8 *) this_eol, (U8) c);
             } /* else pattern char is above Latin1, can't possibly match the
                  non-UTF-8 target */
@@ -9440,6 +9461,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
 	    }
 	}
 	break;
+      }
 
     case EXACTFAA_NO_TRIE: /* This node only generated for non-utf8 patterns */
         assert(! reginfo->is_utf8_pat);
@@ -9490,7 +9512,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
         int c1, c2;
         U8 c1_utf8[UTF8_MAXBYTES+1], c2_utf8[UTF8_MAXBYTES+1];
 
-        assert(STR_LEN(p) == reginfo->is_utf8_pat ? UTF8SKIP(STRING(p)) : 1);
+        assert(STR_LENs(p) == reginfo->is_utf8_pat ? UTF8SKIP(STRINGs(p)) : 1);
 
         if (S_setup_EXACTISH_ST_c1_c2(aTHX_ p, &c1, c1_utf8, &c2, c2_utf8,
                                         reginfo))
@@ -9498,10 +9520,10 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
             if (c1 == CHRTEST_VOID) {
                 /* Use full Unicode fold matching */
                 char *tmpeol = loceol;
-                STRLEN pat_len = reginfo->is_utf8_pat ? UTF8SKIP(STRING(p)) : 1;
+                STRLEN pat_len = reginfo->is_utf8_pat ? UTF8SKIP(STRINGs(p)) : 1;
                 while (hardcount < max
                         && foldEQ_utf8_flags(scan, &tmpeol, 0, utf8_target,
-                                             STRING(p), NULL, pat_len,
+                                             STRINGs(p), NULL, pat_len,
                                              reginfo->is_utf8_pat, utf8_flags))
                 {
                     scan = tmpeol;

-- 
Perl5 Master Repository



nntp.perl.org: Perl Programming lists via nntp and http.
Comments to Ask Bjørn Hansen at ask@perl.org | Group listing | About