develooper Front page | perl.perl5.changes | Postings from March 2019

[perl.git] branch blead updated. v5.29.8-104-ge081490516

From:
Karl Williamson
Date:
March 14, 2019 01:28
Subject:
[perl.git] branch blead updated. v5.29.8-104-ge081490516
Message ID:
E1h4FA2-0002M3-PD@git.dc.perl.space
In perl.git, the branch blead has been updated

<https://perl5.git.perl.org/perl.git/commitdiff/e0814905161f8420d3be47dd3883b1f3c9590a82?hp=99956112fb8debe619f1aa0f9a8c2a2887b1bd7a>

- Log -----------------------------------------------------------------
commit e0814905161f8420d3be47dd3883b1f3c9590a82
Author: Karl Williamson <khw@cpan.org>
Date:   Wed Mar 13 19:11:46 2019 -0600

    PATCH: [perl #133871] heap-buffer-overflow in S_reginsert
    
    The regex compiler was written assuming it knew how many parentheses
    pairs there were at code generation time.  When I converted to a single
    pass in 7c932d07cab18751bfc7515b4320436273a459e2, most things were
    straight forward to not have to know this number, but there were a few
    where it was non-trivial (for me anyway) to figure out how to handle.
    So I punted on these and do a second pass when these are encountered.
    There are few of them and are less commonly used, namely (?R), (?|...)
    and forward references to groups (which most commonly will end up being
    a syntax error anyway).
    
    The fix in this commit is to avoid doing some parentheses relocations
    when a regnode is inserted when it is known that the parentheses counts
    are unreliable (during initial parsing of one of these tricky
    constructs).  The code in the ticket is using a branch reset '(?|...)'.
    A second pass will get done, and the insert properly handled then, after
    the counts are reliable.

commit b76e1c044f939bf713dab1a40f484ed2b22561a7
Author: Karl Williamson <khw@cpan.org>
Date:   Wed Mar 13 18:41:45 2019 -0600

    regcomp.c: Create macro, add comments

commit 1c1a700b2cac24972380a42f01321ad242e0c70c
Author: Karl Williamson <khw@cpan.org>
Date:   Wed Mar 13 18:45:34 2019 -0600

    regcomp.c: Rmv unused variable

-----------------------------------------------------------------------

Summary of changes:
 regcomp.c  | 42 +++++++++++++++++++++++++++---------------
 t/re/pat.t |  6 +++++-
 2 files changed, 32 insertions(+), 16 deletions(-)

diff --git a/regcomp.c b/regcomp.c
index d7c3d46fc8..51679e9063 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -353,7 +353,7 @@ struct RExC_state_t {
             if (DEPENDS_SEMANTICS) {                                        \
                 set_regex_charset(&RExC_flags, REGEX_UNICODE_CHARSET);      \
                 RExC_uni_semantics = 1;                                     \
-                if (RExC_seen_d_op && LIKELY(RExC_total_parens >= 0)) {     \
+                if (RExC_seen_d_op && LIKELY(! IN_PARENS_PASS)) {           \
                     /* No need to restart the parse if we haven't seen      \
                      * anything that differs between /u and /d, and no need \
                      * to restart immediately if we're going to reparse     \
@@ -368,7 +368,7 @@ struct RExC_state_t {
 #define REQUIRE_BRANCHJ(flagp, restart_retval)                              \
     STMT_START {                                                            \
                 RExC_use_BRANCHJ = 1;                                       \
-                if (LIKELY(RExC_total_parens >= 0)) {                       \
+                if (LIKELY(! IN_PARENS_PASS)) {                             \
                     /* No need to restart the parse immediately if we're    \
                      * going to reparse anyway to count parens */           \
                     *flagp |= RESTART_PARSE;                                \
@@ -376,10 +376,19 @@ struct RExC_state_t {
                 }                                                           \
     } STMT_END
 
+/* Until we have completed the parse, we leave RExC_total_parens at 0 or
+ * less.  After that, it must always be positive, because the whole re is
+ * considered to be surrounded by virtual parens.  Setting it to negative
+ * indicates there is some construct that needs to know the actual number of
+ * parens to be properly handled.  And that means an extra pass will be
+ * required after we've counted them all */
+#define ALL_PARENS_COUNTED (RExC_total_parens > 0)
 #define REQUIRE_PARENS_PASS                                                 \
-    STMT_START {                                                            \
-                    if (RExC_total_parens == 0) RExC_total_parens = -1;     \
+    STMT_START {  /* No-op if have completed a pass */                      \
+                    if (! ALL_PARENS_COUNTED) RExC_total_parens = -1;       \
     } STMT_END
+#define IN_PARENS_PASS (RExC_total_parens < 0)
+
 
 /* This is used to return failure (zero) early from the calling function if
  * various flags in 'flags' are set.  Two flags always cause a return:
@@ -7667,9 +7676,9 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
     /* Do the parse */
     if (reg(pRExC_state, 0, &flags, 1)) {
 
-        /* Success!, But if RExC_total_parens < 0, we need to redo the parse
-         * knowing how many parens there actually are */
-        if (RExC_total_parens < 0) {
+        /* Success!, But we may need to redo the parse knowing how many parens
+         * there actually are */
+        if (IN_PARENS_PASS) {
             flags |= RESTART_PARSE;
         }
 
@@ -7711,7 +7720,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
             DEBUG_PARSE_r(Perl_re_printf( aTHX_ "Need to redo parse\n"));
         }
 
-        if (RExC_total_parens > 0) {
+        if (ALL_PARENS_COUNTED) {
             /* Make enough room for all the known parens, and zero it */
             Renew(RExC_open_parens, RExC_total_parens, regnode_offset);
             Zero(RExC_open_parens, RExC_total_parens, regnode_offset);
@@ -8809,7 +8818,7 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags)
             /* It might be a forward reference; we can't fail until we
                 * know, by completing the parse to get all the groups, and
                 * then reparsing */
-            if (RExC_total_parens > 0)  {
+            if (ALL_PARENS_COUNTED)  {
                 vFAIL("Reference to nonexistent named group");
             }
             else {
@@ -11595,7 +11604,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                         /* It might be a forward reference; we can't fail until
                          * we know, by completing the parse to get all the
                          * groups, and then reparsing */
-                        if (RExC_total_parens > 0)  {
+                        if (ALL_PARENS_COUNTED)  {
                             RExC_parse++;
                             vFAIL("Reference to nonexistent group");
                         }
@@ -11621,7 +11630,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                     /* It might be a forward reference; we can't fail until we
                      * know, by completing the parse to get all the groups, and
                      * then reparsing */
-                    if (RExC_total_parens > 0)  {
+                    if (ALL_PARENS_COUNTED)  {
                         if (num >= RExC_total_parens) {
                             RExC_parse++;
                             vFAIL("Reference to nonexistent group");
@@ -11952,7 +11961,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
 	  capturing_parens:
 	    parno = RExC_npar;
 	    RExC_npar++;
-            if (RExC_total_parens <= 0) {
+            if (! ALL_PARENS_COUNTED) {
                 /* If we are in our first pass through (and maybe only pass),
                  * we  need to allocate memory for the capturing parentheses
                  * data structures.  Since we start at npar=1, when it reaches
@@ -12666,7 +12675,6 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
     SV * substitute_parse = NULL;
     char *orig_end;
     char *save_start;
-    bool save_strict;
     I32 flags;
 
     GET_RE_DEBUG_FLAGS_DECL;
@@ -13732,7 +13740,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                     /* It might be a forward reference; we can't fail until we
                      * know, by completing the parse to get all the groups, and
                      * then reparsing */
-                    if (RExC_total_parens > 0)  {
+                    if (ALL_PARENS_COUNTED)  {
                         if (num >= RExC_total_parens)  {
                             vFAIL("Reference to nonexistent group");
                         }
@@ -19542,7 +19550,11 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, const U8 op,
     src = REGNODE_p(RExC_emit);
     RExC_emit += size;
     dst = REGNODE_p(RExC_emit);
-    if (RExC_open_parens) {
+
+    /* If we are in a "count the parentheses" pass, the numbers are unreliable,
+     * and [perl #133871] shows this can lead to problems, so skip this
+     * realignment of parens until a later pass when they are reliable */
+    if (! IN_PARENS_PASS && RExC_open_parens) {
         int paren;
         /*DEBUG_PARSE_FMT("inst"," - %" IVdf, (IV)RExC_npar);*/
         /* remember that RExC_npar is rex->nparens + 1,
diff --git a/t/re/pat.t b/t/re/pat.t
index e97031f2d9..f1be50ae1b 100644
--- a/t/re/pat.t
+++ b/t/re/pat.t
@@ -24,7 +24,7 @@ BEGIN {
 skip_all('no re module') unless defined &DynaLoader::boot_DynaLoader;
 skip_all_without_unicode_tables();
 
-plan tests => 854;  # Update this when adding/deleting tests.
+plan tests => 855;  # Update this when adding/deleting tests.
 
 run_tests() unless caller;
 
@@ -2002,6 +2002,10 @@ while( "\N{U+100}bc" =~ /(..?)(?{$^N})/g ) {
 }
 CODE
     }
+    {   # [perl #133871], ASAN/valgrind out-of-bounds access
+;
+        fresh_perl_like('qr/(?|(())|())|//', qr/syntax error/, {}, "[perl #133871]");
+    }
 
 } # End of sub run_tests
 

-- 
Perl5 Master Repository



nntp.perl.org: Perl Programming lists via nntp and http.
Comments to Ask Bjørn Hansen at ask@perl.org | Group listing | About