develooper Front page | perl.perl5.changes | Postings from March 2019

[perl.git] branch blead updated. v5.29.8-110-gf5f909b1e1

From:
Karl Williamson
Date:
March 14, 2019 23:12
Subject:
[perl.git] branch blead updated. v5.29.8-110-gf5f909b1e1
Message ID:
E1h4ZWZ-0002XN-9X@git.dc.perl.space
In perl.git, the branch blead has been updated

<https://perl5.git.perl.org/perl.git/commitdiff/f5f909b1e1e4c878b51e4545f6c25837f4981cec?hp=912b808cb4fcd596e07f77898c626f5567fbe994>

- Log -----------------------------------------------------------------
commit f5f909b1e1e4c878b51e4545f6c25837f4981cec
Author: Karl Williamson <khw@cpan.org>
Date:   Thu Mar 14 17:06:16 2019 -0600

    regcomp.c: Add assertions
    
    These make sure we don't exceed the 32 bits available for long jumps.

commit bf848a12528ab1e63a2f20da532eda498adbdca6
Author: Karl Williamson <khw@cpan.org>
Date:   Thu Mar 14 16:46:50 2019 -0600

    Add more checking for regnode offset overflowing
    
    This is part of the ongoing failures in [perl #133921].
    
    The bottom line cause is that there are generally 16 bits available for
    the address of the next regnode.  On very large patterns, this may not
    be enough.  When that happens, a long jump is used instead.
    
    What previous commits have done is to insert tests in a loop to detect
    that overflow isn't going to occur.  But it turns out that there are
    other places where such overflow could occur.  The real solution should
    be to detect overflow in the base level routine that would otherwise get
    things wrong.  This entails making that routine be able to return
    failure.  It turns out that another function is used under DEBUGGING, so
    that one must be changed as well.  And the calls where it is possible
    for this to overflow are changed to look for failure return and proceed
    appropriately, which is to set a flag that we need to use long jumps,
    and restart the parse.

-----------------------------------------------------------------------

Summary of changes:
 embed.fnc  |  4 ++--
 proto.h    |  4 ++--
 regcomp.c  | 69 ++++++++++++++++++++++++++++++++++++++++++++------------------
 t/re/pat.t | 49 +++++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 101 insertions(+), 25 deletions(-)

diff --git a/embed.fnc b/embed.fnc
index 517bcb577c..c1236e15ba 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -2424,7 +2424,7 @@ Es	|void	|reginsert	|NN RExC_state_t *pRExC_state \
 				|const U8 op				    \
 				|const regnode_offset operand		    \
 				|const U32 depth
-Es	|void	|regtail	|NN RExC_state_t * pRExC_state		    \
+Es	|bool	|regtail	|NN RExC_state_t * pRExC_state		    \
 				|NN const regnode_offset p		    \
 				|NN const regnode_offset val		    \
 				|const U32 depth
@@ -2556,7 +2556,7 @@ Es	|void	|dump_trie_interim_list|NN const struct _reg_trie_data *trie\
 Es	|void	|dump_trie_interim_table|NN const struct _reg_trie_data *trie\
 				|NULLOK HV* widecharmap|NN AV *revcharmap\
 				|U32 next_alloc|U32 depth
-Es	|U8	|regtail_study	|NN RExC_state_t *pRExC_state \
+Es	|bool	|regtail_study	|NN RExC_state_t *pRExC_state \
 				|NN regnode_offset p|NN const regnode_offset val|U32 depth
 #  endif
 #endif
diff --git a/proto.h b/proto.h
index 936d344c32..49fcb89b7d 100644
--- a/proto.h
+++ b/proto.h
@@ -4434,7 +4434,7 @@ PERL_CALLCONV int	Perl_re_indentf(pTHX_ const char *fmt, U32 depth, ...);
 	assert(fmt)
 STATIC void	S_regdump_extflags(pTHX_ const char *lead, const U32 flags);
 STATIC void	S_regdump_intflags(pTHX_ const char *lead, const U32 flags);
-STATIC U8	S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode_offset p, const regnode_offset val, U32 depth);
+STATIC bool	S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode_offset p, const regnode_offset val, U32 depth);
 #define PERL_ARGS_ASSERT_REGTAIL_STUDY	\
 	assert(pRExC_state); assert(p); assert(val)
 #  endif
@@ -5569,7 +5569,7 @@ STATIC regnode_offset	S_regnode_guts(pTHX_ RExC_state_t *pRExC_state, const U8 o
 STATIC regnode_offset	S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth);
 #define PERL_ARGS_ASSERT_REGPIECE	\
 	assert(pRExC_state); assert(flagp)
-STATIC void	S_regtail(pTHX_ RExC_state_t * pRExC_state, const regnode_offset p, const regnode_offset val, const U32 depth);
+STATIC bool	S_regtail(pTHX_ RExC_state_t * pRExC_state, const regnode_offset p, const regnode_offset val, const U32 depth);
 #define PERL_ARGS_ASSERT_REGTAIL	\
 	assert(pRExC_state); assert(p); assert(val)
 STATIC void	S_scan_commit(pTHX_ const RExC_state_t *pRExC_state, struct scan_data_t *data, SSize_t *minlenp, int is_inf);
diff --git a/regcomp.c b/regcomp.c
index 15d843e7c2..5f2c8633f1 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -364,7 +364,6 @@ struct RExC_state_t {
             }                                                               \
     } STMT_END
 
-#define BRANCH_MAX_OFFSET   U16_MAX
 #define REQUIRE_BRANCHJ(flagp, restart_retval)                              \
     STMT_START {                                                            \
                 RExC_use_BRANCHJ = 1;                                       \
@@ -12070,7 +12069,9 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
             RETURN_FAIL_ON_RESTART(flags, flagp);
             FAIL2("panic: regbranch returned failure, flags=%#" UVxf, (UV) flags);
         }
-        REGTAIL(pRExC_state, lastbr, br);               /* BRANCH -> BRANCH. */
+        if (!  REGTAIL(pRExC_state, lastbr, br)) {  /* BRANCH -> BRANCH. */
+            REQUIRE_BRANCHJ(flagp, 0);
+        }
 	lastbr = br;
 	*flagp |= flags & (SPSTART | HASWIDTH | POSTPONED);
     }
@@ -12141,7 +12142,9 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                           (IV)(ender - lastbr)
             );
         );
-        REGTAIL(pRExC_state, lastbr, ender);
+        if (! REGTAIL(pRExC_state, lastbr, ender)) {
+            REQUIRE_BRANCHJ(flagp, 0);
+        }
 
 	if (have_branch) {
             char is_nothing= 1;
@@ -12152,9 +12155,12 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
 	    for (br = REGNODE_p(ret); br; br = regnext(br)) {
 		const U8 op = PL_regkind[OP(br)];
 		if (op == BRANCH) {
-                    REGTAIL_STUDY(pRExC_state,
-                                  REGNODE_OFFSET(NEXTOPER(br)),
-                                  ender);
+                    if (! REGTAIL_STUDY(pRExC_state,
+                                        REGNODE_OFFSET(NEXTOPER(br)),
+                                        ender))
+                    {
+                        REQUIRE_BRANCHJ(flagp, 0);
+                    }
                     if ( OP(NEXTOPER(br)) != NOTHING
                          || regnext(NEXTOPER(br)) != REGNODE_p(ender))
                         is_nothing= 0;
@@ -12221,7 +12227,10 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
             Set_Node_Cur_Length(REGNODE_p(ret), parse_start);
 	    Set_Node_Offset(REGNODE_p(ret), parse_start + 1);
 	    FLAGS(REGNODE_p(ret)) = flag;
-            REGTAIL_STUDY(pRExC_state, ret, reg_node(pRExC_state, TAIL));
+            if (! REGTAIL_STUDY(pRExC_state, ret, reg_node(pRExC_state, TAIL)))
+            {
+                REQUIRE_BRANCHJ(flagp, 0);
+            }
 	}
     }
 
@@ -12315,16 +12324,14 @@ S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first, U32 depth)
 	    /* FIXME adding one for every branch after the first is probably
 	     * excessive now we have TRIE support. (hv) */
 	    MARK_NAUGHTY(1);
-            REGTAIL(pRExC_state, chain, latest);
+            if (! REGTAIL(pRExC_state, chain, latest)) {
+                /* XXX We could just redo this branch, but figuring out what
+                 * bookkeeping needs to be reset is a pain, and it's likely
+                 * that other branches that goto END will also be too large */
+                REQUIRE_BRANCHJ(flagp, 0);
+            }
 	}
 	chain = latest;
-        if (     chain > (SSize_t) BRANCH_MAX_OFFSET
-            && ! RExC_use_BRANCHJ)
-        {
-            /* XXX We could just redo this branch, but figuring out what
-                * bookkeeping needs to be reset is a pain */
-            REQUIRE_BRANCHJ(flagp, 0);
-        }
 	c++;
     }
     if (chain == 0) {	/* Loop ran zero times. */
@@ -19627,10 +19634,13 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, const U8 op,
 }
 
 /*
-- regtail - set the next-pointer at the end of a node chain of p to val.
+- regtail - set the next-pointer at the end of a node chain of p to val.  If
+            that value won't fit in the space available, instead returns FALSE.
+            (Except asserts if we can't fit in the largest space the regex
+            engine is designed for.)
 - SEE ALSO: regtail_study
 */
-STATIC void
+STATIC bool
 S_regtail(pTHX_ RExC_state_t * pRExC_state,
                 const regnode_offset p,
                 const regnode_offset val,
@@ -19663,11 +19673,21 @@ S_regtail(pTHX_ RExC_state_t * pRExC_state,
     }
 
     if (reg_off_by_arg[OP(REGNODE_p(scan))]) {
+        assert(val - scan <= U32_MAX);
         ARG_SET(REGNODE_p(scan), val - scan);
     }
     else {
+        if (val - scan > U16_MAX) {
+            /* Since not all callers check the return value, populate this with
+             * something that won't loop and will likely lead to a crash if
+             * execution continues */
+            NEXT_OFF(REGNODE_p(scan)) = U16_MAX;
+            return FALSE;
+        }
         NEXT_OFF(REGNODE_p(scan)) = val - scan;
     }
+
+    return TRUE;
 }
 
 #ifdef DEBUGGING
@@ -19684,10 +19704,14 @@ that it is purely analytical.
 Currently only used when in DEBUG mode. The macro REGTAIL_STUDY() is used
 to control which is which.
 
+This used to return a value that was ignored.  It was a problem that it is
+#ifdef'd to be another function that didn't return a value.  khw has changed it
+so both currently return a pass/fail return.
+
 */
 /* TODO: All four parms should be const */
 
-STATIC U8
+STATIC bool
 S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode_offset p,
                       const regnode_offset val, U32 depth)
 {
@@ -19711,7 +19735,7 @@ S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode_offset p,
 	    bool unfolded_multi_char;	/* Unexamined in this routine */
             if (join_exact(pRExC_state, scan, &min,
                            &unfolded_multi_char, 1, REGNODE_p(val), depth+1))
-                return EXACT;
+                return TRUE; /* Was return EXACT */
 	}
 #endif
         if ( exact ) {
@@ -19761,13 +19785,18 @@ S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode_offset p,
         );
     });
     if (reg_off_by_arg[OP(REGNODE_p(scan))]) {
+        assert(val - scan <= U32_MAX);
 	ARG_SET(REGNODE_p(scan), val - scan);
     }
     else {
+        if (val - scan > U16_MAX) {
+            NEXT_OFF(REGNODE_p(scan)) = U16_MAX;
+            return FALSE;
+        }
 	NEXT_OFF(REGNODE_p(scan)) = val - scan;
     }
 
-    return exact;
+    return TRUE; /* Was 'return exact' */
 }
 #endif
 
diff --git a/t/re/pat.t b/t/re/pat.t
index 7bb215a1ea..c2b9d447a0 100644
--- a/t/re/pat.t
+++ b/t/re/pat.t
@@ -24,7 +24,7 @@ BEGIN {
 skip_all('no re module') unless defined &DynaLoader::boot_DynaLoader;
 skip_all_without_unicode_tables();
 
-plan tests => 857;  # Update this when adding/deleting tests.
+plan tests => 859;  # Update this when adding/deleting tests.
 
 run_tests() unless caller;
 
@@ -2008,6 +2008,53 @@ CODE
     {   # [perl #133921], segfault
         fresh_perl_is('qr0||ß+p00000F00000ù\Q00000ÿ00000x00000x0c0e0\Qx0\Qx0\x{0c!}\;\;î0\x
         fresh_perl_is('|ß+W0ü0r0\Qx0\Qx0x0c0G00000000000000000O000000000x0x0x0c!}\;îçÿù\Q0 \x
+
+fresh_perl_is('s|ß+W0ü0f0\Qx0\Qx0x0c0G0xgive0000000000000O0h000x0 \xòÿÿÿ
+
+
+
+
+	ç
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+x{0c!}\;\;çÿ 
+
+        fresh_perl_is('a aú
+
+
+
+
+
+	ç
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+x{1c!}\;\;îçÿp 
     }
 
 } # End of sub run_tests

-- 
Perl5 Master Repository



nntp.perl.org: Perl Programming lists via nntp and http.
Comments to Ask Bjørn Hansen at ask@perl.org | Group listing | About