develooper Front page | perl.perl5.changes | Postings from March 2019

[perl.git] branch blead updated. v5.29.8-119-g6ef7fe5319

From:
Karl Williamson
Date:
March 18, 2019 04:17
Subject:
[perl.git] branch blead updated. v5.29.8-119-g6ef7fe5319
Message ID:
E1h5jiP-0003Sq-BH@git.dc.perl.space
In perl.git, the branch blead has been updated

<https://perl5.git.perl.org/perl.git/commitdiff/6ef7fe531911e0b41ffcc04c1d6b6ec25a8b1bc9?hp=5fcc329e3d4274eaa2e1973b91ca0808434a3bbe>

- Log -----------------------------------------------------------------
commit 6ef7fe531911e0b41ffcc04c1d6b6ec25a8b1bc9
Author: Karl Williamson <khw@cpan.org>
Date:   Sun Mar 17 22:11:04 2019 -0600

    PATCH: [perl #131551] Too deep regex compilation recursion
    
    This patch, started by Yves Orton, and refined in consultation with Tony
    Cook, imposes a maximum depth of unclosed left parentheses, at which
    point it croaks.  This is to prevent the segfault in the ticket.
    
    The patch adds a variable that can be set to increase or decrease this
    limit at run time (actually regex compilation time) should this be
    desired, and hence our pre-determined limit of 1000 can be changed if
    necessary.

commit 3b89859ad83deaeff7c1ee7911d181ef10236879
Author: Karl Williamson <khw@cpan.org>
Date:   Sun Mar 17 21:59:52 2019 -0600

    regcomp.c: Use mnemonic for flag parameter

-----------------------------------------------------------------------

Summary of changes:
 pod/perldiag.pod    |  9 +++++++++
 pod/perlvar.pod     | 11 +++++++++++
 regcomp.c           | 14 +++++++++++++-
 regcomp.h           |  3 +++
 t/lib/croak/regcomp |  7 +++++++
 5 files changed, 43 insertions(+), 1 deletion(-)

diff --git a/pod/perldiag.pod b/pod/perldiag.pod
index c1d776bb07..ec1edb6e70 100644
--- a/pod/perldiag.pod
+++ b/pod/perldiag.pod
@@ -6285,6 +6285,15 @@ The message attempts to include the name of the called subroutine. If the
 subroutine has been aliased, the subroutine's original name will be shown,
 regardless of what name the caller used.
 
+=item Too many nested open parens in regex; marked by <-- HERE in m/%s/
+
+(F) You have exceeded the number of open C<"("> parentheses that haven't
+been matched by corresponding closing ones.  This limit prevents eating
+up too much memory.  It is initially set to 1000, but may be changed by
+setting C<${^RE_COMPILE_RECURSION_LIMIT}> to some other value.  This may
+need to be done in a BEGIN block before the regular expression pattern
+is compiled.
+
 =item Too many )'s
 
 (A) You've accidentally run your script through B<csh> instead of Perl.
diff --git a/pod/perlvar.pod b/pod/perlvar.pod
index d67d4cd8b1..b42cbe2251 100644
--- a/pod/perlvar.pod
+++ b/pod/perlvar.pod
@@ -1249,6 +1249,17 @@ regular expression assertion (see L<perlre>).  May be written to.
 
 This variable was added in Perl 5.005.
 
+=item ${^RE_COMPILE_RECURSION_LIMIT}
+X<${^RE_COMPILE_RECURSION_LIMIT}>
+
+The current value giving the maximum number of open but unclosed
+parenthetical groups there may be at any point during a regular
+expression compilation.  The default is currently 1000 nested groups.
+You may adjust it depending on your needs and the amount of memory
+available.
+
+This variable was added in Perl v5.30.0.
+
 =item ${^RE_DEBUG_FLAGS}
 X<${^RE_DEBUG_FLAGS}>
 
diff --git a/regcomp.c b/regcomp.c
index 5f2c8633f1..816d735f5b 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -2700,7 +2700,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
         trie_words = newAV();
     });
 
-    re_trie_maxbuff = get_sv(RE_TRIE_MAXBUF_NAME, 1);
+    re_trie_maxbuff = get_sv(RE_TRIE_MAXBUF_NAME, GV_ADD);
     assert(re_trie_maxbuff);
     if (!SvIOK(re_trie_maxbuff)) {
         sv_setiv(re_trie_maxbuff, RE_TRIE_MAXBUF_INIT);
@@ -11009,6 +11009,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
     I32 freeze_paren = 0;
     I32 after_freeze = 0;
     I32 num; /* numeric backreferences */
+    SV * max_open;  /* Max number of unclosed parens */
 
     char * parse_start = RExC_parse; /* MJD */
     char * const oregcomp_parse = RExC_parse;
@@ -11018,6 +11019,17 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
     PERL_ARGS_ASSERT_REG;
     DEBUG_PARSE("reg ");
 
+
+    max_open = get_sv(RE_COMPILE_RECURSION_LIMIT, GV_ADD);
+    assert(max_open);
+    if (!SvIOK(max_open)) {
+        sv_setiv(max_open, RE_COMPILE_RECURSION_INIT);
+    }
+    if (depth > 4 * SvIV(max_open)) { /* We increase depth by 4 for each open
+                                         paren */
+        vFAIL("Too many nested open parens");
+    }
+
     *flagp = 0;				/* Tentatively. */
 
     /* Having this true makes it feasible to have a lot fewer tests for the
diff --git a/regcomp.h b/regcomp.h
index 7a4432e3d6..5002e2b38d 100644
--- a/regcomp.h
+++ b/regcomp.h
@@ -946,6 +946,9 @@ typedef struct _reg_ac_data reg_ac_data;
 #define RE_TRIE_MAXBUF_NAME "\022E_TRIE_MAXBUF"
 #define RE_DEBUG_FLAGS "\022E_DEBUG_FLAGS"
 
+#define RE_COMPILE_RECURSION_INIT 1000
+#define RE_COMPILE_RECURSION_LIMIT "\022E_COMPILE_RECURSION_LIMIT"
+
 /*
 
 RE_DEBUG_FLAGS is used to control what debug output is emitted
diff --git a/t/lib/croak/regcomp b/t/lib/croak/regcomp
index c72e3d47bf..0ba705e915 100644
--- a/t/lib/croak/regcomp
+++ b/t/lib/croak/regcomp
@@ -63,3 +63,10 @@ my $p00="[\\x59\\N{U+.}]"; qr/$p00/ui;
 EXPECT
 Invalid hexadecimal number in \N{U+...} in regex; marked by <-- HERE in m/[\x59\N{U+. <-- HERE }]/ at - line 1.
 ########
+# NAME ${^RE_COMPILE_RECURSION_LIMIT} [perl #131551]
+BEGIN { ${^RE_COMPILE_RECURSION_LIMIT} = ${^RE_COMPILE_RECURSION_LIMIT} = 2; }
+qr/(a)/;
+qr/((a))/;
+EXPECT
+Too many nested open parens in regex; marked by <-- HERE in m/(( <-- HERE a))/ at - line 3.
+########

-- 
Perl5 Master Repository



nntp.perl.org: Perl Programming lists via nntp and http.
Comments to Ask Bjørn Hansen at ask@perl.org | Group listing | About