Submitted By: Ken Moffat Date: 2016-03-16 Initial Package Version: 8.38 Upstream Status: Applied Origin: Upstream, backported to 8.38 by Petr Písař at redhat Description: Various fixes, including for CVE-2016-1263 and many other bugs which have been fixed upstream. Many of these bugs were found by fuzzing, upstream is trying to persuade its users to move to pcre2 and giving low priority to further pcre1 maintenance releases. From 3c80e02cd464ea049e117b423fd48fab294c51a9 Mon Sep 17 00:00:00 2001 From: ph10 Date: Thu, 26 Nov 2015 20:29:13 +0000 Subject: [PATCH] Fix auto-callout (?# comment bug. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1611 2f5784b3-3f2a-0410-8824-cb99058d5e15 Petr Pisar: Ported to 8.38. diff --git a/pcre_compile.c b/pcre_compile.c index 4d3b313..3360a8b 100644 --- a/pcre_compile.c +++ b/pcre_compile.c @@ -4699,6 +4699,23 @@ for (;; ptr++) } } + /* Skip over (?# comments. We need to do this here because we want to know if + the next thing is a quantifier, and these comments may come between an item + and its quantifier. */ + + if (c == CHAR_LEFT_PARENTHESIS && ptr[1] == CHAR_QUESTION_MARK && + ptr[2] == CHAR_NUMBER_SIGN) + { + ptr += 3; + while (*ptr != CHAR_NULL && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++; + if (*ptr == CHAR_NULL) + { + *errorcodeptr = ERR18; + goto FAILED; + } + continue; + } + /* See if the next thing is a quantifier. */ is_quantifier = @@ -6529,21 +6546,6 @@ for (;; ptr++) case CHAR_LEFT_PARENTHESIS: ptr++; - /* First deal with comments. Putting this code right at the start ensures - that comments have no bad side effects. */ - - if (ptr[0] == CHAR_QUESTION_MARK && ptr[1] == CHAR_NUMBER_SIGN) - { - ptr += 2; - while (*ptr != CHAR_NULL && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++; - if (*ptr == CHAR_NULL) - { - *errorcodeptr = ERR18; - goto FAILED; - } - continue; - } - /* Now deal with various "verbs" that can be introduced by '*'. */ if (ptr[0] == CHAR_ASTERISK && (ptr[1] == ':' diff --git a/testdata/testinput2 b/testdata/testinput2 index e2e520f..92e3359 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -4217,4 +4217,12 @@ backtracking verbs. --/ /a[[:punct:]b]/BZ +/L(?#(|++ Date: Fri, 27 Nov 2015 17:13:13 +0000 Subject: [PATCH 1/5] Fix negated POSIX class within negated overall class UCP bug. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1612 2f5784b3-3f2a-0410-8824-cb99058d5e15 Petr Písař: Ported to 8.38. diff --git a/pcre_compile.c b/pcre_compile.c index 3360a8b..3670f1e 100644 --- a/pcre_compile.c +++ b/pcre_compile.c @@ -5063,20 +5063,22 @@ for (;; ptr++) ptr = tempptr + 1; continue; - /* For the other POSIX classes (ascii, xdigit) we are going to fall - through to the non-UCP case and build a bit map for characters with - code points less than 256. If we are in a negated POSIX class - within a non-negated overall class, characters with code points - greater than 255 must all match. In the special case where we have - not yet generated any xclass data, and this is the final item in - the overall class, we need do nothing: later on, the opcode + /* For the other POSIX classes (ascii, cntrl, xdigit) we are going + to fall through to the non-UCP case and build a bit map for + characters with code points less than 256. If we are in a negated + POSIX class, characters with code points greater than 255 must + either all match or all not match. In the special case where we + have not yet generated any xclass data, and this is the final item + in the overall class, we need do nothing: later on, the opcode OP_NCLASS will be used to indicate that characters greater than 255 are acceptable. If we have already seen an xclass item or one may follow (we have to assume that it might if this is not the end of - the class), explicitly match all wide codepoints. */ + the class), explicitly list all wide codepoints, which will then + either not match or match, depending on whether the class is or is + not negated. */ default: - if (!negate_class && local_negate && + if (local_negate && (xclass || tempptr[2] != CHAR_RIGHT_SQUARE_BRACKET)) { *class_uchardata++ = XCL_RANGE; diff --git a/testdata/testinput6 b/testdata/testinput6 index aeb62a0..a178d3d 100644 --- a/testdata/testinput6 +++ b/testdata/testinput6 @@ -1553,4 +1553,13 @@ \x{200} \x{37e} +/[^[:^ascii:]\d]/8W + a + ~ + 0 + \a + \x{7f} + \x{389} + \x{20ac} + /-- End of testinput6 --/ diff --git a/testdata/testoutput6 b/testdata/testoutput6 index beb85aa..b64dc0d 100644 --- a/testdata/testoutput6 +++ b/testdata/testoutput6 @@ -2557,4 +2557,20 @@ No match \x{37e} 0: \x{37e} +/[^[:^ascii:]\d]/8W + a + 0: a + ~ + 0: ~ + 0 +No match + \a + 0: \x{07} + \x{7f} + 0: \x{7f} + \x{389} +No match + \x{20ac} +No match + /-- End of testinput6 --/ -- 2.4.3 From bfc1dfa660c24dc7a75108d934290e50d7db2719 Mon Sep 17 00:00:00 2001 From: ph10 Date: Fri, 27 Nov 2015 17:41:04 +0000 Subject: [PATCH 2/5] Fix bug for isolated \E between an item and its qualifier when auto callout is set. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1613 2f5784b3-3f2a-0410-8824-cb99058d5e15 Petr Písař: Ported to 8.38. diff --git a/pcre_compile.c b/pcre_compile.c index 3670f1e..5786cd3 100644 --- a/pcre_compile.c +++ b/pcre_compile.c @@ -4645,9 +4645,10 @@ for (;; ptr++) goto FAILED; } - /* If in \Q...\E, check for the end; if not, we have a literal */ + /* If in \Q...\E, check for the end; if not, we have a literal. Otherwise an + isolated \E is ignored. */ - if (inescq && c != CHAR_NULL) + if (c != CHAR_NULL) { if (c == CHAR_BACKSLASH && ptr[1] == CHAR_E) { @@ -4655,7 +4656,7 @@ for (;; ptr++) ptr++; continue; } - else + else if (inescq) { if (previous_callout != NULL) { @@ -4670,7 +4671,6 @@ for (;; ptr++) } goto NORMAL_CHAR; } - /* Control does not reach here. */ } /* In extended mode, skip white space and comments. We need a loop in order diff --git a/testdata/testinput2 b/testdata/testinput2 index 92e3359..e8ca4fe 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -4225,4 +4225,6 @@ backtracking verbs. --/ /L(?#(|++ Date: Sun, 29 Nov 2015 17:38:25 +0000 Subject: [PATCH 3/5] Give error for regexec with pmatch=NULL and REG_STARTEND set. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1614 2f5784b3-3f2a-0410-8824-cb99058d5e15 Petr Písař: Ported to 8.38. diff --git a/pcreposix.c b/pcreposix.c index f024423..dcc13ef 100644 --- a/pcreposix.c +++ b/pcreposix.c @@ -364,6 +364,7 @@ start location rather than being passed as a PCRE "starting offset". */ if ((eflags & REG_STARTEND) != 0) { + if (pmatch == NULL) return REG_INVARG; so = pmatch[0].rm_so; eo = pmatch[0].rm_eo; } -- 2.4.3 From e347b40d5bb12f7ef1e632aa649571a107be7d8a Mon Sep 17 00:00:00 2001 From: ph10 Date: Sun, 29 Nov 2015 17:46:23 +0000 Subject: [PATCH 4/5] Allow for up to 32-bit numbers in the ordin() function in pcregrep. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1615 2f5784b3-3f2a-0410-8824-cb99058d5e15 Petr Písař: Ported to 8.38. diff --git a/pcregrep.c b/pcregrep.c index 64986b0..cd53c64 100644 --- a/pcregrep.c +++ b/pcregrep.c @@ -2437,7 +2437,7 @@ return options; static char * ordin(int n) { -static char buffer[8]; +static char buffer[14]; char *p = buffer; sprintf(p, "%d", n); while (*p != 0) p++; -- 2.4.3 From e78ad4264b16988b826bd2939a1781c1165a92d9 Mon Sep 17 00:00:00 2001 From: ph10 Date: Mon, 30 Nov 2015 17:44:45 +0000 Subject: [PATCH 5/5] Fix \Q\E before qualifier bug when auto callouts are enabled. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1616 2f5784b3-3f2a-0410-8824-cb99058d5e15 Petr Písař: Ported to 8.38. diff --git a/pcre_compile.c b/pcre_compile.c index 5786cd3..beed46b 100644 --- a/pcre_compile.c +++ b/pcre_compile.c @@ -4671,17 +4671,27 @@ for (;; ptr++) } goto NORMAL_CHAR; } + + /* Check for the start of a \Q...\E sequence. We must do this here rather + than later in case it is immediately followed by \E, which turns it into a + "do nothing" sequence. */ + + if (c == CHAR_BACKSLASH && ptr[1] == CHAR_Q) + { + inescq = TRUE; + ptr++; + continue; + } } - /* In extended mode, skip white space and comments. We need a loop in order - to check for more white space and more comments after a comment. */ + /* In extended mode, skip white space and comments. */ if ((options & PCRE_EXTENDED) != 0) { - for (;;) + const pcre_uchar *wscptr = ptr; + while (MAX_255(c) && (cd->ctypes[c] & ctype_space) != 0) c = *(++ptr); + if (c == CHAR_NUMBER_SIGN) { - while (MAX_255(c) && (cd->ctypes[c] & ctype_space) != 0) c = *(++ptr); - if (c != CHAR_NUMBER_SIGN) break; ptr++; while (*ptr != CHAR_NULL) { @@ -4695,7 +4705,15 @@ for (;; ptr++) if (utf) FORWARDCHAR(ptr); #endif } - c = *ptr; /* Either NULL or the char after a newline */ + } + + /* If we skipped any characters, restart the loop. Otherwise, we didn't see + a comment. */ + + if (ptr > wscptr) + { + ptr--; + continue; } } @@ -7900,16 +7918,6 @@ for (;; ptr++) c = ec; else { - if (escape == ESC_Q) /* Handle start of quoted string */ - { - if (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E) - ptr += 2; /* avoid empty string */ - else inescq = TRUE; - continue; - } - - if (escape == ESC_E) continue; /* Perl ignores an orphan \E */ - /* For metasequences that actually match a character, we disable the setting of a first character if it hasn't already been set. */ diff --git a/testdata/testinput2 b/testdata/testinput2 index e8ca4fe..3a1134f 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -4227,4 +4227,6 @@ backtracking verbs. --/ /(A*)\E+/CBZ +/()\Q\E*]/BCZ + /-- End of testinput2 --/ diff --git a/testdata/testoutput2 b/testdata/testoutput2 index 09756b8..ac33cc4 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -14624,4 +14624,19 @@ No match End ------------------------------------------------------------------ +/()\Q\E*]/BCZ +------------------------------------------------------------------ + Bra + Callout 255 0 7 + Brazero + SCBra 1 + Callout 255 1 0 + KetRmax + Callout 255 7 1 + ] + Callout 255 8 0 + Ket + End +------------------------------------------------------------------ + /-- End of testinput2 --/ -- 2.4.3 From 46ed1a703b067e5b679eacf6500a54dae35f8130 Mon Sep 17 00:00:00 2001 From: ph10 Date: Thu, 3 Dec 2015 17:05:40 +0000 Subject: [PATCH] Fix /x bug when pattern starts with white space and (?-x) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1617 2f5784b3-3f2a-0410-8824-cb99058d5e15 Petr Písař: Ported to 8.38. diff --git a/pcre_compile.c b/pcre_compile.c index beed46b..57719b9 100644 --- a/pcre_compile.c +++ b/pcre_compile.c @@ -7607,39 +7607,15 @@ for (;; ptr++) newoptions = (options | set) & (~unset); /* If the options ended with ')' this is not the start of a nested - group with option changes, so the options change at this level. If this - item is right at the start of the pattern, the options can be - abstracted and made external in the pre-compile phase, and ignored in - the compile phase. This can be helpful when matching -- for instance in - caseless checking of required bytes. - - If the code pointer is not (cd->start_code + 1 + LINK_SIZE), we are - definitely *not* at the start of the pattern because something has been - compiled. In the pre-compile phase, however, the code pointer can have - that value after the start, because it gets reset as code is discarded - during the pre-compile. However, this can happen only at top level - if - we are within parentheses, the starting BRA will still be present. At - any parenthesis level, the length value can be used to test if anything - has been compiled at that level. Thus, a test for both these conditions - is necessary to ensure we correctly detect the start of the pattern in - both phases. - + group with option changes, so the options change at this level. If we are not at the pattern start, reset the greedy defaults and the case value for firstchar and reqchar. */ if (*ptr == CHAR_RIGHT_PARENTHESIS) { - if (code == cd->start_code + 1 + LINK_SIZE && - (lengthptr == NULL || *lengthptr == 2 + 2*LINK_SIZE)) - { - cd->external_options = newoptions; - } - else - { - greedy_default = ((newoptions & PCRE_UNGREEDY) != 0); - greedy_non_default = greedy_default ^ 1; - req_caseopt = ((newoptions & PCRE_CASELESS) != 0)? REQ_CASELESS:0; - } + greedy_default = ((newoptions & PCRE_UNGREEDY) != 0); + greedy_non_default = greedy_default ^ 1; + req_caseopt = ((newoptions & PCRE_CASELESS) != 0)? REQ_CASELESS:0; /* Change options at this level, and pass them back for use in subsequent branches. */ diff --git a/testdata/testoutput2 b/testdata/testoutput2 index ac33cc4..6c42897 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -419,7 +419,7 @@ Need char = '>' /(?U)<.*>/I Capturing subpattern count = 0 -Options: ungreedy +No options First char = '<' Need char = '>' abcghinop @@ -443,7 +443,7 @@ Need char = '=' /(?U)={3,}?/I Capturing subpattern count = 0 -Options: ungreedy +No options First char = '=' Need char = '=' abc========def @@ -477,7 +477,7 @@ Failed: lookbehind assertion is not fixed length at offset 12 /(?i)abc/I Capturing subpattern count = 0 -Options: caseless +No options First char = 'a' (caseless) Need char = 'c' (caseless) @@ -489,7 +489,7 @@ No need char /(?i)^1234/I Capturing subpattern count = 0 -Options: anchored caseless +Options: anchored No first char No need char @@ -502,7 +502,7 @@ No need char /(?s).*/I Capturing subpattern count = 0 May match empty string -Options: anchored dotall +Options: anchored No first char No need char @@ -516,7 +516,7 @@ Starting chars: a b c d /(?i)[abcd]/IS Capturing subpattern count = 0 -Options: caseless +No options No first char No need char Subject length lower bound = 1 @@ -524,7 +524,7 @@ Starting chars: A B C D a b c d /(?m)[xy]|(b|c)/IS Capturing subpattern count = 1 -Options: multiline +No options No first char No need char Subject length lower bound = 1 @@ -538,7 +538,7 @@ No need char /(?i)(^a|^b)/Im Capturing subpattern count = 1 -Options: caseless multiline +Options: multiline First char at start or follows newline No need char @@ -1179,7 +1179,7 @@ No need char End ------------------------------------------------------------------ Capturing subpattern count = 1 -Options: anchored dotall +Options: anchored No first char No need char @@ -2735,7 +2735,7 @@ No match End ------------------------------------------------------------------ Capturing subpattern count = 0 -Options: caseless extended +Options: extended First char = 'a' (caseless) Need char = 'c' (caseless) @@ -2748,7 +2748,7 @@ Need char = 'c' (caseless) End ------------------------------------------------------------------ Capturing subpattern count = 0 -Options: caseless extended +Options: extended First char = 'a' (caseless) Need char = 'c' (caseless) @@ -3095,7 +3095,7 @@ Need char = 'b' End ------------------------------------------------------------------ Capturing subpattern count = 0 -Options: ungreedy +No options First char = 'x' Need char = 'b' xaaaab @@ -3497,7 +3497,7 @@ Need char = 'c' /(?i)[ab]/IS Capturing subpattern count = 0 -Options: caseless +No options No first char No need char Subject length lower bound = 1 @@ -6299,7 +6299,7 @@ Capturing subpattern count = 3 Named capturing subpatterns: A 2 A 3 -Options: anchored dupnames +Options: anchored Duplicate name status changes No first char No need char -- 2.4.3 From db1fb68feddc9afe6f8822d099fa9ff25e3ea8e7 Mon Sep 17 00:00:00 2001 From: ph10 Date: Sat, 5 Dec 2015 16:30:14 +0000 Subject: [PATCH] Fix copy named substring bug. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1618 2f5784b3-3f2a-0410-8824-cb99058d5e15 Petr Písař: Ported to 8.38. diff --git a/pcre_get.c b/pcre_get.c index 8094b34..41eda9c 100644 --- a/pcre_get.c +++ b/pcre_get.c @@ -250,6 +250,7 @@ Arguments: code the compiled regex stringname the name of the capturing substring ovector the vector of matched substrings + stringcount number of captured substrings Returns: the number of the first that is set, or the number of the last one if none are set, @@ -258,13 +259,16 @@ Returns: the number of the first that is set, #if defined COMPILE_PCRE8 static int -get_first_set(const pcre *code, const char *stringname, int *ovector) +get_first_set(const pcre *code, const char *stringname, int *ovector, + int stringcount) #elif defined COMPILE_PCRE16 static int -get_first_set(const pcre16 *code, PCRE_SPTR16 stringname, int *ovector) +get_first_set(const pcre16 *code, PCRE_SPTR16 stringname, int *ovector, + int stringcount) #elif defined COMPILE_PCRE32 static int -get_first_set(const pcre32 *code, PCRE_SPTR32 stringname, int *ovector) +get_first_set(const pcre32 *code, PCRE_SPTR32 stringname, int *ovector, + int stringcount) #endif { const REAL_PCRE *re = (const REAL_PCRE *)code; @@ -295,7 +299,7 @@ if (entrysize <= 0) return entrysize; for (entry = (pcre_uchar *)first; entry <= (pcre_uchar *)last; entry += entrysize) { int n = GET2(entry, 0); - if (ovector[n*2] >= 0) return n; + if (n < stringcount && ovector[n*2] >= 0) return n; } return GET2(entry, 0); } @@ -402,7 +406,7 @@ pcre32_copy_named_substring(const pcre32 *code, PCRE_SPTR32 subject, PCRE_UCHAR32 *buffer, int size) #endif { -int n = get_first_set(code, stringname, ovector); +int n = get_first_set(code, stringname, ovector, stringcount); if (n <= 0) return n; #if defined COMPILE_PCRE8 return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size); @@ -619,7 +623,7 @@ pcre32_get_named_substring(const pcre32 *code, PCRE_SPTR32 subject, PCRE_SPTR32 *stringptr) #endif { -int n = get_first_set(code, stringname, ovector); +int n = get_first_set(code, stringname, ovector, stringcount); if (n <= 0) return n; #if defined COMPILE_PCRE8 return pcre_get_substring(subject, ovector, stringcount, n, stringptr); diff --git a/testdata/testinput2 b/testdata/testinput2 index 3a1134f..00ffe32 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -4229,4 +4229,7 @@ backtracking verbs. --/ /()\Q\E*]/BCZ +/(?)(?J:(?)(?))(?)/ + \O\CC + /-- End of testinput2 --/ diff --git a/testdata/testoutput2 b/testdata/testoutput2 index 6c42897..ffb4466 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -14639,4 +14639,9 @@ No match End ------------------------------------------------------------------ +/(?)(?J:(?)(?))(?)/ + \O\CC +Matched, but too many substrings +copy substring C failed -7 + /-- End of testinput2 --/ -- 2.4.3 From 40363ebc19baeab160abaaa55dc84322a89ac35a Mon Sep 17 00:00:00 2001 From: ph10 Date: Sat, 5 Dec 2015 16:58:46 +0000 Subject: [PATCH] Fix (by hacking) another length computation issue. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1619 2f5784b3-3f2a-0410-8824-cb99058d5e15 Petr Písař: Ported to 8.38. diff --git a/pcre_compile.c b/pcre_compile.c index 57719b9..087bf2a 100644 --- a/pcre_compile.c +++ b/pcre_compile.c @@ -7280,7 +7280,7 @@ for (;; ptr++) issue is fixed "properly" in PCRE2. As PCRE1 is now in maintenance only mode, we finesse the bug by allowing more memory always. */ - *lengthptr += 2 + 2*LINK_SIZE; + *lengthptr += 4 + 4*LINK_SIZE; /* It is even worse than that. The current reference may be to an existing named group with a different number (so apparently not diff --git a/testdata/testoutput11-16 b/testdata/testoutput11-16 index 9a0a12d..280692e 100644 --- a/testdata/testoutput11-16 +++ b/testdata/testoutput11-16 @@ -231,7 +231,7 @@ Memory allocation (code space): 73 ------------------------------------------------------------------ /(?Pa)...(?P=a)bbb(?P>a)d/BM -Memory allocation (code space): 77 +Memory allocation (code space): 93 ------------------------------------------------------------------ 0 24 Bra 2 5 CBra 1 diff --git a/testdata/testoutput11-32 b/testdata/testoutput11-32 index 57e5da0..cdbda74 100644 --- a/testdata/testoutput11-32 +++ b/testdata/testoutput11-32 @@ -231,7 +231,7 @@ Memory allocation (code space): 155 ------------------------------------------------------------------ /(?Pa)...(?P=a)bbb(?P>a)d/BM -Memory allocation (code space): 157 +Memory allocation (code space): 189 ------------------------------------------------------------------ 0 24 Bra 2 5 CBra 1 diff --git a/testdata/testoutput11-8 b/testdata/testoutput11-8 index 748548a..cb37896 100644 --- a/testdata/testoutput11-8 +++ b/testdata/testoutput11-8 @@ -231,7 +231,7 @@ Memory allocation (code space): 45 ------------------------------------------------------------------ /(?Pa)...(?P=a)bbb(?P>a)d/BM -Memory allocation (code space): 50 +Memory allocation (code space): 62 ------------------------------------------------------------------ 0 30 Bra 3 7 CBra 1 -- 2.4.3 From 4f47274a2eb10131d88145ad7fd0eed4027a0c51 Mon Sep 17 00:00:00 2001 From: ph10 Date: Tue, 8 Dec 2015 11:06:40 +0000 Subject: [PATCH] Fix get_substring_list() bug when \K is used in an assertion. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1620 2f5784b3-3f2a-0410-8824-cb99058d5e15 Petr Písař: ported to 8.38. diff --git a/pcre_get.c b/pcre_get.c index 41eda9c..cdd2abc 100644 --- a/pcre_get.c +++ b/pcre_get.c @@ -461,7 +461,10 @@ pcre_uchar **stringlist; pcre_uchar *p; for (i = 0; i < double_count; i += 2) - size += sizeof(pcre_uchar *) + IN_UCHARS(ovector[i+1] - ovector[i] + 1); + { + size += sizeof(pcre_uchar *) + IN_UCHARS(1); + if (ovector[i+1] > ovector[i]) size += IN_UCHARS(ovector[i+1] - ovector[i]); + } stringlist = (pcre_uchar **)(PUBL(malloc))(size); if (stringlist == NULL) return PCRE_ERROR_NOMEMORY; @@ -477,7 +480,7 @@ p = (pcre_uchar *)(stringlist + stringcount + 1); for (i = 0; i < double_count; i += 2) { - int len = ovector[i+1] - ovector[i]; + int len = (ovector[i+1] > ovector[i])? (ovector[i+1] - ovector[i]) : 0; memcpy(p, subject + ovector[i], IN_UCHARS(len)); *stringlist++ = p; p += len; diff --git a/testdata/testinput2 b/testdata/testinput2 index 00ffe32..967a241 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -4232,4 +4232,7 @@ backtracking verbs. --/ /(?)(?J:(?)(?))(?)/ \O\CC +/(?=a\K)/ + ring bpattingbobnd $ 1,oern cou \rb\L + /-- End of testinput2 --/ diff --git a/testdata/testoutput2 b/testdata/testoutput2 index ffb4466..5fb28d5 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -14644,4 +14644,10 @@ No match Matched, but too many substrings copy substring C failed -7 +/(?=a\K)/ + ring bpattingbobnd $ 1,oern cou \rb\L +Start of matched string is beyond its end - displaying from end to start. + 0: a + 0L + /-- End of testinput2 --/ -- 2.5.0 From 3da5528b47b88c32224cf9d14d8a4e80cd7a0815 Mon Sep 17 00:00:00 2001 From: ph10 Date: Sat, 6 Feb 2016 16:54:14 +0000 Subject: [PATCH] Fix pcretest bad behaviour for callout in lookbehind. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1625 2f5784b3-3f2a-0410-8824-cb99058d5e15 Petr Písař: Ported to 8.38. diff --git a/pcretest.c b/pcretest.c index 488e419..63869fd 100644 --- a/pcretest.c +++ b/pcretest.c @@ -2250,7 +2250,7 @@ data is not zero. */ static int callout(pcre_callout_block *cb) { FILE *f = (first_callout | callout_extra)? outfile : NULL; -int i, pre_start, post_start, subject_length; +int i, current_position, pre_start, post_start, subject_length; if (callout_extra) { @@ -2280,14 +2280,19 @@ printed lengths of the substrings. */ if (f != NULL) fprintf(f, "--->"); +/* If a lookbehind is involved, the current position may be earlier than the +match start. If so, use the match start instead. */ + +current_position = (cb->current_position >= cb->start_match)? + cb->current_position : cb->start_match; + PCHARS(pre_start, cb->subject, 0, cb->start_match, f); PCHARS(post_start, cb->subject, cb->start_match, - cb->current_position - cb->start_match, f); + current_position - cb->start_match, f); PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL); -PCHARSV(cb->subject, cb->current_position, - cb->subject_length - cb->current_position, f); +PCHARSV(cb->subject, current_position, cb->subject_length - current_position, f); if (f != NULL) fprintf(f, "\n"); @@ -5740,3 +5745,4 @@ return yield; } /* End of pcretest.c */ + diff --git a/testdata/testinput2 b/testdata/testinput2 index 967a241..086e0f4 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -4235,4 +4235,8 @@ backtracking verbs. --/ /(?=a\K)/ ring bpattingbobnd $ 1,oern cou \rb\L +/(?<=((?C)0))/ + 9010 + abcd + /-- End of testinput2 --/ diff --git a/testdata/testoutput2 b/testdata/testoutput2 index 5fb28d5..d414a72 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -14650,4 +14650,19 @@ Start of matched string is beyond its end - displaying from end to start. 0: a 0L +/(?<=((?C)0))/ + 9010 +--->9010 + 0 ^ 0 + 0 ^ 0 + 0: + 1: 0 + abcd +--->abcd + 0 ^ 0 + 0 ^ 0 + 0 ^ 0 + 0 ^ 0 +No match + /-- End of testinput2 --/ -- 2.5.0 From 943a5105b9fe2842851003f692c7077a6cdbeefe Mon Sep 17 00:00:00 2001 From: ph10 Date: Wed, 10 Feb 2016 19:13:17 +0000 Subject: [PATCH] Fix workspace overflow for (*ACCEPT) with deeply nested parentheses. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1631 2f5784b3-3f2a-0410-8824-cb99058d5e15 Petr Písař: Ported to 8.38. diff --git a/pcre_compile.c b/pcre_compile.c index b9a239e..5019854 100644 --- a/pcre_compile.c +++ b/pcre_compile.c @@ -6,7 +6,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel - Copyright (c) 1997-2014 University of Cambridge + Copyright (c) 1997-2016 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -560,6 +560,7 @@ static const char error_texts[] = /* 85 */ "parentheses are too deeply nested (stack check)\0" "digits missing in \\x{} or \\o{}\0" + "regular expression is too complicated\0" ; /* Table to identify digits and hex digits. This is used when compiling @@ -4591,7 +4592,8 @@ for (;; ptr++) if (code > cd->start_workspace + cd->workspace_size - WORK_SIZE_SAFETY_MARGIN) /* Check for overrun */ { - *errorcodeptr = ERR52; + *errorcodeptr = (code >= cd->start_workspace + cd->workspace_size)? + ERR52 : ERR87; goto FAILED; } @@ -6626,8 +6628,21 @@ for (;; ptr++) cd->had_accept = TRUE; for (oc = cd->open_caps; oc != NULL; oc = oc->next) { - *code++ = OP_CLOSE; - PUT2INC(code, 0, oc->number); + if (lengthptr != NULL) + { +#ifdef COMPILE_PCRE8 + *lengthptr += 1 + IMM2_SIZE; +#elif defined COMPILE_PCRE16 + *lengthptr += 2 + IMM2_SIZE; +#elif defined COMPILE_PCRE32 + *lengthptr += 4 + IMM2_SIZE; +#endif + } + else + { + *code++ = OP_CLOSE; + PUT2INC(code, 0, oc->number); + } } setverb = *code++ = (cd->assert_depth > 0)? OP_ASSERT_ACCEPT : OP_ACCEPT; diff --git a/pcre_internal.h b/pcre_internal.h index f7a5ee7..dbfe80e 100644 --- a/pcre_internal.h +++ b/pcre_internal.h @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel - Copyright (c) 1997-2014 University of Cambridge + Copyright (c) 1997-2016 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -2289,7 +2289,7 @@ enum { ERR0, ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9, ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, ERR70, ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79, - ERR80, ERR81, ERR82, ERR83, ERR84, ERR85, ERR86, ERRCOUNT }; + ERR80, ERR81, ERR82, ERR83, ERR84, ERR85, ERR86, ERR87, ERRCOUNT }; /* JIT compiling modes. The function list is indexed by them. */ diff --git a/pcreposix.c b/pcreposix.c index dcc13ef..55b6ddc 100644 --- a/pcreposix.c +++ b/pcreposix.c @@ -6,7 +6,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel - Copyright (c) 1997-2014 University of Cambridge + Copyright (c) 1997-2016 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -173,7 +173,8 @@ static const int eint[] = { REG_BADPAT, /* group name must start with a non-digit */ /* 85 */ REG_BADPAT, /* parentheses too deeply nested (stack check) */ - REG_BADPAT /* missing digits in \x{} or \o{} */ + REG_BADPAT, /* missing digits in \x{} or \o{} */ + REG_BADPAT /* pattern too complicated */ }; /* Table of texts corresponding to POSIX error codes */ diff --git a/testdata/testinput11 b/testdata/testinput11 index ac9d228..6f0989a 100644 --- a/testdata/testinput11 +++ b/testdata/testinput11 @@ -138,4 +138,6 @@ is required for these tests. --/ /.((?2)(?R)\1)()/B +/([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00](*ACCEPT)/ + /-- End of testinput11 --/ diff --git a/testdata/testoutput11-16 b/testdata/testoutput11-16 index 280692e..3c485da 100644 --- a/testdata/testoutput11-16 +++ b/testdata/testoutput11-16 @@ -765,4 +765,7 @@ Memory allocation (code space): 14 25 End ------------------------------------------------------------------ +/([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00](*ACCEPT)/ +Failed: regular expression is too complicated at offset 490 + /-- End of testinput11 --/ diff --git a/testdata/testoutput11-32 b/testdata/testoutput11-32 index cdbda74..e19518d 100644 --- a/testdata/testoutput11-32 +++ b/testdata/testoutput11-32 @@ -765,4 +765,7 @@ Memory allocation (code space): 28 25 End ------------------------------------------------------------------ +/([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00](*ACCEPT)/ +Failed: missing ) at offset 509 + /-- End of testinput11 --/ diff --git a/testdata/testoutput11-8 b/testdata/testoutput11-8 index cb37896..5a4fbb2 100644 --- a/testdata/testoutput11-8 +++ b/testdata/testoutput11-8 @@ -765,4 +765,7 @@ Memory allocation (code space): 10 38 End ------------------------------------------------------------------ +/([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00](*ACCEPT)/ +Failed: missing ) at offset 509 + /-- End of testinput11 --/ -- 2.5.0 From b7537308b7c758f33c347cb0bec62754c43c271f Mon Sep 17 00:00:00 2001 From: ph10 Date: Sat, 27 Feb 2016 17:38:11 +0000 Subject: [PATCH] Yet another duplicate name bugfix by overestimating the memory needed (i.e. another hack - PCRE2 has this "properly" fixed). MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1636 2f5784b3-3f2a-0410-8824-cb99058d5e15 Petr Písař: Ported to 8.38. diff --git a/pcre_compile.c b/pcre_compile.c index 5019854..4ffea0c 100644 --- a/pcre_compile.c +++ b/pcre_compile.c @@ -7311,7 +7311,12 @@ for (;; ptr++) so far in order to get the number. If the name is not found, leave the value of recno as 0 for a forward reference. */ - else + /* This patch (removing "else") fixes a problem when a reference is + to multiple identically named nested groups from within the nest. + Once again, it is not the "proper" fix, and it results in an + over-allocation of memory. */ + + /* else */ { ng = cd->named_groups; for (i = 0; i < cd->names_found; i++, ng++) diff --git a/testdata/testinput2 b/testdata/testinput2 index 086e0f4..c805f5f 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -4239,4 +4239,6 @@ backtracking verbs. --/ 9010 abcd +/((?J)(?'R'(?'R'(?'R'(?'R'(?'R'(?|(\k'R'))))))))/ + /-- End of testinput2 --/ diff --git a/testdata/testoutput2 b/testdata/testoutput2 index d414a72..800a72f 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -14665,4 +14665,6 @@ Start of matched string is beyond its end - displaying from end to start. 0 ^ 0 No match +/((?J)(?'R'(?'R'(?'R'(?'R'(?'R'(?|(\k'R'))))))))/ + /-- End of testinput2 --/ -- 2.5.0 From 0fc2edb79b3815c6511fd75c36a57893e4acaee6 Mon Sep 17 00:00:00 2001 From: ph10 Date: Sat, 27 Feb 2016 17:55:24 +0000 Subject: [PATCH] Fix pcretest loop for global matching with an ovector size less than 2. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1637 2f5784b3-3f2a-0410-8824-cb99058d5e15 Petr Písař: Ported to 8.38. diff --git a/pcretest.c b/pcretest.c index 63869fd..78ef517 100644 --- a/pcretest.c +++ b/pcretest.c @@ -5617,6 +5617,12 @@ while (!done) break; } + if (use_size_offsets < 2) + { + fprintf(outfile, "Cannot do global matching with an ovector size < 2\n"); + break; + } + /* If we have matched an empty string, first check to see if we are at the end of the subject. If so, the /g loop is over. Otherwise, mimic what Perl's /g options does. This turns out to be rather cunning. First we set -- 2.5.0 From b3db1b7de5cfaa026ec2bc4a393129461a0f5c57 Mon Sep 17 00:00:00 2001 From: ph10 Date: Sat, 27 Feb 2016 18:44:41 +0000 Subject: [PATCH] Fix non-diagnosis of missing assertion after (?(?C). MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1638 2f5784b3-3f2a-0410-8824-cb99058d5e15 Petr Písař: Ported to 8.38. diff --git a/pcre_compile.c b/pcre_compile.c index 4ffea0c..254c629 100644 --- a/pcre_compile.c +++ b/pcre_compile.c @@ -485,7 +485,7 @@ static const char error_texts[] = "lookbehind assertion is not fixed length\0" "malformed number or name after (?(\0" "conditional group contains more than two branches\0" - "assertion expected after (?(\0" + "assertion expected after (?( or (?(?C)\0" "(?R or (?[+-]digits must be followed by )\0" /* 30 */ "unknown POSIX class name\0" @@ -6771,6 +6771,15 @@ for (;; ptr++) for (i = 3;; i++) if (!IS_DIGIT(ptr[i])) break; if (ptr[i] == CHAR_RIGHT_PARENTHESIS) tempptr += i + 1; + + /* tempptr should now be pointing to the opening parenthesis of the + assertion condition. */ + + if (*tempptr != CHAR_LEFT_PARENTHESIS) + { + *errorcodeptr = ERR28; + goto FAILED; + } } /* For conditions that are assertions, check the syntax, and then exit diff --git a/testdata/testinput2 b/testdata/testinput2 index c805f5f..75e402e 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -4241,4 +4241,6 @@ backtracking verbs. --/ /((?J)(?'R'(?'R'(?'R'(?'R'(?'R'(?|(\k'R'))))))))/ +/\N(?(?C)0?!.)*/ + /-- End of testinput2 --/ diff --git a/testdata/testoutput2 b/testdata/testoutput2 index 800a72f..5e88d1a 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -555,13 +555,13 @@ Failed: malformed number or name after (?( at offset 4 Failed: malformed number or name after (?( at offset 4 /(?(?i))/ -Failed: assertion expected after (?( at offset 3 +Failed: assertion expected after (?( or (?(?C) at offset 3 /(?(abc))/ Failed: reference to non-existent subpattern at offset 7 /(?(?.*!.*)?)" -Failed: assertion expected after (?( at offset 3 +Failed: assertion expected after (?( or (?(?C) at offset 3 "X((?2)()*+){2}+"BZ ------------------------------------------------------------------ @@ -14667,4 +14667,7 @@ No match /((?J)(?'R'(?'R'(?'R'(?'R'(?'R'(?|(\k'R'))))))))/ +/\N(?(?C)0?!.)*/ +Failed: assertion expected after (?( or (?(?C) at offset 4 + /-- End of testinput2 --/ -- 2.5.0