From ff4e2b11ab19d0c806a3dc09308d1b393971b8aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dagfinn=20Ilmari=20Manns=C3=A5ker?= Date: Mon, 14 Nov 2016 20:05:31 +0100 Subject: [PATCH] Fix error message for unclosed \N{ in regcomp An unclosed \N{ that made it through to the regex engine rather than being handled by the lexer would erroneously trigger the error for "\N{NAME} must be resolved by the lexer". This separates the check for the missing trailing } and issues the correct error message for this. --- regcomp.c | 8 +++++--- t/re/re_tests | 5 ++++- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/regcomp.c b/regcomp.c index ac664326f01..332cf00482e 100644 --- a/regcomp.c +++ b/regcomp.c @@ -12005,13 +12005,15 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state, RExC_parse++; /* Skip past the '{' */ - if (! (endbrace = strchr(RExC_parse, '}')) /* no trailing brace */ - || ! (endbrace == RExC_parse /* nothing between the {} */ + if (! (endbrace = strchr(RExC_parse, '}'))) { /* no trailing brace */ + vFAIL2("Missing right brace on \\%c{}", 'N'); + } + else if(!(endbrace == RExC_parse /* nothing between the {} */ || (endbrace - RExC_parse >= 2 /* U+ (bad hex is checked... */ && strnEQ(RExC_parse, "U+", 2)))) /* ... below for a better error msg) */ { - if (endbrace) RExC_parse = endbrace; /* position msg's '<--HERE' */ + RExC_parse = endbrace; /* position msg's '<--HERE' */ vFAIL("\\N{NAME} must be resolved by the lexer"); } diff --git a/t/re/re_tests b/t/re/re_tests index 046beaa193b..1797ddc09d9 100644 --- a/t/re/re_tests +++ b/t/re/re_tests @@ -1478,7 +1478,10 @@ abc\N abc\n n [\N{U+}] - c - Invalid hexadecimal number \N{U+4AG3} - c - Invalid hexadecimal number [\N{U+4AG3}] - c - Invalid hexadecimal number -abc\N{def - c - \\N{NAME} must be resolved by the lexer +abc\N{def} - c - \\N{NAME} must be resolved by the lexer +abc\N{U+4AG3 - c - Missing right brace on \\N{} +abc\N{def - c - Missing right brace on \\N{} +abc\N{ - c - Missing right brace on \\N{} # Verify that under /x that still cant have space before left brace /abc\N {U+41}/x - c - Missing braces From c1c28ce6ba90ee05aa96b11ad551a6063680f3b9 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Sat, 25 Mar 2017 15:00:22 -0600 Subject: [PATCH] regcomp.c: Convert some strchr to memchr This allows things to work properly in the face of embedded NULs. See the branch merge message for more information. (cherry picked from commit 43b2f4ef399e2fd7240b4eeb0658686ad95f8e62) --- regcomp.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/regcomp.c b/regcomp.c index 431006e8551..4ee48ede423 100644 --- a/regcomp.c +++ b/regcomp.c @@ -12023,7 +12023,8 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state, RExC_parse++; /* Skip past the '{' */ - if (! (endbrace = strchr(RExC_parse, '}'))) { /* no trailing brace */ + endbrace = (char *) memchr(RExC_parse, '}', RExC_end - RExC_parse); + if (! endbrace) { /* no trailing brace */ vFAIL2("Missing right brace on \\%c{}", 'N'); } else if(!(endbrace == RExC_parse /* nothing between the {} */ @@ -12692,9 +12693,11 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) else { STRLEN length; char name = *RExC_parse; - char * endbrace; + char * endbrace = NULL; RExC_parse += 2; - endbrace = strchr(RExC_parse, '}'); + if (RExC_parse < RExC_end) { + endbrace = (char *) memchr(RExC_parse, '}', RExC_end - RExC_parse); + } if (! endbrace) { vFAIL2("Missing right brace on \\%c{}", name); @@ -16228,7 +16231,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, vFAIL2("Empty \\%c", (U8)value); if (*RExC_parse == '{') { const U8 c = (U8)value; - e = strchr(RExC_parse, '}'); + e = (char *) memchr(RExC_parse, '}', RExC_end - RExC_parse); if (!e) { RExC_parse++; vFAIL2("Missing right brace on \\%c{}", c);