From: Paul Eggert Date: Fri, 16 Mar 2012 21:17:55 +0000 (-0700) Subject: regex: diagnose too-large repeat counts in EREs X-Git-Tag: v0.1~821 X-Git-Url: http://erislabs.net/gitweb/?p=gnulib.git;a=commitdiff_plain;h=04ff3c185ce44b905653be2935a1794b3c888ce1 regex: diagnose too-large repeat counts in EREs Previously, the code did not diagnose the too-large repeat count in EREs like 'b{1000000000}'; instead, it silently treated the ERE as if it were 'b\{1000000000}', which is unexpected. * lib/regcomp.c (parse_dup_op): Fail with REG_ESIZE if a repeat count is too large. REG_ESIZE is used nowhere else, and the diagnostic is a reasonable one for this problem. Another option would be to create a new REG_OVERFLOW error for repeat counts that are too large. (fetch_number): Return RE_DUP_MAX + 1, not REG_ERROR, if the repeat count is too large, so that the caller can distinguish the two cases. * lib/regex.h (_REG_ESIZE): Document that this is now a generic "Too large" return code, and that repeat counts are one example of this. --- diff --git a/ChangeLog b/ChangeLog index 54e3b5d90..128acdadc 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,20 @@ 2012-03-16 Paul Eggert + regex: diagnose too-large repeat counts in EREs + Previously, the code did not diagnose the too-large repeat count + in EREs like 'b{1000000000}'; instead, it silently treated the ERE + as if it were 'b\{1000000000}', which is unexpected. + * lib/regcomp.c (parse_dup_op): Fail with REG_ESIZE if a repeat count + is too large. REG_ESIZE is used nowhere else, and the diagnostic + is a reasonable one for this problem. Another option would be to + create a new REG_OVERFLOW error for repeat counts that are too large. + (fetch_number): Return RE_DUP_MAX + 1, not REG_ERROR, if the repeat + count is too large, so that the caller can distinguish the two cases. + * lib/regex.h (_REG_ESIZE): Document that this is now a generic + "Too large" return code, and that repeat counts are one example of this. + +2012-03-16 Paul Eggert + doc: some glibc x32 integer width issues * doc/posix-headers/sys_types.texi (sys/types.h): * doc/posix-headers/time.texi (time.h): diff --git a/lib/regcomp.c b/lib/regcomp.c index e6d9c999d..3841a0a7b 100644 --- a/lib/regcomp.c +++ b/lib/regcomp.c @@ -2571,6 +2571,12 @@ parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa, *err = REG_BADBR; return NULL; } + + if (BE (RE_DUP_MAX < (end == REG_MISSING ? start : end), 0)) + { + *err = REG_ESIZE; + return NULL; + } } else { @@ -3751,6 +3757,7 @@ build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans, /* This is intended for the expressions like "a{1,3}". Fetch a number from 'input', and return the number. Return REG_MISSING if the number field is empty like "{,1}". + Return RE_DUP_MAX + 1 if the number field is too large. Return REG_ERROR if an error occurred. */ static Idx @@ -3769,8 +3776,9 @@ fetch_number (re_string_t *input, re_token_t *token, reg_syntax_t syntax) num = ((token->type != CHARACTER || c < '0' || '9' < c || num == REG_ERROR) ? REG_ERROR - : ((num == REG_MISSING) ? c - '0' : num * 10 + c - '0')); - num = (num > RE_DUP_MAX) ? REG_ERROR : num; + : num == REG_MISSING + ? c - '0' + : MIN (RE_DUP_MAX + 1, num * 10 + c - '0')); } return num; } diff --git a/lib/regex.h b/lib/regex.h index 0c3b420d7..c1cd613e2 100644 --- a/lib/regex.h +++ b/lib/regex.h @@ -304,7 +304,7 @@ extern reg_syntax_t re_syntax_options; /* RE_DUP_MAX is 2**15 - 1 because an earlier implementation stored the counter as a 2-byte signed integer. This is no longer true, so RE_DUP_MAX could be increased to (INT_MAX / 10 - 1), or to - ((SIZE_MAX - 2) / 10 - 1) if _REGEX_LARGE_OFFSETS is defined. + ((SIZE_MAX - 9) / 10) if _REGEX_LARGE_OFFSETS is defined. However, there would be a huge performance problem if someone actually used a pattern like a\{214748363\}, so RE_DUP_MAX retains its historical value. */ @@ -375,7 +375,7 @@ typedef enum /* Error codes we've added. */ _REG_EEND, /* Premature end. */ - _REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */ + _REG_ESIZE, /* Too large (e.g., repeat count too large). */ _REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */ } reg_errcode_t;