X-Git-Url: http://erislabs.net/gitweb/?a=blobdiff_plain;f=regex.c;h=9fe099c7125fbfdae837436764d2fdcfaee0f7eb;hb=b9a0d0cb0e9efd45b1b46de42125155bf2c67f21;hp=e10a3565f2fdefc3d8412ef94c260fd6b2e91da3;hpb=7ae0247f5b1d1fcfebca9220862348b33df0d07f;p=gnulib.git diff --git a/regex.c b/regex.c index e10a3565f..9fe099c71 100644 --- a/regex.c +++ b/regex.c @@ -23,7 +23,9 @@ - structure the opcode space into opcode+flag. - merge with glibc's regex.[ch]. - replace (succeed_n + jump_n + set_number_at) with something that doesn't - need to modify the compiled regexp. + need to modify the compiled regexp so that re_match can be reentrant. + - get rid of on_failure_jump_smart by doing the optimization in re_comp + rather than at run-time, so that re_match can be reentrant. */ /* AIX requires this to be the first thing in the file. */ @@ -122,8 +124,17 @@ # include "charset.h" # include "category.h" +# ifdef malloc +# undef malloc +# endif # define malloc xmalloc +# ifdef realloc +# undef realloc +# endif # define realloc xrealloc +# ifdef free +# undef free +# endif # define free xfree /* Converts the pointer to the char to BEG-based offset from the start. */ @@ -1487,9 +1498,8 @@ do { \ if (reg == -1) \ { \ /* It's a counter. */ \ - /* Here, we discard `const', which makes re_match non-reentrant. \ - Gcc gives a warning for it, which is good. */ \ - unsigned char *ptr = POP_FAILURE_POINTER (); \ + /* Here, we discard `const', making re_match non-reentrant. */ \ + unsigned char *ptr = (unsigned char*) POP_FAILURE_POINTER (); \ reg = POP_FAILURE_INT (); \ STORE_NUMBER (ptr, reg); \ DEBUG_PRINT3 (" Pop counter %p = %d\n", ptr, reg); \ @@ -1981,30 +1991,29 @@ re_iswctype (ch, cc) int ch; re_wctype_t cc; { - boolean ret = false; - switch (cc) { - case RECC_ALNUM: ret = ISALNUM (ch); - case RECC_ALPHA: ret = ISALPHA (ch); - case RECC_BLANK: ret = ISBLANK (ch); - case RECC_CNTRL: ret = ISCNTRL (ch); - case RECC_DIGIT: ret = ISDIGIT (ch); - case RECC_GRAPH: ret = ISGRAPH (ch); - case RECC_LOWER: ret = ISLOWER (ch); - case RECC_PRINT: ret = ISPRINT (ch); - case RECC_PUNCT: ret = ISPUNCT (ch); - case RECC_SPACE: ret = ISSPACE (ch); - case RECC_UPPER: ret = ISUPPER (ch); - case RECC_XDIGIT: ret = ISXDIGIT (ch); - case RECC_ASCII: ret = IS_REAL_ASCII (ch); - case RECC_NONASCII: ret = !IS_REAL_ASCII (ch); - case RECC_UNIBYTE: ret = ISUNIBYTE (ch); - case RECC_MULTIBYTE: ret = !ISUNIBYTE (ch); - case RECC_WORD: ret = ISWORD (ch); - case RECC_ERROR: ret = false; + case RECC_ALNUM: return ISALNUM (ch); + case RECC_ALPHA: return ISALPHA (ch); + case RECC_BLANK: return ISBLANK (ch); + case RECC_CNTRL: return ISCNTRL (ch); + case RECC_DIGIT: return ISDIGIT (ch); + case RECC_GRAPH: return ISGRAPH (ch); + case RECC_LOWER: return ISLOWER (ch); + case RECC_PRINT: return ISPRINT (ch); + case RECC_PUNCT: return ISPUNCT (ch); + case RECC_SPACE: return ISSPACE (ch); + case RECC_UPPER: return ISUPPER (ch); + case RECC_XDIGIT: return ISXDIGIT (ch); + case RECC_ASCII: return IS_REAL_ASCII (ch); + case RECC_NONASCII: return !IS_REAL_ASCII (ch); + case RECC_UNIBYTE: return ISUNIBYTE (ch); + case RECC_MULTIBYTE: return !ISUNIBYTE (ch); + case RECC_WORD: return ISWORD (ch); + case RECC_ERROR: return false; + default: + abort(); } - return ret; } /* Return a bit-pattern to use in the range-table bits to match multibyte @@ -2013,21 +2022,20 @@ static int re_wctype_to_bit (cc) re_wctype_t cc; { - int ret = 0; - switch (cc) { case RECC_NONASCII: case RECC_PRINT: case RECC_GRAPH: - case RECC_MULTIBYTE: ret = BIT_MULTIBYTE; - case RECC_ALPHA: case RECC_ALNUM: case RECC_WORD: ret = BIT_WORD; - case RECC_LOWER: ret = BIT_LOWER; - case RECC_UPPER: ret = BIT_UPPER; - case RECC_PUNCT: ret = BIT_PUNCT; - case RECC_SPACE: ret = BIT_SPACE; + case RECC_MULTIBYTE: return BIT_MULTIBYTE; + case RECC_ALPHA: case RECC_ALNUM: case RECC_WORD: return BIT_WORD; + case RECC_LOWER: return BIT_LOWER; + case RECC_UPPER: return BIT_UPPER; + case RECC_PUNCT: return BIT_PUNCT; + case RECC_SPACE: return BIT_SPACE; case RECC_ASCII: case RECC_DIGIT: case RECC_XDIGIT: case RECC_CNTRL: - case RECC_BLANK: case RECC_UNIBYTE: case RECC_ERROR: ret = 0; + case RECC_BLANK: case RECC_UNIBYTE: case RECC_ERROR: return 0; + default: + abort(); } - return ret; } #endif @@ -3149,20 +3157,21 @@ regex_compile (pattern, size, syntax, bufp) case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': - if (syntax & RE_NO_BK_REFS) - goto normal_char; + { + regnum_t reg; - c1 = c - '0'; + if (syntax & RE_NO_BK_REFS) + goto normal_backslash; - if (c1 > regnum) - FREE_STACK_RETURN (REG_ESUBREG); + reg = c - '0'; - /* Can't back reference to a subexpression if inside of it. */ - if (group_in_compile_stack (compile_stack, (regnum_t) c1)) - goto normal_char; + /* Can't back reference to a subexpression before its end. */ + if (reg > regnum || group_in_compile_stack (compile_stack, reg)) + FREE_STACK_RETURN (REG_ESUBREG); - laststart = b; - BUF_PUSH_2 (duplicate, c1); + laststart = b; + BUF_PUSH_2 (duplicate, reg); + } break; @@ -4263,7 +4272,7 @@ mutually_exclusive_p (bufp, p1, p2) { register re_wchar_t c = (re_opcode_t) *p2 == endline ? '\n' - : RE_STRING_CHAR(p2 + 2, pend - p2 - 2); + : RE_STRING_CHAR (p2 + 2, pend - p2 - 2); if ((re_opcode_t) *p1 == exactn) { @@ -4308,13 +4317,11 @@ mutually_exclusive_p (bufp, p1, p2) break; case charset: - case charset_not: { if ((re_opcode_t) *p1 == exactn) /* Reuse the code above. */ return mutually_exclusive_p (bufp, p2, p1); - /* It is hard to list up all the character in charset P2 if it includes multibyte character. Give up in such case. */ @@ -4330,7 +4337,7 @@ mutually_exclusive_p (bufp, p1, p2) P2 is ASCII, it is enough to test only bitmap table of P1. */ - if (*p1 == *p2) + if ((re_opcode_t) *p1 == charset) { int idx; /* We win if the charset inside the loop @@ -4349,8 +4356,7 @@ mutually_exclusive_p (bufp, p1, p2) return 1; } } - else if ((re_opcode_t) *p1 == charset - || (re_opcode_t) *p1 == charset_not) + else if ((re_opcode_t) *p1 == charset_not) { int idx; /* We win if the charset_not inside the loop lists @@ -4369,7 +4375,24 @@ mutually_exclusive_p (bufp, p1, p2) } } } + break; + case charset_not: + switch (SWITCH_ENUM_CAST (*p1)) + { + case exactn: + case charset: + /* Reuse the code above. */ + return mutually_exclusive_p (bufp, p2, p1); + case charset_not: + /* When we have two charset_not, it's very unlikely that + they don't overlap. The union of the two sets of excluded + chars should cover all possible chars, which, as a matter of + fact, is virtually impossible in multibyte buffers. */ + ; + } + break; + case wordend: case notsyntaxspec: return ((re_opcode_t) *p1 == syntaxspec @@ -5276,9 +5299,9 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) mcnt, p + mcnt); { re_char *p1 = p; /* Next operation. */ - /* Please don't add casts to try and shut up GCC. */ - unsigned char *p2 = p + mcnt; /* Destination of the jump. */ - unsigned char *p3 = p - 3; /* Location of the opcode. */ + /* Here, we discard `const', making re_match non-reentrant. */ + unsigned char *p2 = (unsigned char*) p + mcnt; /* Jump dest. */ + unsigned char *p3 = (unsigned char*) p - 3; /* opcode location. */ p -= 3; /* Reset so that we will re-execute the instruction once it's been changed. */ @@ -5328,8 +5351,8 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) /* Originally, mcnt is how many times we HAVE to succeed. */ if (mcnt != 0) { - /* Please don't add a cast to try and shut up GCC. */ - unsigned char *p2 = p + 2; /* Location of the counter. */ + /* Here, we discard `const', making re_match non-reentrant. */ + unsigned char *p2 = (unsigned char*) p + 2; /* counter loc. */ mcnt--; p += 4; PUSH_NUMBER (p2, mcnt); @@ -5347,8 +5370,8 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) /* Originally, this is how many times we CAN jump. */ if (mcnt != 0) { - /* Please don't add a cast to try and shut up GCC. */ - unsigned char *p2 = p + 2; /* Location of the counter. */ + /* Here, we discard `const', making re_match non-reentrant. */ + unsigned char *p2 = (unsigned char*) p + 2; /* counter loc. */ mcnt--; PUSH_NUMBER (p2, mcnt); goto unconditional_jump; @@ -5364,8 +5387,8 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) DEBUG_PRINT1 ("EXECUTING set_number_at.\n"); EXTRACT_NUMBER_AND_INCR (mcnt, p); - /* Please don't add a cast to try and shut up GCC. */ - p2 = p + mcnt; + /* Here, we discard `const', making re_match non-reentrant. */ + p2 = (unsigned char*) p + mcnt; /* Signedness doesn't matter since we only copy MCNT's bits . */ EXTRACT_NUMBER_AND_INCR (mcnt, p); DEBUG_PRINT3 (" Setting %p to %d.\n", p2, mcnt);