X-Git-Url: http://erislabs.net/gitweb/?a=blobdiff_plain;f=regex.c;h=0983c17e795175b4835eb099396df47795301168;hb=2c6d91f6bcfffcaa7320af7ea4e2a9c970c0966e;hp=a6c186c5c13cc5833162a1c34a1692db06c84212;hpb=213afad199577109f44b050b0b90d6e6ea9a262c;p=gnulib.git diff --git a/regex.c b/regex.c index a6c186c5c..0983c17e7 100644 --- a/regex.c +++ b/regex.c @@ -2,7 +2,7 @@ 0.12. (Implements POSIX draft P10003.2/D11.2, except for internationalization features.) - Copyright (C) 1993, 1994, 1995, 1996, 1997 Free Software Foundation, Inc. + Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -27,11 +27,15 @@ #undef _GNU_SOURCE #define _GNU_SOURCE +#ifdef emacs /* Converts the pointer to the char to BEG-based offset from the start. */ #define PTR_TO_OFFSET(d) \ POS_AS_IN_BUFFER (MATCHING_IN_FIRST_STRING \ ? (d) - string1 : (d) - (string2 - size1)) -#define POS_AS_IN_BUFFER(p) ((p) + 1) +#define POS_AS_IN_BUFFER(p) ((p) + (NILP (re_match_object) || BUFFERP (re_match_object))) +#else +#define PTR_TO_OFFSET(d) 0 +#endif #ifdef HAVE_CONFIG_H #include @@ -1874,7 +1878,12 @@ regex_compile (pattern, size, syntax, bufp) compile_stack_type compile_stack; /* Points to the current (ending) position in the pattern. */ +#ifdef AIX + /* `const' makes AIX compiler fail. */ + char *p = pattern; +#else const char *p = pattern; +#endif const char *pend = pattern + size; /* How to translate the characters in the pattern. */ @@ -2894,8 +2903,12 @@ regex_compile (pattern, size, syntax, bufp) p1 = p - 1; /* P1 points the head of C. */ #ifdef emacs if (bufp->multibyte) - /* Set P to the next character boundary. */ - p += MULTIBYTE_FORM_LENGTH (p1, pend - p1) - 1; + { + c = STRING_CHAR (p1, pend - p1); + c = TRANSLATE (c); + /* Set P to the next character boundary. */ + p += MULTIBYTE_FORM_LENGTH (p1, pend - p1) - 1; + } #endif /* If no exactn currently being built. */ if (!pending_exact @@ -2907,14 +2920,14 @@ regex_compile (pattern, size, syntax, bufp) || *pending_exact >= (1 << BYTEWIDTH) - (p - p1) /* If followed by a repetition operator. */ - || *p == '*' || *p == '^' + || (p != pend && (*p == '*' || *p == '^')) || ((syntax & RE_BK_PLUS_QM) - ? *p == '\\' && (p[1] == '+' || p[1] == '?') - : (*p == '+' || *p == '?')) + ? p + 1 < pend && *p == '\\' && (p[1] == '+' || p[1] == '?') + : p != pend && (*p == '+' || *p == '?')) || ((syntax & RE_INTERVALS) && ((syntax & RE_NO_BK_BRACES) - ? *p == '{' - : (p[0] == '\\' && p[1] == '{')))) + ? p != pend && *p == '{' + : p + 1 < pend && p[0] == '\\' && p[1] == '{'))) { /* Start building a new exactn. */ @@ -2924,16 +2937,23 @@ regex_compile (pattern, size, syntax, bufp) pending_exact = b - 1; } - /* Here, C may translated, therefore C may not equal to *P1. */ - while (1) +#ifdef emacs + if (! SINGLE_BYTE_CHAR_P (c)) + { + unsigned char work[4], *str; + int i = CHAR_STRING (c, work, str); + int j; + for (j = 0; j < i; j++) + { + BUF_PUSH (str[j]); + (*pending_exact)++; + } + } + else +#endif { BUF_PUSH (c); (*pending_exact)++; - if (++p1 == p) - break; - - /* Rest of multibyte form should be copied literally. */ - c = *(unsigned char *)p1; } break; } /* switch (c) */ @@ -3303,9 +3323,11 @@ re_compile_fastmap (bufp) case charset_not: - /* Chars beyond end of map must be allowed. End of map is - `127' if bufp->multibyte is nonzero. */ - simple_char_max = bufp->multibyte ? 0x80 : (1 << BYTEWIDTH); + /* Chars beyond end of bitmap are possible matches. + All the single-byte codes can occur in multibyte buffers. + So any that are not listed in the charset + are possible matches, even in multibyte buffers. */ + simple_char_max = (1 << BYTEWIDTH); for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH; j < simple_char_max; j++) fastmap[j] = 1; @@ -3332,7 +3354,9 @@ re_compile_fastmap (bufp) case wordchar: - simple_char_max = bufp->multibyte ? 0x80 : (1 << BYTEWIDTH); + /* All the single-byte codes can occur in multibyte buffers, + and they may have word syntax. So do consider them. */ + simple_char_max = (1 << BYTEWIDTH); for (j = 0; j < simple_char_max; j++) if (SYNTAX (j) == Sword) fastmap[j] = 1; @@ -3345,7 +3369,9 @@ re_compile_fastmap (bufp) case notwordchar: - simple_char_max = bufp->multibyte ? 0x80 : (1 << BYTEWIDTH); + /* All the single-byte codes can occur in multibyte buffers, + and they may not have word syntax. So do consider them. */ + simple_char_max = (1 << BYTEWIDTH); for (j = 0; j < simple_char_max; j++) if (SYNTAX (j) != Sword) fastmap[j] = 1; @@ -3361,21 +3387,13 @@ re_compile_fastmap (bufp) { int fastmap_newline = fastmap['\n']; - /* `.' matches anything (but if bufp->multibyte is - nonzero, matches `\000' .. `\127' and possible multibyte - character) ... */ + /* `.' matches anything, except perhaps newline. + Even in a multibyte buffer, it should match any + conceivable byte value for the fastmap. */ if (bufp->multibyte) - { - simple_char_max = 0x80; - - for (j = 0x80; j < 0xA0; j++) - if (BASE_LEADING_CODE_P (j)) - fastmap[j] = 1; - match_any_multibyte_characters = true; - } - else - simple_char_max = (1 << BYTEWIDTH); + match_any_multibyte_characters = true; + simple_char_max = (1 << BYTEWIDTH); for (j = 0; j < simple_char_max; j++) fastmap[j] = 1; @@ -3433,7 +3451,7 @@ re_compile_fastmap (bufp) case categoryspec: k = *p++; - simple_char_max = bufp->multibyte ? 0x80 : (1 << BYTEWIDTH); + simple_char_max = (1 << BYTEWIDTH); for (j = 0; j < simple_char_max; j++) if (CHAR_HAS_CATEGORY (j, k)) fastmap[j] = 1; @@ -3447,7 +3465,7 @@ re_compile_fastmap (bufp) case notcategoryspec: k = *p++; - simple_char_max = bufp->multibyte ? 0x80 : (1 << BYTEWIDTH); + simple_char_max = (1 << BYTEWIDTH); for (j = 0; j < simple_char_max; j++) if (!CHAR_HAS_CATEGORY (j, k)) fastmap[j] = 1; @@ -3710,13 +3728,13 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop) range = total_size - startpos; /* If the search isn't to be a backwards one, don't waste time in a - search for a pattern that must be anchored. */ + search for a pattern anchored at beginning of buffer. */ if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0) { if (startpos > 0) return -1; else - range = 1; + range = 0; } #ifdef emacs @@ -3724,8 +3742,8 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop) don't keep searching past point. */ if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0) { - range = PT - startpos; - if (range <= 0) + range = PT_BYTE - BEGV_BYTE - startpos; + if (range < 0) return -1; } #endif /* emacs */ @@ -3742,8 +3760,8 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop) #ifdef emacs gl_state.object = re_match_object; { - int charpos - = SYNTAX_TABLE_BYTE_TO_CHAR (startpos > 0 ? startpos : startpos + 1); + int adjpos = NILP (re_match_object) || BUFFERP (re_match_object); + int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (startpos + adjpos); SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1); } @@ -3786,8 +3804,8 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop) /* Written out as an if-else to avoid testing `translate' inside the loop. */ - if (RE_TRANSLATE_P (translate)) -{ + if (RE_TRANSLATE_P (translate)) + { if (multibyte) while (range > lim) { @@ -3807,12 +3825,18 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop) else while (range > lim && !fastmap[(unsigned char) - RE_TRANSLATE (translate, (unsigned char) *d++)]) - range--; + RE_TRANSLATE (translate, (unsigned char) *d)]) + { + d++; + range--; + } } else - while (range > lim && !fastmap[(unsigned char) *d++]) - range--; + while (range > lim && !fastmap[(unsigned char) *d]) + { + d++; + range--; + } startpos += irange - range; } @@ -4057,8 +4081,9 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) #ifdef emacs int charpos; + int adjpos = NILP (re_match_object) || BUFFERP (re_match_object); gl_state.object = re_match_object; - charpos = SYNTAX_TABLE_BYTE_TO_CHAR (POS_AS_IN_BUFFER (pos)); + charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos + adjpos); SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1); #endif @@ -4526,9 +4551,10 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) do { PREFETCH (); - if ((unsigned char) RE_TRANSLATE (translate, (unsigned char) *d++) + if ((unsigned char) RE_TRANSLATE (translate, (unsigned char) *d) != (unsigned char) *p++) goto fail; + d++; } while (--mcnt); } @@ -5351,7 +5377,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2); GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2); #ifdef emacs - charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos1 ? pos1 : 1); + charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos1); UPDATE_SYNTAX_TABLE (charpos); #endif s1 = SYNTAX (c1);