X-Git-Url: http://erislabs.net/gitweb/?a=blobdiff_plain;f=regex.c;h=3f951afe637af260675c552be6fcf58b3d4401c9;hb=f979eb0420f34053899f0333b5db42816d294fbc;hp=bbbfcc9fe5440abda86adfaa1127682df18b3514;hpb=6f65addffdf5ac2d1dfa08252cb2b545ba0f08a7;p=gnulib.git diff --git a/regex.c b/regex.c index bbbfcc9fe..3f951afe6 100644 --- a/regex.c +++ b/regex.c @@ -2,7 +2,7 @@ 0.12. (Implements POSIX draft P10003.2/D11.2, except for internationalization features.) - Copyright (C) 1993, 1994, 1995, 1996, 1997 Free Software Foundation, Inc. + Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -27,11 +27,15 @@ #undef _GNU_SOURCE #define _GNU_SOURCE +#ifdef emacs /* Converts the pointer to the char to BEG-based offset from the start. */ #define PTR_TO_OFFSET(d) \ POS_AS_IN_BUFFER (MATCHING_IN_FIRST_STRING \ ? (d) - string1 : (d) - (string2 - size1)) -#define POS_AS_IN_BUFFER(p) ((p) + 1) +#define POS_AS_IN_BUFFER(p) ((p) + (NILP (re_match_object) || BUFFERP (re_match_object))) +#else +#define PTR_TO_OFFSET(d) 0 +#endif #ifdef HAVE_CONFIG_H #include @@ -1874,7 +1878,12 @@ regex_compile (pattern, size, syntax, bufp) compile_stack_type compile_stack; /* Points to the current (ending) position in the pattern. */ +#ifdef AIX + /* `const' makes AIX compiler fail. */ + char *p = pattern; +#else const char *p = pattern; +#endif const char *pend = pattern + size; /* How to translate the characters in the pattern. */ @@ -2894,8 +2903,12 @@ regex_compile (pattern, size, syntax, bufp) p1 = p - 1; /* P1 points the head of C. */ #ifdef emacs if (bufp->multibyte) - /* Set P to the next character boundary. */ - p += MULTIBYTE_FORM_LENGTH (p1, pend - p1) - 1; + { + c = STRING_CHAR (p1, pend - p1); + c = TRANSLATE (c); + /* Set P to the next character boundary. */ + p += MULTIBYTE_FORM_LENGTH (p1, pend - p1) - 1; + } #endif /* If no exactn currently being built. */ if (!pending_exact @@ -2907,14 +2920,14 @@ regex_compile (pattern, size, syntax, bufp) || *pending_exact >= (1 << BYTEWIDTH) - (p - p1) /* If followed by a repetition operator. */ - || *p == '*' || *p == '^' + || (p != pend && (*p == '*' || *p == '^')) || ((syntax & RE_BK_PLUS_QM) - ? *p == '\\' && (p[1] == '+' || p[1] == '?') - : (*p == '+' || *p == '?')) + ? p + 1 < pend && *p == '\\' && (p[1] == '+' || p[1] == '?') + : p != pend && (*p == '+' || *p == '?')) || ((syntax & RE_INTERVALS) && ((syntax & RE_NO_BK_BRACES) - ? *p == '{' - : (p[0] == '\\' && p[1] == '{')))) + ? p != pend && *p == '{' + : p + 1 < pend && p[0] == '\\' && p[1] == '{'))) { /* Start building a new exactn. */ @@ -2924,16 +2937,23 @@ regex_compile (pattern, size, syntax, bufp) pending_exact = b - 1; } - /* Here, C may translated, therefore C may not equal to *P1. */ - while (1) +#ifdef emacs + if (! SINGLE_BYTE_CHAR_P (c)) + { + unsigned char work[4], *str; + int i = CHAR_STRING (c, work, str); + int j; + for (j = 0; j < i; j++) + { + BUF_PUSH (str[j]); + (*pending_exact)++; + } + } + else +#endif { BUF_PUSH (c); (*pending_exact)++; - if (++p1 == p) - break; - - /* Rest of multibyte form should be copied literally. */ - c = *(unsigned char *)p1; } break; } /* switch (c) */ @@ -3303,9 +3323,11 @@ re_compile_fastmap (bufp) case charset_not: - /* Chars beyond end of map must be allowed. End of map is - `127' if bufp->multibyte is nonzero. */ - simple_char_max = bufp->multibyte ? 0x80 : (1 << BYTEWIDTH); + /* Chars beyond end of bitmap are possible matches. + All the single-byte codes can occur in multibyte buffers. + So any that are not listed in the charset + are possible matches, even in multibyte buffers. */ + simple_char_max = (1 << BYTEWIDTH); for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH; j < simple_char_max; j++) fastmap[j] = 1; @@ -3332,7 +3354,9 @@ re_compile_fastmap (bufp) case wordchar: - simple_char_max = bufp->multibyte ? 0x80 : (1 << BYTEWIDTH); + /* All the single-byte codes can occur in multibyte buffers, + and they may have word syntax. So do consider them. */ + simple_char_max = (1 << BYTEWIDTH); for (j = 0; j < simple_char_max; j++) if (SYNTAX (j) == Sword) fastmap[j] = 1; @@ -3345,7 +3369,9 @@ re_compile_fastmap (bufp) case notwordchar: - simple_char_max = bufp->multibyte ? 0x80 : (1 << BYTEWIDTH); + /* All the single-byte codes can occur in multibyte buffers, + and they may not have word syntax. So do consider them. */ + simple_char_max = (1 << BYTEWIDTH); for (j = 0; j < simple_char_max; j++) if (SYNTAX (j) != Sword) fastmap[j] = 1; @@ -3361,21 +3387,13 @@ re_compile_fastmap (bufp) { int fastmap_newline = fastmap['\n']; - /* `.' matches anything (but if bufp->multibyte is - nonzero, matches `\000' .. `\127' and possible multibyte - character) ... */ + /* `.' matches anything, except perhaps newline. + Even in a multibyte buffer, it should match any + conceivable byte value for the fastmap. */ if (bufp->multibyte) - { - simple_char_max = 0x80; - - for (j = 0x80; j < 0xA0; j++) - if (BASE_LEADING_CODE_P (j)) - fastmap[j] = 1; - match_any_multibyte_characters = true; - } - else - simple_char_max = (1 << BYTEWIDTH); + match_any_multibyte_characters = true; + simple_char_max = (1 << BYTEWIDTH); for (j = 0; j < simple_char_max; j++) fastmap[j] = 1; @@ -3433,7 +3451,7 @@ re_compile_fastmap (bufp) case categoryspec: k = *p++; - simple_char_max = bufp->multibyte ? 0x80 : (1 << BYTEWIDTH); + simple_char_max = (1 << BYTEWIDTH); for (j = 0; j < simple_char_max; j++) if (CHAR_HAS_CATEGORY (j, k)) fastmap[j] = 1; @@ -3447,7 +3465,7 @@ re_compile_fastmap (bufp) case notcategoryspec: k = *p++; - simple_char_max = bufp->multibyte ? 0x80 : (1 << BYTEWIDTH); + simple_char_max = (1 << BYTEWIDTH); for (j = 0; j < simple_char_max; j++) if (!CHAR_HAS_CATEGORY (j, k)) fastmap[j] = 1; @@ -3742,8 +3760,8 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop) #ifdef emacs gl_state.object = re_match_object; { - int charpos - = SYNTAX_TABLE_BYTE_TO_CHAR (startpos > 0 ? startpos : startpos + 1); + int adjpos = NILP (re_match_object) || BUFFERP (re_match_object); + int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (startpos + adjpos); SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1); } @@ -3807,12 +3825,18 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop) else while (range > lim && !fastmap[(unsigned char) - RE_TRANSLATE (translate, (unsigned char) *d++)]) - range--; + RE_TRANSLATE (translate, (unsigned char) *d)]) + { + d++; + range--; + } } else - while (range > lim && !fastmap[(unsigned char) *d++]) - range--; + while (range > lim && !fastmap[(unsigned char) *d]) + { + d++; + range--; + } startpos += irange - range; } @@ -4057,8 +4081,9 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) #ifdef emacs int charpos; + int adjpos = NILP (re_match_object) || BUFFERP (re_match_object); gl_state.object = re_match_object; - charpos = SYNTAX_TABLE_BYTE_TO_CHAR (POS_AS_IN_BUFFER (pos)); + charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos + adjpos); SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1); #endif @@ -4526,9 +4551,10 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) do { PREFETCH (); - if ((unsigned char) RE_TRANSLATE (translate, (unsigned char) *d++) + if ((unsigned char) RE_TRANSLATE (translate, (unsigned char) *d) != (unsigned char) *p++) goto fail; + d++; } while (--mcnt); } @@ -4981,6 +5007,10 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) on_failure: DEBUG_PRINT1 ("EXECUTING on_failure_jump"); +#if defined (WINDOWSNT) && defined (emacs) + QUIT; +#endif + EXTRACT_NUMBER_AND_INCR (mcnt, p); DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt); @@ -5021,6 +5051,9 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) /* A smart repeat ends with `maybe_pop_jump'. We change it to either `pop_failure_jump' or `jump'. */ case maybe_pop_jump: +#if defined (WINDOWSNT) && defined (emacs) + QUIT; +#endif EXTRACT_NUMBER_AND_INCR (mcnt, p); DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt); { @@ -5241,6 +5274,9 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) /* Unconditionally jump (without popping any failure points). */ case jump: unconditional_jump: +#if defined (WINDOWSNT) && defined (emacs) + QUIT; +#endif EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */ DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt); p += mcnt; /* Do the jump. */ @@ -5351,7 +5387,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2); GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2); #ifdef emacs - charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos1 ? pos1 : 1); + charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos1); UPDATE_SYNTAX_TABLE (charpos); #endif s1 = SYNTAX (c1); @@ -5644,6 +5680,9 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) /* We goto here if a matching operation fails. */ fail: +#if defined (WINDOWSNT) && defined (emacs) + QUIT; +#endif if (!FAIL_STACK_EMPTY ()) { /* A restart point is known. Restore to that state. */ DEBUG_PRINT1 ("\nFAIL:\n");