X-Git-Url: http://erislabs.net/gitweb/?a=blobdiff_plain;f=lib%2Fregex.c;h=74fcf9c45ebb593a9c53c923d18fe00e4cc1301f;hb=ade79def12b42c40afa2d4bd47989ee40df7fe98;hp=d16bd60381bdf82be7c2203b1bde3916c9de014d;hpb=f711379c33b61c8258767154bebed5be41355a8e;p=gnulib.git diff --git a/lib/regex.c b/lib/regex.c index d16bd6038..74fcf9c45 100644 --- a/lib/regex.c +++ b/lib/regex.c @@ -2,25 +2,22 @@ version 0.12. (Implements POSIX draft P1003.2/D11.2, except for some of the internationalization features.) - Copyright (C) 1993, 94, 95, 96, 97, 98 Free Software Foundation, Inc. + Copyright (C) 1993, 94, 95, 96, 97, 98, 99 Free Software Foundation, Inc. - NOTE: The canonical source of this file is maintained with the GNU C Library. - Bugs can be reported to bug-glibc@prep.ai.mit.edu. + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. - This program is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the - Free Software Foundation; either version 2, or (at your option) any - later version. - - This program is distributed in the hope that it will be useful, + The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, - USA. */ + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ /* AIX requires this to be the first thing in the file. */ #if defined _AIX && !defined REGEX_MALLOC @@ -49,16 +46,41 @@ # include #endif -#define WIDE_CHAR_SUPPORT \ - defined _LIBC || (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC) +#define WIDE_CHAR_SUPPORT (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC) /* For platform which support the ISO C amendement 1 functionality we support user defined character classes. */ -#if WIDE_CHAR_SUPPORT +#if defined _LIBC || WIDE_CHAR_SUPPORT +/* Solaris 2.5 has a bug: must be included before . */ # include # include #endif +#ifdef _LIBC +/* We have to keep the namespace clean. */ +# define regfree(preg) __regfree (preg) +# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef) +# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags) +# define regerror(errcode, preg, errbuf, errbuf_size) \ + __regerror(errcode, preg, errbuf, errbuf_size) +# define re_set_registers(bu, re, nu, st, en) \ + __re_set_registers (bu, re, nu, st, en) +# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \ + __re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) +# define re_match(bufp, string, size, pos, regs) \ + __re_match (bufp, string, size, pos, regs) +# define re_search(bufp, string, size, startpos, range, regs) \ + __re_search (bufp, string, size, startpos, range, regs) +# define re_compile_pattern(pattern, length, bufp) \ + __re_compile_pattern (pattern, length, bufp) +# define re_set_syntax(syntax) __re_set_syntax (syntax) +# define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \ + __re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop) +# define re_compile_fastmap(bufp) __re_compile_fastmap (bufp) + +#define btowc __btowc +#endif + /* This is for other GNU distributions with internationalized messages. */ #if HAVE_LIBINTL_H || defined _LIBC # include @@ -110,8 +132,12 @@ char *realloc (); # ifndef INHIBIT_STRING_HEADER # if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC # include -# if !defined bzero && !defined _LIBC -# define bzero(s, n) (memset (s, '\0', n), (s)) +# ifndef bzero +# ifndef _LIBC +# define bzero(s, n) (memset (s, '\0', n), (s)) +# else +# define bzero(s, n) __bzero (s, n) +# endif # endif # else # include @@ -181,7 +207,7 @@ init_syntax_once () #endif /* not emacs */ /* Get the interface, including the syntax bits. */ -#include "regex.h" +#include /* isalpha etc. are used for the character classes. */ #include @@ -195,7 +221,8 @@ init_syntax_once () STDC_HEADERS is defined, then autoconf has verified that the ctype macros don't need to be guarded with references to isascii. ... Defining isascii to 1 should let any compiler worth its salt - eliminate the && through constant folding." */ + eliminate the && through constant folding." + Solaris defines some of these symbols so we must undefine them first. */ #undef ISASCII #if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII) @@ -227,6 +254,12 @@ init_syntax_once () #define ISUPPER(c) (ISASCII (c) && isupper (c)) #define ISXDIGIT(c) (ISASCII (c) && isxdigit (c)) +#ifdef _tolower +# define TOLOWER(c) _tolower(c) +#else +# define TOLOWER(c) tolower(c) +#endif + #ifndef NULL # define NULL (void *)0 #endif @@ -971,6 +1004,9 @@ re_set_syntax (syntax) #endif /* DEBUG */ return ret; } +#ifdef _LIBC +weak_alias (__re_set_syntax, re_set_syntax) +#endif /* This table gives an error message for each of the error codes listed in regex.h. Obviously the order here has to be same as there. @@ -1688,7 +1724,7 @@ typedef struct } \ } -#if WIDE_CHAR_SUPPORT +#if defined _LIBC || WIDE_CHAR_SUPPORT /* The GNU C library provides support for user-defined character classes and the functions from ISO C amendement 1. */ # ifdef CHARCLASS_NAME_MAX @@ -1699,7 +1735,11 @@ typedef struct # define CHAR_CLASS_MAX_LENGTH 256 # endif -# define IS_CHAR_CLASS(string) wctype (string) +# ifdef _LIBC +# define IS_CHAR_CLASS(string) __wctype (string) +# else +# define IS_CHAR_CLASS(string) wctype (string) +# endif #else # define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */ @@ -2176,25 +2216,28 @@ regex_compile (pattern, size, syntax, bufp) for (;;) { PATFETCH (c); - if (c == ':' || c == ']' || p == pend - || c1 == CHAR_CLASS_MAX_LENGTH) + if ((c == ':' && *p == ']') || p == pend) break; - str[c1++] = c; + if (c1 < CHAR_CLASS_MAX_LENGTH) + str[c1++] = c; + else + /* This is in any case an invalid class name. */ + str[0] = '\0'; } str[c1] = '\0'; - /* If isn't a word bracketed by `[:' and:`]': + /* If isn't a word bracketed by `[:' and `:]': undo the ending character, the letters, and leave the leading `:' and `[' (but set bits for them). */ if (c == ':' && *p == ']') { -#if WIDE_CHAR_SUPPORT +#if defined _LIBC || WIDE_CHAR_SUPPORT boolean is_lower = STREQ (str, "lower"); boolean is_upper = STREQ (str, "upper"); wctype_t wt; int ch; - wt = wctype (str); + wt = IS_CHAR_CLASS (str); if (wt == 0) FREE_STACK_RETURN (REG_ECTYPE); @@ -2206,8 +2249,13 @@ regex_compile (pattern, size, syntax, bufp) for (ch = 0; ch < 1 << BYTEWIDTH; ++ch) { +# ifdef _LIBC + if (__iswctype (__btowc (ch), wt)) + SET_LIST_BIT (ch); +# else if (iswctype (btowc (ch), wt)) SET_LIST_BIT (ch); +# endif if (translate && (is_upper || is_lower) && (ISUPPER (ch) || ISLOWER (ch))) @@ -2691,7 +2739,7 @@ regex_compile (pattern, size, syntax, bufp) case 'w': - if (re_syntax_options & RE_NO_GNU_OPS) + if (syntax & RE_NO_GNU_OPS) goto normal_char; laststart = b; BUF_PUSH (wordchar); @@ -2699,7 +2747,7 @@ regex_compile (pattern, size, syntax, bufp) case 'W': - if (re_syntax_options & RE_NO_GNU_OPS) + if (syntax & RE_NO_GNU_OPS) goto normal_char; laststart = b; BUF_PUSH (notwordchar); @@ -2707,37 +2755,37 @@ regex_compile (pattern, size, syntax, bufp) case '<': - if (re_syntax_options & RE_NO_GNU_OPS) + if (syntax & RE_NO_GNU_OPS) goto normal_char; BUF_PUSH (wordbeg); break; case '>': - if (re_syntax_options & RE_NO_GNU_OPS) + if (syntax & RE_NO_GNU_OPS) goto normal_char; BUF_PUSH (wordend); break; case 'b': - if (re_syntax_options & RE_NO_GNU_OPS) + if (syntax & RE_NO_GNU_OPS) goto normal_char; BUF_PUSH (wordbound); break; case 'B': - if (re_syntax_options & RE_NO_GNU_OPS) + if (syntax & RE_NO_GNU_OPS) goto normal_char; BUF_PUSH (notwordbound); break; case '`': - if (re_syntax_options & RE_NO_GNU_OPS) + if (syntax & RE_NO_GNU_OPS) goto normal_char; BUF_PUSH (begbuf); break; case '\'': - if (re_syntax_options & RE_NO_GNU_OPS) + if (syntax & RE_NO_GNU_OPS) goto normal_char; BUF_PUSH (endbuf); break; @@ -3375,6 +3423,9 @@ re_compile_fastmap (bufp) RESET_FAIL_STACK (); return 0; } /* re_compile_fastmap */ +#ifdef _LIBC +weak_alias (__re_compile_fastmap, re_compile_fastmap) +#endif /* Set REGS to hold NUM_REGS registers, storing them in STARTS and ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use @@ -3410,6 +3461,9 @@ re_set_registers (bufp, regs, num_regs, starts, ends) regs->start = regs->end = (regoff_t *) 0; } } +#ifdef _LIBC +weak_alias (__re_set_registers, re_set_registers) +#endif /* Searching routines. */ @@ -3426,6 +3480,9 @@ re_search (bufp, string, size, startpos, range, regs) return re_search_2 (bufp, NULL, 0, string, size, startpos, range, regs, size); } +#ifdef _LIBC +weak_alias (__re_search, re_search) +#endif /* Using the compiled pattern in BUFP->buffer, first tries to match the @@ -3479,7 +3536,11 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop) /* If the search isn't to be a backwards one, don't waste time in a search for a pattern that must be anchored. */ - if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0) + if (bufp->used > 0 && range > 0 + && ((re_opcode_t) bufp->buffer[0] == begbuf + /* `begline' is like `begbuf' if it cannot match at newlines. */ + || ((re_opcode_t) bufp->buffer[0] == begline + && !bufp->newline_anchor))) { if (startpos > 0) return -1; @@ -3582,6 +3643,9 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop) } return -1; } /* re_search_2 */ +#ifdef _LIBC +weak_alias (__re_search_2, re_search_2) +#endif /* This converts PTR, a pointer into one of the search strings `string1' and `string2' into an offset from the beginning of that string. */ @@ -3683,6 +3747,9 @@ re_match (bufp, string, size, pos, regs) # endif return result; } +# ifdef _LIBC +weak_alias (__re_match, re_match) +# endif #endif /* not emacs */ static boolean group_match_null_string_p _RE_ARGS ((unsigned char **p, @@ -3728,6 +3795,9 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) #endif return result; } +#ifdef _LIBC +weak_alias (__re_match_2, re_match_2) +#endif /* This is a separate function so that we can force an alloca cleanup afterwards. */ @@ -5421,6 +5491,9 @@ re_compile_pattern (pattern, length, bufp) return NULL; return gettext (re_error_msgid[(int) ret]); } +#ifdef _LIBC +weak_alias (__re_compile_pattern, re_compile_pattern) +#endif /* Entry points compatible with 4.2 BSD regex library. We don't define them unless specifically requested. */ @@ -5453,12 +5526,12 @@ re_comp (s) { re_comp_buf.buffer = (unsigned char *) malloc (200); if (re_comp_buf.buffer == NULL) - return gettext (re_error_msgid[(int) REG_ESPACE]); + return (char *) gettext (re_error_msgid[(int) REG_ESPACE]); re_comp_buf.allocated = 200; re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH); if (re_comp_buf.fastmap == NULL) - return gettext (re_error_msgid[(int) REG_ESPACE]); + return (char *) gettext (re_error_msgid[(int) REG_ESPACE]); } /* Since `re_exec' always passes NULL for the `regs' argument, we @@ -5506,7 +5579,8 @@ re_exec (s) REG_EXTENDED bit in CFLAGS is set; otherwise, to RE_SYNTAX_POSIX_BASIC; `newline_anchor' to REG_NEWLINE being set in CFLAGS; - `fastmap' and `fastmap_accurate' to zero; + `fastmap' to an allocated space for the fastmap; + `fastmap_accurate' to zero; `re_nsub' to the number of subexpressions in PATTERN. PATTERN is the address of the pattern string. @@ -5545,11 +5619,8 @@ regcomp (preg, pattern, cflags) preg->allocated = 0; preg->used = 0; - /* Don't bother to use a fastmap when searching. This simplifies the - REG_NEWLINE case: if we used a fastmap, we'd have to put all the - characters after newlines into the fastmap. This way, we just try - every character. */ - preg->fastmap = 0; + /* Try to allocate space for the fastmap. */ + preg->fastmap = (char *) malloc (1 << BYTEWIDTH); if (cflags & REG_ICASE) { @@ -5563,7 +5634,7 @@ regcomp (preg, pattern, cflags) /* Map uppercase characters to corresponding lowercase ones. */ for (i = 0; i < CHAR_SET_SIZE; i++) - preg->translate[i] = ISUPPER (i) ? tolower (i) : i; + preg->translate[i] = ISUPPER (i) ? TOLOWER (i) : i; } else preg->translate = NULL; @@ -5589,8 +5660,24 @@ regcomp (preg, pattern, cflags) unmatched close-group: both are REG_EPAREN. */ if (ret == REG_ERPAREN) ret = REG_EPAREN; + if (ret == REG_NOERROR && preg->fastmap) + { + /* Compute the fastmap now, since regexec cannot modify the pattern + buffer. */ + if (re_compile_fastmap (preg) == -2) + { + /* Some error occured while computing the fastmap, just forget + about it. */ + free (preg->fastmap); + preg->fastmap = NULL; + } + } + return (int) ret; } +#ifdef _LIBC +weak_alias (__regcomp, regcomp) +#endif /* regexec searches for a given pattern, specified by PREG, in the @@ -5634,10 +5721,10 @@ regexec (preg, string, nmatch, pmatch, eflags) if (want_reg_info) { regs.num_regs = nmatch; - regs.start = TALLOC (nmatch, regoff_t); - regs.end = TALLOC (nmatch, regoff_t); - if (regs.start == NULL || regs.end == NULL) + regs.start = TALLOC (nmatch * 2, regoff_t); + if (regs.start == NULL) return (int) REG_NOMATCH; + regs.end = regs.start + nmatch; } /* Perform the searching operation. */ @@ -5661,12 +5748,14 @@ regexec (preg, string, nmatch, pmatch, eflags) /* If we needed the temporary register info, free the space now. */ free (regs.start); - free (regs.end); } /* We want zero return to mean success, unlike `re_search'. */ return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH; } +#ifdef _LIBC +weak_alias (__regexec, regexec) +#endif /* Returns a message corresponding to an error code, ERRCODE, returned @@ -5712,6 +5801,9 @@ regerror (errcode, preg, errbuf, errbuf_size) return msg_size; } +#ifdef _LIBC +weak_alias (__regerror, regerror) +#endif /* Free dynamically allocated space used by PREG. */ @@ -5736,5 +5828,8 @@ regfree (preg) free (preg->translate); preg->translate = NULL; } +#ifdef _LIBC +weak_alias (__regfree, regfree) +#endif #endif /* not emacs */