version 0.12.
(Implements POSIX draft P1003.2/D11.2, except for some of the
internationalization features.)
+ Copyright (C) 1993, 94, 95, 96, 97, 98 Free Software Foundation, Inc.
- Copyright (C) 1993, 1994, 1995, 1996, 1997 Free Software Foundation, Inc.
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
- the C library, however. The master source lives in /gd/gnu/lib.
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
-NOTE: The canonical source of this file is maintained with the
-GNU C Library. Bugs can be reported to bug-glibc@prep.ai.mit.edu.
-
-This program is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; either version 2, or (at your option) any
-later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software Foundation,
-Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
/* AIX requires this to be the first thing in the file. */
-#if defined (_AIX) && !defined (REGEX_MALLOC)
+#if defined _AIX && !defined REGEX_MALLOC
#pragma alloca
#endif
#define _GNU_SOURCE
#ifdef HAVE_CONFIG_H
-#include <config.h>
+# include <config.h>
#endif
-#if defined(STDC_HEADERS) && !defined(emacs)
-#include <stddef.h>
+#ifndef PARAMS
+# if defined __GNUC__ || (defined __STDC__ && __STDC__)
+# define PARAMS(args) args
+# else
+# define PARAMS(args) ()
+# endif /* GCC. */
+#endif /* Not PARAMS. */
+
+#if defined STDC_HEADERS && !defined emacs
+# include <stddef.h>
#else
/* We need this for `regex.h', and perhaps for the Emacs include files. */
-#include <sys/types.h>
+# include <sys/types.h>
#endif
+#define WIDE_CHAR_SUPPORT (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC)
+
/* For platform which support the ISO C amendement 1 functionality we
support user defined character classes. */
-#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
-# include <wctype.h>
+#if defined _LIBC || WIDE_CHAR_SUPPORT
+/* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */
# include <wchar.h>
+# include <wctype.h>
+#endif
+
+#ifdef _LIBC
+/* We have to keep the namespace clean. */
+# define regfree(preg) __regfree (preg)
+# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
+# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
+# define regerror(errcode, preg, errbuf, errbuf_size) \
+ __regerror(errcode, preg, errbuf, errbuf_size)
+# define re_set_registers(bu, re, nu, st, en) \
+ __re_set_registers (bu, re, nu, st, en)
+# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
+ __re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
+# define re_match(bufp, string, size, pos, regs) \
+ __re_match (bufp, string, size, pos, regs)
+# define re_search(bufp, string, size, startpos, range, regs) \
+ __re_search (bufp, string, size, startpos, range, regs)
+# define re_compile_pattern(pattern, length, bufp) \
+ __re_compile_pattern (pattern, length, bufp)
+# define re_set_syntax(syntax) __re_set_syntax (syntax)
+# define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
+ __re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
+# define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
+
+#define btowc __btowc
#endif
/* This is for other GNU distributions with internationalized messages. */
-#if HAVE_LIBINTL_H || defined (_LIBC)
+#if HAVE_LIBINTL_H || defined _LIBC
# include <libintl.h>
#else
# define gettext(msgid) (msgid)
#ifndef gettext_noop
/* This define is so xgettext can find the internationalizable
strings. */
-#define gettext_noop(String) String
+# define gettext_noop(String) String
#endif
/* The `emacs' switch turns on certain matching commands
that make sense only in Emacs. */
#ifdef emacs
-#include "lisp.h"
-#include "buffer.h"
-#include "syntax.h"
+# include "lisp.h"
+# include "buffer.h"
+# include "syntax.h"
#else /* not emacs */
/* If we are not linking with Emacs proper,
we can't use the relocating allocator
even if config.h says that we can. */
-#undef REL_ALLOC
+# undef REL_ALLOC
-#if defined (STDC_HEADERS) || defined (_LIBC)
-#include <stdlib.h>
-#else
+# if defined STDC_HEADERS || defined _LIBC
+# include <stdlib.h>
+# else
char *malloc ();
char *realloc ();
-#endif
+# endif
/* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
If nothing else has been done, use the method below. */
-#ifdef INHIBIT_STRING_HEADER
-#if !(defined (HAVE_BZERO) && defined (HAVE_BCOPY))
-#if !defined (bzero) && !defined (bcopy)
-#undef INHIBIT_STRING_HEADER
-#endif
-#endif
-#endif
+# ifdef INHIBIT_STRING_HEADER
+# if !(defined HAVE_BZERO && defined HAVE_BCOPY)
+# if !defined bzero && !defined bcopy
+# undef INHIBIT_STRING_HEADER
+# endif
+# endif
+# endif
/* This is the normal way of making sure we have a bcopy and a bzero.
This is used in most programs--a few other programs avoid this
by defining INHIBIT_STRING_HEADER. */
-#ifndef INHIBIT_STRING_HEADER
-#if defined (HAVE_STRING_H) || defined (STDC_HEADERS) || defined (_LIBC)
-#include <string.h>
-#ifndef bcmp
-#define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
-#endif
-#ifndef bcopy
-#define bcopy(s, d, n) memcpy ((d), (s), (n))
-#endif
-#ifndef bzero
-#define bzero(s, n) memset ((s), 0, (n))
-#endif
-#else
-#include <strings.h>
-#endif
-#endif
+# ifndef INHIBIT_STRING_HEADER
+# if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC
+# include <string.h>
+# ifndef bzero
+# ifndef _LIBC
+# define bzero(s, n) (memset (s, '\0', n), (s))
+# else
+# define bzero(s, n) __bzero (s, n)
+# endif
+# endif
+# else
+# include <strings.h>
+# ifndef memcmp
+# define memcmp(s1, s2, n) bcmp (s1, s2, n)
+# endif
+# ifndef memcpy
+# define memcpy(d, s, n) (bcopy (s, d, n), (d))
+# endif
+# endif
+# endif
/* Define the syntax stuff for \<, \>, etc. */
/* This must be nonzero for the wordchar and notwordchar pattern
commands in re_match_2. */
-#ifndef Sword
-#define Sword 1
-#endif
+# ifndef Sword
+# define Sword 1
+# endif
-#ifdef SWITCH_ENUM_BUG
-#define SWITCH_ENUM_CAST(x) ((int)(x))
-#else
-#define SWITCH_ENUM_CAST(x) (x)
-#endif
+# ifdef SWITCH_ENUM_BUG
+# define SWITCH_ENUM_CAST(x) ((int)(x))
+# else
+# define SWITCH_ENUM_CAST(x) (x)
+# endif
-#ifdef SYNTAX_TABLE
+/* How many characters in the character set. */
+# define CHAR_SET_SIZE 256
-extern char *re_syntax_table;
+# ifdef SYNTAX_TABLE
-#else /* not SYNTAX_TABLE */
+extern char *re_syntax_table;
-/* How many characters in the character set. */
-#define CHAR_SET_SIZE 256
+# else /* not SYNTAX_TABLE */
static char re_syntax_table[CHAR_SET_SIZE];
done = 1;
}
-#endif /* not SYNTAX_TABLE */
+# endif /* not SYNTAX_TABLE */
-#define SYNTAX(c) re_syntax_table[c]
+# define SYNTAX(c) re_syntax_table[c]
#endif /* not emacs */
\f
/* Get the interface, including the syntax bits. */
-#include "regex.h"
+#include <regex.h>
/* isalpha etc. are used for the character classes. */
#include <ctype.h>
STDC_HEADERS is defined, then autoconf has verified that the ctype
macros don't need to be guarded with references to isascii. ...
Defining isascii to 1 should let any compiler worth its salt
- eliminate the && through constant folding." */
+ eliminate the && through constant folding."
+ Solaris defines some of these symbols so we must undefine them first. */
-#if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
-#define ISASCII(c) 1
+#undef ISASCII
+#if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
+# define ISASCII(c) 1
#else
-#define ISASCII(c) isascii(c)
+# define ISASCII(c) isascii(c)
#endif
#ifdef isblank
-#define ISBLANK(c) (ISASCII (c) && isblank (c))
+# define ISBLANK(c) (ISASCII (c) && isblank (c))
#else
-#define ISBLANK(c) ((c) == ' ' || (c) == '\t')
+# define ISBLANK(c) ((c) == ' ' || (c) == '\t')
#endif
#ifdef isgraph
-#define ISGRAPH(c) (ISASCII (c) && isgraph (c))
+# define ISGRAPH(c) (ISASCII (c) && isgraph (c))
#else
-#define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
+# define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
#endif
+#undef ISPRINT
#define ISPRINT(c) (ISASCII (c) && isprint (c))
#define ISDIGIT(c) (ISASCII (c) && isdigit (c))
#define ISALNUM(c) (ISASCII (c) && isalnum (c))
#define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
#ifndef NULL
-#define NULL (void *)0
+# define NULL (void *)0
#endif
/* We remove any previous definition of `SIGN_EXTEND_CHAR',
(Per Bothner suggested the basic approach.) */
#undef SIGN_EXTEND_CHAR
#if __STDC__
-#define SIGN_EXTEND_CHAR(c) ((signed char) (c))
+# define SIGN_EXTEND_CHAR(c) ((signed char) (c))
#else /* not __STDC__ */
/* As in Harbison and Steele. */
-#define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
+# define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
#endif
\f
/* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we
#ifdef REGEX_MALLOC
-#define REGEX_ALLOCATE malloc
-#define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
-#define REGEX_FREE free
+# define REGEX_ALLOCATE malloc
+# define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
+# define REGEX_FREE free
#else /* not REGEX_MALLOC */
/* Emacs already defines alloca, sometimes. */
-#ifndef alloca
+# ifndef alloca
/* Make alloca work the best possible way. */
-#ifdef __GNUC__
-#define alloca __builtin_alloca
-#else /* not __GNUC__ */
-#if HAVE_ALLOCA_H
-#include <alloca.h>
-#else /* not __GNUC__ or HAVE_ALLOCA_H */
-#if 0 /* It is a bad idea to declare alloca. We always cast the result. */
-#ifndef _AIX /* Already did AIX, up at the top. */
-char *alloca ();
-#endif /* not _AIX */
-#endif
-#endif /* not HAVE_ALLOCA_H */
-#endif /* not __GNUC__ */
+# ifdef __GNUC__
+# define alloca __builtin_alloca
+# else /* not __GNUC__ */
+# if HAVE_ALLOCA_H
+# include <alloca.h>
+# endif /* HAVE_ALLOCA_H */
+# endif /* not __GNUC__ */
-#endif /* not alloca */
+# endif /* not alloca */
-#define REGEX_ALLOCATE alloca
+# define REGEX_ALLOCATE alloca
/* Assumes a `char *destination' variable. */
-#define REGEX_REALLOCATE(source, osize, nsize) \
+# define REGEX_REALLOCATE(source, osize, nsize) \
(destination = (char *) alloca (nsize), \
- bcopy (source, destination, osize), \
- destination)
+ memcpy (destination, source, osize))
/* No need to do anything to free, after alloca. */
-#define REGEX_FREE(arg) ((void)0) /* Do nothing! But inhibit gcc warning. */
+# define REGEX_FREE(arg) ((void)0) /* Do nothing! But inhibit gcc warning. */
#endif /* not REGEX_MALLOC */
/* Define how to allocate the failure stack. */
-#if defined (REL_ALLOC) && defined (REGEX_MALLOC)
+#if defined REL_ALLOC && defined REGEX_MALLOC
-#define REGEX_ALLOCATE_STACK(size) \
+# define REGEX_ALLOCATE_STACK(size) \
r_alloc (&failure_stack_ptr, (size))
-#define REGEX_REALLOCATE_STACK(source, osize, nsize) \
+# define REGEX_REALLOCATE_STACK(source, osize, nsize) \
r_re_alloc (&failure_stack_ptr, (nsize))
-#define REGEX_FREE_STACK(ptr) \
+# define REGEX_FREE_STACK(ptr) \
r_alloc_free (&failure_stack_ptr)
#else /* not using relocating allocator */
-#ifdef REGEX_MALLOC
+# ifdef REGEX_MALLOC
-#define REGEX_ALLOCATE_STACK malloc
-#define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
-#define REGEX_FREE_STACK free
+# define REGEX_ALLOCATE_STACK malloc
+# define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
+# define REGEX_FREE_STACK free
-#else /* not REGEX_MALLOC */
+# else /* not REGEX_MALLOC */
-#define REGEX_ALLOCATE_STACK alloca
+# define REGEX_ALLOCATE_STACK alloca
-#define REGEX_REALLOCATE_STACK(source, osize, nsize) \
+# define REGEX_REALLOCATE_STACK(source, osize, nsize) \
REGEX_REALLOCATE (source, osize, nsize)
/* No need to explicitly free anything. */
-#define REGEX_FREE_STACK(arg)
+# define REGEX_FREE_STACK(arg)
-#endif /* not REGEX_MALLOC */
+# endif /* not REGEX_MALLOC */
#endif /* not using relocating allocator */
#define false 0
#define true 1
-static int re_match_2_internal ();
+static int re_match_2_internal PARAMS ((struct re_pattern_buffer *bufp,
+ const char *string1, int size1,
+ const char *string2, int size2,
+ int pos,
+ struct re_registers *regs,
+ int stop));
\f
/* These are the command codes that appear in compiled regular
expressions. Some opcodes are followed by argument bytes. A
*dest += temp << 8;
}
-#ifndef EXTRACT_MACROS /* To debug the macros. */
-#undef EXTRACT_NUMBER
-#define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
-#endif /* not EXTRACT_MACROS */
+# ifndef EXTRACT_MACROS /* To debug the macros. */
+# undef EXTRACT_NUMBER
+# define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
+# endif /* not EXTRACT_MACROS */
#endif /* DEBUG */
*source += 2;
}
-#ifndef EXTRACT_MACROS
-#undef EXTRACT_NUMBER_AND_INCR
-#define EXTRACT_NUMBER_AND_INCR(dest, src) \
+# ifndef EXTRACT_MACROS
+# undef EXTRACT_NUMBER_AND_INCR
+# define EXTRACT_NUMBER_AND_INCR(dest, src) \
extract_number_and_incr (&dest, &src)
-#endif /* not EXTRACT_MACROS */
+# endif /* not EXTRACT_MACROS */
#endif /* DEBUG */
\f
#ifdef DEBUG
/* We use standard I/O for debugging. */
-#include <stdio.h>
+# include <stdio.h>
/* It is useful to test things that ``must'' be true when debugging. */
-#include <assert.h>
+# include <assert.h>
static int debug = 0;
-#define DEBUG_STATEMENT(e) e
-#define DEBUG_PRINT1(x) if (debug) printf (x)
-#define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
-#define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
-#define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
-#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \
+# define DEBUG_STATEMENT(e) e
+# define DEBUG_PRINT1(x) if (debug) printf (x)
+# define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
+# define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
+# define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
+# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \
if (debug) print_partial_compiled_pattern (s, e)
-#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \
+# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \
if (debug) print_double_string (w, s1, sz1, s2, sz2)
case wordend:
printf ("/wordend");
-#ifdef emacs
+# ifdef emacs
case before_dot:
printf ("/before_dot");
break;
mcnt = *p++;
printf ("/%d", mcnt);
break;
-#endif /* emacs */
+# endif /* emacs */
case wordchar:
printf ("/wordchar");
#else /* not DEBUG */
-#undef assert
-#define assert(e)
+# undef assert
+# define assert(e)
-#define DEBUG_STATEMENT(e)
-#define DEBUG_PRINT1(x)
-#define DEBUG_PRINT2(x1, x2)
-#define DEBUG_PRINT3(x1, x2, x3)
-#define DEBUG_PRINT4(x1, x2, x3, x4)
-#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
-#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
+# define DEBUG_STATEMENT(e)
+# define DEBUG_PRINT1(x)
+# define DEBUG_PRINT2(x1, x2)
+# define DEBUG_PRINT3(x1, x2, x3)
+# define DEBUG_PRINT4(x1, x2, x3, x4)
+# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
+# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
#endif /* not DEBUG */
\f
#endif /* DEBUG */
return ret;
}
+#ifdef _LIBC
+weak_alias (__re_set_syntax, re_set_syntax)
+#endif
\f
/* This table gives an error message for each of the error codes listed
in regex.h. Obviously the order here has to be same as there.
/* When using GNU C, we are not REALLY using the C alloca, no matter
what config.h may say. So don't take precautions for it. */
#ifdef __GNUC__
-#undef C_ALLOCA
+# undef C_ALLOCA
#endif
/* The match routines may not allocate if (1) they would do it with malloc
Note that if REL_ALLOC is defined, matching would not use malloc for the
failure stack, but we would still use it for the register vectors;
so REL_ALLOC should not affect this. */
-#if (defined (C_ALLOCA) || defined (REGEX_MALLOC)) && defined (emacs)
-#undef MATCH_MAY_ALLOCATE
+#if (defined C_ALLOCA || defined REGEX_MALLOC) && defined emacs
+# undef MATCH_MAY_ALLOCATE
#endif
\f
when matching. If this number is exceeded, we allocate more
space, so it is not a hard limit. */
#ifndef INIT_FAILURE_ALLOC
-#define INIT_FAILURE_ALLOC 5
+# define INIT_FAILURE_ALLOC 5
#endif
/* Roughly the maximum number of failure points on the stack. Would be
#ifdef INT_IS_16BIT
-#if defined (MATCH_MAY_ALLOCATE)
+# if defined MATCH_MAY_ALLOCATE
/* 4400 was enough to cause a crash on Alpha OSF/1,
whose default stack limit is 2mb. */
long int re_max_failures = 4000;
-#else
+# else
long int re_max_failures = 2000;
-#endif
+# endif
union fail_stack_elt
{
#else /* not INT_IS_16BIT */
-#if defined (MATCH_MAY_ALLOCATE)
+# if defined MATCH_MAY_ALLOCATE
/* 4400 was enough to cause a crash on Alpha OSF/1,
whose default stack limit is 2mb. */
int re_max_failures = 20000;
-#else
+# else
int re_max_failures = 2000;
-#endif
+# endif
union fail_stack_elt
{
Do `return -2' if the alloc fails. */
#ifdef MATCH_MAY_ALLOCATE
-#define INIT_FAIL_STACK() \
+# define INIT_FAIL_STACK() \
do { \
fail_stack.stack = (fail_stack_elt_t *) \
- REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \
+ REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \
\
if (fail_stack.stack == NULL) \
return -2; \
fail_stack.avail = 0; \
} while (0)
-#define RESET_FAIL_STACK() REGEX_FREE_STACK (fail_stack.stack)
+# define RESET_FAIL_STACK() REGEX_FREE_STACK (fail_stack.stack)
#else
-#define INIT_FAIL_STACK() \
+# define INIT_FAIL_STACK() \
do { \
fail_stack.avail = 0; \
} while (0)
-#define RESET_FAIL_STACK()
+# define RESET_FAIL_STACK()
#endif
/* Used to omit pushing failure point id's when we're not debugging. */
#ifdef DEBUG
-#define DEBUG_PUSH PUSH_FAILURE_INT
-#define DEBUG_POP(item_addr) (item_addr)->integer = POP_FAILURE_INT ()
+# define DEBUG_PUSH PUSH_FAILURE_INT
+# define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT ()
#else
-#define DEBUG_PUSH(item)
-#define DEBUG_POP(item_addr)
+# define DEBUG_PUSH(item)
+# define DEBUG_POP(item_addr)
#endif
if we ever fail back to it.
Requires variables fail_stack, regstart, regend, reg_info, and
- num_regs be declared. DOUBLE_FAIL_STACK requires `destination' be
- declared.
+ num_regs_pushed be declared. DOUBLE_FAIL_STACK requires `destination'
+ be declared.
Does `return FAILURE_CODE' if runs out of memory. */
/* Can't be int, since there is not a shred of a guarantee that int \
is wide enough to hold a value of something to which pointer can \
be assigned */ \
- s_reg_t this_reg; \
+ active_reg_t this_reg; \
\
DEBUG_STATEMENT (failure_id++); \
DEBUG_STATEMENT (nfailure_points_pushed++); \
DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\
DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\
\
- DEBUG_PRINT2 (" slots needed: %d\n", NUM_FAILURE_ITEMS); \
+ DEBUG_PRINT2 (" slots needed: %ld\n", NUM_FAILURE_ITEMS); \
DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \
\
/* Ensure we have enough space allocated for what we will push. */ \
for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
this_reg++) \
{ \
- DEBUG_PRINT2 (" Pushing reg: %d\n", this_reg); \
+ DEBUG_PRINT2 (" Pushing reg: %lu\n", this_reg); \
DEBUG_STATEMENT (num_regs_pushed++); \
\
- DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \
+ DEBUG_PRINT2 (" start: %p\n", regstart[this_reg]); \
PUSH_FAILURE_POINTER (regstart[this_reg]); \
\
- DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \
+ DEBUG_PRINT2 (" end: %p\n", regend[this_reg]); \
PUSH_FAILURE_POINTER (regend[this_reg]); \
\
- DEBUG_PRINT2 (" info: 0x%x\n ", reg_info[this_reg]); \
+ DEBUG_PRINT2 (" info: %p\n ", \
+ reg_info[this_reg].word.pointer); \
DEBUG_PRINT2 (" match_null=%d", \
REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \
DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \
PUSH_FAILURE_ELT (reg_info[this_reg].word); \
} \
\
- DEBUG_PRINT2 (" Pushing low active reg: %d\n", lowest_active_reg);\
+ DEBUG_PRINT2 (" Pushing low active reg: %ld\n", lowest_active_reg);\
PUSH_FAILURE_INT (lowest_active_reg); \
\
- DEBUG_PRINT2 (" Pushing high active reg: %d\n", highest_active_reg);\
+ DEBUG_PRINT2 (" Pushing high active reg: %ld\n", highest_active_reg);\
PUSH_FAILURE_INT (highest_active_reg); \
\
- DEBUG_PRINT2 (" Pushing pattern 0x%x:\n", pattern_place); \
+ DEBUG_PRINT2 (" Pushing pattern %p:\n", pattern_place); \
DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \
PUSH_FAILURE_POINTER (pattern_place); \
\
- DEBUG_PRINT2 (" Pushing string 0x%x: `", string_place); \
+ DEBUG_PRINT2 (" Pushing string %p: `", string_place); \
DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \
size2); \
DEBUG_PRINT1 ("'\n"); \
/* Individual items aside from the registers. */
#ifdef DEBUG
-#define NUM_NONREG_ITEMS 5 /* Includes failure point id. */
+# define NUM_NONREG_ITEMS 5 /* Includes failure point id. */
#else
-#define NUM_NONREG_ITEMS 4
+# define NUM_NONREG_ITEMS 4
#endif
/* We push at most this many items on the stack. */
#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
{ \
- DEBUG_STATEMENT (fail_stack_elt_t failure_id;) \
- s_reg_t this_reg; \
+ DEBUG_STATEMENT (unsigned failure_id;) \
+ active_reg_t this_reg; \
const unsigned char *string_temp; \
\
assert (!FAIL_STACK_EMPTY ()); \
if (string_temp != NULL) \
str = (const char *) string_temp; \
\
- DEBUG_PRINT2 (" Popping string 0x%x: `", str); \
+ DEBUG_PRINT2 (" Popping string %p: `", str); \
DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \
DEBUG_PRINT1 ("'\n"); \
\
pat = (unsigned char *) POP_FAILURE_POINTER (); \
- DEBUG_PRINT2 (" Popping pattern 0x%x:\n", pat); \
+ DEBUG_PRINT2 (" Popping pattern %p:\n", pat); \
DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \
\
/* Restore register info. */ \
high_reg = (active_reg_t) POP_FAILURE_INT (); \
- DEBUG_PRINT2 (" Popping high active reg: %d\n", high_reg); \
+ DEBUG_PRINT2 (" Popping high active reg: %ld\n", high_reg); \
\
low_reg = (active_reg_t) POP_FAILURE_INT (); \
- DEBUG_PRINT2 (" Popping low active reg: %d\n", low_reg); \
+ DEBUG_PRINT2 (" Popping low active reg: %ld\n", low_reg); \
\
if (1) \
for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \
{ \
- DEBUG_PRINT2 (" Popping reg: %d\n", this_reg); \
+ DEBUG_PRINT2 (" Popping reg: %ld\n", this_reg); \
\
reg_info[this_reg].word = POP_FAILURE_ELT (); \
- DEBUG_PRINT2 (" info: 0x%x\n", reg_info[this_reg]); \
+ DEBUG_PRINT2 (" info: %p\n", \
+ reg_info[this_reg].word.pointer); \
\
regend[this_reg] = (const char *) POP_FAILURE_POINTER (); \
- DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \
+ DEBUG_PRINT2 (" end: %p\n", regend[this_reg]); \
\
regstart[this_reg] = (const char *) POP_FAILURE_POINTER (); \
- DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \
+ DEBUG_PRINT2 (" start: %p\n", regstart[this_reg]); \
} \
else \
{ \
string passed to us by the user to an unsigned char that we can use
as an array index (in, e.g., `translate'). */
#ifndef PATFETCH
-#define PATFETCH(c) \
+# define PATFETCH(c) \
do {if (p == pend) return REG_EEND; \
c = (unsigned char) *p++; \
if (translate) c = (unsigned char) translate[c]; \
`char *', to avoid warnings when a string constant is passed. But
when we use a character as a subscript we must make it unsigned. */
#ifndef TRANSLATE
-#define TRANSLATE(d) \
+# define TRANSLATE(d) \
(translate ? (char) translate[(unsigned char) (d)] : (d))
#endif
MSC and drop MAX_BUF_SIZE a bit. Otherwise you may end up
reallocating to 0 bytes. Such thing is not going to work too well.
You have been warned!! */
-#if defined(_MSC_VER) && !defined(WIN32)
+#if defined _MSC_VER && !defined WIN32
/* Microsoft C 16-bit versions limit malloc to approx 65512 bytes.
The REALLOC define eliminates a flurry of conversion warnings,
but is not required. */
-#define MAX_BUF_SIZE 65500L
-#define REALLOC(p,s) realloc ((p), (size_t) (s))
+# define MAX_BUF_SIZE 65500L
+# define REALLOC(p,s) realloc ((p), (size_t) (s))
#else
-#define MAX_BUF_SIZE (1L << 16)
-#define REALLOC(p,s) realloc ((p), (s))
+# define MAX_BUF_SIZE (1L << 16)
+# define REALLOC(p,s) realloc ((p), (s))
#endif
/* Extend the buffer by twice its current size via realloc and
} \
}
-#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
+#if defined _LIBC || WIDE_CHAR_SUPPORT
/* The GNU C library provides support for user-defined character classes
and the functions from ISO C amendement 1. */
# ifdef CHARCLASS_NAME_MAX
# define CHAR_CLASS_MAX_LENGTH 256
# endif
-# define IS_CHAR_CLASS(string) wctype (string)
+# ifdef _LIBC
+# define IS_CHAR_CLASS(string) __wctype (string)
+# else
+# define IS_CHAR_CLASS(string) wctype (string)
+# endif
#else
# define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */
/* Always count groups, whether or not bufp->no_sub is set. */
bufp->re_nsub = 0;
-#if !defined (emacs) && !defined (SYNTAX_TABLE)
+#if !defined emacs && !defined SYNTAX_TABLE
/* Initialize the syntax table. */
init_syntax_once ();
#endif
for (;;)
{
PATFETCH (c);
- if (c == ':' || c == ']' || p == pend
- || c1 == CHAR_CLASS_MAX_LENGTH)
+ if ((c == ':' && *p == ']') || p == pend)
break;
- str[c1++] = c;
+ if (c1 < CHAR_CLASS_MAX_LENGTH)
+ str[c1++] = c;
+ else
+ /* This is in any case an invalid class name. */
+ str[0] = '\0';
}
str[c1] = '\0';
- /* If isn't a word bracketed by `[:' and:`]':
+ /* If isn't a word bracketed by `[:' and `:]':
undo the ending character, the letters, and leave
the leading `:' and `[' (but set bits for them). */
if (c == ':' && *p == ']')
{
-#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
+#if defined _LIBC || WIDE_CHAR_SUPPORT
boolean is_lower = STREQ (str, "lower");
boolean is_upper = STREQ (str, "upper");
wctype_t wt;
int ch;
- wt = wctype (str);
+ wt = IS_CHAR_CLASS (str);
if (wt == 0)
FREE_STACK_RETURN (REG_ECTYPE);
for (ch = 0; ch < 1 << BYTEWIDTH; ++ch)
{
+# ifdef _LIBC
+ if (__iswctype (__btowc (ch), wt))
+ SET_LIST_BIT (ch);
+# else
if (iswctype (btowc (ch), wt))
SET_LIST_BIT (ch);
+# endif
if (translate && (is_upper || is_lower)
&& (ISUPPER (ch) || ISLOWER (ch)))
if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
if (COMPILE_STACK_EMPTY)
- if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
- goto normal_backslash;
- else
- FREE_STACK_RETURN (REG_ERPAREN);
+ {
+ if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
+ goto normal_backslash;
+ else
+ FREE_STACK_RETURN (REG_ERPAREN);
+ }
handle_close:
if (fixup_alt_jump)
/* See similar code for backslashed left paren above. */
if (COMPILE_STACK_EMPTY)
- if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
- goto normal_char;
- else
- FREE_STACK_RETURN (REG_ERPAREN);
+ {
+ if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
+ goto normal_char;
+ else
+ FREE_STACK_RETURN (REG_ERPAREN);
+ }
/* Since we just checked for an empty stack above, this
``can't happen''. */
case 'w':
- if (re_syntax_options & RE_NO_GNU_OPS)
+ if (syntax & RE_NO_GNU_OPS)
goto normal_char;
laststart = b;
BUF_PUSH (wordchar);
case 'W':
- if (re_syntax_options & RE_NO_GNU_OPS)
+ if (syntax & RE_NO_GNU_OPS)
goto normal_char;
laststart = b;
BUF_PUSH (notwordchar);
case '<':
- if (re_syntax_options & RE_NO_GNU_OPS)
+ if (syntax & RE_NO_GNU_OPS)
goto normal_char;
BUF_PUSH (wordbeg);
break;
case '>':
- if (re_syntax_options & RE_NO_GNU_OPS)
+ if (syntax & RE_NO_GNU_OPS)
goto normal_char;
BUF_PUSH (wordend);
break;
case 'b':
- if (re_syntax_options & RE_NO_GNU_OPS)
+ if (syntax & RE_NO_GNU_OPS)
goto normal_char;
BUF_PUSH (wordbound);
break;
case 'B':
- if (re_syntax_options & RE_NO_GNU_OPS)
+ if (syntax & RE_NO_GNU_OPS)
goto normal_char;
BUF_PUSH (notwordbound);
break;
case '`':
- if (re_syntax_options & RE_NO_GNU_OPS)
+ if (syntax & RE_NO_GNU_OPS)
goto normal_char;
BUF_PUSH (begbuf);
break;
case '\'':
- if (re_syntax_options & RE_NO_GNU_OPS)
+ if (syntax & RE_NO_GNU_OPS)
goto normal_char;
BUF_PUSH (endbuf);
break;
{
fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS);
-#ifdef emacs
+# ifdef emacs
if (! fail_stack.stack)
fail_stack.stack
= (fail_stack_elt_t *) xmalloc (fail_stack.size
= (fail_stack_elt_t *) xrealloc (fail_stack.stack,
(fail_stack.size
* sizeof (fail_stack_elt_t)));
-#else /* not emacs */
+# else /* not emacs */
if (! fail_stack.stack)
fail_stack.stack
= (fail_stack_elt_t *) malloc (fail_stack.size
= (fail_stack_elt_t *) realloc (fail_stack.stack,
(fail_stack.size
* sizeof (fail_stack_elt_t)));
-#endif /* not emacs */
+# endif /* not emacs */
}
regex_grow_registers (num_regs);
#ifndef REGEX_MALLOC
char *destination;
#endif
- /* We don't push any register information onto the failure stack. */
- unsigned num_regs = 0;
register char *fastmap = bufp->fastmap;
unsigned char *pattern = bufp->buffer;
RESET_FAIL_STACK ();
return 0;
} /* re_compile_fastmap */
+#ifdef _LIBC
+weak_alias (__re_compile_fastmap, re_compile_fastmap)
+#endif
\f
/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
regs->start = regs->end = (regoff_t *) 0;
}
}
+#ifdef _LIBC
+weak_alias (__re_set_registers, re_set_registers)
+#endif
\f
/* Searching routines. */
return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
regs, size);
}
+#ifdef _LIBC
+weak_alias (__re_search, re_search)
+#endif
/* Using the compiled pattern in BUFP->buffer, first tries to match the
/* If the search isn't to be a backwards one, don't waste time in a
search for a pattern that must be anchored. */
- if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
+ if (bufp->used > 0 && range > 0
+ && ((re_opcode_t) bufp->buffer[0] == begbuf
+ /* `begline' is like `begbuf' if it cannot match at newlines. */
+ || ((re_opcode_t) bufp->buffer[0] == begline
+ && !bufp->newline_anchor)))
{
if (startpos > 0)
return -1;
val = re_match_2_internal (bufp, string1, size1, string2, size2,
startpos, regs, stop);
#ifndef REGEX_MALLOC
-#ifdef C_ALLOCA
+# ifdef C_ALLOCA
alloca (0);
-#endif
+# endif
#endif
if (val >= 0)
}
return -1;
} /* re_search_2 */
+#ifdef _LIBC
+weak_alias (__re_search_2, re_search_2)
+#endif
\f
/* This converts PTR, a pointer into one of the search strings `string1'
and `string2' into an offset from the beginning of that string. */
/* Free everything we malloc. */
#ifdef MATCH_MAY_ALLOCATE
-#define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL
-#define FREE_VARIABLES() \
+# define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL
+# define FREE_VARIABLES() \
do { \
REGEX_FREE_STACK (fail_stack.stack); \
FREE_VAR (regstart); \
FREE_VAR (reg_info_dummy); \
} while (0)
#else
-#define FREE_VARIABLES() ((void)0) /* Do nothing! But inhibit gcc warning. */
+# define FREE_VARIABLES() ((void)0) /* Do nothing! But inhibit gcc warning. */
#endif /* not MATCH_MAY_ALLOCATE */
/* These values must meet several constraints. They must not be valid
{
int result = re_match_2_internal (bufp, NULL, 0, string, size,
pos, regs, size);
-#ifndef REGEX_MALLOC
-#ifdef C_ALLOCA
+# ifndef REGEX_MALLOC
+# ifdef C_ALLOCA
alloca (0);
-#endif
-#endif
+# endif
+# endif
return result;
}
+# ifdef _LIBC
+weak_alias (__re_match, re_match)
+# endif
#endif /* not emacs */
static boolean group_match_null_string_p _RE_ARGS ((unsigned char **p,
int result = re_match_2_internal (bufp, string1, size1, string2, size2,
pos, regs, stop);
#ifndef REGEX_MALLOC
-#ifdef C_ALLOCA
+# ifdef C_ALLOCA
alloca (0);
-#endif
+# endif
#endif
return result;
}
+#ifdef _LIBC
+weak_alias (__re_match_2, re_match_2)
+#endif
/* This is a separate function so that we can force an alloca cleanup
afterwards. */
past them. */
if (translate
? bcmp_translate (d, d2, mcnt, translate)
- : bcmp (d, d2, mcnt))
+ : memcmp (d, d2, mcnt))
goto fail;
d += mcnt, d2 += mcnt;
DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
/* It doesn't matter what we push for the string here. What
the code at `fail' tests is the value for the pattern. */
- PUSH_FAILURE_POINT (0, 0, -2);
+ PUSH_FAILURE_POINT (NULL, NULL, -2);
goto unconditional_jump;
DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
/* See comments just above at `dummy_failure_jump' about the
two zeroes. */
- PUSH_FAILURE_POINT (0, 0, -2);
+ PUSH_FAILURE_POINT (NULL, NULL, -2);
break;
/* Have to succeed matching what follows at least n times.
return NULL;
return gettext (re_error_msgid[(int) ret]);
}
+#ifdef _LIBC
+weak_alias (__re_compile_pattern, re_compile_pattern)
+#endif
\f
/* Entry points compatible with 4.2 BSD regex library. We don't define
them unless specifically requested. */
-#if defined (_REGEX_RE_COMP) || defined (_LIBC)
+#if defined _REGEX_RE_COMP || defined _LIBC
/* BSD has one and only one pattern buffer. */
static struct re_pattern_buffer re_comp_buf;
{
re_comp_buf.buffer = (unsigned char *) malloc (200);
if (re_comp_buf.buffer == NULL)
- return gettext (re_error_msgid[(int) REG_ESPACE]);
+ return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
re_comp_buf.allocated = 200;
re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
if (re_comp_buf.fastmap == NULL)
- return gettext (re_error_msgid[(int) REG_ESPACE]);
+ return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
}
/* Since `re_exec' always passes NULL for the `regs' argument, we
REG_EXTENDED bit in CFLAGS is set; otherwise, to
RE_SYNTAX_POSIX_BASIC;
`newline_anchor' to REG_NEWLINE being set in CFLAGS;
- `fastmap' and `fastmap_accurate' to zero;
+ `fastmap' to an allocated space for the fastmap;
+ `fastmap_accurate' to zero;
`re_nsub' to the number of subexpressions in PATTERN.
PATTERN is the address of the pattern string.
preg->allocated = 0;
preg->used = 0;
- /* Don't bother to use a fastmap when searching. This simplifies the
- REG_NEWLINE case: if we used a fastmap, we'd have to put all the
- characters after newlines into the fastmap. This way, we just try
- every character. */
- preg->fastmap = 0;
+ /* Try to allocate space for the fastmap. */
+ preg->fastmap = (char *) malloc (1 << BYTEWIDTH);
if (cflags & REG_ICASE)
{
unmatched close-group: both are REG_EPAREN. */
if (ret == REG_ERPAREN) ret = REG_EPAREN;
+ if (ret == REG_NOERROR && preg->fastmap)
+ {
+ /* Compute the fastmap now, since regexec cannot modify the pattern
+ buffer. */
+ if (re_compile_fastmap (preg) == -2)
+ {
+ /* Some error occured while computing the fastmap, just forget
+ about it. */
+ free (preg->fastmap);
+ preg->fastmap = NULL;
+ }
+ }
+
return (int) ret;
}
+#ifdef _LIBC
+weak_alias (__regcomp, regcomp)
+#endif
/* regexec searches for a given pattern, specified by PREG, in the
if (want_reg_info)
{
regs.num_regs = nmatch;
- regs.start = TALLOC (nmatch, regoff_t);
- regs.end = TALLOC (nmatch, regoff_t);
- if (regs.start == NULL || regs.end == NULL)
+ regs.start = TALLOC (nmatch * 2, regoff_t);
+ if (regs.start == NULL)
return (int) REG_NOMATCH;
+ regs.end = regs.start + nmatch;
}
/* Perform the searching operation. */
/* If we needed the temporary register info, free the space now. */
free (regs.start);
- free (regs.end);
}
/* We want zero return to mean success, unlike `re_search'. */
return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
}
+#ifdef _LIBC
+weak_alias (__regexec, regexec)
+#endif
/* Returns a message corresponding to an error code, ERRCODE, returned
{
if (msg_size > errbuf_size)
{
- strncpy (errbuf, msg, errbuf_size - 1);
+#if defined HAVE_MEMPCPY || defined _LIBC
+ *((char *) __mempcpy (errbuf, msg, errbuf_size - 1)) = '\0';
+#else
+ memcpy (errbuf, msg, errbuf_size - 1);
errbuf[errbuf_size - 1] = 0;
+#endif
}
else
- strcpy (errbuf, msg);
+ memcpy (errbuf, msg, msg_size);
}
return msg_size;
}
+#ifdef _LIBC
+weak_alias (__regerror, regerror)
+#endif
/* Free dynamically allocated space used by PREG. */
free (preg->translate);
preg->translate = NULL;
}
+#ifdef _LIBC
+weak_alias (__regfree, regfree)
+#endif
#endif /* not emacs */