/* TODO:
- structure the opcode space into opcode+flag.
- merge with glibc's regex.[ch].
- - replace succeed_n + jump_n with a combined operation so that the counter
- can simply be decremented when popping the failure_point without having
- to stack up failure_count entries.
- */
+ - replace (succeed_n + jump_n + set_number_at) with something that doesn't
+ need to modify the compiled regexp so that re_match can be reentrant.
+ - get rid of on_failure_jump_smart by doing the optimization in re_comp
+ rather than at run-time, so that re_match can be reentrant.
+*/
/* AIX requires this to be the first thing in the file. */
#if defined _AIX && !defined REGEX_MALLOC
/* Whether to use ISO C Amendment 1 wide char functions.
Those should not be used for Emacs since it uses its own. */
+#if defined _LIBC
+#define WIDE_CHAR_SUPPORT 1
+#else
#define WIDE_CHAR_SUPPORT \
- (defined _LIBC || HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC && !emacs)
+ (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC && !emacs)
+#endif
/* For platform which support the ISO C amendement 1 functionality we
support user defined character classes. */
# include "charset.h"
# include "category.h"
+# ifdef malloc
+# undef malloc
+# endif
# define malloc xmalloc
+# ifdef realloc
+# undef realloc
+# endif
# define realloc xrealloc
+# ifdef free
+# undef free
+# endif
# define free xfree
/* Converts the pointer to the char to BEG-based offset from the start. */
is followed by a range table:
2 bytes of flags for character sets (low 8 bits, high 8 bits)
See RANGE_TABLE_WORK_BITS below.
- 2 bytes, the number of pairs that follow
+ 2 bytes, the number of pairs that follow (upto 32767)
pairs, each 2 multibyte characters,
each multibyte character represented as 3 bytes. */
charset,
static void
extract_number (dest, source)
int *dest;
- unsigned char *source;
+ re_char *source;
{
int temp = SIGN_EXTEND_CHAR (*(source + 1));
*dest = *source & 0377;
static void
extract_number_and_incr (destination, source)
int *destination;
- unsigned char **source;
+ re_char **source;
{
extract_number (destination, *source);
*source += 2;
#define CHARSET_LOOKUP_RANGE_TABLE_RAW(not, c, range_table, count) \
do \
{ \
- int range_start, range_end; \
- unsigned char *p; \
- unsigned char *range_table_end \
+ re_wchar_t range_start, range_end; \
+ re_char *p; \
+ re_char *range_table_end \
= CHARSET_RANGE_TABLE_END ((range_table), (count)); \
\
for (p = (range_table); p < range_table_end; p += 2 * 3) \
{ \
/* Number of ranges in range table. */ \
int count; \
- unsigned char *range_table = CHARSET_RANGE_TABLE (charset); \
- \
+ re_char *range_table = CHARSET_RANGE_TABLE (charset); \
+ \
EXTRACT_NUMBER_AND_INCR (count, range_table); \
CHARSET_LOOKUP_RANGE_TABLE_RAW ((not), (c), range_table, count); \
} \
void
print_partial_compiled_pattern (start, end)
- unsigned char *start;
- unsigned char *end;
+ re_char *start;
+ re_char *end;
{
int mcnt, mcnt2;
- unsigned char *p = start;
- unsigned char *pend = end;
+ re_char *p = start;
+ re_char *pend = end;
if (start == NULL)
{
print_compiled_pattern (bufp)
struct re_pattern_buffer *bufp;
{
- unsigned char *buffer = bufp->buffer;
+ re_char *buffer = bufp->buffer;
print_partial_compiled_pattern (buffer, buffer + bufp->used);
printf ("%ld bytes used/%ld bytes allocated.\n",
/* Roughly the maximum number of failure points on the stack. Would be
exactly that if always used TYPICAL_FAILURE_SIZE items each time we failed.
This is a variable only so users of regex can assign to it; we never
- change it ourselves. */
+ change it ourselves. We always multiply it by TYPICAL_FAILURE_SIZE
+ before using it, so it should probably be a byte-count instead. */
# if defined MATCH_MAY_ALLOCATE
/* Note that 4400 was enough to cause a crash on Alpha OSF/1,
whose default stack limit is 2mb. In order for a larger
union fail_stack_elt
{
- const unsigned char *pointer;
+ re_char *pointer;
/* This should be the biggest `int' that's no bigger than a pointer. */
long integer;
};
size_t frame; /* Offset of the cur constructed frame. */
} fail_stack_type;
-#define PATTERN_STACK_EMPTY() (fail_stack.avail == 0)
#define FAIL_STACK_EMPTY() (fail_stack.frame == 0)
#define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size)
1)))
-/* Push pointer POINTER on FAIL_STACK.
- Return 1 if was able to do so and 0 if ran out of memory allocating
- space to do so. */
-#define PUSH_PATTERN_OP(POINTER, FAIL_STACK) \
- ((FAIL_STACK_FULL () \
- && !GROW_FAIL_STACK (FAIL_STACK)) \
- ? 0 \
- : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER, \
- 1))
-#define POP_PATTERN_OP() POP_FAILURE_POINTER ()
-
/* Push a pointer value onto the failure stack.
Assumes the variable `fail_stack'. Probably should only
be called from within `PUSH_FAILURE_POINT'. */
#define PUSH_FAILURE_POINTER(item) \
- fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item)
+ fail_stack.stack[fail_stack.avail++].pointer = (item)
/* This pushes an integer-valued item onto the failure stack.
Assumes the variable `fail_stack'. Probably should only
PUSH_FAILURE_INT (num); \
} while (0)
-#define PUSH_FAILURE_COUNT(ptr) \
+/* Change the counter's value to VAL, but make sure that it will
+ be reset when backtracking. */
+#define PUSH_NUMBER(ptr,val) \
do { \
char *destination; \
int c; \
ENSURE_FAIL_STACK(3); \
EXTRACT_NUMBER (c, ptr); \
- DEBUG_PRINT3 (" Push counter %p = %d\n", ptr, c); \
+ DEBUG_PRINT4 (" Push number %p = %d -> %d\n", ptr, c, val); \
PUSH_FAILURE_INT (c); \
PUSH_FAILURE_POINTER (ptr); \
PUSH_FAILURE_INT (-1); \
+ STORE_NUMBER (ptr, val); \
} while (0)
/* Pop a saved register off the stack. */
if (reg == -1) \
{ \
/* It's a counter. */ \
+ /* Here, we discard `const', making re_match non-reentrant. */ \
unsigned char *ptr = (unsigned char*) POP_FAILURE_POINTER (); \
reg = POP_FAILURE_INT (); \
STORE_NUMBER (ptr, reg); \
/* Estimate the size of data pushed by a typical failure stack entry.
An estimate is all we need, because all we use this for
is to choose a limit for how big to make the failure stack. */
-
+/* BEWARE, the value `20' is hard-coded in emacs.c:main(). */
#define TYPICAL_FAILURE_SIZE 20
/* How many items can still be added to the stack without overflowing it. */
while (fail_stack.frame < fail_stack.avail) \
POP_FAILURE_REG_OR_COUNT (); \
\
- pat = (unsigned char *) POP_FAILURE_POINTER (); \
+ pat = POP_FAILURE_POINTER (); \
DEBUG_PRINT2 (" Popping pattern %p: ", pat); \
DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \
\
/* If the saved string location is NULL, it came from an \
on_failure_keep_string_jump opcode, and we want to throw away the \
saved NULL, thus retaining our current position in the string. */ \
- str = (re_char *) POP_FAILURE_POINTER (); \
+ str = POP_FAILURE_POINTER (); \
DEBUG_PRINT2 (" Popping string %p: `", str); \
DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \
DEBUG_PRINT1 ("'\n"); \
int arg, unsigned char *end));
static void insert_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
int arg1, int arg2, unsigned char *end));
-static boolean at_begline_loc_p _RE_ARGS ((const unsigned char *pattern,
- const unsigned char *p,
+static boolean at_begline_loc_p _RE_ARGS ((re_char *pattern,
+ re_char *p,
reg_syntax_t syntax));
-static boolean at_endline_loc_p _RE_ARGS ((const unsigned char *p,
- const unsigned char *pend,
+static boolean at_endline_loc_p _RE_ARGS ((re_char *p,
+ re_char *pend,
reg_syntax_t syntax));
-static unsigned char *skip_one_char _RE_ARGS ((unsigned char *p));
-static int analyse_first _RE_ARGS ((unsigned char *p, unsigned char *pend,
+static re_char *skip_one_char _RE_ARGS ((re_char *p));
+static int analyse_first _RE_ARGS ((re_char *p, re_char *pend,
char *fastmap, const int multibyte));
/* Fetch the next character in the uncompiled pattern---translating it
- if necessary. Also cast from a signed character in the constant
- string passed to us by the user to an unsigned char that we can use
- as an array index (in, e.g., `translate'). */
+ if necessary. */
#define PATFETCH(c) \
do { \
PATFETCH_RAW (c); \
/* Make sure we have at least N more bytes of space in buffer. */
#define GET_BUFFER_SPACE(n) \
- while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated) \
+ while ((size_t) (b - bufp->buffer + (n)) > bufp->allocated) \
EXTEND_BUFFER ()
/* Make sure we have one more byte of buffer space and then add C to it. */
#endif
#define EXTEND_BUFFER() \
do { \
- unsigned char *old_buffer = bufp->buffer; \
+ re_char *old_buffer = bufp->buffer; \
if (bufp->allocated == MAX_BUF_SIZE) \
return REG_ESIZE; \
bufp->allocated <<= 1; \
if (bufp->allocated > MAX_BUF_SIZE) \
bufp->allocated = MAX_BUF_SIZE; \
- bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\
+ RETALLOC (bufp->buffer, bufp->allocated, unsigned char); \
if (bufp->buffer == NULL) \
return REG_ESPACE; \
/* If the buffer moved, move all the pointers into it. */ \
/* Set the bit for character C in a list. */
-#define SET_LIST_BIT(c) \
- (b[((unsigned char) (c)) / BYTEWIDTH] \
- |= 1 << (((unsigned char) c) % BYTEWIDTH))
+#define SET_LIST_BIT(c) (b[((c)) / BYTEWIDTH] |= 1 << ((c) % BYTEWIDTH))
/* Get the next unsigned number in the uncompiled pattern. */
# define CHAR_CLASS_MAX_LENGTH 256
# endif
typedef wctype_t re_wctype_t;
+typedef wchar_t re_wchar_t;
# define re_wctype wctype
# define re_iswctype iswctype
# define re_wctype_to_bit(cc) 0
# define CHAR_CLASS_MAX_LENGTH 9 /* Namely, `multibyte'. */
# define btowc(c) c
-/* Character classes' indices. */
+/* Character classes. */
typedef enum { RECC_ERROR = 0,
RECC_ALNUM, RECC_ALPHA, RECC_WORD,
RECC_GRAPH, RECC_PRINT,
RECC_ASCII, RECC_UNIBYTE
} re_wctype_t;
+typedef int re_wchar_t;
+
/* Map a string to the char class it names (if any). */
static re_wctype_t
-re_wctype (string)
- unsigned char *string;
+re_wctype (str)
+ re_char *str;
{
+ const char *string = str;
if (STREQ (string, "alnum")) return RECC_ALNUM;
else if (STREQ (string, "alpha")) return RECC_ALPHA;
else if (STREQ (string, "word")) return RECC_WORD;
case RECC_MULTIBYTE: return !ISUNIBYTE (ch);
case RECC_WORD: return ISWORD (ch);
case RECC_ERROR: return false;
+ default:
+ abort();
}
}
case RECC_SPACE: return BIT_SPACE;
case RECC_ASCII: case RECC_DIGIT: case RECC_XDIGIT: case RECC_CNTRL:
case RECC_BLANK: case RECC_UNIBYTE: case RECC_ERROR: return 0;
+ default:
+ abort();
}
}
#endif
if (immediate_quit) QUIT; \
} while (0)
#else
-# define IMMEDIATE_QUIT_CHECK (0)
+# define IMMEDIATE_QUIT_CHECK ((void)0)
#endif
\f
#ifndef MATCH_MAY_ALLOCATE
reg_syntax_t syntax;
struct re_pattern_buffer *bufp;
{
- /* We fetch characters from PATTERN here. Even though PATTERN is
- `char *' (i.e., signed), we declare these variables as unsigned, so
- they can be reliably used as array indices. */
- register unsigned int c, c1;
+ /* We fetch characters from PATTERN here. */
+ register re_wchar_t c, c1;
/* A random temporary spot in PATTERN. */
re_char *p1;
boolean simple = skip_one_char (laststart) == b;
unsigned int startoffset = 0;
re_opcode_t ofj =
+ /* Check if the loop can match the empty string. */
(simple || !analyse_first (laststart, b, NULL, 0)) ?
on_failure_jump : on_failure_jump_loop;
assert (skip_one_char (laststart) <= b);
if (SINGLE_BYTE_CHAR_P (c))
/* ... into bitmap. */
{
- unsigned this_char;
+ re_wchar_t this_char;
int range_start = c, range_end = c1;
/* If the start is after the end, the range is empty. */
case '1': case '2': case '3': case '4': case '5':
case '6': case '7': case '8': case '9':
- if (syntax & RE_NO_BK_REFS)
- goto normal_char;
+ {
+ regnum_t reg;
- c1 = c - '0';
+ if (syntax & RE_NO_BK_REFS)
+ goto normal_backslash;
- if (c1 > regnum)
- FREE_STACK_RETURN (REG_ESUBREG);
+ reg = c - '0';
- /* Can't back reference to a subexpression if inside of it. */
- if (group_in_compile_stack (compile_stack, (regnum_t) c1))
- goto normal_char;
+ /* Can't back reference to a subexpression before its end. */
+ if (reg > regnum || group_in_compile_stack (compile_stack, reg))
+ FREE_STACK_RETURN (REG_ESUBREG);
- laststart = b;
- BUF_PUSH_2 (duplicate, c1);
+ laststart = b;
+ BUF_PUSH_2 (duplicate, reg);
+ }
break;
static boolean
at_begline_loc_p (pattern, p, syntax)
- const unsigned char *pattern, *p;
+ re_char *pattern, *p;
reg_syntax_t syntax;
{
- const unsigned char *prev = p - 2;
+ re_char *prev = p - 2;
boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
return
static boolean
at_endline_loc_p (p, pend, syntax)
- const unsigned char *p, *pend;
+ re_char *p, *pend;
reg_syntax_t syntax;
{
- const unsigned char *next = p;
+ re_char *next = p;
boolean next_backslash = *next == '\\';
- const unsigned char *next_next = p + 1 < pend ? p + 1 : 0;
+ re_char *next_next = p + 1 < pend ? p + 1 : 0;
return
/* Before a subexpression? */
Return 1 if p..pend might match the empty string.
Return 0 if p..pend matches at least one char.
- Return -1 if p..pend matches at least one char, but fastmap was not
- updated accurately.
- Return -2 if an error occurred. */
+ Return -1 if fastmap was not updated accurately. */
static int
analyse_first (p, pend, fastmap, multibyte)
- unsigned char *p, *pend;
+ re_char *p, *pend;
char *fastmap;
const int multibyte;
{
int j, k;
boolean not;
-#ifdef MATCH_MAY_ALLOCATE
- fail_stack_type fail_stack;
-#endif
-#ifndef REGEX_MALLOC
- char *destination;
-#endif
-
-#if defined REL_ALLOC && defined REGEX_MALLOC
- /* This holds the pointer to the failure stack, when
- it is allocated relocatably. */
- fail_stack_elt_t *failure_stack_ptr;
-#endif
-
- /* Assume that each path through the pattern can be null until
- proven otherwise. We set this false at the bottom of switch
- statement, to which we get only if a particular path doesn't
- match the empty string. */
- boolean path_can_be_null = true;
/* If all elements for base leading-codes in fastmap is set, this
flag is set true. */
assert (p);
- INIT_FAIL_STACK ();
-
/* The loop below works as follows:
- It has a working-list kept in the PATTERN_STACK and which basically
starts by only containing a pointer to the first operation.
so that `p' is monotonically increasing. More to the point, we
never set `p' (or push) anything `<= p1'. */
- while (1)
+ while (p < pend)
{
/* `p1' is used as a marker of how far back a `on_failure_jump'
can go without being ignored. It is normally equal to `p'
3..9: <body>
10: on_failure_jump 3
as used for the *? operator. */
- unsigned char *p1 = p;
-
- if (p >= pend)
- {
- if (path_can_be_null)
- return (RESET_FAIL_STACK (), 1);
-
- /* We have reached the (effective) end of pattern. */
- if (PATTERN_STACK_EMPTY ())
- return (RESET_FAIL_STACK (), 0);
-
- p = (unsigned char*) POP_PATTERN_OP ();
- path_can_be_null = true;
- continue;
- }
-
- /* We should never be about to go beyond the end of the pattern. */
- assert (p < pend);
+ re_char *p1 = p;
switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
{
case succeed:
- p = pend;
+ return 1;
continue;
case duplicate:
/* We could put all the chars except for \n (and maybe \0)
but we don't bother since it is generally not worth it. */
if (!fastmap) break;
- return (RESET_FAIL_STACK (), -1);
+ return -1;
case charset_not:
#else /* emacs */
/* This match depends on text properties. These end with
aborting optimizations. */
- return (RESET_FAIL_STACK (), -1);
+ return -1;
case categoryspec:
case notcategoryspec:
EXTRACT_NUMBER_AND_INCR (j, p);
if (p + j <= p1)
; /* Backward jump to be ignored. */
- else if (!PUSH_PATTERN_OP (p + j, fail_stack))
- return (RESET_FAIL_STACK (), -2);
+ else
+ { /* We have to look down both arms.
+ We first go down the "straight" path so as to minimize
+ stack usage when going through alternatives. */
+ int r = analyse_first (p, pend, fastmap, multibyte);
+ if (r) return r;
+ p += j;
+ }
continue;
/* Getting here means we have found the possible starting
characters for one path of the pattern -- and that the empty
- string does not match. We need not follow this path further.
- Instead, look at the next alternative (remembered on the
- stack), or quit if no more. The test at the top of the loop
- does these things. */
- path_can_be_null = false;
- p = pend;
+ string does not match. We need not follow this path further. */
+ return 0;
} /* while p */
- return (RESET_FAIL_STACK (), 0);
+ /* We reached the end without matching anything. */
+ return 1;
+
} /* analyse_first */
\f
/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
analysis = analyse_first (bufp->buffer, bufp->buffer + bufp->used,
fastmap, RE_MULTIBYTE_P (bufp));
bufp->can_be_null = (analysis != 0);
- if (analysis < -1)
- return analysis;
return 0;
} /* re_compile_fastmap */
\f
/* Update the fastmap now if not correct already. */
if (fastmap && !bufp->fastmap_accurate)
- if (re_compile_fastmap (bufp) == -2)
- return -2;
+ re_compile_fastmap (bufp);
/* See whether the pattern is anchored. */
anchored_start = (bufp->buffer[0] == begline);
if (fastmap && startpos < total_size && !bufp->can_be_null)
{
register re_char *d;
- register unsigned int buf_ch;
+ register re_wchar_t buf_ch;
d = POS_ADDR_VSTRING (startpos);
/* If the operation is a match against one or more chars,
return a pointer to the next operation, else return NULL. */
-static unsigned char *
+static re_char *
skip_one_char (p)
- unsigned char *p;
+ re_char *p;
{
switch (SWITCH_ENUM_CAST (*p++))
{
case endline:
case exactn:
{
- register unsigned int c
+ register re_wchar_t c
= (re_opcode_t) *p2 == endline ? '\n'
- : RE_STRING_CHAR(p2 + 2, pend - p2 - 2);
+ : RE_STRING_CHAR (p2 + 2, pend - p2 - 2);
if ((re_opcode_t) *p1 == exactn)
{
break;
case charset:
- case charset_not:
{
if ((re_opcode_t) *p1 == exactn)
/* Reuse the code above. */
return mutually_exclusive_p (bufp, p2, p1);
-
/* It is hard to list up all the character in charset
P2 if it includes multibyte character. Give up in
such case. */
P2 is ASCII, it is enough to test only bitmap
table of P1. */
- if (*p1 == *p2)
+ if ((re_opcode_t) *p1 == charset)
{
int idx;
/* We win if the charset inside the loop
return 1;
}
}
- else if ((re_opcode_t) *p1 == charset
- || (re_opcode_t) *p1 == charset_not)
+ else if ((re_opcode_t) *p1 == charset_not)
{
int idx;
/* We win if the charset_not inside the loop lists
}
}
}
+ break;
+ case charset_not:
+ switch (SWITCH_ENUM_CAST (*p1))
+ {
+ case exactn:
+ case charset:
+ /* Reuse the code above. */
+ return mutually_exclusive_p (bufp, p2, p1);
+ case charset_not:
+ /* When we have two charset_not, it's very unlikely that
+ they don't overlap. The union of the two sets of excluded
+ chars should cover all possible chars, which, as a matter of
+ fact, is virtually impossible in multibyte buffers. */
+ ;
+ }
+ break;
+
case wordend:
case notsyntaxspec:
return ((re_opcode_t) *p1 == syntaxspec
{
/* General temporaries. */
int mcnt;
+ size_t reg;
boolean not;
- unsigned char *p1;
/* Just past the end of the corresponding string. */
re_char *end1, *end2;
re_char *dfail;
/* Where we are in the pattern, and the end of the pattern. */
- unsigned char *p = bufp->buffer;
- register unsigned char *pend = p + bufp->used;
+ re_char *p = bufp->buffer;
+ re_char *pend = p + bufp->used;
/* We use this to map every character in the string. */
RE_TRANSLATE_TYPE translate = bufp->translate;
/* Initialize subexpression text positions to -1 to mark ones that no
start_memory/stop_memory has been seen for. Also initialize the
register information struct. */
- for (mcnt = 1; mcnt < num_regs; mcnt++)
- regstart[mcnt] = regend[mcnt] = NULL;
+ for (reg = 1; reg < num_regs; reg++)
+ regstart[reg] = regend[reg] = NULL;
/* We move `string1' into `string2' if the latter's empty -- but not if
`string1' is null. */
DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
- for (mcnt = 1; mcnt < num_regs; mcnt++)
+ for (reg = 1; reg < num_regs; reg++)
{
- best_regstart[mcnt] = regstart[mcnt];
- best_regend[mcnt] = regend[mcnt];
+ best_regstart[reg] = regstart[reg];
+ best_regend[reg] = regend[reg];
}
}
goto fail;
dend = ((d >= string1 && d <= end1)
? end_match_1 : end_match_2);
- for (mcnt = 1; mcnt < num_regs; mcnt++)
+ for (reg = 1; reg < num_regs; reg++)
{
- regstart[mcnt] = best_regstart[mcnt];
- regend[mcnt] = best_regend[mcnt];
+ regstart[reg] = best_regstart[reg];
+ regend[reg] = best_regend[reg];
}
}
} /* d != end_match_2 */
/* Go through the first `min (num_regs, regs->num_regs)'
registers, since that is all we initialized. */
- for (mcnt = 1; mcnt < MIN (num_regs, regs->num_regs); mcnt++)
+ for (reg = 1; reg < MIN (num_regs, regs->num_regs); reg++)
{
- if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
- regs->start[mcnt] = regs->end[mcnt] = -1;
+ if (REG_UNSET (regstart[reg]) || REG_UNSET (regend[reg]))
+ regs->start[reg] = regs->end[reg] = -1;
else
{
- regs->start[mcnt]
- = (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
- regs->end[mcnt]
- = (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
+ regs->start[reg]
+ = (regoff_t) POINTER_TO_OFFSET (regstart[reg]);
+ regs->end[reg]
+ = (regoff_t) POINTER_TO_OFFSET (regend[reg]);
}
}
we (re)allocated the registers, this is the case,
because we always allocate enough to have at least one
-1 at the end. */
- for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++)
- regs->start[mcnt] = regs->end[mcnt] = -1;
+ for (reg = num_regs; reg < regs->num_regs; reg++)
+ regs->start[reg] = regs->end[reg] = -1;
} /* regs && !bufp->no_sub */
DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
case anychar:
{
int buf_charlen;
- unsigned int buf_ch;
+ re_wchar_t buf_ch;
DEBUG_PRINT1 ("EXECUTING anychar.\n");
/* Start of actual range_table, or end of bitmap if there is no
range table. */
- unsigned char *range_table;
+ re_char *range_table;
/* Nonzero if there is a range table. */
int range_table_exists;
DEBUG_PRINT3 ("EXECUTING on_failure_jump_smart %d (to %p).\n",
mcnt, p + mcnt);
{
- unsigned char *p1 = p; /* Next operation. */
- unsigned char *p2 = p + mcnt; /* Destination of the jump. */
+ re_char *p1 = p; /* Next operation. */
+ /* Here, we discard `const', making re_match non-reentrant. */
+ unsigned char *p2 = (unsigned char*) p + mcnt; /* Jump dest. */
+ unsigned char *p3 = (unsigned char*) p - 3; /* opcode location. */
p -= 3; /* Reset so that we will re-execute the
instruction once it's been changed. */
{
/* Use a fast `on_failure_keep_string_jump' loop. */
DEBUG_PRINT1 (" smart exclusive => fast loop.\n");
- *p = (unsigned char) on_failure_keep_string_jump;
+ *p3 = (unsigned char) on_failure_keep_string_jump;
STORE_NUMBER (p2 - 2, mcnt + 3);
}
else
{
/* Default to a safe `on_failure_jump' loop. */
DEBUG_PRINT1 (" smart default => slow loop.\n");
- *p = (unsigned char) on_failure_jump;
+ *p3 = (unsigned char) on_failure_jump;
}
DEBUG_STATEMENT (debug -= 2);
}
/* Have to succeed matching what follows at least n times.
After that, handle like `on_failure_jump'. */
case succeed_n:
+ /* Signedness doesn't matter since we only compare MCNT to 0. */
EXTRACT_NUMBER (mcnt, p + 2);
DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
/* Originally, mcnt is how many times we HAVE to succeed. */
if (mcnt != 0)
{
+ /* Here, we discard `const', making re_match non-reentrant. */
+ unsigned char *p2 = (unsigned char*) p + 2; /* counter loc. */
mcnt--;
- p += 2;
- PUSH_FAILURE_COUNT (p);
- DEBUG_PRINT3 (" Setting %p to %d.\n", p, mcnt);
- STORE_NUMBER_AND_INCR (p, mcnt);
+ p += 4;
+ PUSH_NUMBER (p2, mcnt);
}
else
/* The two bytes encoding mcnt == 0 are two no_op opcodes. */
break;
case jump_n:
+ /* Signedness doesn't matter since we only compare MCNT to 0. */
EXTRACT_NUMBER (mcnt, p + 2);
DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
/* Originally, this is how many times we CAN jump. */
if (mcnt != 0)
{
+ /* Here, we discard `const', making re_match non-reentrant. */
+ unsigned char *p2 = (unsigned char*) p + 2; /* counter loc. */
mcnt--;
- PUSH_FAILURE_COUNT (p + 2);
- STORE_NUMBER (p + 2, mcnt);
+ PUSH_NUMBER (p2, mcnt);
goto unconditional_jump;
}
/* If don't have to jump any more, skip over the rest of command. */
case set_number_at:
{
+ unsigned char *p2; /* Location of the counter. */
DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
EXTRACT_NUMBER_AND_INCR (mcnt, p);
- p1 = p + mcnt;
+ /* Here, we discard `const', making re_match non-reentrant. */
+ p2 = (unsigned char*) p + mcnt;
+ /* Signedness doesn't matter since we only copy MCNT's bits . */
EXTRACT_NUMBER_AND_INCR (mcnt, p);
- DEBUG_PRINT3 (" Setting %p to %d.\n", p1, mcnt);
- PUSH_FAILURE_COUNT (p1);
- STORE_NUMBER (p1, mcnt);
+ DEBUG_PRINT3 (" Setting %p to %d.\n", p2, mcnt);
+ PUSH_NUMBER (p2, mcnt);
break;
}
{
/* C1 is the character before D, S1 is the syntax of C1, C2
is the character at D, and S2 is the syntax of C2. */
- int c1, c2, s1, s2;
+ re_wchar_t c1, c2;
+ int s1, s2;
#ifdef emacs
int offset = PTR_TO_OFFSET (d - 1);
int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
{
/* C1 is the character before D, S1 is the syntax of C1, C2
is the character at D, and S2 is the syntax of C2. */
- int c1, c2, s1, s2;
+ re_wchar_t c1, c2;
+ int s1, s2;
#ifdef emacs
int offset = PTR_TO_OFFSET (d);
int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
{
/* C1 is the character before D, S1 is the syntax of C1, C2
is the character at D, and S2 is the syntax of C2. */
- int c1, c2, s1, s2;
+ re_wchar_t c1, c2;
+ int s1, s2;
#ifdef emacs
int offset = PTR_TO_OFFSET (d) - 1;
int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
}
#endif
{
- int c, len;
+ int len;
+ re_wchar_t c;
c = RE_STRING_CHAR_AND_LENGTH (d, dend - d, len);
DEBUG_PRINT3 ("EXECUTING %scategoryspec %d.\n", not?"not":"", mcnt);
PREFETCH ();
{
- int c, len;
+ int len;
+ re_wchar_t c;
+
c = RE_STRING_CHAR_AND_LENGTH (d, dend - d, len);
if ((!CHAR_HAS_CATEGORY (c, mcnt)) ^ not)
IMMEDIATE_QUIT_CHECK;
if (!FAIL_STACK_EMPTY ())
{
- re_char *str;
- unsigned char *pat;
+ re_char *str, *pat;
/* A restart point is known. Restore to that state. */
DEBUG_PRINT1 ("\nFAIL:\n");
POP_FAILURE_POINT (str, pat);
while (p1 < p1_end && p2 < p2_end)
{
int p1_charlen, p2_charlen;
- int p1_ch, p2_ch;
+ re_wchar_t p1_ch, p2_ch;
p1_ch = RE_STRING_CHAR_AND_LENGTH (p1, p1_end - p1, p1_charlen);
p2_ch = RE_STRING_CHAR_AND_LENGTH (p2, p2_end - p2, p2_charlen);
int
regcomp (preg, pattern, cflags)
- regex_t *preg;
- const char *pattern;
+ regex_t *__restrict preg;
+ const char *__restrict pattern;
int cflags;
{
reg_errcode_t ret;
int
regexec (preg, string, nmatch, pmatch, eflags)
- const regex_t *preg;
- const char *string;
+ const regex_t *__restrict preg;
+ const char *__restrict string;
size_t nmatch;
regmatch_t pmatch[];
int eflags;