0.12. (Implements POSIX draft P10003.2/D11.2, except for
internationalization features.)
- Copyright (C) 1993, 1994, 1995, 1996, 1997 Free Software Foundation, Inc.
+ Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
#undef _GNU_SOURCE
#define _GNU_SOURCE
+#ifdef emacs
/* Converts the pointer to the char to BEG-based offset from the start. */
#define PTR_TO_OFFSET(d) \
POS_AS_IN_BUFFER (MATCHING_IN_FIRST_STRING \
? (d) - string1 : (d) - (string2 - size1))
-#define POS_AS_IN_BUFFER(p) ((p) + 1)
+#define POS_AS_IN_BUFFER(p) ((p) + (NILP (re_match_object) || BUFFERP (re_match_object)))
+#else
+#define PTR_TO_OFFSET(d) 0
+#endif
#ifdef HAVE_CONFIG_H
#include <config.h>
#include "category.h"
#define malloc xmalloc
+#define realloc xrealloc
#define free xfree
#else /* not emacs */
#define PATFETCH(c) \
do {if (p == pend) return REG_EEND; \
c = (unsigned char) *p++; \
- if (translate) c = RE_TRANSLATE (translate, c); \
+ if (RE_TRANSLATE_P (translate)) c = RE_TRANSLATE (translate, c); \
} while (0)
#endif
when we use a character as a subscript we must make it unsigned. */
#ifndef TRANSLATE
#define TRANSLATE(d) \
- (translate ? (unsigned) RE_TRANSLATE (translate, (unsigned) (d)) : (d))
+ (RE_TRANSLATE_P (translate) \
+ ? (unsigned) RE_TRANSLATE (translate, (unsigned) (d)) : (d))
#endif
compile_stack_type compile_stack;
/* Points to the current (ending) position in the pattern. */
+#ifdef AIX
+ /* `const' makes AIX compiler fail. */
+ char *p = pattern;
+#else
const char *p = pattern;
+#endif
const char *pend = pattern + size;
/* How to translate the characters in the pattern. */
p1 = p - 1; /* P1 points the head of C. */
#ifdef emacs
if (bufp->multibyte)
- /* Set P to the next character boundary. */
- p += MULTIBYTE_FORM_LENGTH (p1, pend - p1) - 1;
+ {
+ c = STRING_CHAR (p1, pend - p1);
+ c = TRANSLATE (c);
+ /* Set P to the next character boundary. */
+ p += MULTIBYTE_FORM_LENGTH (p1, pend - p1) - 1;
+ }
#endif
/* If no exactn currently being built. */
if (!pending_exact
|| *pending_exact >= (1 << BYTEWIDTH) - (p - p1)
/* If followed by a repetition operator. */
- || *p == '*' || *p == '^'
+ || (p != pend && (*p == '*' || *p == '^'))
|| ((syntax & RE_BK_PLUS_QM)
- ? *p == '\\' && (p[1] == '+' || p[1] == '?')
- : (*p == '+' || *p == '?'))
+ ? p + 1 < pend && *p == '\\' && (p[1] == '+' || p[1] == '?')
+ : p != pend && (*p == '+' || *p == '?'))
|| ((syntax & RE_INTERVALS)
&& ((syntax & RE_NO_BK_BRACES)
- ? *p == '{'
- : (p[0] == '\\' && p[1] == '{'))))
+ ? p != pend && *p == '{'
+ : p + 1 < pend && p[0] == '\\' && p[1] == '{')))
{
/* Start building a new exactn. */
pending_exact = b - 1;
}
- /* Here, C may translated, therefore C may not equal to *P1. */
- while (1)
+#ifdef emacs
+ if (! SINGLE_BYTE_CHAR_P (c))
+ {
+ unsigned char work[4], *str;
+ int i = CHAR_STRING (c, work, str);
+ int j;
+ for (j = 0; j < i; j++)
+ {
+ BUF_PUSH (str[j]);
+ (*pending_exact)++;
+ }
+ }
+ else
+#endif
{
BUF_PUSH (c);
(*pending_exact)++;
- if (++p1 == p)
- break;
-
- /* Rest of multibyte form should be copied literally. */
- c = *(unsigned char *)p1;
}
break;
} /* switch (c) */
case charset_not:
- /* Chars beyond end of map must be allowed. End of map is
- `127' if bufp->multibyte is nonzero. */
- simple_char_max = bufp->multibyte ? 0x80 : (1 << BYTEWIDTH);
+ /* Chars beyond end of bitmap are possible matches.
+ All the single-byte codes can occur in multibyte buffers.
+ So any that are not listed in the charset
+ are possible matches, even in multibyte buffers. */
+ simple_char_max = (1 << BYTEWIDTH);
for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH;
j < simple_char_max; j++)
fastmap[j] = 1;
case wordchar:
- simple_char_max = bufp->multibyte ? 0x80 : (1 << BYTEWIDTH);
+ /* All the single-byte codes can occur in multibyte buffers,
+ and they may have word syntax. So do consider them. */
+ simple_char_max = (1 << BYTEWIDTH);
for (j = 0; j < simple_char_max; j++)
if (SYNTAX (j) == Sword)
fastmap[j] = 1;
case notwordchar:
- simple_char_max = bufp->multibyte ? 0x80 : (1 << BYTEWIDTH);
+ /* All the single-byte codes can occur in multibyte buffers,
+ and they may not have word syntax. So do consider them. */
+ simple_char_max = (1 << BYTEWIDTH);
for (j = 0; j < simple_char_max; j++)
if (SYNTAX (j) != Sword)
fastmap[j] = 1;
{
int fastmap_newline = fastmap['\n'];
- /* `.' matches anything (but if bufp->multibyte is
- nonzero, matches `\000' .. `\127' and possible multibyte
- character) ... */
+ /* `.' matches anything, except perhaps newline.
+ Even in a multibyte buffer, it should match any
+ conceivable byte value for the fastmap. */
if (bufp->multibyte)
- {
- simple_char_max = 0x80;
-
- for (j = 0x80; j < 0xA0; j++)
- if (BASE_LEADING_CODE_P (j))
- fastmap[j] = 1;
- match_any_multibyte_characters = true;
- }
- else
- simple_char_max = (1 << BYTEWIDTH);
+ match_any_multibyte_characters = true;
+ simple_char_max = (1 << BYTEWIDTH);
for (j = 0; j < simple_char_max; j++)
fastmap[j] = 1;
case categoryspec:
k = *p++;
- simple_char_max = bufp->multibyte ? 0x80 : (1 << BYTEWIDTH);
+ simple_char_max = (1 << BYTEWIDTH);
for (j = 0; j < simple_char_max; j++)
if (CHAR_HAS_CATEGORY (j, k))
fastmap[j] = 1;
case notcategoryspec:
k = *p++;
- simple_char_max = bufp->multibyte ? 0x80 : (1 << BYTEWIDTH);
+ simple_char_max = (1 << BYTEWIDTH);
for (j = 0; j < simple_char_max; j++)
if (!CHAR_HAS_CATEGORY (j, k))
fastmap[j] = 1;
range = total_size - startpos;
/* If the search isn't to be a backwards one, don't waste time in a
- search for a pattern that must be anchored. */
+ search for a pattern anchored at beginning of buffer. */
if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
{
if (startpos > 0)
return -1;
else
- range = 1;
+ range = 0;
}
#ifdef emacs
don't keep searching past point. */
if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
{
- range = PT - startpos;
- if (range <= 0)
+ range = PT_BYTE - BEGV_BYTE - startpos;
+ if (range < 0)
return -1;
}
#endif /* emacs */
anchored_start = 1;
#ifdef emacs
- SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object,
- POS_AS_IN_BUFFER (startpos > 0
- ? startpos - 1 : startpos),
- 1);
+ gl_state.object = re_match_object;
+ {
+ int adjpos = NILP (re_match_object) || BUFFERP (re_match_object);
+ int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (startpos + adjpos);
+
+ SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1);
+ }
#endif
/* Loop through the string, looking for a place to start matching. */
/* Written out as an if-else to avoid testing `translate'
inside the loop. */
- if (translate)
+ if (RE_TRANSLATE_P (translate))
{
if (multibyte)
while (range > lim)
else
while (range > lim
&& !fastmap[(unsigned char)
- RE_TRANSLATE (translate, (unsigned char) *d++)])
- range--;
+ RE_TRANSLATE (translate, (unsigned char) *d)])
+ {
+ d++;
+ range--;
+ }
}
else
- while (range > lim && !fastmap[(unsigned char) *d++])
- range--;
+ while (range > lim && !fastmap[(unsigned char) *d])
+ {
+ d++;
+ range--;
+ }
startpos += irange - range;
}
: size1 - startpos);
buf_ch = STRING_CHAR (d, room);
- if (translate)
+ if (RE_TRANSLATE_P (translate))
buf_ch = RE_TRANSLATE (translate, buf_ch);
if (! (buf_ch >= 0400
int result;
#ifdef emacs
- SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object,
- POS_AS_IN_BUFFER (pos > 0 ? pos - 1 : pos),
- 1);
+ int charpos;
+ int adjpos = NILP (re_match_object) || BUFFERP (re_match_object);
+ gl_state.object = re_match_object;
+ charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos + adjpos);
+ SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1);
#endif
result = re_match_2_internal (bufp, string1, size1, string2, size2,
- pos, regs, stop);
+ pos, regs, stop);
alloca (0);
return result;
}
/* This is written out as an if-else so we don't waste time
testing `translate' inside the loop. */
- if (translate)
+ if (RE_TRANSLATE_P (translate))
{
#ifdef emacs
if (multibyte)
do
{
PREFETCH ();
- if ((unsigned char) RE_TRANSLATE (translate, (unsigned char) *d++)
+ if ((unsigned char) RE_TRANSLATE (translate, (unsigned char) *d)
!= (unsigned char) *p++)
goto fail;
+ d++;
}
while (--mcnt);
}
else
#endif /* not emacs */
{
- buf_ch = *d;
+ buf_ch = (unsigned char) *d;
buf_charlen = 1;
}
/* Compare that many; failure if mismatch, else move
past them. */
- if (translate
+ if (RE_TRANSLATE_P (translate)
? bcmp_translate (d, d2, mcnt, translate)
: bcmp (d, d2, mcnt))
goto fail;
GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2);
#ifdef emacs
- charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos1 ? pos1 : 1);
+ charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos1);
UPDATE_SYNTAX_TABLE (charpos);
#endif
s1 = SYNTAX (c1);