+ begalt = b = bufp->buffer;
+
+ /* Loop through the uncompiled pattern until we're at the end. */
+ while (p != pend)
+ {
+ PATFETCH (c);
+
+ switch (c)
+ {
+ case '^':
+ {
+ if ( /* If at start of pattern, it's an operator. */
+ p == pattern + 1
+ /* If context independent, it's an operator. */
+ || syntax & RE_CONTEXT_INDEP_ANCHORS
+ /* Otherwise, depends on what's come before. */
+ || at_begline_loc_p (pattern, p, syntax))
+ BUF_PUSH ((syntax & RE_NO_NEWLINE_ANCHOR) ? begbuf : begline);
+ else
+ goto normal_char;
+ }
+ break;
+
+
+ case '$':
+ {
+ if ( /* If at end of pattern, it's an operator. */
+ p == pend
+ /* If context independent, it's an operator. */
+ || syntax & RE_CONTEXT_INDEP_ANCHORS
+ /* Otherwise, depends on what's next. */
+ || at_endline_loc_p (p, pend, syntax))
+ BUF_PUSH ((syntax & RE_NO_NEWLINE_ANCHOR) ? endbuf : endline);
+ else
+ goto normal_char;
+ }
+ break;
+
+
+ case '+':
+ case '?':
+ if ((syntax & RE_BK_PLUS_QM)
+ || (syntax & RE_LIMITED_OPS))
+ goto normal_char;
+ handle_plus:
+ case '*':
+ /* If there is no previous pattern... */
+ if (!laststart)
+ {
+ if (syntax & RE_CONTEXT_INVALID_OPS)
+ FREE_STACK_RETURN (REG_BADRPT);
+ else if (!(syntax & RE_CONTEXT_INDEP_OPS))
+ goto normal_char;
+ }
+
+ {
+ /* 1 means zero (many) matches is allowed. */
+ boolean zero_times_ok = 0, many_times_ok = 0;
+ boolean greedy = 1;
+
+ /* If there is a sequence of repetition chars, collapse it
+ down to just one (the right one). We can't combine
+ interval operators with these because of, e.g., `a{2}*',
+ which should only match an even number of `a's. */
+
+ for (;;)
+ {
+ if ((syntax & RE_FRUGAL)
+ && c == '?' && (zero_times_ok || many_times_ok))
+ greedy = 0;
+ else
+ {
+ zero_times_ok |= c != '+';
+ many_times_ok |= c != '?';
+ }
+
+ if (p == pend)
+ break;
+ else if (*p == '*'
+ || (!(syntax & RE_BK_PLUS_QM)
+ && (*p == '+' || *p == '?')))
+ ;
+ else if (syntax & RE_BK_PLUS_QM && *p == '\\')
+ {
+ if (p+1 == pend)
+ FREE_STACK_RETURN (REG_EESCAPE);
+ if (p[1] == '+' || p[1] == '?')
+ PATFETCH (c); /* Gobble up the backslash. */
+ else
+ break;
+ }
+ else
+ break;
+ /* If we get here, we found another repeat character. */
+ PATFETCH (c);
+ }
+
+ /* Star, etc. applied to an empty pattern is equivalent
+ to an empty pattern. */
+ if (!laststart || laststart == b)
+ break;
+
+ /* Now we know whether or not zero matches is allowed
+ and also whether or not two or more matches is allowed. */
+ if (greedy)
+ {
+ if (many_times_ok)
+ {
+ boolean simple = skip_one_char (laststart) == b;
+ unsigned int startoffset = 0;
+ re_opcode_t ofj =
+ /* Check if the loop can match the empty string. */
+ (simple || !analyse_first (laststart, b, NULL, 0))
+ ? on_failure_jump : on_failure_jump_loop;
+ assert (skip_one_char (laststart) <= b);
+
+ if (!zero_times_ok && simple)
+ { /* Since simple * loops can be made faster by using
+ on_failure_keep_string_jump, we turn simple P+
+ into PP* if P is simple. */
+ unsigned char *p1, *p2;
+ startoffset = b - laststart;
+ GET_BUFFER_SPACE (startoffset);
+ p1 = b; p2 = laststart;
+ while (p2 < p1)
+ *b++ = *p2++;
+ zero_times_ok = 1;
+ }
+
+ GET_BUFFER_SPACE (6);
+ if (!zero_times_ok)
+ /* A + loop. */
+ STORE_JUMP (ofj, b, b + 6);
+ else
+ /* Simple * loops can use on_failure_keep_string_jump
+ depending on what follows. But since we don't know
+ that yet, we leave the decision up to
+ on_failure_jump_smart. */
+ INSERT_JUMP (simple ? on_failure_jump_smart : ofj,
+ laststart + startoffset, b + 6);
+ b += 3;
+ STORE_JUMP (jump, b, laststart + startoffset);
+ b += 3;
+ }
+ else
+ {
+ /* A simple ? pattern. */
+ assert (zero_times_ok);
+ GET_BUFFER_SPACE (3);
+ INSERT_JUMP (on_failure_jump, laststart, b + 3);
+ b += 3;
+ }
+ }
+ else /* not greedy */
+ { /* I wish the greedy and non-greedy cases could be merged. */
+
+ GET_BUFFER_SPACE (7); /* We might use less. */
+ if (many_times_ok)
+ {
+ boolean emptyp = analyse_first (laststart, b, NULL, 0);
+
+ /* The non-greedy multiple match looks like
+ a repeat..until: we only need a conditional jump
+ at the end of the loop. */
+ if (emptyp) BUF_PUSH (no_op);
+ STORE_JUMP (emptyp ? on_failure_jump_nastyloop
+ : on_failure_jump, b, laststart);
+ b += 3;
+ if (zero_times_ok)
+ {
+ /* The repeat...until naturally matches one or more.
+ To also match zero times, we need to first jump to
+ the end of the loop (its conditional jump). */
+ INSERT_JUMP (jump, laststart, b);
+ b += 3;
+ }
+ }
+ else
+ {
+ /* non-greedy a?? */
+ INSERT_JUMP (jump, laststart, b + 3);
+ b += 3;
+ INSERT_JUMP (on_failure_jump, laststart, laststart + 6);
+ b += 3;
+ }
+ }
+ }
+ pending_exact = 0;
+ break;
+
+
+ case '.':
+ laststart = b;
+ BUF_PUSH (anychar);
+ break;
+
+
+ case '[':
+ {
+ CLEAR_RANGE_TABLE_WORK_USED (range_table_work);
+
+ if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
+
+ /* Ensure that we have enough space to push a charset: the
+ opcode, the length count, and the bitset; 34 bytes in all. */
+ GET_BUFFER_SPACE (34);
+
+ laststart = b;
+
+ /* We test `*p == '^' twice, instead of using an if
+ statement, so we only need one BUF_PUSH. */
+ BUF_PUSH (*p == '^' ? charset_not : charset);
+ if (*p == '^')
+ p++;
+
+ /* Remember the first position in the bracket expression. */
+ p1 = p;
+
+ /* Push the number of bytes in the bitmap. */
+ BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
+
+ /* Clear the whole map. */
+ bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
+
+ /* charset_not matches newline according to a syntax bit. */
+ if ((re_opcode_t) b[-2] == charset_not
+ && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
+ SET_LIST_BIT ('\n');
+
+ /* Read in characters and ranges, setting map bits. */
+ for (;;)
+ {
+ boolean escaped_char = false;
+ const unsigned char *p2 = p;
+
+ if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
+
+ /* Don't translate yet. The range TRANSLATE(X..Y) cannot
+ always be determined from TRANSLATE(X) and TRANSLATE(Y)
+ So the translation is done later in a loop. Example:
+ (let ((case-fold-search t)) (string-match "[A-_]" "A")) */
+ PATFETCH (c);
+
+ /* \ might escape characters inside [...] and [^...]. */
+ if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
+ {
+ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
+
+ PATFETCH (c);
+ escaped_char = true;
+ }
+ else
+ {
+ /* Could be the end of the bracket expression. If it's
+ not (i.e., when the bracket expression is `[]' so
+ far), the ']' character bit gets set way below. */
+ if (c == ']' && p2 != p1)
+ break;
+ }
+
+ /* What should we do for the character which is
+ greater than 0x7F, but not BASE_LEADING_CODE_P?
+ XXX */
+
+ /* See if we're at the beginning of a possible character
+ class. */
+
+ if (!escaped_char &&
+ syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
+ {
+ /* Leave room for the null. */
+ unsigned char str[CHAR_CLASS_MAX_LENGTH + 1];
+ const unsigned char *class_beg;
+
+ PATFETCH (c);
+ c1 = 0;
+ class_beg = p;
+
+ /* If pattern is `[[:'. */
+ if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
+
+ for (;;)
+ {
+ PATFETCH (c);
+ if ((c == ':' && *p == ']') || p == pend)
+ break;
+ if (c1 < CHAR_CLASS_MAX_LENGTH)
+ str[c1++] = c;
+ else
+ /* This is in any case an invalid class name. */
+ str[0] = '\0';
+ }
+ str[c1] = '\0';
+
+ /* If isn't a word bracketed by `[:' and `:]':
+ undo the ending character, the letters, and
+ leave the leading `:' and `[' (but set bits for
+ them). */
+ if (c == ':' && *p == ']')
+ {
+ re_wchar_t ch;
+ re_wctype_t cc;
+
+ cc = re_wctype (str);
+
+ if (cc == 0)
+ FREE_STACK_RETURN (REG_ECTYPE);
+
+ /* Throw away the ] at the end of the character
+ class. */
+ PATFETCH (c);
+
+ if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
+
+ /* Most character classes in a multibyte match
+ just set a flag. Exceptions are is_blank,
+ is_digit, is_cntrl, and is_xdigit, since
+ they can only match ASCII characters. We
+ don't need to handle them for multibyte.
+ They are distinguished by a negative wctype. */
+
+ if (multibyte)
+ SET_RANGE_TABLE_WORK_AREA_BIT (range_table_work,
+ re_wctype_to_bit (cc));
+
+ for (ch = 0; ch < 1 << BYTEWIDTH; ++ch)
+ {
+ int translated = TRANSLATE (ch);
+ if (re_iswctype (btowc (ch), cc))
+ SET_LIST_BIT (translated);
+ }
+
+ /* Repeat the loop. */
+ continue;
+ }
+ else
+ {
+ /* Go back to right after the "[:". */
+ p = class_beg;
+ SET_LIST_BIT ('[');
+
+ /* Because the `:' may starts the range, we
+ can't simply set bit and repeat the loop.
+ Instead, just set it to C and handle below. */
+ c = ':';
+ }
+ }
+
+ if (p < pend && p[0] == '-' && p[1] != ']')
+ {
+
+ /* Discard the `-'. */
+ PATFETCH (c1);
+
+ /* Fetch the character which ends the range. */
+ PATFETCH (c1);
+
+ if (SINGLE_BYTE_CHAR_P (c))
+ {
+ if (! SINGLE_BYTE_CHAR_P (c1))
+ {
+ /* Handle a range starting with a
+ character of less than 256, and ending
+ with a character of not less than 256.
+ Split that into two ranges, the low one
+ ending at 0377, and the high one
+ starting at the smallest character in
+ the charset of C1 and ending at C1. */
+ int charset = CHAR_CHARSET (c1);
+ re_wchar_t c2 = MAKE_CHAR (charset, 0, 0);
+
+ SET_RANGE_TABLE_WORK_AREA (range_table_work,
+ c2, c1);
+ c1 = 0377;
+ }
+ }
+ else if (!SAME_CHARSET_P (c, c1))
+ FREE_STACK_RETURN (REG_ERANGE);
+ }
+ else
+ /* Range from C to C. */
+ c1 = c;
+
+ /* Set the range ... */
+ if (SINGLE_BYTE_CHAR_P (c))
+ /* ... into bitmap. */
+ {
+ re_wchar_t this_char;
+ re_wchar_t range_start = c, range_end = c1;
+
+ /* If the start is after the end, the range is empty. */
+ if (range_start > range_end)
+ {
+ if (syntax & RE_NO_EMPTY_RANGES)
+ FREE_STACK_RETURN (REG_ERANGE);
+ /* Else, repeat the loop. */
+ }
+ else
+ {
+ for (this_char = range_start; this_char <= range_end;
+ this_char++)
+ SET_LIST_BIT (TRANSLATE (this_char));
+ }
+ }
+ else
+ /* ... into range table. */
+ SET_RANGE_TABLE_WORK_AREA (range_table_work, c, c1);
+ }
+
+ /* Discard any (non)matching list bytes that are all 0 at the
+ end of the map. Decrease the map-length byte too. */
+ while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
+ b[-1]--;
+ b += b[-1];
+
+ /* Build real range table from work area. */
+ if (RANGE_TABLE_WORK_USED (range_table_work)
+ || RANGE_TABLE_WORK_BITS (range_table_work))
+ {
+ int i;
+ int used = RANGE_TABLE_WORK_USED (range_table_work);
+
+ /* Allocate space for COUNT + RANGE_TABLE. Needs two
+ bytes for flags, two for COUNT, and three bytes for
+ each character. */
+ GET_BUFFER_SPACE (4 + used * 3);
+
+ /* Indicate the existence of range table. */
+ laststart[1] |= 0x80;
+
+ /* Store the character class flag bits into the range table.
+ If not in emacs, these flag bits are always 0. */
+ *b++ = RANGE_TABLE_WORK_BITS (range_table_work) & 0xff;
+ *b++ = RANGE_TABLE_WORK_BITS (range_table_work) >> 8;
+
+ STORE_NUMBER_AND_INCR (b, used / 2);
+ for (i = 0; i < used; i++)
+ STORE_CHARACTER_AND_INCR
+ (b, RANGE_TABLE_WORK_ELT (range_table_work, i));
+ }
+ }
+ break;
+
+
+ case '(':
+ if (syntax & RE_NO_BK_PARENS)
+ goto handle_open;
+ else
+ goto normal_char;