+#ifdef MBS_SUPPORT
+ /* We assume a charset(_not) structure as a wchar_t array.
+ charset[0] = (re_opcode_t) charset(_not)
+ charset[1] = l (= length of char_classes)
+ charset[2] = m (= length of collating_symbols)
+ charset[3] = n (= length of equivalence_classes)
+ charset[4] = o (= length of char_ranges)
+ charset[5] = p (= length of chars)
+
+ charset[6] = char_class (wctype_t)
+ charset[6+CHAR_CLASS_SIZE] = char_class (wctype_t)
+ ...
+ charset[l+5] = char_class (wctype_t)
+
+ charset[l+6] = collating_symbol (wchar_t)
+ ...
+ charset[l+m+5] = collating_symbol (wchar_t)
+ ifdef _LIBC we use the index if
+ _NL_COLLATE_SYMB_EXTRAMB instead of
+ wchar_t string.
+
+ charset[l+m+6] = equivalence_classes (wchar_t)
+ ...
+ charset[l+m+n+5] = equivalence_classes (wchar_t)
+ ifdef _LIBC we use the index in
+ _NL_COLLATE_WEIGHT instead of
+ wchar_t string.
+
+ charset[l+m+n+6] = range_start
+ charset[l+m+n+7] = range_end
+ ...
+ charset[l+m+n+2o+4] = range_start
+ charset[l+m+n+2o+5] = range_end
+ ifdef _LIBC we use the value looked up
+ in _NL_COLLATE_COLLSEQ instead of
+ wchar_t character.
+
+ charset[l+m+n+2o+6] = char
+ ...
+ charset[l+m+n+2o+p+5] = char
+
+ */
+
+ /* We need at least 6 spaces: the opcode, the length of
+ char_classes, the length of collating_symbols, the length of
+ equivalence_classes, the length of char_ranges, the length of
+ chars. */
+ GET_BUFFER_SPACE (6);
+
+ /* Save b as laststart. And We use laststart as the pointer
+ to the first element of the charset here.
+ In other words, laststart[i] indicates charset[i]. */
+ laststart = b;
+
+ /* We test `*p == '^' twice, instead of using an if
+ statement, so we only need one BUF_PUSH. */
+ BUF_PUSH (*p == '^' ? charset_not : charset);
+ if (*p == '^')
+ p++;
+
+ /* Push the length of char_classes, the length of
+ collating_symbols, the length of equivalence_classes, the
+ length of char_ranges and the length of chars. */
+ BUF_PUSH_3 (0, 0, 0);
+ BUF_PUSH_2 (0, 0);
+
+ /* Remember the first position in the bracket expression. */
+ p1 = p;
+
+ /* charset_not matches newline according to a syntax bit. */
+ if ((re_opcode_t) b[-6] == charset_not
+ && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
+ {
+ BUF_PUSH('\n');
+ laststart[5]++; /* Update the length of characters */
+ }
+
+ /* Read in characters and ranges, setting map bits. */
+ for (;;)
+ {
+ if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
+
+ PATFETCH (c);
+
+ /* \ might escape characters inside [...] and [^...]. */
+ if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
+ {
+ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
+
+ PATFETCH (c1);
+ BUF_PUSH(c1);
+ laststart[5]++; /* Update the length of chars */
+ range_start = c1;
+ continue;
+ }
+
+ /* Could be the end of the bracket expression. If it's
+ not (i.e., when the bracket expression is `[]' so
+ far), the ']' character bit gets set way below. */
+ if (c == ']' && p != p1 + 1)
+ break;
+
+ /* Look ahead to see if it's a range when the last thing
+ was a character class. */
+ if (had_char_class && c == '-' && *p != ']')
+ FREE_STACK_RETURN (REG_ERANGE);
+
+ /* Look ahead to see if it's a range when the last thing
+ was a character: if this is a hyphen not at the
+ beginning or the end of a list, then it's the range
+ operator. */
+ if (c == '-'
+ && !(p - 2 >= pattern && p[-2] == '[')
+ && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
+ && *p != ']')
+ {
+ reg_errcode_t ret;
+ /* Allocate the space for range_start and range_end. */
+ GET_BUFFER_SPACE (2);
+ /* Update the pointer to indicate end of buffer. */
+ b += 2;
+ ret = compile_range (range_start, &p, pend, translate,
+ syntax, b, laststart);
+ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
+ range_start = 0xffffffff;
+ }
+ else if (p[0] == '-' && p[1] != ']')
+ { /* This handles ranges made up of characters only. */
+ reg_errcode_t ret;
+
+ /* Move past the `-'. */
+ PATFETCH (c1);
+ /* Allocate the space for range_start and range_end. */
+ GET_BUFFER_SPACE (2);
+ /* Update the pointer to indicate end of buffer. */
+ b += 2;
+ ret = compile_range (c, &p, pend, translate, syntax, b,
+ laststart);
+ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
+ range_start = 0xffffffff;
+ }
+
+ /* See if we're at the beginning of a possible character
+ class. */
+ else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
+ { /* Leave room for the null. */
+ char str[CHAR_CLASS_MAX_LENGTH + 1];
+
+ PATFETCH (c);
+ c1 = 0;
+
+ /* If pattern is `[[:'. */
+ if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
+
+ for (;;)
+ {
+ PATFETCH (c);
+ if ((c == ':' && *p == ']') || p == pend)
+ break;
+ if (c1 < CHAR_CLASS_MAX_LENGTH)
+ str[c1++] = c;
+ else
+ /* This is in any case an invalid class name. */
+ str[0] = '\0';
+ }
+ str[c1] = '\0';
+
+ /* If isn't a word bracketed by `[:' and `:]':
+ undo the ending character, the letters, and leave
+ the leading `:' and `[' (but store them as character). */
+ if (c == ':' && *p == ']')
+ {
+ wctype_t wt;
+ uintptr_t alignedp;
+
+ /* Query the character class as wctype_t. */
+ wt = IS_CHAR_CLASS (str);
+ if (wt == 0)
+ FREE_STACK_RETURN (REG_ECTYPE);
+
+ /* Throw away the ] at the end of the character
+ class. */
+ PATFETCH (c);
+
+ if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
+
+ /* Allocate the space for character class. */
+ GET_BUFFER_SPACE(CHAR_CLASS_SIZE);
+ /* Update the pointer to indicate end of buffer. */
+ b += CHAR_CLASS_SIZE;
+ /* Move data which follow character classes
+ not to violate the data. */
+ insert_space(CHAR_CLASS_SIZE,
+ laststart + 6 + laststart[1],
+ b - 1);
+ alignedp = ((uintptr_t)(laststart + 6 + laststart[1])
+ + __alignof__(wctype_t) - 1)
+ & ~(uintptr_t)(__alignof__(wctype_t) - 1);
+ /* Store the character class. */
+ *((wctype_t*)alignedp) = wt;
+ /* Update length of char_classes */
+ laststart[1] += CHAR_CLASS_SIZE;
+
+ had_char_class = true;
+ }
+ else
+ {
+ c1++;
+ while (c1--)
+ PATUNFETCH;
+ BUF_PUSH ('[');
+ BUF_PUSH (':');
+ laststart[5] += 2; /* Update the length of characters */
+ range_start = ':';
+ had_char_class = false;
+ }
+ }
+ else if (syntax & RE_CHAR_CLASSES && c == '[' && (*p == '='
+ || *p == '.'))
+ {
+ CHAR_TYPE str[128]; /* Should be large enough. */
+ CHAR_TYPE delim = *p; /* '=' or '.' */
+# ifdef _LIBC
+ uint32_t nrules =
+ _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
+# endif
+ PATFETCH (c);
+ c1 = 0;
+
+ /* If pattern is `[[=' or '[[.'. */
+ if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
+
+ for (;;)
+ {
+ PATFETCH (c);
+ if ((c == delim && *p == ']') || p == pend)
+ break;
+ if (c1 < sizeof (str) - 1)
+ str[c1++] = c;
+ else
+ /* This is in any case an invalid class name. */
+ str[0] = '\0';
+ }
+ str[c1] = '\0';
+
+ if (c == delim && *p == ']' && str[0] != '\0')
+ {
+ unsigned int i, offset;
+ /* If we have no collation data we use the default
+ collation in which each character is in a class
+ by itself. It also means that ASCII is the
+ character set and therefore we cannot have character
+ with more than one byte in the multibyte
+ representation. */
+
+ /* If not defined _LIBC, we push the name and
+ `\0' for the sake of matching performance. */
+ int datasize = c1 + 1;
+
+# ifdef _LIBC
+ int32_t idx = 0;
+ if (nrules == 0)
+# endif
+ {
+ if (c1 != 1)
+ FREE_STACK_RETURN (REG_ECOLLATE);
+ }
+# ifdef _LIBC
+ else
+ {
+ const int32_t *table;
+ const int32_t *weights;
+ const int32_t *extra;
+ const int32_t *indirect;
+ wint_t *cp;
+
+ /* This #include defines a local function! */
+# include <locale/weightwc.h>
+
+ if(delim == '=')
+ {
+ /* We push the index for equivalence class. */
+ cp = (wint_t*)str;
+
+ table = (const int32_t *)
+ _NL_CURRENT (LC_COLLATE,
+ _NL_COLLATE_TABLEWC);
+ weights = (const int32_t *)
+ _NL_CURRENT (LC_COLLATE,
+ _NL_COLLATE_WEIGHTWC);
+ extra = (const int32_t *)
+ _NL_CURRENT (LC_COLLATE,
+ _NL_COLLATE_EXTRAWC);
+ indirect = (const int32_t *)
+ _NL_CURRENT (LC_COLLATE,
+ _NL_COLLATE_INDIRECTWC);
+
+ idx = findidx ((const wint_t**)&cp);
+ if (idx == 0 || cp < (wint_t*) str + c1)
+ /* This is no valid character. */
+ FREE_STACK_RETURN (REG_ECOLLATE);
+
+ str[0] = (wchar_t)idx;
+ }
+ else /* delim == '.' */
+ {
+ /* We push collation sequence value
+ for collating symbol. */
+ int32_t table_size;
+ const int32_t *symb_table;
+ const unsigned char *extra;
+ int32_t idx;
+ int32_t elem;
+ int32_t second;
+ int32_t hash;
+ char char_str[c1];
+
+ /* We have to convert the name to a single-byte
+ string. This is possible since the names
+ consist of ASCII characters and the internal
+ representation is UCS4. */
+ for (i = 0; i < c1; ++i)
+ char_str[i] = str[i];
+
+ table_size =
+ _NL_CURRENT_WORD (LC_COLLATE,
+ _NL_COLLATE_SYMB_HASH_SIZEMB);
+ symb_table = (const int32_t *)
+ _NL_CURRENT (LC_COLLATE,
+ _NL_COLLATE_SYMB_TABLEMB);
+ extra = (const unsigned char *)
+ _NL_CURRENT (LC_COLLATE,
+ _NL_COLLATE_SYMB_EXTRAMB);
+
+ /* Locate the character in the hashing table. */
+ hash = elem_hash (char_str, c1);
+
+ idx = 0;
+ elem = hash % table_size;
+ second = hash % (table_size - 2);
+ while (symb_table[2 * elem] != 0)
+ {
+ /* First compare the hashing value. */
+ if (symb_table[2 * elem] == hash
+ && c1 == extra[symb_table[2 * elem + 1]]
+ && memcmp (str,
+ &extra[symb_table[2 * elem + 1]
+ + 1], c1) == 0)
+ {
+ /* Yep, this is the entry. */
+ idx = symb_table[2 * elem + 1];
+ idx += 1 + extra[idx];
+ break;
+ }
+
+ /* Next entry. */
+ elem += second;
+ }
+
+ if (symb_table[2 * elem] != 0)
+ {
+ /* Compute the index of the byte sequence
+ in the table. */
+ idx += 1 + extra[idx];
+ /* Adjust for the alignment. */
+ idx = (idx + 3) & ~4;
+
+ str[0] = (wchar_t) idx + 4;
+ }
+ else if (symb_table[2 * elem] == 0 && c1 == 1)
+ {
+ /* No valid character. Match it as a
+ single byte character. */
+ had_char_class = false;
+ BUF_PUSH(str[0]);
+ /* Update the length of characters */
+ laststart[5]++;
+ range_start = str[0];
+
+ /* Throw away the ] at the end of the
+ collating symbol. */
+ PATFETCH (c);
+ /* exit from the switch block. */
+ continue;
+ }
+ else
+ FREE_STACK_RETURN (REG_ECOLLATE);
+ }
+ datasize = 1;
+ }
+# endif
+ /* Throw away the ] at the end of the equivalence
+ class (or collating symbol). */
+ PATFETCH (c);
+
+ /* Allocate the space for the equivalence class
+ (or collating symbol) (and '\0' if needed). */
+ GET_BUFFER_SPACE(datasize);
+ /* Update the pointer to indicate end of buffer. */
+ b += datasize;
+
+ if (delim == '=')
+ { /* equivalence class */
+ /* Calculate the offset of char_ranges,
+ which is next to equivalence_classes. */
+ offset = laststart[1] + laststart[2]
+ + laststart[3] +6;
+ /* Insert space. */
+ insert_space(datasize, laststart + offset, b - 1);
+
+ /* Write the equivalence_class and \0. */
+ for (i = 0 ; i < datasize ; i++)
+ laststart[offset + i] = str[i];
+
+ /* Update the length of equivalence_classes. */
+ laststart[3] += datasize;
+ had_char_class = true;
+ }
+ else /* delim == '.' */
+ { /* collating symbol */
+ /* Calculate the offset of the equivalence_classes,
+ which is next to collating_symbols. */
+ offset = laststart[1] + laststart[2] + 6;
+ /* Insert space and write the collationg_symbol
+ and \0. */
+ insert_space(datasize, laststart + offset, b-1);
+ for (i = 0 ; i < datasize ; i++)
+ laststart[offset + i] = str[i];
+
+ /* In re_match_2_internal if range_start < -1, we
+ assume -range_start is the offset of the
+ collating symbol which is specified as
+ the character of the range start. So we assign
+ -(laststart[1] + laststart[2] + 6) to
+ range_start. */
+ range_start = -(laststart[1] + laststart[2] + 6);
+ /* Update the length of collating_symbol. */
+ laststart[2] += datasize;
+ had_char_class = false;
+ }
+ }
+ else
+ {
+ c1++;
+ while (c1--)
+ PATUNFETCH;
+ BUF_PUSH ('[');
+ BUF_PUSH (delim);
+ laststart[5] += 2; /* Update the length of characters */
+ range_start = delim;
+ had_char_class = false;
+ }
+ }
+ else
+ {
+ had_char_class = false;
+ BUF_PUSH(c);
+ laststart[5]++; /* Update the length of characters */
+ range_start = c;
+ }
+ }
+
+#else /* not MBS_SUPPORT */