+\f
+#if WIDE_CHAR_SUPPORT
+/* The GNU C library provides support for user-defined character classes
+ and the functions from ISO C amendement 1. */
+# ifdef CHARCLASS_NAME_MAX
+# define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX
+# else
+/* This shouldn't happen but some implementation might still have this
+ problem. Use a reasonable default value. */
+# define CHAR_CLASS_MAX_LENGTH 256
+# endif
+typedef wctype_t re_wctype_t;
+typedef wchar_t re_wchar_t;
+# define re_wctype wctype
+# define re_iswctype iswctype
+# define re_wctype_to_bit(cc) 0
+#else
+# define CHAR_CLASS_MAX_LENGTH 9 /* Namely, `multibyte'. */
+# define btowc(c) c
+
+/* Character classes. */
+typedef enum { RECC_ERROR = 0,
+ RECC_ALNUM, RECC_ALPHA, RECC_WORD,
+ RECC_GRAPH, RECC_PRINT,
+ RECC_LOWER, RECC_UPPER,
+ RECC_PUNCT, RECC_CNTRL,
+ RECC_DIGIT, RECC_XDIGIT,
+ RECC_BLANK, RECC_SPACE,
+ RECC_MULTIBYTE, RECC_NONASCII,
+ RECC_ASCII, RECC_UNIBYTE
+} re_wctype_t;
+
+typedef int re_wchar_t;
+
+/* Map a string to the char class it names (if any). */
+static re_wctype_t
+re_wctype (str)
+ re_char *str;
+{
+ const char *string = str;
+ if (STREQ (string, "alnum")) return RECC_ALNUM;
+ else if (STREQ (string, "alpha")) return RECC_ALPHA;
+ else if (STREQ (string, "word")) return RECC_WORD;
+ else if (STREQ (string, "ascii")) return RECC_ASCII;
+ else if (STREQ (string, "nonascii")) return RECC_NONASCII;
+ else if (STREQ (string, "graph")) return RECC_GRAPH;
+ else if (STREQ (string, "lower")) return RECC_LOWER;
+ else if (STREQ (string, "print")) return RECC_PRINT;
+ else if (STREQ (string, "punct")) return RECC_PUNCT;
+ else if (STREQ (string, "space")) return RECC_SPACE;
+ else if (STREQ (string, "upper")) return RECC_UPPER;
+ else if (STREQ (string, "unibyte")) return RECC_UNIBYTE;
+ else if (STREQ (string, "multibyte")) return RECC_MULTIBYTE;
+ else if (STREQ (string, "digit")) return RECC_DIGIT;
+ else if (STREQ (string, "xdigit")) return RECC_XDIGIT;
+ else if (STREQ (string, "cntrl")) return RECC_CNTRL;
+ else if (STREQ (string, "blank")) return RECC_BLANK;
+ else return 0;
+}
+
+/* True iff CH is in the char class CC. */
+static boolean
+re_iswctype (ch, cc)
+ int ch;
+ re_wctype_t cc;
+{
+ switch (cc)
+ {
+ case RECC_ALNUM: return ISALNUM (ch);
+ case RECC_ALPHA: return ISALPHA (ch);
+ case RECC_BLANK: return ISBLANK (ch);
+ case RECC_CNTRL: return ISCNTRL (ch);
+ case RECC_DIGIT: return ISDIGIT (ch);
+ case RECC_GRAPH: return ISGRAPH (ch);
+ case RECC_LOWER: return ISLOWER (ch);
+ case RECC_PRINT: return ISPRINT (ch);
+ case RECC_PUNCT: return ISPUNCT (ch);
+ case RECC_SPACE: return ISSPACE (ch);
+ case RECC_UPPER: return ISUPPER (ch);
+ case RECC_XDIGIT: return ISXDIGIT (ch);
+ case RECC_ASCII: return IS_REAL_ASCII (ch);
+ case RECC_NONASCII: return !IS_REAL_ASCII (ch);
+ case RECC_UNIBYTE: return ISUNIBYTE (ch);
+ case RECC_MULTIBYTE: return !ISUNIBYTE (ch);
+ case RECC_WORD: return ISWORD (ch);
+ case RECC_ERROR: return false;
+ default:
+ abort();
+ }
+}
+
+/* Return a bit-pattern to use in the range-table bits to match multibyte
+ chars of class CC. */
+static int
+re_wctype_to_bit (cc)
+ re_wctype_t cc;
+{
+ switch (cc)
+ {
+ case RECC_NONASCII: case RECC_PRINT: case RECC_GRAPH:
+ case RECC_MULTIBYTE: return BIT_MULTIBYTE;
+ case RECC_ALPHA: case RECC_ALNUM: case RECC_WORD: return BIT_WORD;
+ case RECC_LOWER: return BIT_LOWER;
+ case RECC_UPPER: return BIT_UPPER;
+ case RECC_PUNCT: return BIT_PUNCT;
+ case RECC_SPACE: return BIT_SPACE;
+ case RECC_ASCII: case RECC_DIGIT: case RECC_XDIGIT: case RECC_CNTRL:
+ case RECC_BLANK: case RECC_UNIBYTE: case RECC_ERROR: return 0;
+ default:
+ abort();
+ }
+}
+#endif
+\f
+/* Filling in the work area of a range. */
+
+/* Actually extend the space in WORK_AREA. */
+
+static void
+extend_range_table_work_area (work_area)
+ struct range_table_work_area *work_area;
+{
+ work_area->allocated += 16 * sizeof (int);
+ if (work_area->table)
+ work_area->table
+ = (int *) realloc (work_area->table, work_area->allocated);
+ else
+ work_area->table
+ = (int *) malloc (work_area->allocated);
+}
+
+#ifdef emacs
+
+/* Carefully find the ranges of codes that are equivalent
+ under case conversion to the range start..end when passed through
+ TRANSLATE. Handle the case where non-letters can come in between
+ two upper-case letters (which happens in Latin-1).
+ Also handle the case of groups of more than 2 case-equivalent chars.
+
+ The basic method is to look at consecutive characters and see
+ if they can form a run that can be handled as one.
+
+ Returns -1 if successful, REG_ESPACE if ran out of space. */
+
+static int
+set_image_of_range_1 (work_area, start, end, translate)
+ RE_TRANSLATE_TYPE translate;
+ struct range_table_work_area *work_area;
+ re_wchar_t start, end;
+{
+ /* `one_case' indicates a character, or a run of characters,
+ each of which is an isolate (no case-equivalents).
+ This includes all ASCII non-letters.
+
+ `two_case' indicates a character, or a run of characters,
+ each of which has two case-equivalent forms.
+ This includes all ASCII letters.
+
+ `strange' indicates a character that has more than one
+ case-equivalent. */
+
+ enum case_type {one_case, two_case, strange};
+
+ /* Describe the run that is in progress,
+ which the next character can try to extend.
+ If run_type is strange, that means there really is no run.
+ If run_type is one_case, then run_start...run_end is the run.
+ If run_type is two_case, then the run is run_start...run_end,
+ and the case-equivalents end at run_eqv_end. */
+
+ enum case_type run_type = strange;
+ int run_start, run_end, run_eqv_end;
+
+ Lisp_Object eqv_table;
+
+ if (!RE_TRANSLATE_P (translate))
+ {
+ EXTEND_RANGE_TABLE (work_area, 2);
+ work_area->table[work_area->used++] = (start);
+ work_area->table[work_area->used++] = (end);
+ return -1;
+ }
+
+ eqv_table = XCHAR_TABLE (translate)->extras[2];
+
+ for (; start <= end; start++)
+ {
+ enum case_type this_type;
+ int eqv = RE_TRANSLATE (eqv_table, start);
+ int minchar, maxchar;
+
+ /* Classify this character */
+ if (eqv == start)
+ this_type = one_case;
+ else if (RE_TRANSLATE (eqv_table, eqv) == start)
+ this_type = two_case;
+ else
+ this_type = strange;
+
+ if (start < eqv)
+ minchar = start, maxchar = eqv;
+ else
+ minchar = eqv, maxchar = start;
+
+ /* Can this character extend the run in progress? */
+ if (this_type == strange || this_type != run_type
+ || !(minchar == run_end + 1
+ && (run_type == two_case
+ ? maxchar == run_eqv_end + 1 : 1)))
+ {
+ /* No, end the run.
+ Record each of its equivalent ranges. */
+ if (run_type == one_case)
+ {
+ EXTEND_RANGE_TABLE (work_area, 2);
+ work_area->table[work_area->used++] = run_start;
+ work_area->table[work_area->used++] = run_end;
+ }
+ else if (run_type == two_case)
+ {
+ EXTEND_RANGE_TABLE (work_area, 4);
+ work_area->table[work_area->used++] = run_start;
+ work_area->table[work_area->used++] = run_end;
+ work_area->table[work_area->used++]
+ = RE_TRANSLATE (eqv_table, run_start);
+ work_area->table[work_area->used++]
+ = RE_TRANSLATE (eqv_table, run_end);
+ }
+ run_type = strange;
+ }
+
+ if (this_type == strange)
+ {
+ /* For a strange character, add each of its equivalents, one
+ by one. Don't start a range. */
+ do
+ {
+ EXTEND_RANGE_TABLE (work_area, 2);
+ work_area->table[work_area->used++] = eqv;
+ work_area->table[work_area->used++] = eqv;
+ eqv = RE_TRANSLATE (eqv_table, eqv);
+ }
+ while (eqv != start);
+ }
+
+ /* Add this char to the run, or start a new run. */
+ else if (run_type == strange)
+ {
+ /* Initialize a new range. */
+ run_type = this_type;
+ run_start = start;
+ run_end = start;
+ run_eqv_end = RE_TRANSLATE (eqv_table, run_end);
+ }
+ else
+ {
+ /* Extend a running range. */
+ run_end = minchar;
+ run_eqv_end = RE_TRANSLATE (eqv_table, run_end);
+ }
+ }
+
+ /* If a run is still in progress at the end, finish it now
+ by recording its equivalent ranges. */
+ if (run_type == one_case)
+ {
+ EXTEND_RANGE_TABLE (work_area, 2);
+ work_area->table[work_area->used++] = run_start;
+ work_area->table[work_area->used++] = run_end;
+ }
+ else if (run_type == two_case)
+ {
+ EXTEND_RANGE_TABLE (work_area, 4);
+ work_area->table[work_area->used++] = run_start;
+ work_area->table[work_area->used++] = run_end;
+ work_area->table[work_area->used++]
+ = RE_TRANSLATE (eqv_table, run_start);
+ work_area->table[work_area->used++]
+ = RE_TRANSLATE (eqv_table, run_end);
+ }
+
+ return -1;
+}
+
+#endif /* emacs */
+
+/* Record the the image of the range start..end when passed through
+ TRANSLATE. This is not necessarily TRANSLATE(start)..TRANSLATE(end)
+ and is not even necessarily contiguous.
+ Normally we approximate it with the smallest contiguous range that contains
+ all the chars we need. However, for Latin-1 we go to extra effort
+ to do a better job.
+
+ This function is not called for ASCII ranges.
+
+ Returns -1 if successful, REG_ESPACE if ran out of space. */
+
+static int
+set_image_of_range (work_area, start, end, translate)
+ RE_TRANSLATE_TYPE translate;
+ struct range_table_work_area *work_area;
+ re_wchar_t start, end;
+{
+ re_wchar_t cmin, cmax;