X-Git-Url: http://erislabs.net/gitweb/?a=blobdiff_plain;f=lib%2Fregex_internal.h;h=9bbc6ac3b9edbbc10a078109da2dd3b67937ddd4;hb=9aa4a0e39ea568b5481f100645106e1ac1d46883;hp=3c17fc3c9d305e2386151b0573bf84151f8e7609;hpb=1e5cfc92d3a783d911169d1704ae6e37072c327c;p=gnulib.git diff --git a/lib/regex_internal.h b/lib/regex_internal.h index 3c17fc3c9..9bbc6ac3b 100644 --- a/lib/regex_internal.h +++ b/lib/regex_internal.h @@ -1,5 +1,5 @@ /* Extended regular expression matching and search library. - Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc. + Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Isamu Hasegawa . @@ -27,29 +27,28 @@ #include #include -#if defined HAVE_LANGINFO_H || defined HAVE_LANGINFO_CODESET || defined _LIBC +#ifdef _LIBC # include +#else +# include "localcharset.h" #endif #if defined HAVE_LOCALE_H || defined _LIBC # include #endif -#if defined HAVE_WCHAR_H || defined _LIBC -# include -#endif /* HAVE_WCHAR_H || _LIBC */ -#if defined HAVE_WCTYPE_H || defined _LIBC -# include -#endif /* HAVE_WCTYPE_H || _LIBC */ + +#include +#include +#include #if defined _LIBC # include #else -# define __libc_lock_define(CLASS,NAME) # define __libc_lock_init(NAME) do { } while (0) # define __libc_lock_lock(NAME) do { } while (0) # define __libc_lock_unlock(NAME) do { } while (0) #endif /* In case that the system doesn't have isblank(). */ -#if !defined _LIBC && !defined HAVE_ISBLANK && !defined isblank +#if !defined _LIBC && !HAVE_DECL_ISBLANK && !defined isblank # define isblank(ch) ((ch) == ' ' || (ch) == '\t') #endif @@ -80,7 +79,12 @@ # define gettext_noop(String) String #endif -#if (defined MB_CUR_MAX && HAVE_LOCALE_H && HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_WCRTOMB && HAVE_MBRTOWC && HAVE_WCSCOLL) || _LIBC +/* For loser systems without the definition. */ +#ifndef SIZE_MAX +# define SIZE_MAX ((size_t) -1) +#endif + +#if (defined MB_CUR_MAX && HAVE_LOCALE_H && HAVE_WCTYPE_H && HAVE_ISWCTYPE && HAVE_WCRTOMB && HAVE_MBRTOWC && HAVE_WCSCOLL) || _LIBC # define RE_ENABLE_I18N #endif @@ -88,11 +92,16 @@ # define BE(expr, val) __builtin_expect (expr, val) #else # define BE(expr, val) (expr) -# define inline +# ifdef _LIBC +# define inline +# endif #endif -/* Number of single byte character. */ -#define SBC_MAX 256 +/* Number of ASCII characters. */ +#define ASCII_CHARS 0x80 + +/* Number of single byte characters. */ +#define SBC_MAX (UCHAR_MAX + 1) #define COLL_ELEM_LEN_MAX 8 @@ -105,23 +114,17 @@ # define __wctype wctype # define __iswctype iswctype # define __btowc btowc -# ifndef __mempcpy -# define __mempcpy mempcpy -# endif # define __wcrtomb wcrtomb # define __regfree regfree # define attribute_hidden #endif /* not _LIBC */ -#ifdef __GNUC__ +#if __GNUC__ >= 4 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1) # define __attribute(arg) __attribute__ (arg) #else # define __attribute(arg) #endif -extern const char __re_error_msgid[] attribute_hidden; -extern const size_t __re_error_msgid_idx[] attribute_hidden; - typedef __re_idx_t Idx; /* Special return value for failure to match. */ @@ -147,22 +150,52 @@ typedef __re_idx_t Idx; /* A hash value, suitable for computing hash tables. */ typedef __re_size_t re_hashval_t; -/* Number of bits in an unsinged int. */ -#define UINT_BITS (sizeof (unsigned int) * CHAR_BIT) -/* Number of unsigned int in an bit_set. */ -#define BITSET_UINTS ((SBC_MAX + UINT_BITS - 1) / UINT_BITS) -typedef unsigned int bitset[BITSET_UINTS]; -typedef unsigned int *re_bitset_ptr_t; -typedef const unsigned int *re_const_bitset_ptr_t; - -#define bitset_set(set,i) (set[i / UINT_BITS] |= 1u << i % UINT_BITS) -#define bitset_clear(set,i) (set[i / UINT_BITS] &= ~(1u << i % UINT_BITS)) -#define bitset_contain(set,i) (set[i / UINT_BITS] & (1u << i % UINT_BITS)) -#define bitset_empty(set) memset (set, 0, sizeof (unsigned int) * BITSET_UINTS) -#define bitset_set_all(set) \ - memset (set, 255, sizeof (unsigned int) * BITSET_UINTS) -#define bitset_copy(dest,src) \ - memcpy (dest, src, sizeof (unsigned int) * BITSET_UINTS) +/* An integer used to represent a set of bits. It must be unsigned, + and must be at least as wide as unsigned int. */ +typedef unsigned long int bitset_word_t; +/* All bits set in a bitset_word_t. */ +#define BITSET_WORD_MAX ULONG_MAX + +/* Number of bits in a bitset_word_t. For portability to hosts with + padding bits, do not use '(sizeof (bitset_word_t) * CHAR_BIT)'; + instead, deduce it directly from BITSET_WORD_MAX. Avoid + greater-than-32-bit integers and unconditional shifts by more than + 31 bits, as they're not portable. */ +#if BITSET_WORD_MAX == 0xffffffff +# define BITSET_WORD_BITS 32 +#elif BITSET_WORD_MAX >> 31 >> 5 == 1 +# define BITSET_WORD_BITS 36 +#elif BITSET_WORD_MAX >> 31 >> 16 == 1 +# define BITSET_WORD_BITS 48 +#elif BITSET_WORD_MAX >> 31 >> 28 == 1 +# define BITSET_WORD_BITS 60 +#elif BITSET_WORD_MAX >> 31 >> 31 >> 1 == 1 +# define BITSET_WORD_BITS 64 +#elif BITSET_WORD_MAX >> 31 >> 31 >> 9 == 1 +# define BITSET_WORD_BITS 72 +#elif BITSET_WORD_MAX >> 31 >> 31 >> 31 >> 31 >> 3 == 1 +# define BITSET_WORD_BITS 128 +#elif BITSET_WORD_MAX >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 7 == 1 +# define BITSET_WORD_BITS 256 +#elif BITSET_WORD_MAX >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 7 > 1 +# define BITSET_WORD_BITS 257 /* any value > SBC_MAX will do here */ +# if BITSET_WORD_BITS <= SBC_MAX +# error "Invalid SBC_MAX" +# endif +#elif BITSET_WORD_MAX == (0xffffffff + 2) * 0xffffffff +/* Work around a bug in 64-bit PGC (before version 6.1-2), where the + preprocessor mishandles large unsigned values as if they were signed. */ +# define BITSET_WORD_BITS 64 +#else +# error "Add case for new bitset_word_t size" +#endif + +/* Number of bitset_word_t values in a bitset_t. */ +#define BITSET_WORDS ((SBC_MAX + BITSET_WORD_BITS - 1) / BITSET_WORD_BITS) + +typedef bitset_word_t bitset_t[BITSET_WORDS]; +typedef bitset_word_t *re_bitset_ptr_t; +typedef const bitset_word_t *re_const_bitset_ptr_t; #define PREV_WORD_CONSTRAINT 0x0001 #define PREV_NOTWORD_CONSTRAINT 0x0002 @@ -307,7 +340,7 @@ typedef struct Idx idx; /* for BACK_REF */ re_context_type ctx_type; /* for ANCHOR */ } opr; -#if __GNUC__ >= 2 +#if __GNUC__ >= 2 && !__STRICT_ANSI__ re_token_type_t type : 8; #else re_token_type_t type; @@ -368,7 +401,7 @@ struct re_string_t the beginning of the input string. */ unsigned int tip_context; /* The translation passed as a part of an argument of re_compile_pattern. */ - unsigned REG_TRANSLATE_TYPE trans; + RE_TRANSLATE_TYPE trans; /* Copy of re_dfa_t's word_char. */ re_const_bitset_ptr_t word_char; /* true if REG_ICASE. */ @@ -405,10 +438,9 @@ static reg_errcode_t build_wcs_upper_buffer (re_string_t *pstr) #endif /* RE_ENABLE_I18N */ static void build_upper_buffer (re_string_t *pstr) internal_function; static void re_string_translate_buffer (re_string_t *pstr) internal_function; -static unsigned int re_string_context_at (const re_string_t *input, - Idx idx, int eflags) +static unsigned int re_string_context_at (const re_string_t *input, Idx idx, + int eflags) internal_function __attribute ((pure)); - #define re_string_peek_byte(pstr, offset) \ ((pstr)->mbs[(pstr)->cur_idx + offset]) #define re_string_fetch_byte(pstr) \ @@ -441,8 +473,11 @@ static unsigned int re_string_context_at (const re_string_t *input, # endif #endif +#ifndef MAX +# define MAX(a,b) ((a) < (b) ? (b) : (a)) +#endif + #define re_malloc(t,n) ((t *) malloc ((n) * sizeof (t))) -#define re_calloc(t,n) ((t *) calloc (n, sizeof (t))) #define re_realloc(p,t,n) ((t *) realloc (p, (n) * sizeof (t))) #define re_free(p) free (p) @@ -575,9 +610,9 @@ typedef struct /* The string object corresponding to the input string. */ re_string_t input; #if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L) - re_dfa_t *const dfa; + const re_dfa_t *const dfa; #else - re_dfa_t *dfa; + const re_dfa_t *dfa; #endif /* EFLAGS of the argument of regexec. */ int eflags; @@ -624,8 +659,8 @@ struct re_fail_stack_t struct re_dfa_t { re_token_t *nodes; - Idx nodes_alloc; - Idx nodes_len; + size_t nodes_alloc; + size_t nodes_len; Idx *nexts; Idx *org_indices; re_node_set *edests; @@ -647,8 +682,8 @@ struct re_dfa_t Idx nbackref; /* The number of backreference in this dfa. */ /* Bitmap expressing which backreference is used. */ - unsigned int used_bkref_map; - unsigned int completed_bkref_map; + bitset_word_t used_bkref_map; + bitset_word_t completed_bkref_map; unsigned int has_plural_match : 1; /* If this dfa has "multibyte node", which is a backreference or @@ -659,13 +694,15 @@ struct re_dfa_t unsigned int map_notascii : 1; unsigned int word_ops_used : 1; int mb_cur_max; - bitset word_char; + bitset_t word_char; reg_syntax_t syntax; Idx *subexp_map; #ifdef DEBUG char* re_str; #endif +#ifdef _LIBC __libc_lock_define (, lock) +#endif }; #define re_node_set_init_empty(set) memset (set, '\0', sizeof (re_node_set)) @@ -673,8 +710,6 @@ struct re_dfa_t (re_node_set_remove_at (set, re_node_set_contains (set, id) - 1)) #define re_node_set_empty(p) ((p)->nelem = 0) #define re_node_set_free(set) re_free ((set)->elems) - -static void free_state (re_dfastate_t *state) internal_function; typedef enum @@ -698,40 +733,76 @@ typedef struct } bracket_elem_t; -/* Inline functions for bitset operation. */ +/* Inline functions for bitset_t operation. */ + static inline void -bitset_not (bitset set) +bitset_set (bitset_t set, Idx i) { - int bitset_i; - for (bitset_i = 0; bitset_i < BITSET_UINTS; ++bitset_i) - set[bitset_i] = ~set[bitset_i]; + set[i / BITSET_WORD_BITS] |= (bitset_word_t) 1 << i % BITSET_WORD_BITS; +} + +static inline void +bitset_clear (bitset_t set, Idx i) +{ + set[i / BITSET_WORD_BITS] &= ~ ((bitset_word_t) 1 << i % BITSET_WORD_BITS); +} + +static inline bool +bitset_contain (const bitset_t set, Idx i) +{ + return (set[i / BITSET_WORD_BITS] >> i % BITSET_WORD_BITS) & 1; } static inline void -bitset_merge (bitset dest, const bitset src) +bitset_empty (bitset_t set) +{ + memset (set, '\0', sizeof (bitset_t)); +} + +static inline void +bitset_set_all (bitset_t set) +{ + memset (set, -1, sizeof (bitset_word_t) * (SBC_MAX / BITSET_WORD_BITS)); + if (SBC_MAX % BITSET_WORD_BITS != 0) + set[BITSET_WORDS - 1] = + ((bitset_word_t) 1 << SBC_MAX % BITSET_WORD_BITS) - 1; +} + +static inline void +bitset_copy (bitset_t dest, const bitset_t src) +{ + memcpy (dest, src, sizeof (bitset_t)); +} + +static inline void +bitset_not (bitset_t set) { int bitset_i; - for (bitset_i = 0; bitset_i < BITSET_UINTS; ++bitset_i) - dest[bitset_i] |= src[bitset_i]; + for (bitset_i = 0; bitset_i < SBC_MAX / BITSET_WORD_BITS; ++bitset_i) + set[bitset_i] = ~set[bitset_i]; + if (SBC_MAX % BITSET_WORD_BITS != 0) + set[BITSET_WORDS - 1] = + ((((bitset_word_t) 1 << SBC_MAX % BITSET_WORD_BITS) - 1) + & ~set[BITSET_WORDS - 1]); } static inline void -bitset_not_merge (bitset dest, const bitset src) +bitset_merge (bitset_t dest, const bitset_t src) { - int i; - for (i = 0; i < BITSET_UINTS; ++i) - dest[i] |= ~src[i]; + int bitset_i; + for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i) + dest[bitset_i] |= src[bitset_i]; } static inline void -bitset_mask (bitset dest, const bitset src) +bitset_mask (bitset_t dest, const bitset_t src) { int bitset_i; - for (bitset_i = 0; bitset_i < BITSET_UINTS; ++bitset_i) + for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i) dest[bitset_i] &= src[bitset_i]; } -#if defined RE_ENABLE_I18N +#ifdef RE_ENABLE_I18N /* Inline functions for re_string. */ static inline int internal_function __attribute ((pure)) @@ -759,11 +830,11 @@ static int internal_function __attribute ((pure)) re_string_elem_size_at (const re_string_t *pstr, Idx idx) { -#ifdef _LIBC +# ifdef _LIBC const unsigned char *p, *extra; const int32_t *table, *indirect; int32_t tmp; -# include +# include uint_fast32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); if (nrules != 0) @@ -778,7 +849,7 @@ re_string_elem_size_at (const re_string_t *pstr, Idx idx) return p - pstr->mbs - idx; } else -#endif /* _LIBC */ +# endif /* _LIBC */ return 1; } #endif /* RE_ENABLE_I18N */