/* Extended regular expression matching and search library.
- Copyright (C) 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
+ Copyright (C) 2002-2012 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
- with this program; if not, write to the Free Software Foundation,
- Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+ with this program; if not, see <http://www.gnu.org/licenses/>. */
#ifndef _REGEX_INTERNAL_H
#define _REGEX_INTERNAL_H 1
#include <assert.h>
#include <ctype.h>
-#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <langinfo.h>
#ifndef _LIBC
-# include "strcase.h"
-#endif
-
-#if defined HAVE_LANGINFO_H || defined HAVE_LANGINFO_CODESET || defined _LIBC
-# include <langinfo.h>
-#endif
-#if defined HAVE_LOCALE_H || defined _LIBC
-# include <locale.h>
+# include "localcharset.h"
#endif
-#if defined HAVE_WCHAR_H || defined _LIBC
-# include <wchar.h>
-#endif /* HAVE_WCHAR_H || _LIBC */
-#if defined HAVE_WCTYPE_H || defined _LIBC
-# include <wctype.h>
-#endif /* HAVE_WCTYPE_H || _LIBC */
+#include <locale.h>
+#include <wchar.h>
+#include <wctype.h>
+#include <stdbool.h>
#include <stdint.h>
#if defined _LIBC
# include <bits/libc-lock.h>
#else
+# define __libc_lock_define(CLASS,NAME)
# define __libc_lock_init(NAME) do { } while (0)
# define __libc_lock_lock(NAME) do { } while (0)
# define __libc_lock_unlock(NAME) do { } while (0)
#endif
/* In case that the system doesn't have isblank(). */
-#if !defined _LIBC && !HAVE_DECL_ISBLANK && !defined isblank
+#if !defined _LIBC && ! (defined isblank || (HAVE_ISBLANK && HAVE_DECL_ISBLANK))
# define isblank(ch) ((ch) == ' ' || (ch) == '\t')
#endif
# define gettext_noop(String) String
#endif
-/* For loser systems without the definition. */
-#ifndef SIZE_MAX
-# define SIZE_MAX ((size_t) -1)
-#endif
-
-#if (defined MB_CUR_MAX && HAVE_LOCALE_H && HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_WCRTOMB && HAVE_MBRTOWC && HAVE_WCSCOLL) || _LIBC
+#if (defined MB_CUR_MAX && HAVE_WCTYPE_H && HAVE_ISWCTYPE && HAVE_WCSCOLL) || _LIBC
# define RE_ENABLE_I18N
#endif
# define __wctype wctype
# define __iswctype iswctype
# define __btowc btowc
-# ifndef __mempcpy
-# define __mempcpy mempcpy
-# endif
+# define __mbrtowc mbrtowc
# define __wcrtomb wcrtomb
# define __regfree regfree
# define attribute_hidden
#endif
typedef __re_idx_t Idx;
+#ifdef _REGEX_LARGE_OFFSETS
+# define IDX_MAX (SIZE_MAX - 2)
+#else
+# define IDX_MAX INT_MAX
+#endif
/* Special return value for failure to match. */
#define REG_MISSING ((Idx) -1)
instead, deduce it directly from BITSET_WORD_MAX. Avoid
greater-than-32-bit integers and unconditional shifts by more than
31 bits, as they're not portable. */
-#if BITSET_WORD_MAX == 0xffffffff
+#if BITSET_WORD_MAX == 0xffffffffUL
# define BITSET_WORD_BITS 32
-#elif BITSET_WORD_MAX >> 31 >> 5 == 1
+#elif BITSET_WORD_MAX >> 31 >> 4 == 1
# define BITSET_WORD_BITS 36
#elif BITSET_WORD_MAX >> 31 >> 16 == 1
# define BITSET_WORD_BITS 48
# if BITSET_WORD_BITS <= SBC_MAX
# error "Invalid SBC_MAX"
# endif
-#elif BITSET_WORD_MAX == (0xffffffff + 2) * 0xffffffff
-/* Work around a bug in 64-bit PGC (before version 6.1-2), where the
- preprocessor mishandles large unsigned values as if they were signed. */
-# define BITSET_WORD_BITS 64
#else
# error "Add case for new bitset_word_t size"
#endif
Idx idx; /* for BACK_REF */
re_context_type ctx_type; /* for ANCHOR */
} opr;
-#if __GNUC__ >= 2
+#if __GNUC__ >= 2 && !defined __STRICT_ANSI__
re_token_type_t type : 8;
#else
re_token_type_t type;
typedef struct re_dfa_t re_dfa_t;
#ifndef _LIBC
-# ifdef __i386__
-# define internal_function __attribute ((regparm (3), stdcall))
-# else
-# define internal_function
-# endif
+# define internal_function
#endif
+#ifndef NOT_IN_libc
static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr,
Idx new_buf_len)
internal_function;
-#ifdef RE_ENABLE_I18N
+# ifdef RE_ENABLE_I18N
static void build_wcs_buffer (re_string_t *pstr) internal_function;
static reg_errcode_t build_wcs_upper_buffer (re_string_t *pstr)
- internal_function;
-#endif /* RE_ENABLE_I18N */
+ internal_function;
+# endif /* RE_ENABLE_I18N */
static void build_upper_buffer (re_string_t *pstr) internal_function;
static void re_string_translate_buffer (re_string_t *pstr) internal_function;
static unsigned int re_string_context_at (const re_string_t *input, Idx idx,
int eflags)
internal_function __attribute ((pure));
+#endif
#define re_string_peek_byte(pstr, offset) \
((pstr)->mbs[(pstr)->cur_idx + offset])
#define re_string_fetch_byte(pstr) \
# else
/* alloca is implemented with malloc, so just use malloc. */
# define __libc_use_alloca(n) 0
+# undef alloca
+# define alloca(n) malloc (n)
# endif
#endif
#ifndef MAX
# define MAX(a,b) ((a) < (b) ? (b) : (a))
#endif
+#ifndef MIN
+# define MIN(a,b) ((a) < (b) ? (a) : (b))
+#endif
#define re_malloc(t,n) ((t *) malloc ((n) * sizeof (t)))
#define re_realloc(p,t,n) ((t *) realloc (p, (n) * sizeof (t)))
re_token_t token;
- /* `node_idx' is the index in dfa->nodes, if `type' == 0.
- Otherwise `type' indicate the type of this node. */
+ /* 'node_idx' is the index in dfa->nodes, if 'type' == 0.
+ Otherwise 'type' indicate the type of this node. */
Idx node_idx;
};
typedef struct bin_tree_t bin_tree_t;
struct re_dfastate_t **trtable, **word_trtable;
unsigned int context : 4;
unsigned int halt : 1;
- /* If this state can accept `multi byte'.
+ /* If this state can accept "multi byte".
Note that we refer to multibyte characters, and multi character
- collating elements as `multi byte'. */
+ collating elements as "multi byte". */
unsigned int accept_mb : 1;
/* If this state has backreference node(s). */
unsigned int has_backref : 1;
re_bitset_ptr_t sb_char;
int str_tree_storage_idx;
- /* number of subexpressions `re_nsub' is in regex_t. */
+ /* number of subexpressions 're_nsub' is in regex_t. */
re_hashval_t state_hash_mask;
Idx init_node;
Idx nbackref; /* The number of backreference in this dfa. */
#ifdef DEBUG
char* re_str;
#endif
-#ifdef _LIBC
__libc_lock_define (, lock)
-#endif
};
#define re_node_set_init_empty(set) memset (set, '\0', sizeof (re_node_set))
return (wint_t) pstr->wcs[idx];
}
+# ifndef NOT_IN_libc
static int
internal_function __attribute ((pure))
re_string_elem_size_at (const re_string_t *pstr, Idx idx)
{
-# ifdef _LIBC
+# ifdef _LIBC
const unsigned char *p, *extra;
const int32_t *table, *indirect;
- int32_t tmp;
-# include <locale/weight.h>
+# include <locale/weight.h>
uint_fast32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
if (nrules != 0)
indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
_NL_COLLATE_INDIRECTMB);
p = pstr->mbs + idx;
- tmp = findidx (&p);
+ findidx (&p, pstr->len - idx);
return p - pstr->mbs - idx;
}
else
-# endif /* _LIBC */
+# endif /* _LIBC */
return 1;
}
+# endif
#endif /* RE_ENABLE_I18N */
+#ifndef __GNUC_PREREQ
+# if defined __GNUC__ && defined __GNUC_MINOR__
+# define __GNUC_PREREQ(maj, min) \
+ ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min))
+# else
+# define __GNUC_PREREQ(maj, min) 0
+# endif
+#endif
+
+#if __GNUC_PREREQ (3,4)
+# undef __attribute_warn_unused_result__
+# define __attribute_warn_unused_result__ \
+ __attribute__ ((__warn_unused_result__))
+#else
+# define __attribute_warn_unused_result__ /* empty */
+#endif
+
#endif /* _REGEX_INTERNAL_H */