regex: don't depend on wcscoll
[gnulib.git] / m4 / regex.m4
index a79f03b..424ae33 100644 (file)
-#serial 10
+# serial 64
+
+# Copyright (C) 1996-2001, 2003-2013 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
 
 dnl Initially derived from code in GNU grep.
 dnl Mostly written by Jim Meyering.
 
 
 dnl Initially derived from code in GNU grep.
 dnl Mostly written by Jim Meyering.
 
-dnl Usage: jm_INCLUDED_REGEX([lib/regex.c])
-dnl
-AC_DEFUN(jm_INCLUDED_REGEX,
-  [
-    dnl Even packages that don't use regex.c can use this macro.
-    dnl Of course, for them it doesn't do anything.
+AC_PREREQ([2.50])
 
 
-    # Assume we'll default to using the included regex.c.
-    ac_use_included_regex=yes
+AC_DEFUN([gl_REGEX],
+[
+  AC_ARG_WITH([included-regex],
+    [AS_HELP_STRING([--without-included-regex],
+                    [don't compile regex; this is the default on systems
+                     with recent-enough versions of the GNU C Library
+                     (use with caution on other systems).])])
 
 
-    # However, if the system regex support is good enough that it passes the
-    # the following run test, then default to *not* using the included regex.c.
+  case $with_included_regex in #(
+  yes|no) ac_use_included_regex=$with_included_regex
+        ;;
+  '')
+    # If the system regex support is good enough that it passes the
+    # following run test, then default to *not* using the included regex.c.
     # If cross compiling, assume the test would fail and use the included
     # If cross compiling, assume the test would fail and use the included
-    # regex.c.  The first failing regular expression is from `Spencer ere
-    # test #75' in grep-2.3.
+    # regex.c.
+    AC_CHECK_DECLS_ONCE([alarm])
     AC_CACHE_CHECK([for working re_compile_pattern],
     AC_CACHE_CHECK([for working re_compile_pattern],
-                  jm_cv_func_working_re_compile_pattern,
-      AC_TRY_RUN(
-[#include <stdio.h>
-#include <regex.h>
-         int
-         main ()
-         {
-           static struct re_pattern_buffer regex;
-           const char *s;
-           struct re_registers regs;
-           re_set_syntax (RE_SYNTAX_POSIX_EGREP);
-           /* These two brackets, '[[' and the one in the comment below serve
-              to quote the brackets (unbalanced) in the following line.  */
-           s = re_compile_pattern ("a[[:]:]]b\n", 9, &regex);
-           /* This bracket ']' helps quote the unbalanced expression above.  */
-           /* This should fail with _Invalid character class name_ error.  */
-           if (!s)
-             exit (1);
-
-           /* This should succeed, but doesn't for e.g. glibc-2.1.3.  */
-           s = re_compile_pattern ("{1", 2, &regex);
-
-           if (s)
-             exit (1);
-
-           /* The following example is derived from a problem report
+                   [gl_cv_func_re_compile_pattern_working],
+      [AC_RUN_IFELSE(
+        [AC_LANG_PROGRAM(
+          [[#include <regex.h>
+
+            #include <locale.h>
+            #include <limits.h>
+            #include <string.h>
+            #if HAVE_DECL_ALARM
+            # include <unistd.h>
+            # include <signal.h>
+            #endif
+          ]],
+          [[int result = 0;
+            static struct re_pattern_buffer regex;
+            unsigned char folded_chars[UCHAR_MAX + 1];
+            int i;
+            const char *s;
+            struct re_registers regs;
+
+#if HAVE_DECL_ALARM
+            /* Some builds of glibc go into an infinite loop on this test.  */
+            signal (SIGALRM, SIG_DFL);
+            alarm (2);
+#endif
+            if (setlocale (LC_ALL, "en_US.UTF-8"))
+              {
+                {
+                  /* http://sourceware.org/ml/libc-hacker/2006-09/msg00008.html
+                     This test needs valgrind to catch the bug on Debian
+                     GNU/Linux 3.1 x86, but it might catch the bug better
+                     on other platforms and it shouldn't hurt to try the
+                     test here.  */
+                  static char const pat[] = "insert into";
+                  static char const data[] =
+                    "\xFF\0\x12\xA2\xAA\xC4\xB1,K\x12\xC4\xB1*\xACK";
+                  re_set_syntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE
+                                 | RE_ICASE);
+                  memset (&regex, 0, sizeof regex);
+                  s = re_compile_pattern (pat, sizeof pat - 1, &regex);
+                  if (s)
+                    result |= 1;
+                  else if (re_search (&regex, data, sizeof data - 1,
+                                      0, sizeof data - 1, &regs)
+                           != -1)
+                    result |= 1;
+                }
+
+                {
+                  /* This test is from glibc bug 15078.
+                     The test case is from Andreas Schwab in
+                     <http://www.sourceware.org/ml/libc-alpha/2013-01/msg00967.html>.
+                     */
+                  static char const pat[] = "[^x]x";
+                  static char const data[] =
+                    /* <U1000><U103B><U103D><U1014><U103A><U102F><U1015><U103A> */
+                    "\xe1\x80\x80"
+                    "\xe1\x80\xbb"
+                    "\xe1\x80\xbd"
+                    "\xe1\x80\x94"
+                    "\xe1\x80\xba"
+                    "\xe1\x80\xaf"
+                    "\xe1\x80\x95"
+                    "\xe1\x80\xba"
+                    "x";
+                  re_set_syntax (0);
+                  memset (&regex, 0, sizeof regex);
+                  s = re_compile_pattern (pat, sizeof pat - 1, &regex);
+                  if (s)
+                    result |= 1;
+                  else
+                    {
+                      i = re_search (&regex, data, sizeof data - 1,
+                                     0, sizeof data - 1, 0);
+                      if (i != 0 && i != 21)
+                        result |= 1;
+                    }
+                }
+
+                if (! setlocale (LC_ALL, "C"))
+                  return 1;
+              }
+
+            /* This test is from glibc bug 3957, reported by Andrew Mackey.  */
+            re_set_syntax (RE_SYNTAX_EGREP | RE_HAT_LISTS_NOT_NEWLINE);
+            memset (&regex, 0, sizeof regex);
+            s = re_compile_pattern ("a[^x]b", 6, &regex);
+            if (s)
+              result |= 2;
+            /* This should fail, but succeeds for glibc-2.5.  */
+            else if (re_search (&regex, "a\nb", 3, 0, 3, &regs) != -1)
+              result |= 2;
+
+            /* This regular expression is from Spencer ere test number 75
+               in grep-2.3.  */
+            re_set_syntax (RE_SYNTAX_POSIX_EGREP);
+            memset (&regex, 0, sizeof regex);
+            for (i = 0; i <= UCHAR_MAX; i++)
+              folded_chars[i] = i;
+            regex.translate = folded_chars;
+            s = re_compile_pattern ("a[[:@:>@:]]b\n", 11, &regex);
+            /* This should fail with _Invalid character class name_ error.  */
+            if (!s)
+              result |= 4;
+
+            /* Ensure that [b-a] is diagnosed as invalid, when
+               using RE_NO_EMPTY_RANGES. */
+            re_set_syntax (RE_SYNTAX_POSIX_EGREP | RE_NO_EMPTY_RANGES);
+            memset (&regex, 0, sizeof regex);
+            s = re_compile_pattern ("a[b-a]", 6, &regex);
+            if (s == 0)
+              result |= 8;
+
+            /* This should succeed, but does not for glibc-2.1.3.  */
+            memset (&regex, 0, sizeof regex);
+            s = re_compile_pattern ("{1", 2, &regex);
+            if (s)
+              result |= 8;
+
+            /* The following example is derived from a problem report
                against gawk from Jorge Stolfi <stolfi@ic.unicamp.br>.  */
                against gawk from Jorge Stolfi <stolfi@ic.unicamp.br>.  */
-           s = re_compile_pattern ("[[anĂ¹]]*n", 7, &regex);
-           if (s)
-             exit (1);
-
-           /* This should match, but doesn't for e.g. glibc-2.2.1.  */
-           if (re_match (&regex, "an", 2, 0, &regs) != 2)
-             exit (1);
-
-           exit (0);
-         }
-       ],
-              jm_cv_func_working_re_compile_pattern=yes,
-              jm_cv_func_working_re_compile_pattern=no,
-              dnl When crosscompiling, assume it's broken.
-              jm_cv_func_working_re_compile_pattern=no))
-    if test $jm_cv_func_working_re_compile_pattern = yes; then
-      ac_use_included_regex=no
-    fi
-
-    test -n "$1" || AC_MSG_ERROR([missing argument])
-    m4_syscmd([test -f $1])
-    ifelse(m4_sysval, 0,
-      [
-       AC_ARG_WITH(included-regex,
-       [  --without-included-regex don't compile regex; this is the default on
-                          systems with version 2 of the GNU C library
-                          (use with caution on other system)],
-                   jm_with_regex=$withval,
-                   jm_with_regex=$ac_use_included_regex)
-       if test "$jm_with_regex" = yes; then
-         AC_LIBOBJ(regex)
-       fi
-      ],
-    )
-  ]
-)
+            memset (&regex, 0, sizeof regex);
+            s = re_compile_pattern ("[an\371]*n", 7, &regex);
+            if (s)
+              result |= 8;
+            /* This should match, but does not for glibc-2.2.1.  */
+            else if (re_match (&regex, "an", 2, 0, &regs) != 2)
+              result |= 8;
+
+            memset (&regex, 0, sizeof regex);
+            s = re_compile_pattern ("x", 1, &regex);
+            if (s)
+              result |= 8;
+            /* glibc-2.2.93 does not work with a negative RANGE argument.  */
+            else if (re_search (&regex, "wxy", 3, 2, -2, &regs) != 1)
+              result |= 8;
+
+            /* The version of regex.c in older versions of gnulib
+               ignored RE_ICASE.  Detect that problem too.  */
+            re_set_syntax (RE_SYNTAX_EMACS | RE_ICASE);
+            memset (&regex, 0, sizeof regex);
+            s = re_compile_pattern ("x", 1, &regex);
+            if (s)
+              result |= 16;
+            else if (re_search (&regex, "WXY", 3, 0, 3, &regs) < 0)
+              result |= 16;
+
+            /* Catch a bug reported by Vin Shelton in
+               http://lists.gnu.org/archive/html/bug-coreutils/2007-06/msg00089.html
+               */
+            re_set_syntax (RE_SYNTAX_POSIX_BASIC
+                           & ~RE_CONTEXT_INVALID_DUP
+                           & ~RE_NO_EMPTY_RANGES);
+            memset (&regex, 0, sizeof regex);
+            s = re_compile_pattern ("[[:alnum:]_-]\\\\+$", 16, &regex);
+            if (s)
+              result |= 32;
+
+            /* REG_STARTEND was added to glibc on 2004-01-15.
+               Reject older versions.  */
+            if (! REG_STARTEND)
+              result |= 64;
+
+#if 0
+            /* It would be nice to reject hosts whose regoff_t values are too
+               narrow (including glibc on hosts with 64-bit ptrdiff_t and
+               32-bit int), but we should wait until glibc implements this
+               feature.  Otherwise, support for equivalence classes and
+               multibyte collation symbols would always be broken except
+               when compiling --without-included-regex.   */
+            if (sizeof (regoff_t) < sizeof (ptrdiff_t)
+                || sizeof (regoff_t) < sizeof (ssize_t))
+              result |= 64;
+#endif
+
+            return result;
+          ]])],
+       [gl_cv_func_re_compile_pattern_working=yes],
+       [gl_cv_func_re_compile_pattern_working=no],
+       dnl When crosscompiling, assume it is not working.
+       [gl_cv_func_re_compile_pattern_working=no])])
+    case $gl_cv_func_re_compile_pattern_working in #(
+    yes) ac_use_included_regex=no;; #(
+    no) ac_use_included_regex=yes;;
+    esac
+    ;;
+  *) AC_MSG_ERROR([Invalid value for --with-included-regex: $with_included_regex])
+    ;;
+  esac
+
+  if test $ac_use_included_regex = yes; then
+    AC_DEFINE([_REGEX_INCLUDE_LIMITS_H], [1],
+      [Define if you want <regex.h> to include <limits.h>, so that it
+       consistently overrides <limits.h>'s RE_DUP_MAX.])
+    AC_DEFINE([_REGEX_LARGE_OFFSETS], [1],
+      [Define if you want regoff_t to be at least as wide POSIX requires.])
+    AC_DEFINE([re_syntax_options], [rpl_re_syntax_options],
+      [Define to rpl_re_syntax_options if the replacement should be used.])
+    AC_DEFINE([re_set_syntax], [rpl_re_set_syntax],
+      [Define to rpl_re_set_syntax if the replacement should be used.])
+    AC_DEFINE([re_compile_pattern], [rpl_re_compile_pattern],
+      [Define to rpl_re_compile_pattern if the replacement should be used.])
+    AC_DEFINE([re_compile_fastmap], [rpl_re_compile_fastmap],
+      [Define to rpl_re_compile_fastmap if the replacement should be used.])
+    AC_DEFINE([re_search], [rpl_re_search],
+      [Define to rpl_re_search if the replacement should be used.])
+    AC_DEFINE([re_search_2], [rpl_re_search_2],
+      [Define to rpl_re_search_2 if the replacement should be used.])
+    AC_DEFINE([re_match], [rpl_re_match],
+      [Define to rpl_re_match if the replacement should be used.])
+    AC_DEFINE([re_match_2], [rpl_re_match_2],
+      [Define to rpl_re_match_2 if the replacement should be used.])
+    AC_DEFINE([re_set_registers], [rpl_re_set_registers],
+      [Define to rpl_re_set_registers if the replacement should be used.])
+    AC_DEFINE([re_comp], [rpl_re_comp],
+      [Define to rpl_re_comp if the replacement should be used.])
+    AC_DEFINE([re_exec], [rpl_re_exec],
+      [Define to rpl_re_exec if the replacement should be used.])
+    AC_DEFINE([regcomp], [rpl_regcomp],
+      [Define to rpl_regcomp if the replacement should be used.])
+    AC_DEFINE([regexec], [rpl_regexec],
+      [Define to rpl_regexec if the replacement should be used.])
+    AC_DEFINE([regerror], [rpl_regerror],
+      [Define to rpl_regerror if the replacement should be used.])
+    AC_DEFINE([regfree], [rpl_regfree],
+      [Define to rpl_regfree if the replacement should be used.])
+  fi
+])
+
+# Prerequisites of lib/regex.c and lib/regex_internal.c.
+AC_DEFUN([gl_PREREQ_REGEX],
+[
+  AC_REQUIRE([AC_USE_SYSTEM_EXTENSIONS])
+  AC_REQUIRE([AC_C_INLINE])
+  AC_REQUIRE([AC_C_RESTRICT])
+  AC_REQUIRE([AC_TYPE_MBSTATE_T])
+  AC_REQUIRE([gl_EEMALLOC])
+  AC_REQUIRE([gl_GLIBC21])
+  AC_CHECK_HEADERS([libintl.h])
+  AC_CHECK_FUNCS_ONCE([isblank iswctype])
+  AC_CHECK_DECLS([isblank], [], [], [[#include <ctype.h>]])
+])