Fix regex code so it doesn't rely on strcasecmp.

author Paul Eggert <eggert@cs.ucla.edu>

Thu, 15 Feb 2007 00:16:55 +0000 (00:16 +0000)

committer Paul Eggert <eggert@cs.ucla.edu>

Thu, 15 Feb 2007 00:16:55 +0000 (00:16 +0000)
author Paul Eggert <eggert@cs.ucla.edu>
Thu, 15 Feb 2007 00:16:55 +0000 (00:16 +0000)
committer Paul Eggert <eggert@cs.ucla.edu>
Thu, 15 Feb 2007 00:16:55 +0000 (00:16 +0000)
diff --git a/ChangeLog b/ChangeLog

index 7b1ab07..058132f 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,14 @@
+2007-02-14  Paul Eggert  <eggert@cs.ucla.edu>
+
+       Fix regex code so it doesn't rely on strcasecmp.
+       * lib/regex_internal.h: Include <langinfo.h> only if _LIBC is defined.
+       Otherwise, include gnulib's langinfo.h.
+       * lib/regcomp.c (init_dfa): Don't use strcasecmp, as it can have
+       undesirable behavior in non-C locales.  Instead, rely on locale_charset.
+       * m4/regex.m4 (gl_PREREQ_REGEX): Don't require AM_LANGINFO_CODESET.
+       * modules/regex (FILES): Remove m4/codeset.m4.
+       (Depends-on): Add localcharset.  Remove strcase.
+
  2007-02-13  Ralf Wildenhues  <Ralf.Wildenhues@gmx.de>
  
         * m4/unlinkdir.m4 (gl_UNLINKDIR): Fix m4 quoting bug.
diff --git a/lib/regcomp.c b/lib/regcomp.c

index 0209bb1..fe4d243 100644 (file)
--- a/lib/regcomp.c
+++ b/lib/regcomp.c
@@ -829,9 +829,6 @@ static reg_errcode_t
  init_dfa (re_dfa_t *dfa, size_t pat_len)
  {
    __re_size_t table_size;
-#ifndef _LIBC
-  char *codeset_name;
-#endif
  #ifdef RE_ENABLE_I18N
    size_t max_i18n_object_size = MAX (sizeof (wchar_t), sizeof (wctype_t));
  #else
@@ -875,22 +872,7 @@ init_dfa (re_dfa_t *dfa, size_t pat_len)
    dfa->map_notascii = (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MAP_TO_NONASCII)
                        != 0);
  #else
-# ifdef HAVE_LANGINFO_CODESET
-  codeset_name = nl_langinfo (CODESET);
-# else
-  codeset_name = getenv ("LC_ALL");
-  if (codeset_name == NULL || codeset_name[0] == '\0')
-    codeset_name = getenv ("LC_CTYPE");
-  if (codeset_name == NULL || codeset_name[0] == '\0')
-    codeset_name = getenv ("LANG");
-  if (codeset_name == NULL)
-    codeset_name = "";
-  else if (strchr (codeset_name, '.') !=  NULL)
-    codeset_name = strchr (codeset_name, '.') + 1;
-# endif
-
-  if (strcasecmp (codeset_name, "UTF-8") == 0
-      || strcasecmp (codeset_name, "UTF8") == 0)
+  if (strcmp (locale_charset (), "UTF-8") == 0)
      dfa->is_utf8 = 1;
  
    /* We check exhaustively in the loop below if this charset is a
diff --git a/lib/regex_internal.h b/lib/regex_internal.h

index a19c9a7..9bbc6ac 100644 (file)
--- a/lib/regex_internal.h
+++ b/lib/regex_internal.h
@@ -27,8 +27,10 @@
  #include <stdlib.h>
  #include <string.h>
  
-#if defined HAVE_LANGINFO_H || defined HAVE_LANGINFO_CODESET || defined _LIBC
+#ifdef _LIBC
  # include <langinfo.h>
+#else
+# include "localcharset.h"
  #endif
  #if defined HAVE_LOCALE_H || defined _LIBC
  # include <locale.h>
diff --git a/m4/regex.m4 b/m4/regex.m4

index f694bac..4b421bd 100644 (file)
--- a/m4/regex.m4
+++ b/m4/regex.m4
@@ -1,4 +1,4 @@
-#serial 44
+#serial 45
  
  # Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005,
  # 2006, 2007 Free Software Foundation, Inc.
@@ -203,7 +203,6 @@ AC_DEFUN([gl_PREREQ_REGEX],
  [
    AC_REQUIRE([AC_GNU_SOURCE])
    AC_REQUIRE([AC_C_RESTRICT])
-  AC_REQUIRE([AM_LANGINFO_CODESET])
    AC_CHECK_FUNCS_ONCE([iswctype mbrtowc wcrtomb wcscoll])
    AC_CHECK_DECLS([isblank], [], [], [#include <ctype.h>])
  ])
diff --git a/modules/regex b/modules/regex

index 3f94248..a1a6811 100644 (file)
--- a/modules/regex
+++ b/modules/regex
@@ -8,17 +8,16 @@ lib/regex_internal.c
  lib/regex_internal.h
  lib/regexec.c
  lib/regcomp.c
-m4/codeset.m4
  m4/regex.m4
  
  Depends-on:
  alloca
  extensions
  gettext-h
+localcharset
  malloc
  stdbool
  stdint
-strcase
  ssize_t
  wchar
  wctype
author	Paul Eggert <eggert@cs.ucla.edu>
	Thu, 15 Feb 2007 00:16:55 +0000 (00:16 +0000)
committer	Paul Eggert <eggert@cs.ucla.edu>
	Thu, 15 Feb 2007 00:16:55 +0000 (00:16 +0000)
ChangeLog		patch \| blob \| history
lib/regcomp.c		patch \| blob \| history
lib/regex_internal.h		patch \| blob \| history
m4/regex.m4		patch \| blob \| history
modules/regex		patch \| blob \| history