X-Git-Url: http://erislabs.net/gitweb/?a=blobdiff_plain;f=lib%2Fregexec.c;h=05a8e807e59bab31a1c2057f563332e0d2514a95;hb=1276a2c5f24c0c932426aca9c899fa524d2443f2;hp=598e456772d52a94433b768f5316f41f3063cd96;hpb=0f6dd806d5fd7c45d9366cd35c793958b14dd75b;p=gnulib.git

diff --git a/lib/regexec.c b/lib/regexec.c
index 598e45677..05a8e807e 100644
--- a/lib/regexec.c
+++ b/lib/regexec.c
@@ -1,22 +1,21 @@
 /* Extended regular expression matching and search library.
-   Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free
-   Software Foundation, Inc.
+   Copyright (C) 2002-2014 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
 
-   This program is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 2, or (at your option)
-   any later version.
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
 
-   This program is distributed in the hope that it will be useful,
+   The GNU C Library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
 
-   You should have received a copy of the GNU General Public License along
-   with this program; if not, write to the Free Software Foundation,
-   Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
 
 static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
 				     Idx n) internal_function;
@@ -52,9 +51,8 @@ static regoff_t re_search_stub (struct re_pattern_buffer *bufp,
 				regoff_t range, Idx stop,
 				struct re_registers *regs,
 				bool ret_len) internal_function;
-static unsigned int re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
-				  Idx nregs, int regs_allocated)
-     internal_function;
+static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
+                              Idx nregs, int regs_allocated) internal_function;
 static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx)
      internal_function;
 static Idx check_matching (re_match_context_t *mctx, bool fl_longest_match,
@@ -201,7 +199,7 @@ static Idx group_nodes_into_DFAstates (const re_dfa_t *dfa,
 static bool check_node_accept (const re_match_context_t *mctx,
 			       const re_token_t *node, Idx idx)
      internal_function;
-static reg_errcode_t extend_buffers (re_match_context_t *mctx)
+static reg_errcode_t extend_buffers (re_match_context_t *mctx, int min_len)
      internal_function;
 
 /* Entry point for POSIX code.  */
@@ -210,11 +208,11 @@ static reg_errcode_t extend_buffers (re_match_context_t *mctx)
    string STRING.
 
    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
-   `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
+   'regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
    least NMATCH elements, and we set them to the offsets of the
    corresponding matched substrings.
 
-   EFLAGS specifies `execution flags' which affect matching: if
+   EFLAGS specifies "execution flags" which affect matching: if
    REG_NOTBOL is set, then ^ does not match at the beginning of the
    string; if REG_NOTEOL is set, then $ does not match at the end.
 
@@ -230,9 +228,7 @@ regexec (preg, string, nmatch, pmatch, eflags)
 {
   reg_errcode_t err;
   Idx start, length;
-#ifdef _LIBC
-  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
-#endif
+  re_dfa_t *dfa = preg->buffer;
 
   if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))
     return REG_BADPAT;
@@ -248,14 +244,14 @@ regexec (preg, string, nmatch, pmatch, eflags)
       length = strlen (string);
     }
 
-  __libc_lock_lock (dfa->lock);
+  lock_lock (dfa->lock);
   if (preg->no_sub)
     err = re_search_internal (preg, string, length, start, length,
 			      length, 0, NULL, eflags);
   else
     err = re_search_internal (preg, string, length, start, length,
 			      length, nmatch, pmatch, eflags);
-  __libc_lock_unlock (dfa->lock);
+  lock_unlock (dfa->lock);
   return err != REG_NOERROR;
 }
 
@@ -366,7 +362,6 @@ weak_alias (__re_search_2, re_search_2)
 #endif
 
 static regoff_t
-internal_function
 re_search_2_stub (struct re_pattern_buffer *bufp,
 		  const char *string1, Idx length1,
 		  const char *string2, Idx length2,
@@ -414,7 +409,6 @@ re_search_2_stub (struct re_pattern_buffer *bufp,
    otherwise the position of the match is returned.  */
 
 static regoff_t
-internal_function
 re_search_stub (struct re_pattern_buffer *bufp,
 		const char *string, Idx length,
 		Idx start, regoff_t range, Idx stop, struct re_registers *regs,
@@ -425,9 +419,7 @@ re_search_stub (struct re_pattern_buffer *bufp,
   Idx nregs;
   regoff_t rval;
   int eflags = 0;
-#ifdef _LIBC
-  re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
-#endif
+  re_dfa_t *dfa = bufp->buffer;
   Idx last_start = start + range;
 
   /* Check for out-of-range.  */
@@ -438,7 +430,7 @@ re_search_stub (struct re_pattern_buffer *bufp,
   else if (BE (last_start < 0 || (range < 0 && start <= last_start), 0))
     last_start = 0;
 
-  __libc_lock_lock (dfa->lock);
+  lock_lock (dfa->lock);
 
   eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
   eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
@@ -478,9 +470,9 @@ re_search_stub (struct re_pattern_buffer *bufp,
 
   rval = 0;
 
-  /* I hope we needn't fill ther regs with -1's when no match was found.  */
+  /* I hope we needn't fill their regs with -1's when no match was found.  */
   if (result != REG_NOERROR)
-    rval = -1;
+    rval = result == REG_NOMATCH ? -1 : -2;
   else if (regs != NULL)
     {
       /* If caller wants register contents data back, copy them.  */
@@ -502,19 +494,18 @@ re_search_stub (struct re_pattern_buffer *bufp,
     }
   re_free (pmatch);
  out:
-  __libc_lock_unlock (dfa->lock);
+  lock_unlock (dfa->lock);
   return rval;
 }
 
-static unsigned int
-internal_function
+static unsigned
 re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, Idx nregs,
 	      int regs_allocated)
 {
   int rval = REGS_REALLOCATE;
   Idx i;
   Idx need_regs = nregs + 1;
-  /* We need one extra element beyond `num_regs' for the `-1' marker GNU code
+  /* We need one extra element beyond 'num_regs' for the '-1' marker GNU code
      uses.  */
 
   /* Have the register data arrays been allocated?  */
@@ -637,7 +628,7 @@ re_exec (s)
    (0 <= LAST_START && LAST_START <= LENGTH)  */
 
 static reg_errcode_t
-internal_function __attribute_warn_unused_result__
+__attribute_warn_unused_result__
 re_search_internal (const regex_t *preg,
 		    const char *string, Idx length,
 		    Idx start, Idx last_start, Idx stop,
@@ -645,7 +636,7 @@ re_search_internal (const regex_t *preg,
 		    int eflags)
 {
   reg_errcode_t err;
-  const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;
+  const re_dfa_t *dfa = preg->buffer;
   Idx left_lim, right_lim;
   int incr;
   bool fl_longest_match;
@@ -720,7 +711,8 @@ re_search_internal (const regex_t *preg,
   if (nmatch > 1 || dfa->has_mb_node)
     {
       /* Avoid overflow.  */
-      if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= mctx.input.bufs_len, 0))
+      if (BE ((MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *))
+               <= mctx.input.bufs_len), 0))
 	{
 	  err = REG_ESPACE;
 	  goto free_return;
@@ -740,7 +732,7 @@ re_search_internal (const regex_t *preg,
   mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
 			   : CONTEXT_NEWLINE | CONTEXT_BEGBUF;
 
-  /* Check incrementally whether of not the input string match.  */
+  /* Check incrementally whether the input string matches.  */
   incr = (last_start < start) ? -1 : 1;
   left_lim = (last_start < start) ? last_start : start;
   right_lim = (last_start < start) ? start : last_start;
@@ -833,10 +825,10 @@ re_search_internal (const regex_t *preg,
 		break;
 	      match_first += incr;
 	      if (match_first < left_lim || match_first > right_lim)
-	        {
-	          err = REG_NOMATCH;
-	          goto free_return;
-	        }
+		{
+		  err = REG_NOMATCH;
+		  goto free_return;
+		}
 	    }
 	  break;
 	}
@@ -922,7 +914,7 @@ re_search_internal (const regex_t *preg,
 	    goto free_return;
 	}
 
-      /* At last, add the offset to the each registers, since we slided
+      /* At last, add the offset to each register, since we slid
 	 the buffers so that we could assume that the matching starts
 	 from 0.  */
       for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
@@ -953,14 +945,14 @@ re_search_internal (const regex_t *preg,
 	}
 
       if (dfa->subexp_map)
-        for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
-          if (dfa->subexp_map[reg_idx] != reg_idx)
-            {
-              pmatch[reg_idx + 1].rm_so
-                = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
-              pmatch[reg_idx + 1].rm_eo
-                = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
-            }
+	for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
+	  if (dfa->subexp_map[reg_idx] != reg_idx)
+	    {
+	      pmatch[reg_idx + 1].rm_so
+		= pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
+	      pmatch[reg_idx + 1].rm_eo
+		= pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
+	    }
     }
 
  free_return:
@@ -972,7 +964,7 @@ re_search_internal (const regex_t *preg,
 }
 
 static reg_errcode_t
-internal_function __attribute_warn_unused_result__
+__attribute_warn_unused_result__
 prune_impossible_nodes (re_match_context_t *mctx)
 {
   const re_dfa_t *const dfa = mctx->dfa;
@@ -988,7 +980,7 @@ prune_impossible_nodes (re_match_context_t *mctx)
   halt_node = mctx->last_node;
 
   /* Avoid overflow.  */
-  if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= match_last, 0))
+  if (BE (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) <= match_last, 0))
     return REG_ESPACE;
 
   sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
@@ -1068,7 +1060,7 @@ prune_impossible_nodes (re_match_context_t *mctx)
    since initial states may have constraints like "\<", "^", etc..  */
 
 static inline re_dfastate_t *
-__attribute ((always_inline)) internal_function
+__attribute__ ((always_inline)) internal_function
 acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
 			    Idx idx)
 {
@@ -1106,7 +1098,7 @@ acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
    FL_LONGEST_MATCH means we want the POSIX longest matching.
    If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
    next place where we may want to try matching.
-   Note that the matcher assume that the maching starts from the current
+   Note that the matcher assumes that the matching starts from the current
    index of the buffer.  */
 
 static Idx
@@ -1149,7 +1141,7 @@ check_matching (re_match_context_t *mctx, bool fl_longest_match,
 	    {
 	      err = transit_state_bkref (mctx, &cur_state->nodes);
 	      if (BE (err != REG_NOERROR, 0))
-	        return err;
+		return err;
 	    }
 	}
     }
@@ -1175,17 +1167,18 @@ check_matching (re_match_context_t *mctx, bool fl_longest_match,
       re_dfastate_t *old_state = cur_state;
       Idx next_char_idx = re_string_cur_idx (&mctx->input) + 1;
 
-      if (BE (next_char_idx >= mctx->input.bufs_len, 0)
-          || (BE (next_char_idx >= mctx->input.valid_len, 0)
-              && mctx->input.valid_len < mctx->input.len))
-        {
-          err = extend_buffers (mctx);
-          if (BE (err != REG_NOERROR, 0))
+      if ((BE (next_char_idx >= mctx->input.bufs_len, 0)
+	   && mctx->input.bufs_len < mctx->input.len)
+	  || (BE (next_char_idx >= mctx->input.valid_len, 0)
+	      && mctx->input.valid_len < mctx->input.len))
+	{
+	  err = extend_buffers (mctx, next_char_idx + 1);
+	  if (BE (err != REG_NOERROR, 0))
 	    {
 	      assert (err == REG_ESPACE);
 	      return REG_ERROR;
 	    }
-        }
+	}
 
       cur_state = transit_state (&err, mctx, cur_state);
       if (mctx->state_log != NULL)
@@ -1309,17 +1302,17 @@ proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs,
           if (dest_node == REG_MISSING)
 	    dest_node = candidate;
 
-          else
+	  else
 	    {
 	      /* In order to avoid infinite loop like "(a*)*", return the second
-	         epsilon-transition if the first was already considered.  */
+		 epsilon-transition if the first was already considered.  */
 	      if (re_node_set_contains (eps_via_nodes, dest_node))
-	        return candidate;
+		return candidate;
 
 	      /* Otherwise, push the second epsilon-transition on the fail stack.  */
 	      else if (fs != NULL
 		       && push_fail_stack (fs, *pidx, candidate, nregs, regs,
-				           eps_via_nodes))
+					   eps_via_nodes))
 		return REG_ERROR;
 
 	      /* We know we are going to exit.  */
@@ -1436,7 +1429,7 @@ internal_function __attribute_warn_unused_result__
 set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
 	  regmatch_t *pmatch, bool fl_backtrack)
 {
-  const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;
+  const re_dfa_t *dfa = preg->buffer;
   Idx idx, cur_node;
   re_node_set eps_via_nodes;
   struct re_fail_stack_t *fs;
@@ -1608,21 +1601,21 @@ update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
    and sift the nodes in each states according to the following rules.
    Updated state_log will be wrote to STATE_LOG.
 
-   Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if...
+   Rules: We throw away the Node 'a' in the STATE_LOG[STR_IDX] if...
      1. When STR_IDX == MATCH_LAST(the last index in the state_log):
-	If `a' isn't the LAST_NODE and `a' can't epsilon transit to
-	the LAST_NODE, we throw away the node `a'.
-     2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts
-	string `s' and transit to `b':
+	If 'a' isn't the LAST_NODE and 'a' can't epsilon transit to
+	the LAST_NODE, we throw away the node 'a'.
+     2. When 0 <= STR_IDX < MATCH_LAST and 'a' accepts
+	string 's' and transit to 'b':
 	i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
-	   away the node `a'.
+	   away the node 'a'.
 	ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
-	    thrown away, we throw away the node `a'.
+	    thrown away, we throw away the node 'a'.
      3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
 	i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
-	   node `a'.
+	   node 'a'.
 	ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
-	    we throw away the node `a'.  */
+	    we throw away the node 'a'.  */
 
 #define STATE_NODE_CONTAINS(state,node) \
   ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
@@ -1667,7 +1660,7 @@ sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx)
       if (mctx->state_log[str_idx])
 	{
 	  err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
-          if (BE (err != REG_NOERROR, 0))
+	  if (BE (err != REG_NOERROR, 0))
 	    goto free_return;
 	}
 
@@ -1695,11 +1688,11 @@ build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx,
   Idx i;
 
   /* Then build the next sifted state.
-     We build the next sifted state on `cur_dest', and update
-     `sifted_states[str_idx]' with `cur_dest'.
+     We build the next sifted state on 'cur_dest', and update
+     'sifted_states[str_idx]' with 'cur_dest'.
      Note:
-     `cur_dest' is the sifted state from `state_log[str_idx + 1]'.
-     `cur_src' points the node_set of the old `state_log[str_idx]'
+     'cur_dest' is the sifted state from 'state_log[str_idx + 1]'.
+     'cur_src' points the node_set of the old 'state_log[str_idx]'
      (with the epsilon nodes pre-filtered out).  */
   for (i = 0; i < cur_src->nelem; i++)
     {
@@ -1712,7 +1705,7 @@ build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx,
       assert (!IS_EPSILON_NODE (type));
 #endif
 #ifdef RE_ENABLE_I18N
-      /* If the node may accept `multi byte'.  */
+      /* If the node may accept "multi byte".  */
       if (dfa->nodes[prev_node].accept_mb)
 	naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
 					 str_idx, sctx->last_str_idx);
@@ -1753,12 +1746,13 @@ clean_state_log_if_needed (re_match_context_t *mctx, Idx next_state_log_idx)
 {
   Idx top = mctx->state_log_top;
 
-  if (next_state_log_idx >= mctx->input.bufs_len
+  if ((next_state_log_idx >= mctx->input.bufs_len
+       && mctx->input.bufs_len < mctx->input.len)
       || (next_state_log_idx >= mctx->input.valid_len
 	  && mctx->input.valid_len < mctx->input.len))
     {
       reg_errcode_t err;
-      err = extend_buffers (mctx);
+      err = extend_buffers (mctx, next_state_log_idx + 1);
       if (BE (err != REG_NOERROR, 0))
 	return err;
     }
@@ -1982,7 +1976,7 @@ check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
 	    {
 	      struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
 	      do
-	        {
+		{
 		  Idx dst;
 		  int cpos;
 
@@ -2004,9 +1998,9 @@ check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
 		  if (dst == from_node)
 		    {
 		      if (boundaries & 1)
-		        return -1;
+			return -1;
 		      else /* if (boundaries & 2) */
-		        return 0;
+			return 0;
 		    }
 
 		  cpos =
@@ -2020,7 +2014,7 @@ check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
 		  if (subexp_idx < BITSET_WORD_BITS)
 		    ent->eps_reachable_subexps_map
 		      &= ~((bitset_word_t) 1 << subexp_idx);
-	        }
+		}
 	      while (ent++->more);
 	    }
 	  break;
@@ -2245,7 +2239,7 @@ sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx,
 	  re_node_set_remove (&local_sctx.limits, enabled_idx);
 
 	  /* mctx->bkref_ents may have changed, reload the pointer.  */
-          entry = mctx->bkref_ents + enabled_idx;
+	  entry = mctx->bkref_ents + enabled_idx;
 	}
       while (enabled_idx++, entry++->more);
     }
@@ -2268,17 +2262,17 @@ sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,
 {
   const re_dfa_t *const dfa = mctx->dfa;
   int naccepted;
-  /* Check the node can accept `multi byte'.  */
+  /* Check the node can accept "multi byte".  */
   naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
   if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
       !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
 			    dfa->nexts[node_idx]))
-    /* The node can't accept the `multi byte', or the
+    /* The node can't accept the "multi byte", or the
        destination was already thrown away, then the node
-       could't accept the current input `multi byte'.   */
+       could't accept the current input "multi byte".   */
     naccepted = 0;
   /* Otherwise, it is sure that the node could accept
-     `naccepted' bytes input.  */
+     'naccepted' bytes input.  */
   return naccepted;
 }
 #endif /* RE_ENABLE_I18N */
@@ -2326,7 +2320,7 @@ transit_state (reg_errcode_t *err, re_match_context_t *mctx,
 
       trtable = state->word_trtable;
       if (BE (trtable != NULL, 1))
-        {
+	{
 	  unsigned int context;
 	  context
 	    = re_string_context_at (&mctx->input,
@@ -2372,21 +2366,21 @@ merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,
       unsigned int context;
       re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
       /* If (state_log[cur_idx] != 0), it implies that cur_idx is
-         the destination of a multibyte char/collating element/
-         back reference.  Then the next state is the union set of
-         these destinations and the results of the transition table.  */
+	 the destination of a multibyte char/collating element/
+	 back reference.  Then the next state is the union set of
+	 these destinations and the results of the transition table.  */
       pstate = mctx->state_log[cur_idx];
       log_nodes = pstate->entrance_nodes;
       if (next_state != NULL)
-        {
-          table_nodes = next_state->entrance_nodes;
-          *err = re_node_set_init_union (&next_nodes, table_nodes,
+	{
+	  table_nodes = next_state->entrance_nodes;
+	  *err = re_node_set_init_union (&next_nodes, table_nodes,
 					     log_nodes);
-          if (BE (*err != REG_NOERROR, 0))
+	  if (BE (*err != REG_NOERROR, 0))
 	    return NULL;
-        }
+	}
       else
-        next_nodes = *log_nodes;
+	next_nodes = *log_nodes;
       /* Note: We already add the nodes of the initial state,
 	 then we don't need to add them here.  */
 
@@ -2394,12 +2388,12 @@ merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,
 				      re_string_cur_idx (&mctx->input) - 1,
 				      mctx->eflags);
       next_state = mctx->state_log[cur_idx]
-        = re_acquire_state_context (err, dfa, &next_nodes, context);
+	= re_acquire_state_context (err, dfa, &next_nodes, context);
       /* We don't need to check errors here, since the return value of
-         this function is next_state and ERR is already set.  */
+	 this function is next_state and ERR is already set.  */
 
       if (table_nodes != NULL)
-        re_node_set_free (&next_nodes);
+	re_node_set_free (&next_nodes);
     }
 
   if (BE (dfa->nbackref, 0) && next_state != NULL)
@@ -2440,9 +2434,9 @@ find_recover_state (reg_errcode_t *err, re_match_context_t *mctx)
 
       do
 	{
-          if (++cur_str_idx > max)
-            return NULL;
-          re_string_skip_bytes (&mctx->input, 1);
+	  if (++cur_str_idx > max)
+	    return NULL;
+	  re_string_skip_bytes (&mctx->input, 1);
 	}
       while (mctx->state_log[cur_str_idx] == NULL);
 
@@ -2457,7 +2451,7 @@ find_recover_state (reg_errcode_t *err, re_match_context_t *mctx)
 /* From the node set CUR_NODES, pick up the nodes whose types are
    OP_OPEN_SUBEXP and which have corresponding back references in the regular
    expression. And register them to use them later for evaluating the
-   correspoding back references.  */
+   corresponding back references.  */
 
 static reg_errcode_t
 internal_function
@@ -2550,7 +2544,7 @@ transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
       re_dfastate_t *dest_state;
 
       if (!dfa->nodes[cur_node_idx].accept_mb)
-        continue;
+	continue;
 
       if (dfa->nodes[cur_node_idx].constraint)
 	{
@@ -2568,7 +2562,7 @@ transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
       if (naccepted == 0)
 	continue;
 
-      /* The node can accepts `naccepted' bytes.  */
+      /* The node can accepts 'naccepted' bytes.  */
       dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
       mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
 			       : mctx->max_mb_elem_len);
@@ -2620,7 +2614,7 @@ transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
       const re_token_t *node = dfa->nodes + node_idx;
       re_node_set *new_dest_nodes;
 
-      /* Check whether `node' is a backreference or not.  */
+      /* Check whether 'node' is a backreference or not.  */
       if (node->type != OP_BACK_REF)
 	continue;
 
@@ -2632,14 +2626,14 @@ transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
 	    continue;
 	}
 
-      /* `node' is a backreference.
+      /* 'node' is a backreference.
 	 Check the substring which the substring matched.  */
       bkc_idx = mctx->nbkref_ents;
       err = get_subexp (mctx, node_idx, cur_str_idx);
       if (BE (err != REG_NOERROR, 0))
 	goto free_return;
 
-      /* And add the epsilon closures (which is `new_dest_nodes') of
+      /* And add the epsilon closures (which is 'new_dest_nodes') of
 	 the backreference to appropriate state_log.  */
 #ifdef DEBUG
       assert (dfa->nexts[node_idx] != REG_MISSING);
@@ -2663,7 +2657,7 @@ transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
 	  dest_state = mctx->state_log[dest_str_idx];
 	  prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
 			: mctx->state_log[cur_str_idx]->nodes.nelem);
-	  /* Add `new_dest_node' to state_log.  */
+	  /* Add 'new_dest_node' to state_log.  */
 	  if (dest_state == NULL)
 	    {
 	      mctx->state_log[dest_str_idx]
@@ -2731,7 +2725,7 @@ get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx)
       const struct re_backref_cache_entry *entry
 	= mctx->bkref_ents + cache_idx;
       do
-        if (entry->node == bkref_node)
+	if (entry->node == bkref_node)
 	  return REG_NOERROR; /* We already checked it.  */
       while (entry++->more);
     }
@@ -2815,7 +2809,7 @@ get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx)
 		  if (bkref_str_off >= mctx->input.len)
 		    break;
 
-		  err = extend_buffers (mctx);
+		  err = extend_buffers (mctx, bkref_str_off + 1);
 		  if (BE (err != REG_NOERROR, 0))
 		    return err;
 
@@ -2937,9 +2931,12 @@ check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node,
     {
       re_dfastate_t **new_array;
       Idx old_alloc = path->alloc;
-      Idx new_alloc = old_alloc + last_str + mctx->max_mb_elem_len + 1;
-      if (BE (new_alloc < old_alloc, 0)
-	  || BE (SIZE_MAX / sizeof (re_dfastate_t *) < new_alloc, 0))
+      Idx incr_alloc = last_str + mctx->max_mb_elem_len + 1;
+      Idx new_alloc;
+      if (BE (IDX_MAX - old_alloc < incr_alloc, 0))
+	return REG_ESPACE;
+      new_alloc = old_alloc + incr_alloc;
+      if (BE (SIZE_MAX / sizeof (re_dfastate_t *) < new_alloc, 0))
 	return REG_ESPACE;
       new_array = re_realloc (path->array, re_dfastate_t *, new_alloc);
       if (BE (new_array == NULL, 0))
@@ -3102,7 +3099,7 @@ check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx,
       assert (!IS_EPSILON_NODE (type));
 #endif
 #ifdef RE_ENABLE_I18N
-      /* If the node may accept `multi byte'.  */
+      /* If the node may accept "multi byte".  */
       if (dfa->nodes[cur_node].accept_mb)
 	{
 	  naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
@@ -3359,7 +3356,7 @@ build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
   bitset_word_t elem, mask;
   bool dests_node_malloced = false;
   bool dest_states_malloced = false;
-  Idx ndests; /* Number of the destination states from `state'.  */
+  Idx ndests; /* Number of the destination states from 'state'.  */
   re_dfastate_t **trtable;
   re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
   re_node_set follows, *dests_node;
@@ -3373,8 +3370,8 @@ build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
   } *dests_alloc;
 
   /* We build DFA states which corresponds to the destination nodes
-     from `state'.  `dests_node[i]' represents the nodes which i-th
-     destination state contains, and `dests_ch[i]' represents the
+     from 'state'.  'dests_node[i]' represents the nodes which i-th
+     destination state contains, and 'dests_ch[i]' represents the
      characters which i-th destination state accepts.  */
   if (__libc_use_alloca (sizeof (struct dests_alloc)))
     dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc));
@@ -3388,20 +3385,23 @@ build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
   dests_node = dests_alloc->dests_node;
   dests_ch = dests_alloc->dests_ch;
 
-  /* Initialize transiton table.  */
+  /* Initialize transition table.  */
   state->word_trtable = state->trtable = NULL;
 
-  /* At first, group all nodes belonging to `state' into several
+  /* At first, group all nodes belonging to 'state' into several
      destinations.  */
   ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
   if (BE (! REG_VALID_NONZERO_INDEX (ndests), 0))
     {
       if (dests_node_malloced)
 	free (dests_alloc);
+      /* Return false in case of an error, true otherwise.  */
       if (ndests == 0)
 	{
 	  state->trtable = (re_dfastate_t **)
 	    calloc (sizeof (re_dfastate_t *), SBC_MAX);
+          if (BE (state->trtable == NULL, 0))
+            return false;
 	  return true;
 	}
       return false;
@@ -3591,13 +3591,13 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
   reg_errcode_t err;
   bool ok;
   Idx i, j, k;
-  Idx ndests; /* Number of the destinations from `state'.  */
+  Idx ndests; /* Number of the destinations from 'state'.  */
   bitset_t accepts; /* Characters a node can accept.  */
   const re_node_set *cur_nodes = &state->nodes;
   bitset_empty (accepts);
   ndests = 0;
 
-  /* For all the nodes belonging to `state',  */
+  /* For all the nodes belonging to 'state',  */
   for (i = 0; i < cur_nodes->nelem; ++i)
     {
       re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
@@ -3626,7 +3626,7 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
 	}
 #ifdef RE_ENABLE_I18N
       else if (type == OP_UTF8_PERIOD)
-        {
+	{
 	  if (ASCII_CHARS % BITSET_WORD_BITS == 0)
 	    memset (accepts, -1, ASCII_CHARS / CHAR_BIT);
 	  else
@@ -3635,12 +3635,12 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
 	    bitset_clear (accepts, '\n');
 	  if (dfa->syntax & RE_DOT_NOT_NULL)
 	    bitset_clear (accepts, '\0');
-        }
+	}
 #endif
       else
 	continue;
 
-      /* Check the `accepts' and sift the characters which are not
+      /* Check the 'accepts' and sift the characters which are not
 	 match it the context.  */
       if (constraint)
 	{
@@ -3699,7 +3699,7 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
 	    }
 	}
 
-      /* Then divide `accepts' into DFA states, or create a new
+      /* Then divide 'accepts' into DFA states, or create a new
 	 state.  Above, we make sure that accepts is not empty.  */
       for (j = 0; j < ndests; ++j)
 	{
@@ -3712,7 +3712,7 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
 	  if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
 	    continue;
 
-	  /* Enumerate the intersection set of this state and `accepts'.  */
+	  /* Enumerate the intersection set of this state and 'accepts'.  */
 	  has_intersec = 0;
 	  for (k = 0; k < BITSET_WORDS; ++k)
 	    has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
@@ -3720,7 +3720,7 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
 	  if (!has_intersec)
 	    continue;
 
-	  /* Then check if this state is a subset of `accepts'.  */
+	  /* Then check if this state is a subset of 'accepts'.  */
 	  not_subset = not_consumed = 0;
 	  for (k = 0; k < BITSET_WORDS; ++k)
 	    {
@@ -3728,8 +3728,8 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
 	      not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
 	    }
 
-	  /* If this state isn't a subset of `accepts', create a
-	     new group state, which has the `remains'. */
+	  /* If this state isn't a subset of 'accepts', create a
+	     new group state, which has the 'remains'. */
 	  if (not_subset)
 	    {
 	      bitset_copy (dests_ch[ndests], remains);
@@ -3768,7 +3768,7 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
 }
 
 #ifdef RE_ENABLE_I18N
-/* Check how many bytes the node `dfa->nodes[node_idx]' accepts.
+/* Check how many bytes the node 'dfa->nodes[node_idx]' accepts.
    Return the number of the bytes the node accepts.
    STR_IDX is the current index of the input string.
 
@@ -3840,7 +3840,7 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
   if (node->type == OP_PERIOD)
     {
       if (char_len <= 1)
-        return 0;
+	return 0;
       /* FIXME: I don't think this if is needed, as both '\n'
 	 and '\0' are char_len == 1.  */
       /* '.' accepts any one character except the following two cases.  */
@@ -3895,7 +3895,6 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
 	  const int32_t *table, *indirect;
 	  const unsigned char *weights, *extra;
 	  const char *collseqwc;
-	  int32_t idx;
 	  /* This #include defines a local function!  */
 #  include <locale/weight.h>
 
@@ -3933,6 +3932,7 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
 		in_collseq = find_collation_sequence_value (pin, elem_len);
 	    }
 	  /* match with range expression?  */
+	  /* FIXME: Implement rational ranges here, too.  */
 	  for (i = 0; i < cset->nranges; ++i)
 	    if (cset->range_starts[i] <= in_collseq
 		&& in_collseq <= cset->range_ends[i])
@@ -3953,7 +3953,7 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
 	      indirect = (const int32_t *)
 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
-	      int32_t idx = findidx (&cp);
+	      int32_t idx = findidx (&cp, elem_len);
 	      if (idx > 0)
 		for (i = 0; i < cset->nequiv_classes; ++i)
 		  {
@@ -3984,18 +3984,9 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
 # endif /* _LIBC */
 	{
 	  /* match with range expression?  */
-#if __GNUC__ >= 2 && ! (__STDC_VERSION__ < 199901L && __STRICT_ANSI__)
-	  wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'};
-#else
-	  wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
-	  cmp_buf[2] = wc;
-#endif
 	  for (i = 0; i < cset->nranges; ++i)
 	    {
-	      cmp_buf[0] = cset->range_starts[i];
-	      cmp_buf[4] = cset->range_ends[i];
-	      if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
-		  && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
+	      if (cset->range_starts[i] <= wc && wc <= cset->range_ends[i])
 		{
 		  match_len = char_len;
 		  goto check_node_accept_bytes_match;
@@ -4065,7 +4056,7 @@ find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
 	  /* Skip the collation sequence value.  */
 	  idx += sizeof (uint32_t);
 	  /* Skip the wide char sequence of the collating element.  */
-	  idx = idx + sizeof (uint32_t) * (extra[idx] + 1);
+	  idx = idx + sizeof (uint32_t) * (*(int32_t *) (extra + idx) + 1);
 	  /* If we found the entry, return the sequence value.  */
 	  if (found)
 	    return *(uint32_t *) (extra + idx);
@@ -4133,17 +4124,20 @@ check_node_accept (const re_match_context_t *mctx, const re_token_t *node,
 
 static reg_errcode_t
 internal_function __attribute_warn_unused_result__
-extend_buffers (re_match_context_t *mctx)
+extend_buffers (re_match_context_t *mctx, int min_len)
 {
   reg_errcode_t ret;
   re_string_t *pstr = &mctx->input;
 
   /* Avoid overflow.  */
-  if (BE (SIZE_MAX / 2 / sizeof (re_dfastate_t *) <= pstr->bufs_len, 0))
+  if (BE (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) / 2
+          <= pstr->bufs_len, 0))
     return REG_ESPACE;
 
-  /* Double the lengthes of the buffers.  */
-  ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
+  /* Double the lengths of the buffers, but allocate at least MIN_LEN.  */
+  ret = re_string_realloc_buffers (pstr,
+				   MAX (min_len,
+					MIN (pstr->len, pstr->bufs_len * 2)));
   if (BE (ret != REG_NOERROR, 0))
     return ret;
 
@@ -4206,7 +4200,7 @@ match_ctx_init (re_match_context_t *mctx, int eflags, Idx n)
       size_t max_object_size =
 	MAX (sizeof (struct re_backref_cache_entry),
 	     sizeof (re_sub_match_top_t *));
-      if (BE (SIZE_MAX / max_object_size < n, 0))
+      if (BE (MIN (IDX_MAX, SIZE_MAX / max_object_size) < n, 0))
 	return REG_ESPACE;
 
       mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);