X-Git-Url: http://erislabs.net/gitweb/?a=blobdiff_plain;f=regex.c;h=a997402a15d16e72099b2b488e4a3ec6c8e5d5d5;hb=a19e601b42a86b59b4af13aa9f960397f5306c38;hp=5a98af0678ef5da6d7332bd4a649a66ada9d9584;hpb=f5edf8add11f0d2e94f553721602cb3926bf33b2;p=gnulib.git

diff --git a/regex.c b/regex.c
index 5a98af067..a997402a1 100644
--- a/regex.c
+++ b/regex.c
@@ -1120,23 +1120,25 @@ static const char *re_error_msgid[] =
    REGEX_ALLOCATE_STACK.  */
 
 
-/* Number of failure points for which to initially allocate space
+/* Approximate number of failure points for which to initially allocate space
    when matching.  If this number is exceeded, we allocate more
    space, so it is not a hard limit.  */
 #ifndef INIT_FAILURE_ALLOC
-#define INIT_FAILURE_ALLOC 5
+#define INIT_FAILURE_ALLOC 20
 #endif
 
 /* Roughly the maximum number of failure points on the stack.  Would be
-   exactly that if always used MAX_FAILURE_ITEMS items each time we failed.
+   exactly that if always used TYPICAL_FAILURE_SIZE items each time we failed.
    This is a variable only so users of regex can assign to it; we never
    change it ourselves.	 */
 #if defined (MATCH_MAY_ALLOCATE)
-/* 4400 was enough to cause a crash on Alpha OSF/1,
-   whose default stack limit is 2mb.  */
-int re_max_failures = 20000;
+/* Note that 4400 is enough to cause a crash on Alpha OSF/1,
+   whose default stack limit is 2mb.  In order for a larger
+   value to work reliably, you have to try to make it accord
+   with the process stack limit.  */
+int re_max_failures = 40000;
 #else
-int re_max_failures = 2000;
+int re_max_failures = 4000;
 #endif
 
 union fail_stack_elt
@@ -1166,7 +1168,8 @@ typedef struct
 #define INIT_FAIL_STACK()						\
   do {									\
     fail_stack.stack = (fail_stack_elt_t *)				\
-      REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t));	\
+      REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * TYPICAL_FAILURE_SIZE	\
+			    * sizeof (fail_stack_elt_t));		\
 									\
     if (fail_stack.stack == NULL)					\
       return -2;							\
@@ -1186,24 +1189,40 @@ typedef struct
 #endif
 
 
-/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
+/* Double the size of FAIL_STACK, up to a limit
+   which allows approximately `re_max_failures' items.
 
    Return 1 if succeeds, and 0 if either ran out of memory
    allocating space for it or it was already too large.
 
    REGEX_REALLOCATE_STACK requires `destination' be declared.	*/
 
-#define DOUBLE_FAIL_STACK(fail_stack)					\
-  ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS		\
+/* Factor to increase the failure stack size by
+   when we increase it.
+   This used to be 2, but 2 was too wasteful
+   because the old discarded stacks added up to as much space
+   were as ultimate, maximum-size stack.  */
+#define FAIL_STACK_GROWTH_FACTOR 4
+
+#define GROW_FAIL_STACK(fail_stack)					\
+  (((fail_stack).size * sizeof (fail_stack_elt_t)			\
+    >= re_max_failures * TYPICAL_FAILURE_SIZE)				\
    ? 0									\
-   : ((fail_stack).stack = (fail_stack_elt_t *)				\
+   : ((fail_stack).stack						\
+      = (fail_stack_elt_t *)						\
 	REGEX_REALLOCATE_STACK ((fail_stack).stack,			\
 	  (fail_stack).size * sizeof (fail_stack_elt_t),		\
-	  ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)),	\
+	  MIN (re_max_failures * TYPICAL_FAILURE_SIZE,			\
+	       ((fail_stack).size * sizeof (fail_stack_elt_t)		\
+		* FAIL_STACK_GROWTH_FACTOR))),				\
 									\
       (fail_stack).stack == NULL					\
       ? 0								\
-      : ((fail_stack).size <<= 1,					\
+      : ((fail_stack).size						\
+	 = (MIN (re_max_failures * TYPICAL_FAILURE_SIZE,		\
+		 ((fail_stack).size * sizeof (fail_stack_elt_t)		\
+		  * FAIL_STACK_GROWTH_FACTOR))				\
+	    / sizeof (fail_stack_elt_t)),				\
 	 1)))
 
 
@@ -1212,7 +1231,7 @@ typedef struct
    space to do so.  */
 #define PUSH_PATTERN_OP(POINTER, FAIL_STACK)				\
   ((FAIL_STACK_FULL ()							\
-    && !DOUBLE_FAIL_STACK (FAIL_STACK))					\
+    && !GROW_FAIL_STACK (FAIL_STACK))					\
    ? 0									\
    : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER,	\
       1))
@@ -1255,7 +1274,7 @@ typedef struct
    if we ever fail back to it.
 
    Requires variables fail_stack, regstart, regend, reg_info, and
-   num_regs be declared.  DOUBLE_FAIL_STACK requires `destination' be
+   num_regs be declared.  GROW_FAIL_STACK requires `destination' be
    declared.
 
    Does `return FAILURE_CODE' if runs out of memory.  */
@@ -1279,7 +1298,7 @@ typedef struct
     /* Ensure we have enough space allocated for what we will push.  */	\
     while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)			\
       {									\
-	if (!DOUBLE_FAIL_STACK (fail_stack))				\
+	if (!GROW_FAIL_STACK (fail_stack))				\
 	  return failure_code;						\
 									\
 	DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n",		\
@@ -1346,13 +1365,14 @@ typedef struct
 #define NUM_NONREG_ITEMS 4
 #endif
 
-/* We push at most this many items on the stack.  */
-/* We used to use (num_regs - 1), which is the number of registers
-   this regexp will save; but that was changed to 5
-   to avoid stack overflow for a regexp with lots of parens.  */
-#define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
+/* Estimate the size of data pushed by a typical failure stack entry.
+   An estimate is all we need, because all we use this for
+   is to choose a limit for how big to make the failure stack.  */
+
+#define TYPICAL_FAILURE_SIZE 20
 
-/* We actually push this many items.  */
+/* This is how many items we actually use for a failure point.
+   It depends on the regexp.  */
 #define NUM_FAILURE_ITEMS				\
   (((0							\
      ? 0 : highest_active_reg - lowest_active_reg + 1)	\
@@ -2939,12 +2959,9 @@ regex_compile (pattern, size, syntax, bufp)
   {
     int num_regs = bufp->re_nsub + 1;
 
-    /* Since DOUBLE_FAIL_STACK refuses to double only if the current size
-       is strictly greater than re_max_failures, the largest possible stack
-       is 2 * re_max_failures failure points.  */
-    if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS))
+    if (fail_stack.size < re_max_failures * TYPICAL_FAILURE_SIZE)
       {
-	fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS);
+	fail_stack.size = re_max_failures * TYPICAL_FAILURE_SIZE);
 
 #ifdef emacs
 	if (! fail_stack.stack)
@@ -3887,7 +3904,7 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
 	      int len = 0;
 
 	      /* Find the head of multibyte form.  */
-	      while (!CHAR_HEAD_P (p))
+	      while (!CHAR_HEAD_P (*p))
 		p--, len++;
 
 	      /* Adjust it. */
@@ -5302,15 +5319,17 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 		 is the character at D, and S2 is the syntax of C2.  */
 	      int c1, c2, s1, s2;
 	      int pos1 = PTR_TO_OFFSET (d - 1);
+	      int charpos;
 
 	      GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
 	      GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2);
 #ifdef emacs
-	      UPDATE_SYNTAX_TABLE (pos1 ? pos1 : 1);
+	      charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos1 ? pos1 : 1);
+	      UPDATE_SYNTAX_TABLE (charpos);
 #endif
 	      s1 = SYNTAX (c1);
 #ifdef emacs
-	      UPDATE_SYNTAX_TABLE_FORWARD (pos1 + 1);
+	      UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
 #endif
 	      s2 = SYNTAX (c2);
 
@@ -5337,15 +5356,17 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 		 is the character at D, and S2 is the syntax of C2.  */
 	      int c1, c2, s1, s2;
 	      int pos1 = PTR_TO_OFFSET (d - 1);
+	      int charpos;
 
 	      GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
 	      GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2);
 #ifdef emacs
-	      UPDATE_SYNTAX_TABLE (pos1);
+	      charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos1);
+	      UPDATE_SYNTAX_TABLE (charpos);
 #endif
 	      s1 = SYNTAX (c1);
 #ifdef emacs
-	      UPDATE_SYNTAX_TABLE_FORWARD (pos1 + 1);
+	      UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
 #endif
 	      s2 = SYNTAX (c2);
 
@@ -5372,10 +5393,12 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 		 is the character at D, and S2 is the syntax of C2.  */
 	      int c1, c2, s1, s2;
 	      int pos1 = PTR_TO_OFFSET (d);
+	      int charpos;
 
 	      GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2);
 #ifdef emacs
-	      UPDATE_SYNTAX_TABLE (pos1);
+	      charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos1);
+	      UPDATE_SYNTAX_TABLE (charpos);
 #endif
 	      s2 = SYNTAX (c2);
 	
@@ -5388,7 +5411,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 		{
 		  GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
 #ifdef emacs
-		  UPDATE_SYNTAX_TABLE_BACKWARD (pos1 - 1);
+		  UPDATE_SYNTAX_TABLE_BACKWARD (charpos - 1);
 #endif
 		  s1 = SYNTAX (c1);
 
@@ -5413,8 +5436,14 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 	      /* C1 is the character before D, S1 is the syntax of C1, C2
 		 is the character at D, and S2 is the syntax of C2.  */
 	      int c1, c2, s1, s2;
+	      int pos1 = PTR_TO_OFFSET (d);
+	      int charpos;
 
 	      GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
+#ifdef emacs
+	      charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos1 - 1);
+	      UPDATE_SYNTAX_TABLE (charpos);
+#endif
 	      s1 = SYNTAX (c1);
 
 	      /* Case 2: S1 is not Sword.  */
@@ -5425,6 +5454,9 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 	      if (!AT_STRINGS_END (d))
 		{
 		  GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2);
+#ifdef emacs
+		  UPDATE_SYNTAX_TABLE_FORWARD (charpos);
+#endif
 		  s2 = SYNTAX (c2);
 
 		  /* ... and S2 is Sword, and WORD_BOUNDARY_P (C1, C2)
@@ -5438,19 +5470,19 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 #ifdef emacs
 	case before_dot:
 	  DEBUG_PRINT1 ("EXECUTING before_dot.\n");
-	  if (PTR_CHAR_POS ((unsigned char *) d) >= PT)
+	  if (PTR_BYTE_POS ((unsigned char *) d) >= PT_BYTE)
 	    goto fail;
 	  break;
 
 	case at_dot:
 	  DEBUG_PRINT1 ("EXECUTING at_dot.\n");
-	  if (PTR_CHAR_POS ((unsigned char *) d) != PT)
+	  if (PTR_BYTE_POS ((unsigned char *) d) != PT_BYTE)
 	    goto fail;
 	  break;
 
 	case after_dot:
 	  DEBUG_PRINT1 ("EXECUTING after_dot.\n");
-	  if (PTR_CHAR_POS ((unsigned char *) d) <= PT)
+	  if (PTR_BYTE_POS ((unsigned char *) d) <= PT_BYTE)
 	    goto fail;
 	  break;
 
@@ -5466,7 +5498,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 	  PREFETCH ();
 #ifdef emacs
 	  {
-	    int pos1 = PTR_TO_OFFSET (d);
+	    int pos1 = SYNTAX_TABLE_BYTE_TO_CHAR (PTR_TO_OFFSET (d));
 	    UPDATE_SYNTAX_TABLE (pos1);
 	  }
 #endif
@@ -5500,7 +5532,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 	  PREFETCH ();
 #ifdef emacs
 	  {
-	    int pos1 = PTR_TO_OFFSET (d);
+	    int pos1 = SYNTAX_TABLE_BYTE_TO_CHAR (PTR_TO_OFFSET (d));
 	    UPDATE_SYNTAX_TABLE (pos1);
 	  }
 #endif