lib/hash.c

   1 /* hash - hashing table processing.
   2
   3    Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003 Free Software
   4    Foundation, Inc.
   5
   6    Written by Jim Meyering, 1992.
   7
   8    This program is free software; you can redistribute it and/or modify
   9    it under the terms of the GNU General Public License as published by
  10    the Free Software Foundation; either version 2, or (at your option)
  11    any later version.
  12
  13    This program is distributed in the hope that it will be useful,
  14    but WITHOUT ANY WARRANTY; without even the implied warranty of
  15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16    GNU General Public License for more details.
  17
  18    You should have received a copy of the GNU General Public License
  19    along with this program; if not, write to the Free Software Foundation,
  20    Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  21
  22 /* A generic hash table package.  */
  23
  24 /* Define USE_OBSTACK to 1 if you want the allocator to use obstacks instead
  25    of malloc.  If you change USE_OBSTACK, you have to recompile!  */
  26
  27 #if HAVE_CONFIG_H
  28 # include <config.h>
  29 #endif
  30
  31 #include <limits.h>
  32 #include <stdbool.h>
  33 #include <stdio.h>
  34 #include <stdlib.h>
  35
  36 #if USE_OBSTACK
  37 # include "obstack.h"
  38 # ifndef obstack_chunk_alloc
  39 #  define obstack_chunk_alloc malloc
  40 # endif
  41 # ifndef obstack_chunk_free
  42 #  define obstack_chunk_free free
  43 # endif
  44 #endif
  45
  46 #include "hash.h"
  47
  48 struct hash_table
  49   {
  50     /* The array of buckets starts at BUCKET and extends to BUCKET_LIMIT-1,
  51        for a possibility of N_BUCKETS.  Among those, N_BUCKETS_USED buckets
  52        are not empty, there are N_ENTRIES active entries in the table.  */
  53     struct hash_entry *bucket;
  54     struct hash_entry *bucket_limit;
  55     unsigned n_buckets;
  56     unsigned n_buckets_used;
  57     unsigned n_entries;
  58
  59     /* Tuning arguments, kept in a physicaly separate structure.  */
  60     const Hash_tuning *tuning;
  61
  62     /* Three functions are given to `hash_initialize', see the documentation
  63        block for this function.  In a word, HASHER randomizes a user entry
  64        into a number up from 0 up to some maximum minus 1; COMPARATOR returns
  65        true if two user entries compare equally; and DATA_FREER is the cleanup
  66        function for a user entry.  */
  67     Hash_hasher hasher;
  68     Hash_comparator comparator;
  69     Hash_data_freer data_freer;
  70
  71     /* A linked list of freed struct hash_entry structs.  */
  72     struct hash_entry *free_entry_list;
  73
  74 #if USE_OBSTACK
  75     /* Whenever obstacks are used, it is possible to allocate all overflowed
  76        entries into a single stack, so they all can be freed in a single
  77        operation.  It is not clear if the speedup is worth the trouble.  */
  78     struct obstack entry_stack;
  79 #endif
  80   };
  81
  82 /* A hash table contains many internal entries, each holding a pointer to
  83    some user provided data (also called a user entry).  An entry indistinctly
  84    refers to both the internal entry and its associated user entry.  A user
  85    entry contents may be hashed by a randomization function (the hashing
  86    function, or just `hasher' for short) into a number (or `slot') between 0
  87    and the current table size.  At each slot position in the hash table,
  88    starts a linked chain of entries for which the user data all hash to this
  89    slot.  A bucket is the collection of all entries hashing to the same slot.
  90
  91    A good `hasher' function will distribute entries rather evenly in buckets.
  92    In the ideal case, the length of each bucket is roughly the number of
  93    entries divided by the table size.  Finding the slot for a data is usually
  94    done in constant time by the `hasher', and the later finding of a precise
  95    entry is linear in time with the size of the bucket.  Consequently, a
  96    larger hash table size (that is, a larger number of buckets) is prone to
  97    yielding shorter chains, *given* the `hasher' function behaves properly.
  98
  99    Long buckets slow down the lookup algorithm.  One might use big hash table
 100    sizes in hope to reduce the average length of buckets, but this might
 101    become inordinate, as unused slots in the hash table take some space.  The
 102    best bet is to make sure you are using a good `hasher' function (beware
 103    that those are not that easy to write! :-), and to use a table size
 104    larger than the actual number of entries.  */
 105
 106 /* If an insertion makes the ratio of nonempty buckets to table size larger
 107    than the growth threshold (a number between 0.0 and 1.0), then increase
 108    the table size by multiplying by the growth factor (a number greater than
 109    1.0).  The growth threshold defaults to 0.8, and the growth factor
 110    defaults to 1.414, meaning that the table will have doubled its size
 111    every second time 80% of the buckets get used.  */
 112 #define DEFAULT_GROWTH_THRESHOLD 0.8
 113 #define DEFAULT_GROWTH_FACTOR 1.414
 114
 115 /* If a deletion empties a bucket and causes the ratio of used buckets to
 116    table size to become smaller than the shrink threshold (a number between
 117    0.0 and 1.0), then shrink the table by multiplying by the shrink factor (a
 118    number greater than the shrink threshold but smaller than 1.0).  The shrink
 119    threshold and factor default to 0.0 and 1.0, meaning that the table never
 120    shrinks.  */
 121 #define DEFAULT_SHRINK_THRESHOLD 0.0
 122 #define DEFAULT_SHRINK_FACTOR 1.0
 123
 124 /* Use this to initialize or reset a TUNING structure to
 125    some sensible values. */
 126 static const Hash_tuning default_tuning =
 127   {
 128     DEFAULT_SHRINK_THRESHOLD,
 129     DEFAULT_SHRINK_FACTOR,
 130     DEFAULT_GROWTH_THRESHOLD,
 131     DEFAULT_GROWTH_FACTOR,
 132     false
 133   };
 134
 135 /* Information and lookup.  */
 136
 137 /* The following few functions provide information about the overall hash
 138    table organization: the number of entries, number of buckets and maximum
 139    length of buckets.  */
 140
 141 /* Return the number of buckets in the hash table.  The table size, the total
 142    number of buckets (used plus unused), or the maximum number of slots, are
 143    the same quantity.  */
 144
 145 unsigned
 146 hash_get_n_buckets (const Hash_table *table)
 147 {
 148   return table->n_buckets;
 149 }
 150
 151 /* Return the number of slots in use (non-empty buckets).  */
 152
 153 unsigned
 154 hash_get_n_buckets_used (const Hash_table *table)
 155 {
 156   return table->n_buckets_used;
 157 }
 158
 159 /* Return the number of active entries.  */
 160
 161 unsigned
 162 hash_get_n_entries (const Hash_table *table)
 163 {
 164   return table->n_entries;
 165 }
 166
 167 /* Return the length of the longest chain (bucket).  */
 168
 169 unsigned
 170 hash_get_max_bucket_length (const Hash_table *table)
 171 {
 172   struct hash_entry *bucket;
 173   unsigned max_bucket_length = 0;
 174
 175   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
 176     {
 177       if (bucket->data)
 178         {
 179           struct hash_entry *cursor = bucket;
 180           unsigned bucket_length = 1;
 181
 182           while (cursor = cursor->next, cursor)
 183             bucket_length++;
 184
 185           if (bucket_length > max_bucket_length)
 186             max_bucket_length = bucket_length;
 187         }
 188     }
 189
 190   return max_bucket_length;
 191 }
 192
 193 /* Do a mild validation of a hash table, by traversing it and checking two
 194    statistics.  */
 195
 196 bool
 197 hash_table_ok (const Hash_table *table)
 198 {
 199   struct hash_entry *bucket;
 200   unsigned n_buckets_used = 0;
 201   unsigned n_entries = 0;
 202
 203   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
 204     {
 205       if (bucket->data)
 206         {
 207           struct hash_entry *cursor = bucket;
 208
 209           /* Count bucket head.  */
 210           n_buckets_used++;
 211           n_entries++;
 212
 213           /* Count bucket overflow.  */
 214           while (cursor = cursor->next, cursor)
 215             n_entries++;
 216         }
 217     }
 218
 219   if (n_buckets_used == table->n_buckets_used && n_entries == table->n_entries)
 220     return true;
 221
 222   return false;
 223 }
 224
 225 void
 226 hash_print_statistics (const Hash_table *table, FILE *stream)
 227 {
 228   unsigned n_entries = hash_get_n_entries (table);
 229   unsigned n_buckets = hash_get_n_buckets (table);
 230   unsigned n_buckets_used = hash_get_n_buckets_used (table);
 231   unsigned max_bucket_length = hash_get_max_bucket_length (table);
 232
 233   fprintf (stream, "# entries:         %u\n", n_entries);
 234   fprintf (stream, "# buckets:         %u\n", n_buckets);
 235   fprintf (stream, "# buckets used:    %u (%.2f%%)\n", n_buckets_used,
 236            (100.0 * n_buckets_used) / n_buckets);
 237   fprintf (stream, "max bucket length: %u\n", max_bucket_length);
 238 }
 239
 240 /* If ENTRY matches an entry already in the hash table, return the
 241    entry from the table.  Otherwise, return NULL.  */
 242
 243 void *
 244 hash_lookup (const Hash_table *table, const void *entry)
 245 {
 246   struct hash_entry *bucket
 247     = table->bucket + table->hasher (entry, table->n_buckets);
 248   struct hash_entry *cursor;
 249
 250   if (! (bucket < table->bucket_limit))
 251     abort ();
 252
 253   if (bucket->data == NULL)
 254     return NULL;
 255
 256   for (cursor = bucket; cursor; cursor = cursor->next)
 257     if (table->comparator (entry, cursor->data))
 258       return cursor->data;
 259
 260   return NULL;
 261 }
 262
 263 /* Walking.  */
 264
 265 /* The functions in this page traverse the hash table and process the
 266    contained entries.  For the traversal to work properly, the hash table
 267    should not be resized nor modified while any particular entry is being
 268    processed.  In particular, entries should not be added or removed.  */
 269
 270 /* Return the first data in the table, or NULL if the table is empty.  */
 271
 272 void *
 273 hash_get_first (const Hash_table *table)
 274 {
 275   struct hash_entry *bucket;
 276
 277   if (table->n_entries == 0)
 278     return NULL;
 279
 280   for (bucket = table->bucket; ; bucket++)
 281     if (! (bucket < table->bucket_limit))
 282       abort ();
 283     else if (bucket->data)
 284       return bucket->data;
 285 }
 286
 287 /* Return the user data for the entry following ENTRY, where ENTRY has been
 288    returned by a previous call to either `hash_get_first' or `hash_get_next'.
 289    Return NULL if there are no more entries.  */
 290
 291 void *
 292 hash_get_next (const Hash_table *table, const void *entry)
 293 {
 294   struct hash_entry *bucket
 295     = table->bucket + table->hasher (entry, table->n_buckets);
 296   struct hash_entry *cursor;
 297
 298   if (! (bucket < table->bucket_limit))
 299     abort ();
 300
 301   /* Find next entry in the same bucket.  */
 302   for (cursor = bucket; cursor; cursor = cursor->next)
 303     if (cursor->data == entry && cursor->next)
 304       return cursor->next->data;
 305
 306   /* Find first entry in any subsequent bucket.  */
 307   while (++bucket < table->bucket_limit)
 308     if (bucket->data)
 309       return bucket->data;
 310
 311   /* None found.  */
 312   return NULL;
 313 }
 314
 315 /* Fill BUFFER with pointers to active user entries in the hash table, then
 316    return the number of pointers copied.  Do not copy more than BUFFER_SIZE
 317    pointers.  */
 318
 319 unsigned
 320 hash_get_entries (const Hash_table *table, void **buffer,
 321                   unsigned buffer_size)
 322 {
 323   unsigned counter = 0;
 324   struct hash_entry *bucket;
 325   struct hash_entry *cursor;
 326
 327   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
 328     {
 329       if (bucket->data)
 330         {
 331           for (cursor = bucket; cursor; cursor = cursor->next)
 332             {
 333               if (counter >= buffer_size)
 334                 return counter;
 335               buffer[counter++] = cursor->data;
 336             }
 337         }
 338     }
 339
 340   return counter;
 341 }
 342
 343 /* Call a PROCESSOR function for each entry of a hash table, and return the
 344    number of entries for which the processor function returned success.  A
 345    pointer to some PROCESSOR_DATA which will be made available to each call to
 346    the processor function.  The PROCESSOR accepts two arguments: the first is
 347    the user entry being walked into, the second is the value of PROCESSOR_DATA
 348    as received.  The walking continue for as long as the PROCESSOR function
 349    returns nonzero.  When it returns zero, the walking is interrupted.  */
 350
 351 unsigned
 352 hash_do_for_each (const Hash_table *table, Hash_processor processor,
 353                   void *processor_data)
 354 {
 355   unsigned counter = 0;
 356   struct hash_entry *bucket;
 357   struct hash_entry *cursor;
 358
 359   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
 360     {
 361       if (bucket->data)
 362         {
 363           for (cursor = bucket; cursor; cursor = cursor->next)
 364             {
 365               if (!(*processor) (cursor->data, processor_data))
 366                 return counter;
 367               counter++;
 368             }
 369         }
 370     }
 371
 372   return counter;
 373 }
 374
 375 /* Allocation and clean-up.  */
 376
 377 /* Return a hash index for a NUL-terminated STRING between 0 and N_BUCKETS-1.
 378    This is a convenience routine for constructing other hashing functions.  */
 379
 380 #if USE_DIFF_HASH
 381
 382 /* About hashings, Paul Eggert writes to me (FP), on 1994-01-01: "Please see
 383    B. J. McKenzie, R. Harries & T. Bell, Selecting a hashing algorithm,
 384    Software--practice & experience 20, 2 (Feb 1990), 209-224.  Good hash
 385    algorithms tend to be domain-specific, so what's good for [diffutils'] io.c
 386    may not be good for your application."  */
 387
 388 unsigned
 389 hash_string (const char *string, unsigned n_buckets)
 390 {
 391 # define ROTATE_LEFT(Value, Shift) \
 392   ((Value) << (Shift) | (Value) >> ((sizeof (unsigned) * CHAR_BIT) - (Shift)))
 393 # define HASH_ONE_CHAR(Value, Byte) \
 394   ((Byte) + ROTATE_LEFT (Value, 7))
 395
 396   unsigned value = 0;
 397
 398   for (; *string; string++)
 399     value = HASH_ONE_CHAR (value, *(const unsigned char *) string);
 400   return value % n_buckets;
 401
 402 # undef ROTATE_LEFT
 403 # undef HASH_ONE_CHAR
 404 }
 405
 406 #else /* not USE_DIFF_HASH */
 407
 408 /* This one comes from `recode', and performs a bit better than the above as
 409    per a few experiments.  It is inspired from a hashing routine found in the
 410    very old Cyber `snoop', itself written in typical Greg Mansfield style.
 411    (By the way, what happened to this excellent man?  Is he still alive?)  */
 412
 413 unsigned
 414 hash_string (const char *string, unsigned n_buckets)
 415 {
 416   unsigned value = 0;
 417
 418   while (*string)
 419     value = ((value * 31 + (int) *(const unsigned char *) string++)
 420              % n_buckets);
 421   return value;
 422 }
 423
 424 #endif /* not USE_DIFF_HASH */
 425
 426 /* Return true if CANDIDATE is a prime number.  CANDIDATE should be an odd
 427    number at least equal to 11.  */
 428
 429 static bool
 430 is_prime (unsigned long candidate)
 431 {
 432   unsigned long divisor = 3;
 433   unsigned long square = divisor * divisor;
 434
 435   while (square < candidate && (candidate % divisor))
 436     {
 437       divisor++;
 438       square += 4 * divisor;
 439       divisor++;
 440     }
 441
 442   return (candidate % divisor ? true : false);
 443 }
 444
 445 /* Round a given CANDIDATE number up to the nearest prime, and return that
 446    prime.  Primes lower than 10 are merely skipped.  */
 447
 448 static unsigned long
 449 next_prime (unsigned long candidate)
 450 {
 451   /* Skip small primes.  */
 452   if (candidate < 10)
 453     candidate = 10;
 454
 455   /* Make it definitely odd.  */
 456   candidate |= 1;
 457
 458   while (!is_prime (candidate))
 459     candidate += 2;
 460
 461   return candidate;
 462 }
 463
 464 void
 465 hash_reset_tuning (Hash_tuning *tuning)
 466 {
 467   *tuning = default_tuning;
 468 }
 469
 470 /* For the given hash TABLE, check the user supplied tuning structure for
 471    reasonable values, and return true if there is no gross error with it.
 472    Otherwise, definitively reset the TUNING field to some acceptable default
 473    in the hash table (that is, the user loses the right of further modifying
 474    tuning arguments), and return false.  */
 475
 476 static bool
 477 check_tuning (Hash_table *table)
 478 {
 479   const Hash_tuning *tuning = table->tuning;
 480
 481   if (tuning->growth_threshold > 0.0
 482       && tuning->growth_threshold < 1.0
 483       && tuning->growth_factor > 1.0
 484       && tuning->shrink_threshold >= 0.0
 485       && tuning->shrink_threshold < 1.0
 486       && tuning->shrink_factor > tuning->shrink_threshold
 487       && tuning->shrink_factor <= 1.0
 488       && tuning->shrink_threshold < tuning->growth_threshold)
 489     return true;
 490
 491   table->tuning = &default_tuning;
 492   return false;
 493 }
 494
 495 /* Allocate and return a new hash table, or NULL upon failure.  The initial
 496    number of buckets is automatically selected so as to _guarantee_ that you
 497    may insert at least CANDIDATE different user entries before any growth of
 498    the hash table size occurs.  So, if have a reasonably tight a-priori upper
 499    bound on the number of entries you intend to insert in the hash table, you
 500    may save some table memory and insertion time, by specifying it here.  If
 501    the IS_N_BUCKETS field of the TUNING structure is true, the CANDIDATE
 502    argument has its meaning changed to the wanted number of buckets.
 503
 504    TUNING points to a structure of user-supplied values, in case some fine
 505    tuning is wanted over the default behavior of the hasher.  If TUNING is
 506    NULL, the default tuning parameters are used instead.
 507
 508    The user-supplied HASHER function should be provided.  It accepts two
 509    arguments ENTRY and TABLE_SIZE.  It computes, by hashing ENTRY contents, a
 510    slot number for that entry which should be in the range 0..TABLE_SIZE-1.
 511    This slot number is then returned.
 512
 513    The user-supplied COMPARATOR function should be provided.  It accepts two
 514    arguments pointing to user data, it then returns true for a pair of entries
 515    that compare equal, or false otherwise.  This function is internally called
 516    on entries which are already known to hash to the same bucket index.
 517
 518    The user-supplied DATA_FREER function, when not NULL, may be later called
 519    with the user data as an argument, just before the entry containing the
 520    data gets freed.  This happens from within `hash_free' or `hash_clear'.
 521    You should specify this function only if you want these functions to free
 522    all of your `data' data.  This is typically the case when your data is
 523    simply an auxiliary struct that you have malloc'd to aggregate several
 524    values.  */
 525
 526 Hash_table *
 527 hash_initialize (unsigned candidate, const Hash_tuning *tuning,
 528                  Hash_hasher hasher, Hash_comparator comparator,
 529                  Hash_data_freer data_freer)
 530 {
 531   Hash_table *table;
 532   struct hash_entry *bucket;
 533
 534   if (hasher == NULL || comparator == NULL)
 535     return NULL;
 536
 537   table = (Hash_table *) malloc (sizeof (Hash_table));
 538   if (table == NULL)
 539     return NULL;
 540
 541   if (!tuning)
 542     tuning = &default_tuning;
 543   table->tuning = tuning;
 544   if (!check_tuning (table))
 545     {
 546       /* Fail if the tuning options are invalid.  This is the only occasion
 547          when the user gets some feedback about it.  Once the table is created,
 548          if the user provides invalid tuning options, we silently revert to
 549          using the defaults, and ignore further request to change the tuning
 550          options.  */
 551       free (table);
 552       return NULL;
 553     }
 554
 555   table->n_buckets
 556     = next_prime (tuning->is_n_buckets ? candidate
 557                   : (unsigned) (candidate / tuning->growth_threshold));
 558
 559   table->bucket = (struct hash_entry *)
 560     malloc (table->n_buckets * sizeof (struct hash_entry));
 561   if (table->bucket == NULL)
 562     {
 563       free (table);
 564       return NULL;
 565     }
 566   table->bucket_limit = table->bucket + table->n_buckets;
 567
 568   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
 569     {
 570       bucket->data = NULL;
 571       bucket->next = NULL;
 572     }
 573   table->n_buckets_used = 0;
 574   table->n_entries = 0;
 575
 576   table->hasher = hasher;
 577   table->comparator = comparator;
 578   table->data_freer = data_freer;
 579
 580   table->free_entry_list = NULL;
 581 #if USE_OBSTACK
 582   obstack_init (&table->entry_stack);
 583 #endif
 584   return table;
 585 }
 586
 587 /* Make all buckets empty, placing any chained entries on the free list.
 588    Apply the user-specified function data_freer (if any) to the datas of any
 589    affected entries.  */
 590
 591 void
 592 hash_clear (Hash_table *table)
 593 {
 594   struct hash_entry *bucket;
 595
 596   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
 597     {
 598       if (bucket->data)
 599         {
 600           struct hash_entry *cursor;
 601           struct hash_entry *next;
 602
 603           /* Free the bucket overflow.  */
 604           for (cursor = bucket->next; cursor; cursor = next)
 605             {
 606               if (table->data_freer)
 607                 (*table->data_freer) (cursor->data);
 608               cursor->data = NULL;
 609
 610               next = cursor->next;
 611               /* Relinking is done one entry at a time, as it is to be expected
 612                  that overflows are either rare or short.  */
 613               cursor->next = table->free_entry_list;
 614               table->free_entry_list = cursor;
 615             }
 616
 617           /* Free the bucket head.  */
 618           if (table->data_freer)
 619             (*table->data_freer) (bucket->data);
 620           bucket->data = NULL;
 621           bucket->next = NULL;
 622         }
 623     }
 624
 625   table->n_buckets_used = 0;
 626   table->n_entries = 0;
 627 }
 628
 629 /* Reclaim all storage associated with a hash table.  If a data_freer
 630    function has been supplied by the user when the hash table was created,
 631    this function applies it to the data of each entry before freeing that
 632    entry.  */
 633
 634 void
 635 hash_free (Hash_table *table)
 636 {
 637   struct hash_entry *bucket;
 638   struct hash_entry *cursor;
 639   struct hash_entry *next;
 640
 641   /* Call the user data_freer function.  */
 642   if (table->data_freer && table->n_entries)
 643     {
 644       for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
 645         {
 646           if (bucket->data)
 647             {
 648               for (cursor = bucket; cursor; cursor = cursor->next)
 649                 {
 650                   (*table->data_freer) (cursor->data);
 651                 }
 652             }
 653         }
 654     }
 655
 656 #if USE_OBSTACK
 657
 658   obstack_free (&table->entry_stack, NULL);
 659
 660 #else
 661
 662   /* Free all bucket overflowed entries.  */
 663   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
 664     {
 665       for (cursor = bucket->next; cursor; cursor = next)
 666         {
 667           next = cursor->next;
 668           free (cursor);
 669         }
 670     }
 671
 672   /* Also reclaim the internal list of previously freed entries.  */
 673   for (cursor = table->free_entry_list; cursor; cursor = next)
 674     {
 675       next = cursor->next;
 676       free (cursor);
 677     }
 678
 679 #endif
 680
 681   /* Free the remainder of the hash table structure.  */
 682   free (table->bucket);
 683   free (table);
 684 }
 685
 686 /* Insertion and deletion.  */
 687
 688 /* Get a new hash entry for a bucket overflow, possibly by reclying a
 689    previously freed one.  If this is not possible, allocate a new one.  */
 690
 691 static struct hash_entry *
 692 allocate_entry (Hash_table *table)
 693 {
 694   struct hash_entry *new;
 695
 696   if (table->free_entry_list)
 697     {
 698       new = table->free_entry_list;
 699       table->free_entry_list = new->next;
 700     }
 701   else
 702     {
 703 #if USE_OBSTACK
 704       new = (struct hash_entry *)
 705         obstack_alloc (&table->entry_stack, sizeof (struct hash_entry));
 706 #else
 707       new = (struct hash_entry *) malloc (sizeof (struct hash_entry));
 708 #endif
 709     }
 710
 711   return new;
 712 }
 713
 714 /* Free a hash entry which was part of some bucket overflow,
 715    saving it for later recycling.  */
 716
 717 static void
 718 free_entry (Hash_table *table, struct hash_entry *entry)
 719 {
 720   entry->data = NULL;
 721   entry->next = table->free_entry_list;
 722   table->free_entry_list = entry;
 723 }
 724
 725 /* This private function is used to help with insertion and deletion.  When
 726    ENTRY matches an entry in the table, return a pointer to the corresponding
 727    user data and set *BUCKET_HEAD to the head of the selected bucket.
 728    Otherwise, return NULL.  When DELETE is true and ENTRY matches an entry in
 729    the table, unlink the matching entry.  */
 730
 731 static void *
 732 hash_find_entry (Hash_table *table, const void *entry,
 733                  struct hash_entry **bucket_head, bool delete)
 734 {
 735   struct hash_entry *bucket
 736     = table->bucket + table->hasher (entry, table->n_buckets);
 737   struct hash_entry *cursor;
 738
 739   if (! (bucket < table->bucket_limit))
 740     abort ();
 741
 742   *bucket_head = bucket;
 743
 744   /* Test for empty bucket.  */
 745   if (bucket->data == NULL)
 746     return NULL;
 747
 748   /* See if the entry is the first in the bucket.  */
 749   if ((*table->comparator) (entry, bucket->data))
 750     {
 751       void *data = bucket->data;
 752
 753       if (delete)
 754         {
 755           if (bucket->next)
 756             {
 757               struct hash_entry *next = bucket->next;
 758
 759               /* Bump the first overflow entry into the bucket head, then save
 760                  the previous first overflow entry for later recycling.  */
 761               *bucket = *next;
 762               free_entry (table, next);
 763             }
 764           else
 765             {
 766               bucket->data = NULL;
 767             }
 768         }
 769
 770       return data;
 771     }
 772
 773   /* Scan the bucket overflow.  */
 774   for (cursor = bucket; cursor->next; cursor = cursor->next)
 775     {
 776       if ((*table->comparator) (entry, cursor->next->data))
 777         {
 778           void *data = cursor->next->data;
 779
 780           if (delete)
 781             {
 782               struct hash_entry *next = cursor->next;
 783
 784               /* Unlink the entry to delete, then save the freed entry for later
 785                  recycling.  */
 786               cursor->next = next->next;
 787               free_entry (table, next);
 788             }
 789
 790           return data;
 791         }
 792     }
 793
 794   /* No entry found.  */
 795   return NULL;
 796 }
 797
 798 /* For an already existing hash table, change the number of buckets through
 799    specifying CANDIDATE.  The contents of the hash table are preserved.  The
 800    new number of buckets is automatically selected so as to _guarantee_ that
 801    the table may receive at least CANDIDATE different user entries, including
 802    those already in the table, before any other growth of the hash table size
 803    occurs.  If TUNING->IS_N_BUCKETS is true, then CANDIDATE specifies the
 804    exact number of buckets desired.  */
 805
 806 bool
 807 hash_rehash (Hash_table *table, unsigned candidate)
 808 {
 809   Hash_table *new_table;
 810   struct hash_entry *bucket;
 811   struct hash_entry *cursor;
 812   struct hash_entry *next;
 813
 814   new_table = hash_initialize (candidate, table->tuning, table->hasher,
 815                                table->comparator, table->data_freer);
 816   if (new_table == NULL)
 817     return false;
 818
 819   /* Merely reuse the extra old space into the new table.  */
 820 #if USE_OBSTACK
 821   obstack_free (&new_table->entry_stack, NULL);
 822   new_table->entry_stack = table->entry_stack;
 823 #endif
 824   new_table->free_entry_list = table->free_entry_list;
 825
 826   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
 827     if (bucket->data)
 828       for (cursor = bucket; cursor; cursor = next)
 829         {
 830           void *data = cursor->data;
 831           struct hash_entry *new_bucket
 832             = (new_table->bucket
 833                + new_table->hasher (data, new_table->n_buckets));
 834
 835           if (! (new_bucket < new_table->bucket_limit))
 836             abort ();
 837
 838           next = cursor->next;
 839
 840           if (new_bucket->data)
 841             {
 842               if (cursor == bucket)
 843                 {
 844                   /* Allocate or recycle an entry, when moving from a bucket
 845                      header into a bucket overflow.  */
 846                   struct hash_entry *new_entry = allocate_entry (new_table);
 847
 848                   if (new_entry == NULL)
 849                     return false;
 850
 851                   new_entry->data = data;
 852                   new_entry->next = new_bucket->next;
 853                   new_bucket->next = new_entry;
 854                 }
 855               else
 856                 {
 857                   /* Merely relink an existing entry, when moving from a
 858                      bucket overflow into a bucket overflow.  */
 859                   cursor->next = new_bucket->next;
 860                   new_bucket->next = cursor;
 861                 }
 862             }
 863           else
 864             {
 865               /* Free an existing entry, when moving from a bucket
 866                  overflow into a bucket header.  Also take care of the
 867                  simple case of moving from a bucket header into a bucket
 868                  header.  */
 869               new_bucket->data = data;
 870               new_table->n_buckets_used++;
 871               if (cursor != bucket)
 872                 free_entry (new_table, cursor);
 873             }
 874         }
 875
 876   free (table->bucket);
 877   table->bucket = new_table->bucket;
 878   table->bucket_limit = new_table->bucket_limit;
 879   table->n_buckets = new_table->n_buckets;
 880   table->n_buckets_used = new_table->n_buckets_used;
 881   table->free_entry_list = new_table->free_entry_list;
 882   /* table->n_entries already holds its value.  */
 883 #if USE_OBSTACK
 884   table->entry_stack = new_table->entry_stack;
 885 #endif
 886   free (new_table);
 887
 888   return true;
 889 }
 890
 891 /* If ENTRY matches an entry already in the hash table, return the pointer
 892    to the entry from the table.  Otherwise, insert ENTRY and return ENTRY.
 893    Return NULL if the storage required for insertion cannot be allocated.  */
 894
 895 void *
 896 hash_insert (Hash_table *table, const void *entry)
 897 {
 898   void *data;
 899   struct hash_entry *bucket;
 900
 901   /* The caller cannot insert a NULL entry.  */
 902   if (! entry)
 903     abort ();
 904
 905   /* If there's a matching entry already in the table, return that.  */
 906   if ((data = hash_find_entry (table, entry, &bucket, false)) != NULL)
 907     return data;
 908
 909   /* ENTRY is not matched, it should be inserted.  */
 910
 911   if (bucket->data)
 912     {
 913       struct hash_entry *new_entry = allocate_entry (table);
 914
 915       if (new_entry == NULL)
 916         return NULL;
 917
 918       /* Add ENTRY in the overflow of the bucket.  */
 919
 920       new_entry->data = (void *) entry;
 921       new_entry->next = bucket->next;
 922       bucket->next = new_entry;
 923       table->n_entries++;
 924       return (void *) entry;
 925     }
 926
 927   /* Add ENTRY right in the bucket head.  */
 928
 929   bucket->data = (void *) entry;
 930   table->n_entries++;
 931   table->n_buckets_used++;
 932
 933   /* If the growth threshold of the buckets in use has been reached, increase
 934      the table size and rehash.  There's no point in checking the number of
 935      entries:  if the hashing function is ill-conditioned, rehashing is not
 936      likely to improve it.  */
 937
 938   if (table->n_buckets_used
 939       > table->tuning->growth_threshold * table->n_buckets)
 940     {
 941       /* Check more fully, before starting real work.  If tuning arguments
 942          became invalid, the second check will rely on proper defaults.  */
 943       check_tuning (table);
 944       if (table->n_buckets_used
 945           > table->tuning->growth_threshold * table->n_buckets)
 946         {
 947           const Hash_tuning *tuning = table->tuning;
 948           unsigned candidate
 949             = (unsigned) (tuning->is_n_buckets
 950                           ? (table->n_buckets * tuning->growth_factor)
 951                           : (table->n_buckets * tuning->growth_factor
 952                              * tuning->growth_threshold));
 953
 954           /* If the rehash fails, arrange to return NULL.  */
 955           if (!hash_rehash (table, candidate))
 956             entry = NULL;
 957         }
 958     }
 959
 960   return (void *) entry;
 961 }
 962
 963 /* If ENTRY is already in the table, remove it and return the just-deleted
 964    data (the user may want to deallocate its storage).  If ENTRY is not in the
 965    table, don't modify the table and return NULL.  */
 966
 967 void *
 968 hash_delete (Hash_table *table, const void *entry)
 969 {
 970   void *data;
 971   struct hash_entry *bucket;
 972
 973   data = hash_find_entry (table, entry, &bucket, true);
 974   if (!data)
 975     return NULL;
 976
 977   table->n_entries--;
 978   if (!bucket->data)
 979     {
 980       table->n_buckets_used--;
 981
 982       /* If the shrink threshold of the buckets in use has been reached,
 983          rehash into a smaller table.  */
 984
 985       if (table->n_buckets_used
 986           < table->tuning->shrink_threshold * table->n_buckets)
 987         {
 988           /* Check more fully, before starting real work.  If tuning arguments
 989              became invalid, the second check will rely on proper defaults.  */
 990           check_tuning (table);
 991           if (table->n_buckets_used
 992               < table->tuning->shrink_threshold * table->n_buckets)
 993             {
 994               const Hash_tuning *tuning = table->tuning;
 995               unsigned candidate
 996                 = (unsigned) (tuning->is_n_buckets
 997                               ? table->n_buckets * tuning->shrink_factor
 998                               : (table->n_buckets * tuning->shrink_factor
 999                                  * tuning->growth_threshold));
1000
1001               hash_rehash (table, candidate);
1002             }
1003         }
1004     }
1005
1006   return data;
1007 }
1008
1009 /* Testing.  */
1010
1011 #if TESTING
1012
1013 void
1014 hash_print (const Hash_table *table)
1015 {
1016   struct hash_entry *bucket;
1017
1018   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
1019     {
1020       struct hash_entry *cursor;
1021
1022       if (bucket)
1023         printf ("%d:\n", bucket - table->bucket);
1024
1025       for (cursor = bucket; cursor; cursor = cursor->next)
1026         {
1027           char *s = (char *) cursor->data;
1028           /* FIXME */
1029           if (s)
1030             printf ("  %s\n", s);
1031         }
1032     }
1033 }
1034
1035 #endif /* TESTING */