lib/hash.c

   1 /* hash - hashing table processing.
   2
   3    Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2006, 2007,
   4    2009 Free Software Foundation, Inc.
   5
   6    Written by Jim Meyering, 1992.
   7
   8    This program is free software: you can redistribute it and/or modify
   9    it under the terms of the GNU General Public License as published by
  10    the Free Software Foundation; either version 3 of the License, or
  11    (at your option) any later version.
  12
  13    This program is distributed in the hope that it will be useful,
  14    but WITHOUT ANY WARRANTY; without even the implied warranty of
  15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16    GNU General Public License for more details.
  17
  18    You should have received a copy of the GNU General Public License
  19    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  20
  21 /* A generic hash table package.  */
  22
  23 /* Define USE_OBSTACK to 1 if you want the allocator to use obstacks instead
  24    of malloc.  If you change USE_OBSTACK, you have to recompile!  */
  25
  26 #include <config.h>
  27
  28 #include "hash.h"
  29 #include "xalloc.h"
  30
  31 #include <limits.h>
  32 #include <stdio.h>
  33 #include <stdlib.h>
  34
  35 #if USE_OBSTACK
  36 # include "obstack.h"
  37 # ifndef obstack_chunk_alloc
  38 #  define obstack_chunk_alloc malloc
  39 # endif
  40 # ifndef obstack_chunk_free
  41 #  define obstack_chunk_free free
  42 # endif
  43 #endif
  44
  45 #ifndef SIZE_MAX
  46 # define SIZE_MAX ((size_t) -1)
  47 #endif
  48
  49 struct hash_entry
  50   {
  51     void *data;
  52     struct hash_entry *next;
  53   };
  54
  55 struct hash_table
  56   {
  57     /* The array of buckets starts at BUCKET and extends to BUCKET_LIMIT-1,
  58        for a possibility of N_BUCKETS.  Among those, N_BUCKETS_USED buckets
  59        are not empty, there are N_ENTRIES active entries in the table.  */
  60     struct hash_entry *bucket;
  61     struct hash_entry const *bucket_limit;
  62     size_t n_buckets;
  63     size_t n_buckets_used;
  64     size_t n_entries;
  65
  66     /* Tuning arguments, kept in a physically separate structure.  */
  67     const Hash_tuning *tuning;
  68
  69     /* Three functions are given to `hash_initialize', see the documentation
  70        block for this function.  In a word, HASHER randomizes a user entry
  71        into a number up from 0 up to some maximum minus 1; COMPARATOR returns
  72        true if two user entries compare equally; and DATA_FREER is the cleanup
  73        function for a user entry.  */
  74     Hash_hasher hasher;
  75     Hash_comparator comparator;
  76     Hash_data_freer data_freer;
  77
  78     /* A linked list of freed struct hash_entry structs.  */
  79     struct hash_entry *free_entry_list;
  80
  81 #if USE_OBSTACK
  82     /* Whenever obstacks are used, it is possible to allocate all overflowed
  83        entries into a single stack, so they all can be freed in a single
  84        operation.  It is not clear if the speedup is worth the trouble.  */
  85     struct obstack entry_stack;
  86 #endif
  87   };
  88
  89 /* A hash table contains many internal entries, each holding a pointer to
  90    some user-provided data (also called a user entry).  An entry indistinctly
  91    refers to both the internal entry and its associated user entry.  A user
  92    entry contents may be hashed by a randomization function (the hashing
  93    function, or just `hasher' for short) into a number (or `slot') between 0
  94    and the current table size.  At each slot position in the hash table,
  95    starts a linked chain of entries for which the user data all hash to this
  96    slot.  A bucket is the collection of all entries hashing to the same slot.
  97
  98    A good `hasher' function will distribute entries rather evenly in buckets.
  99    In the ideal case, the length of each bucket is roughly the number of
 100    entries divided by the table size.  Finding the slot for a data is usually
 101    done in constant time by the `hasher', and the later finding of a precise
 102    entry is linear in time with the size of the bucket.  Consequently, a
 103    larger hash table size (that is, a larger number of buckets) is prone to
 104    yielding shorter chains, *given* the `hasher' function behaves properly.
 105
 106    Long buckets slow down the lookup algorithm.  One might use big hash table
 107    sizes in hope to reduce the average length of buckets, but this might
 108    become inordinate, as unused slots in the hash table take some space.  The
 109    best bet is to make sure you are using a good `hasher' function (beware
 110    that those are not that easy to write! :-), and to use a table size
 111    larger than the actual number of entries.  */
 112
 113 /* If an insertion makes the ratio of nonempty buckets to table size larger
 114    than the growth threshold (a number between 0.0 and 1.0), then increase
 115    the table size by multiplying by the growth factor (a number greater than
 116    1.0).  The growth threshold defaults to 0.8, and the growth factor
 117    defaults to 1.414, meaning that the table will have doubled its size
 118    every second time 80% of the buckets get used.  */
 119 #define DEFAULT_GROWTH_THRESHOLD 0.8
 120 #define DEFAULT_GROWTH_FACTOR 1.414
 121
 122 /* If a deletion empties a bucket and causes the ratio of used buckets to
 123    table size to become smaller than the shrink threshold (a number between
 124    0.0 and 1.0), then shrink the table by multiplying by the shrink factor (a
 125    number greater than the shrink threshold but smaller than 1.0).  The shrink
 126    threshold and factor default to 0.0 and 1.0, meaning that the table never
 127    shrinks.  */
 128 #define DEFAULT_SHRINK_THRESHOLD 0.0
 129 #define DEFAULT_SHRINK_FACTOR 1.0
 130
 131 /* Use this to initialize or reset a TUNING structure to
 132    some sensible values. */
 133 static const Hash_tuning default_tuning =
 134   {
 135     DEFAULT_SHRINK_THRESHOLD,
 136     DEFAULT_SHRINK_FACTOR,
 137     DEFAULT_GROWTH_THRESHOLD,
 138     DEFAULT_GROWTH_FACTOR,
 139     false
 140   };
 141
 142 /* Information and lookup.  */
 143
 144 /* The following few functions provide information about the overall hash
 145    table organization: the number of entries, number of buckets and maximum
 146    length of buckets.  */
 147
 148 /* Return the number of buckets in the hash table.  The table size, the total
 149    number of buckets (used plus unused), or the maximum number of slots, are
 150    the same quantity.  */
 151
 152 size_t
 153 hash_get_n_buckets (const Hash_table *table)
 154 {
 155   return table->n_buckets;
 156 }
 157
 158 /* Return the number of slots in use (non-empty buckets).  */
 159
 160 size_t
 161 hash_get_n_buckets_used (const Hash_table *table)
 162 {
 163   return table->n_buckets_used;
 164 }
 165
 166 /* Return the number of active entries.  */
 167
 168 size_t
 169 hash_get_n_entries (const Hash_table *table)
 170 {
 171   return table->n_entries;
 172 }
 173
 174 /* Return the length of the longest chain (bucket).  */
 175
 176 size_t
 177 hash_get_max_bucket_length (const Hash_table *table)
 178 {
 179   struct hash_entry const *bucket;
 180   size_t max_bucket_length = 0;
 181
 182   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
 183     {
 184       if (bucket->data)
 185         {
 186           struct hash_entry const *cursor = bucket;
 187           size_t bucket_length = 1;
 188
 189           while (cursor = cursor->next, cursor)
 190             bucket_length++;
 191
 192           if (bucket_length > max_bucket_length)
 193             max_bucket_length = bucket_length;
 194         }
 195     }
 196
 197   return max_bucket_length;
 198 }
 199
 200 /* Do a mild validation of a hash table, by traversing it and checking two
 201    statistics.  */
 202
 203 bool
 204 hash_table_ok (const Hash_table *table)
 205 {
 206   struct hash_entry const *bucket;
 207   size_t n_buckets_used = 0;
 208   size_t n_entries = 0;
 209
 210   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
 211     {
 212       if (bucket->data)
 213         {
 214           struct hash_entry const *cursor = bucket;
 215
 216           /* Count bucket head.  */
 217           n_buckets_used++;
 218           n_entries++;
 219
 220           /* Count bucket overflow.  */
 221           while (cursor = cursor->next, cursor)
 222             n_entries++;
 223         }
 224     }
 225
 226   if (n_buckets_used == table->n_buckets_used && n_entries == table->n_entries)
 227     return true;
 228
 229   return false;
 230 }
 231
 232 void
 233 hash_print_statistics (const Hash_table *table, FILE *stream)
 234 {
 235   size_t n_entries = hash_get_n_entries (table);
 236   size_t n_buckets = hash_get_n_buckets (table);
 237   size_t n_buckets_used = hash_get_n_buckets_used (table);
 238   size_t max_bucket_length = hash_get_max_bucket_length (table);
 239
 240   fprintf (stream, "# entries:         %lu\n", (unsigned long int) n_entries);
 241   fprintf (stream, "# buckets:         %lu\n", (unsigned long int) n_buckets);
 242   fprintf (stream, "# buckets used:    %lu (%.2f%%)\n",
 243            (unsigned long int) n_buckets_used,
 244            (100.0 * n_buckets_used) / n_buckets);
 245   fprintf (stream, "max bucket length: %lu\n",
 246            (unsigned long int) max_bucket_length);
 247 }
 248
 249 /* If ENTRY matches an entry already in the hash table, return the
 250    entry from the table.  Otherwise, return NULL.  */
 251
 252 void *
 253 hash_lookup (const Hash_table *table, const void *entry)
 254 {
 255   struct hash_entry const *bucket
 256     = table->bucket + table->hasher (entry, table->n_buckets);
 257   struct hash_entry const *cursor;
 258
 259   if (! (bucket < table->bucket_limit))
 260     abort ();
 261
 262   if (bucket->data == NULL)
 263     return NULL;
 264
 265   for (cursor = bucket; cursor; cursor = cursor->next)
 266     if (table->comparator (entry, cursor->data))
 267       return cursor->data;
 268
 269   return NULL;
 270 }
 271
 272 /* Walking.  */
 273
 274 /* The functions in this page traverse the hash table and process the
 275    contained entries.  For the traversal to work properly, the hash table
 276    should not be resized nor modified while any particular entry is being
 277    processed.  In particular, entries should not be added, and an entry
 278    may be removed only if there is no shrink threshold and the entry being
 279    removed has already been passed to hash_get_next.  */
 280
 281 /* Return the first data in the table, or NULL if the table is empty.  */
 282
 283 void *
 284 hash_get_first (const Hash_table *table)
 285 {
 286   struct hash_entry const *bucket;
 287
 288   if (table->n_entries == 0)
 289     return NULL;
 290
 291   for (bucket = table->bucket; ; bucket++)
 292     if (! (bucket < table->bucket_limit))
 293       abort ();
 294     else if (bucket->data)
 295       return bucket->data;
 296 }
 297
 298 /* Return the user data for the entry following ENTRY, where ENTRY has been
 299    returned by a previous call to either `hash_get_first' or `hash_get_next'.
 300    Return NULL if there are no more entries.  */
 301
 302 void *
 303 hash_get_next (const Hash_table *table, const void *entry)
 304 {
 305   struct hash_entry const *bucket
 306     = table->bucket + table->hasher (entry, table->n_buckets);
 307   struct hash_entry const *cursor;
 308
 309   if (! (bucket < table->bucket_limit))
 310     abort ();
 311
 312   /* Find next entry in the same bucket.  */
 313   for (cursor = bucket; cursor; cursor = cursor->next)
 314     if (cursor->data == entry && cursor->next)
 315       return cursor->next->data;
 316
 317   /* Find first entry in any subsequent bucket.  */
 318   while (++bucket < table->bucket_limit)
 319     if (bucket->data)
 320       return bucket->data;
 321
 322   /* None found.  */
 323   return NULL;
 324 }
 325
 326 /* Fill BUFFER with pointers to active user entries in the hash table, then
 327    return the number of pointers copied.  Do not copy more than BUFFER_SIZE
 328    pointers.  */
 329
 330 size_t
 331 hash_get_entries (const Hash_table *table, void **buffer,
 332                   size_t buffer_size)
 333 {
 334   size_t counter = 0;
 335   struct hash_entry const *bucket;
 336   struct hash_entry const *cursor;
 337
 338   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
 339     {
 340       if (bucket->data)
 341         {
 342           for (cursor = bucket; cursor; cursor = cursor->next)
 343             {
 344               if (counter >= buffer_size)
 345                 return counter;
 346               buffer[counter++] = cursor->data;
 347             }
 348         }
 349     }
 350
 351   return counter;
 352 }
 353
 354 /* Call a PROCESSOR function for each entry of a hash table, and return the
 355    number of entries for which the processor function returned success.  A
 356    pointer to some PROCESSOR_DATA which will be made available to each call to
 357    the processor function.  The PROCESSOR accepts two arguments: the first is
 358    the user entry being walked into, the second is the value of PROCESSOR_DATA
 359    as received.  The walking continue for as long as the PROCESSOR function
 360    returns nonzero.  When it returns zero, the walking is interrupted.  */
 361
 362 size_t
 363 hash_do_for_each (const Hash_table *table, Hash_processor processor,
 364                   void *processor_data)
 365 {
 366   size_t counter = 0;
 367   struct hash_entry const *bucket;
 368   struct hash_entry const *cursor;
 369
 370   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
 371     {
 372       if (bucket->data)
 373         {
 374           for (cursor = bucket; cursor; cursor = cursor->next)
 375             {
 376               if (!(*processor) (cursor->data, processor_data))
 377                 return counter;
 378               counter++;
 379             }
 380         }
 381     }
 382
 383   return counter;
 384 }
 385
 386 /* Allocation and clean-up.  */
 387
 388 /* Return a hash index for a NUL-terminated STRING between 0 and N_BUCKETS-1.
 389    This is a convenience routine for constructing other hashing functions.  */
 390
 391 #if USE_DIFF_HASH
 392
 393 /* About hashings, Paul Eggert writes to me (FP), on 1994-01-01: "Please see
 394    B. J. McKenzie, R. Harries & T. Bell, Selecting a hashing algorithm,
 395    Software--practice & experience 20, 2 (Feb 1990), 209-224.  Good hash
 396    algorithms tend to be domain-specific, so what's good for [diffutils'] io.c
 397    may not be good for your application."  */
 398
 399 size_t
 400 hash_string (const char *string, size_t n_buckets)
 401 {
 402 # define ROTATE_LEFT(Value, Shift) \
 403   ((Value) << (Shift) | (Value) >> ((sizeof (size_t) * CHAR_BIT) - (Shift)))
 404 # define HASH_ONE_CHAR(Value, Byte) \
 405   ((Byte) + ROTATE_LEFT (Value, 7))
 406
 407   size_t value = 0;
 408   unsigned char ch;
 409
 410   for (; (ch = *string); string++)
 411     value = HASH_ONE_CHAR (value, ch);
 412   return value % n_buckets;
 413
 414 # undef ROTATE_LEFT
 415 # undef HASH_ONE_CHAR
 416 }
 417
 418 #else /* not USE_DIFF_HASH */
 419
 420 /* This one comes from `recode', and performs a bit better than the above as
 421    per a few experiments.  It is inspired from a hashing routine found in the
 422    very old Cyber `snoop', itself written in typical Greg Mansfield style.
 423    (By the way, what happened to this excellent man?  Is he still alive?)  */
 424
 425 size_t
 426 hash_string (const char *string, size_t n_buckets)
 427 {
 428   size_t value = 0;
 429   unsigned char ch;
 430
 431   for (; (ch = *string); string++)
 432     value = (value * 31 + ch) % n_buckets;
 433   return value;
 434 }
 435
 436 #endif /* not USE_DIFF_HASH */
 437
 438 /* Return true if CANDIDATE is a prime number.  CANDIDATE should be an odd
 439    number at least equal to 11.  */
 440
 441 static bool
 442 is_prime (size_t candidate)
 443 {
 444   size_t divisor = 3;
 445   size_t square = divisor * divisor;
 446
 447   while (square < candidate && (candidate % divisor))
 448     {
 449       divisor++;
 450       square += 4 * divisor;
 451       divisor++;
 452     }
 453
 454   return (candidate % divisor ? true : false);
 455 }
 456
 457 /* Round a given CANDIDATE number up to the nearest prime, and return that
 458    prime.  Primes lower than 10 are merely skipped.  */
 459
 460 static size_t
 461 next_prime (size_t candidate)
 462 {
 463   /* Skip small primes.  */
 464   if (candidate < 10)
 465     candidate = 10;
 466
 467   /* Make it definitely odd.  */
 468   candidate |= 1;
 469
 470   while (!is_prime (candidate))
 471     candidate += 2;
 472
 473   return candidate;
 474 }
 475
 476 void
 477 hash_reset_tuning (Hash_tuning *tuning)
 478 {
 479   *tuning = default_tuning;
 480 }
 481
 482 /* For the given hash TABLE, check the user supplied tuning structure for
 483    reasonable values, and return true if there is no gross error with it.
 484    Otherwise, definitively reset the TUNING field to some acceptable default
 485    in the hash table (that is, the user loses the right of further modifying
 486    tuning arguments), and return false.  */
 487
 488 static bool
 489 check_tuning (Hash_table *table)
 490 {
 491   const Hash_tuning *tuning = table->tuning;
 492   if (tuning == &default_tuning)
 493     return true;
 494
 495   /* Be a bit stricter than mathematics would require, so that
 496      rounding errors in size calculations do not cause allocations to
 497      fail to grow or shrink as they should.  The smallest allocation
 498      is 11 (due to next_prime's algorithm), so an epsilon of 0.1
 499      should be good enough.  */
 500   float epsilon = 0.1f;
 501
 502   if (epsilon < tuning->growth_threshold
 503       && tuning->growth_threshold < 1 - epsilon
 504       && 1 + epsilon < tuning->growth_factor
 505       && 0 <= tuning->shrink_threshold
 506       && tuning->shrink_threshold + epsilon < tuning->shrink_factor
 507       && tuning->shrink_factor <= 1
 508       && tuning->shrink_threshold + epsilon < tuning->growth_threshold)
 509     return true;
 510
 511   table->tuning = &default_tuning;
 512   return false;
 513 }
 514
 515 /* Allocate and return a new hash table, or NULL upon failure.  The initial
 516    number of buckets is automatically selected so as to _guarantee_ that you
 517    may insert at least CANDIDATE different user entries before any growth of
 518    the hash table size occurs.  So, if have a reasonably tight a-priori upper
 519    bound on the number of entries you intend to insert in the hash table, you
 520    may save some table memory and insertion time, by specifying it here.  If
 521    the IS_N_BUCKETS field of the TUNING structure is true, the CANDIDATE
 522    argument has its meaning changed to the wanted number of buckets.
 523
 524    TUNING points to a structure of user-supplied values, in case some fine
 525    tuning is wanted over the default behavior of the hasher.  If TUNING is
 526    NULL, the default tuning parameters are used instead.  If TUNING is
 527    provided but the values requested are out of bounds or might cause
 528    rounding errors, return NULL.
 529
 530    The user-supplied HASHER function should be provided.  It accepts two
 531    arguments ENTRY and TABLE_SIZE.  It computes, by hashing ENTRY contents, a
 532    slot number for that entry which should be in the range 0..TABLE_SIZE-1.
 533    This slot number is then returned.
 534
 535    The user-supplied COMPARATOR function should be provided.  It accepts two
 536    arguments pointing to user data, it then returns true for a pair of entries
 537    that compare equal, or false otherwise.  This function is internally called
 538    on entries which are already known to hash to the same bucket index.
 539
 540    The user-supplied DATA_FREER function, when not NULL, may be later called
 541    with the user data as an argument, just before the entry containing the
 542    data gets freed.  This happens from within `hash_free' or `hash_clear'.
 543    You should specify this function only if you want these functions to free
 544    all of your `data' data.  This is typically the case when your data is
 545    simply an auxiliary struct that you have malloc'd to aggregate several
 546    values.  */
 547
 548 Hash_table *
 549 hash_initialize (size_t candidate, const Hash_tuning *tuning,
 550                  Hash_hasher hasher, Hash_comparator comparator,
 551                  Hash_data_freer data_freer)
 552 {
 553   Hash_table *table;
 554
 555   if (hasher == NULL || comparator == NULL)
 556     return NULL;
 557
 558   table = malloc (sizeof *table);
 559   if (table == NULL)
 560     return NULL;
 561
 562   if (!tuning)
 563     tuning = &default_tuning;
 564   table->tuning = tuning;
 565   if (!check_tuning (table))
 566     {
 567       /* Fail if the tuning options are invalid.  This is the only occasion
 568          when the user gets some feedback about it.  Once the table is created,
 569          if the user provides invalid tuning options, we silently revert to
 570          using the defaults, and ignore further request to change the tuning
 571          options.  */
 572       goto fail;
 573     }
 574
 575   if (!tuning->is_n_buckets)
 576     {
 577       float new_candidate = candidate / tuning->growth_threshold;
 578       if (SIZE_MAX <= new_candidate)
 579         goto fail;
 580       candidate = new_candidate;
 581     }
 582
 583   if (xalloc_oversized (candidate, sizeof *table->bucket))
 584     goto fail;
 585   table->n_buckets = next_prime (candidate);
 586   if (xalloc_oversized (table->n_buckets, sizeof *table->bucket))
 587     goto fail;
 588
 589   table->bucket = calloc (table->n_buckets, sizeof *table->bucket);
 590   if (table->bucket == NULL)
 591     goto fail;
 592   table->bucket_limit = table->bucket + table->n_buckets;
 593   table->n_buckets_used = 0;
 594   table->n_entries = 0;
 595
 596   table->hasher = hasher;
 597   table->comparator = comparator;
 598   table->data_freer = data_freer;
 599
 600   table->free_entry_list = NULL;
 601 #if USE_OBSTACK
 602   obstack_init (&table->entry_stack);
 603 #endif
 604   return table;
 605
 606  fail:
 607   free (table);
 608   return NULL;
 609 }
 610
 611 /* Make all buckets empty, placing any chained entries on the free list.
 612    Apply the user-specified function data_freer (if any) to the datas of any
 613    affected entries.  */
 614
 615 void
 616 hash_clear (Hash_table *table)
 617 {
 618   struct hash_entry *bucket;
 619
 620   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
 621     {
 622       if (bucket->data)
 623         {
 624           struct hash_entry *cursor;
 625           struct hash_entry *next;
 626
 627           /* Free the bucket overflow.  */
 628           for (cursor = bucket->next; cursor; cursor = next)
 629             {
 630               if (table->data_freer)
 631                 (*table->data_freer) (cursor->data);
 632               cursor->data = NULL;
 633
 634               next = cursor->next;
 635               /* Relinking is done one entry at a time, as it is to be expected
 636                  that overflows are either rare or short.  */
 637               cursor->next = table->free_entry_list;
 638               table->free_entry_list = cursor;
 639             }
 640
 641           /* Free the bucket head.  */
 642           if (table->data_freer)
 643             (*table->data_freer) (bucket->data);
 644           bucket->data = NULL;
 645           bucket->next = NULL;
 646         }
 647     }
 648
 649   table->n_buckets_used = 0;
 650   table->n_entries = 0;
 651 }
 652
 653 /* Reclaim all storage associated with a hash table.  If a data_freer
 654    function has been supplied by the user when the hash table was created,
 655    this function applies it to the data of each entry before freeing that
 656    entry.  */
 657
 658 void
 659 hash_free (Hash_table *table)
 660 {
 661   struct hash_entry *bucket;
 662   struct hash_entry *cursor;
 663   struct hash_entry *next;
 664
 665   /* Call the user data_freer function.  */
 666   if (table->data_freer && table->n_entries)
 667     {
 668       for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
 669         {
 670           if (bucket->data)
 671             {
 672               for (cursor = bucket; cursor; cursor = cursor->next)
 673                 {
 674                   (*table->data_freer) (cursor->data);
 675                 }
 676             }
 677         }
 678     }
 679
 680 #if USE_OBSTACK
 681
 682   obstack_free (&table->entry_stack, NULL);
 683
 684 #else
 685
 686   /* Free all bucket overflowed entries.  */
 687   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
 688     {
 689       for (cursor = bucket->next; cursor; cursor = next)
 690         {
 691           next = cursor->next;
 692           free (cursor);
 693         }
 694     }
 695
 696   /* Also reclaim the internal list of previously freed entries.  */
 697   for (cursor = table->free_entry_list; cursor; cursor = next)
 698     {
 699       next = cursor->next;
 700       free (cursor);
 701     }
 702
 703 #endif
 704
 705   /* Free the remainder of the hash table structure.  */
 706   free (table->bucket);
 707   free (table);
 708 }
 709
 710 /* Insertion and deletion.  */
 711
 712 /* Get a new hash entry for a bucket overflow, possibly by recycling a
 713    previously freed one.  If this is not possible, allocate a new one.  */
 714
 715 static struct hash_entry *
 716 allocate_entry (Hash_table *table)
 717 {
 718   struct hash_entry *new;
 719
 720   if (table->free_entry_list)
 721     {
 722       new = table->free_entry_list;
 723       table->free_entry_list = new->next;
 724     }
 725   else
 726     {
 727 #if USE_OBSTACK
 728       new = obstack_alloc (&table->entry_stack, sizeof *new);
 729 #else
 730       new = malloc (sizeof *new);
 731 #endif
 732     }
 733
 734   return new;
 735 }
 736
 737 /* Free a hash entry which was part of some bucket overflow,
 738    saving it for later recycling.  */
 739
 740 static void
 741 free_entry (Hash_table *table, struct hash_entry *entry)
 742 {
 743   entry->data = NULL;
 744   entry->next = table->free_entry_list;
 745   table->free_entry_list = entry;
 746 }
 747
 748 /* This private function is used to help with insertion and deletion.  When
 749    ENTRY matches an entry in the table, return a pointer to the corresponding
 750    user data and set *BUCKET_HEAD to the head of the selected bucket.
 751    Otherwise, return NULL.  When DELETE is true and ENTRY matches an entry in
 752    the table, unlink the matching entry.  */
 753
 754 static void *
 755 hash_find_entry (Hash_table *table, const void *entry,
 756                  struct hash_entry **bucket_head, bool delete)
 757 {
 758   struct hash_entry *bucket
 759     = table->bucket + table->hasher (entry, table->n_buckets);
 760   struct hash_entry *cursor;
 761
 762   if (! (bucket < table->bucket_limit))
 763     abort ();
 764
 765   *bucket_head = bucket;
 766
 767   /* Test for empty bucket.  */
 768   if (bucket->data == NULL)
 769     return NULL;
 770
 771   /* See if the entry is the first in the bucket.  */
 772   if ((*table->comparator) (entry, bucket->data))
 773     {
 774       void *data = bucket->data;
 775
 776       if (delete)
 777         {
 778           if (bucket->next)
 779             {
 780               struct hash_entry *next = bucket->next;
 781
 782               /* Bump the first overflow entry into the bucket head, then save
 783                  the previous first overflow entry for later recycling.  */
 784               *bucket = *next;
 785               free_entry (table, next);
 786             }
 787           else
 788             {
 789               bucket->data = NULL;
 790             }
 791         }
 792
 793       return data;
 794     }
 795
 796   /* Scan the bucket overflow.  */
 797   for (cursor = bucket; cursor->next; cursor = cursor->next)
 798     {
 799       if ((*table->comparator) (entry, cursor->next->data))
 800         {
 801           void *data = cursor->next->data;
 802
 803           if (delete)
 804             {
 805               struct hash_entry *next = cursor->next;
 806
 807               /* Unlink the entry to delete, then save the freed entry for later
 808                  recycling.  */
 809               cursor->next = next->next;
 810               free_entry (table, next);
 811             }
 812
 813           return data;
 814         }
 815     }
 816
 817   /* No entry found.  */
 818   return NULL;
 819 }
 820
 821 /* For an already existing hash table, change the number of buckets through
 822    specifying CANDIDATE.  The contents of the hash table are preserved.  The
 823    new number of buckets is automatically selected so as to _guarantee_ that
 824    the table may receive at least CANDIDATE different user entries, including
 825    those already in the table, before any other growth of the hash table size
 826    occurs.  If TUNING->IS_N_BUCKETS is true, then CANDIDATE specifies the
 827    exact number of buckets desired.  Return true iff the rehash succeeded.  */
 828
 829 bool
 830 hash_rehash (Hash_table *table, size_t candidate)
 831 {
 832   Hash_table *new_table;
 833   struct hash_entry *bucket;
 834   struct hash_entry *cursor;
 835   struct hash_entry *next;
 836
 837   new_table = hash_initialize (candidate, table->tuning, table->hasher,
 838                                table->comparator, table->data_freer);
 839   if (new_table == NULL)
 840     return false;
 841
 842   /* Merely reuse the extra old space into the new table.  */
 843 #if USE_OBSTACK
 844   obstack_free (&new_table->entry_stack, NULL);
 845   new_table->entry_stack = table->entry_stack;
 846 #endif
 847   new_table->free_entry_list = table->free_entry_list;
 848
 849   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
 850     if (bucket->data)
 851       for (cursor = bucket; cursor; cursor = next)
 852         {
 853           void *data = cursor->data;
 854           struct hash_entry *new_bucket
 855             = (new_table->bucket
 856                + new_table->hasher (data, new_table->n_buckets));
 857
 858           if (! (new_bucket < new_table->bucket_limit))
 859             abort ();
 860
 861           next = cursor->next;
 862
 863           if (new_bucket->data)
 864             {
 865               if (cursor == bucket)
 866                 {
 867                   /* Allocate or recycle an entry, when moving from a bucket
 868                      header into a bucket overflow.  */
 869                   struct hash_entry *new_entry = allocate_entry (new_table);
 870
 871                   if (new_entry == NULL)
 872                     return false;
 873
 874                   new_entry->data = data;
 875                   new_entry->next = new_bucket->next;
 876                   new_bucket->next = new_entry;
 877                 }
 878               else
 879                 {
 880                   /* Merely relink an existing entry, when moving from a
 881                      bucket overflow into a bucket overflow.  */
 882                   cursor->next = new_bucket->next;
 883                   new_bucket->next = cursor;
 884                 }
 885             }
 886           else
 887             {
 888               /* Free an existing entry, when moving from a bucket
 889                  overflow into a bucket header.  Also take care of the
 890                  simple case of moving from a bucket header into a bucket
 891                  header.  */
 892               new_bucket->data = data;
 893               new_table->n_buckets_used++;
 894               if (cursor != bucket)
 895                 free_entry (new_table, cursor);
 896             }
 897         }
 898
 899   free (table->bucket);
 900   table->bucket = new_table->bucket;
 901   table->bucket_limit = new_table->bucket_limit;
 902   table->n_buckets = new_table->n_buckets;
 903   table->n_buckets_used = new_table->n_buckets_used;
 904   table->free_entry_list = new_table->free_entry_list;
 905   /* table->n_entries already holds its value.  */
 906 #if USE_OBSTACK
 907   table->entry_stack = new_table->entry_stack;
 908 #endif
 909   free (new_table);
 910
 911   return true;
 912 }
 913
 914 /* If ENTRY matches an entry already in the hash table, return the pointer
 915    to the entry from the table.  Otherwise, insert ENTRY and return ENTRY.
 916    Return NULL if the storage required for insertion cannot be allocated.
 917    This implementation does not support duplicate entries or insertion of
 918    NULL.  */
 919
 920 void *
 921 hash_insert (Hash_table *table, const void *entry)
 922 {
 923   void *data;
 924   struct hash_entry *bucket;
 925
 926   /* The caller cannot insert a NULL entry.  */
 927   if (! entry)
 928     abort ();
 929
 930   /* If there's a matching entry already in the table, return that.  */
 931   if ((data = hash_find_entry (table, entry, &bucket, false)) != NULL)
 932     return data;
 933
 934   /* If the growth threshold of the buckets in use has been reached, increase
 935      the table size and rehash.  There's no point in checking the number of
 936      entries:  if the hashing function is ill-conditioned, rehashing is not
 937      likely to improve it.  */
 938
 939   if (table->n_buckets_used
 940       > table->tuning->growth_threshold * table->n_buckets)
 941     {
 942       /* Check more fully, before starting real work.  If tuning arguments
 943          became invalid, the second check will rely on proper defaults.  */
 944       check_tuning (table);
 945       if (table->n_buckets_used
 946           > table->tuning->growth_threshold * table->n_buckets)
 947         {
 948           const Hash_tuning *tuning = table->tuning;
 949           float candidate =
 950             (tuning->is_n_buckets
 951              ? (table->n_buckets * tuning->growth_factor)
 952              : (table->n_buckets * tuning->growth_factor
 953                 * tuning->growth_threshold));
 954
 955           if (SIZE_MAX <= candidate)
 956             return NULL;
 957
 958           /* If the rehash fails, arrange to return NULL.  */
 959           if (!hash_rehash (table, candidate))
 960             return NULL;
 961
 962           /* Update the bucket we are interested in.  */
 963           if (hash_find_entry (table, entry, &bucket, false) != NULL)
 964             abort ();
 965         }
 966     }
 967
 968   /* ENTRY is not matched, it should be inserted.  */
 969
 970   if (bucket->data)
 971     {
 972       struct hash_entry *new_entry = allocate_entry (table);
 973
 974       if (new_entry == NULL)
 975         return NULL;
 976
 977       /* Add ENTRY in the overflow of the bucket.  */
 978
 979       new_entry->data = (void *) entry;
 980       new_entry->next = bucket->next;
 981       bucket->next = new_entry;
 982       table->n_entries++;
 983       return (void *) entry;
 984     }
 985
 986   /* Add ENTRY right in the bucket head.  */
 987
 988   bucket->data = (void *) entry;
 989   table->n_entries++;
 990   table->n_buckets_used++;
 991
 992   return (void *) entry;
 993 }
 994
 995 /* If ENTRY is already in the table, remove it and return the just-deleted
 996    data (the user may want to deallocate its storage).  If ENTRY is not in the
 997    table, don't modify the table and return NULL.  */
 998
 999 void *
1000 hash_delete (Hash_table *table, const void *entry)
1001 {
1002   void *data;
1003   struct hash_entry *bucket;
1004
1005   data = hash_find_entry (table, entry, &bucket, true);
1006   if (!data)
1007     return NULL;
1008
1009   table->n_entries--;
1010   if (!bucket->data)
1011     {
1012       table->n_buckets_used--;
1013
1014       /* If the shrink threshold of the buckets in use has been reached,
1015          rehash into a smaller table.  */
1016
1017       if (table->n_buckets_used
1018           < table->tuning->shrink_threshold * table->n_buckets)
1019         {
1020           /* Check more fully, before starting real work.  If tuning arguments
1021              became invalid, the second check will rely on proper defaults.  */
1022           check_tuning (table);
1023           if (table->n_buckets_used
1024               < table->tuning->shrink_threshold * table->n_buckets)
1025             {
1026               const Hash_tuning *tuning = table->tuning;
1027               size_t candidate =
1028                 (tuning->is_n_buckets
1029                  ? table->n_buckets * tuning->shrink_factor
1030                  : (table->n_buckets * tuning->shrink_factor
1031                     * tuning->growth_threshold));
1032
1033               hash_rehash (table, candidate);
1034             }
1035         }
1036     }
1037
1038   return data;
1039 }
1040
1041 /* Testing.  */
1042
1043 #if TESTING
1044
1045 void
1046 hash_print (const Hash_table *table)
1047 {
1048   struct hash_entry const *bucket;
1049
1050   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
1051     {
1052       struct hash_entry *cursor;
1053
1054       if (bucket)
1055         printf ("%lu:\n", (unsigned long int) (bucket - table->bucket));
1056
1057       for (cursor = bucket; cursor; cursor = cursor->next)
1058         {
1059           char const *s = cursor->data;
1060           /* FIXME */
1061           if (s)
1062             printf ("  %s\n", s);
1063         }
1064     }
1065 }
1066
1067 #endif /* TESTING */