lib/hash.c

   1 /* hash - hashing table processing.
   2
   3    Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2006, 2007,
   4    2009 Free Software Foundation, Inc.
   5
   6    Written by Jim Meyering, 1992.
   7
   8    This program is free software: you can redistribute it and/or modify
   9    it under the terms of the GNU General Public License as published by
  10    the Free Software Foundation; either version 3 of the License, or
  11    (at your option) any later version.
  12
  13    This program is distributed in the hope that it will be useful,
  14    but WITHOUT ANY WARRANTY; without even the implied warranty of
  15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16    GNU General Public License for more details.
  17
  18    You should have received a copy of the GNU General Public License
  19    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  20
  21 /* A generic hash table package.  */
  22
  23 /* Define USE_OBSTACK to 1 if you want the allocator to use obstacks instead
  24    of malloc.  If you change USE_OBSTACK, you have to recompile!  */
  25
  26 #include <config.h>
  27
  28 #include "hash.h"
  29
  30 #include "bitrotate.h"
  31 #include "xalloc.h"
  32
  33 #include <stdint.h>
  34 #include <stdio.h>
  35 #include <stdlib.h>
  36
  37 #if USE_OBSTACK
  38 # include "obstack.h"
  39 # ifndef obstack_chunk_alloc
  40 #  define obstack_chunk_alloc malloc
  41 # endif
  42 # ifndef obstack_chunk_free
  43 #  define obstack_chunk_free free
  44 # endif
  45 #endif
  46
  47 struct hash_entry
  48   {
  49     void *data;
  50     struct hash_entry *next;
  51   };
  52
  53 struct hash_table
  54   {
  55     /* The array of buckets starts at BUCKET and extends to BUCKET_LIMIT-1,
  56        for a possibility of N_BUCKETS.  Among those, N_BUCKETS_USED buckets
  57        are not empty, there are N_ENTRIES active entries in the table.  */
  58     struct hash_entry *bucket;
  59     struct hash_entry const *bucket_limit;
  60     size_t n_buckets;
  61     size_t n_buckets_used;
  62     size_t n_entries;
  63
  64     /* Tuning arguments, kept in a physically separate structure.  */
  65     const Hash_tuning *tuning;
  66
  67     /* Three functions are given to `hash_initialize', see the documentation
  68        block for this function.  In a word, HASHER randomizes a user entry
  69        into a number up from 0 up to some maximum minus 1; COMPARATOR returns
  70        true if two user entries compare equally; and DATA_FREER is the cleanup
  71        function for a user entry.  */
  72     Hash_hasher hasher;
  73     Hash_comparator comparator;
  74     Hash_data_freer data_freer;
  75
  76     /* A linked list of freed struct hash_entry structs.  */
  77     struct hash_entry *free_entry_list;
  78
  79 #if USE_OBSTACK
  80     /* Whenever obstacks are used, it is possible to allocate all overflowed
  81        entries into a single stack, so they all can be freed in a single
  82        operation.  It is not clear if the speedup is worth the trouble.  */
  83     struct obstack entry_stack;
  84 #endif
  85   };
  86
  87 /* A hash table contains many internal entries, each holding a pointer to
  88    some user-provided data (also called a user entry).  An entry indistinctly
  89    refers to both the internal entry and its associated user entry.  A user
  90    entry contents may be hashed by a randomization function (the hashing
  91    function, or just `hasher' for short) into a number (or `slot') between 0
  92    and the current table size.  At each slot position in the hash table,
  93    starts a linked chain of entries for which the user data all hash to this
  94    slot.  A bucket is the collection of all entries hashing to the same slot.
  95
  96    A good `hasher' function will distribute entries rather evenly in buckets.
  97    In the ideal case, the length of each bucket is roughly the number of
  98    entries divided by the table size.  Finding the slot for a data is usually
  99    done in constant time by the `hasher', and the later finding of a precise
 100    entry is linear in time with the size of the bucket.  Consequently, a
 101    larger hash table size (that is, a larger number of buckets) is prone to
 102    yielding shorter chains, *given* the `hasher' function behaves properly.
 103
 104    Long buckets slow down the lookup algorithm.  One might use big hash table
 105    sizes in hope to reduce the average length of buckets, but this might
 106    become inordinate, as unused slots in the hash table take some space.  The
 107    best bet is to make sure you are using a good `hasher' function (beware
 108    that those are not that easy to write! :-), and to use a table size
 109    larger than the actual number of entries.  */
 110
 111 /* If an insertion makes the ratio of nonempty buckets to table size larger
 112    than the growth threshold (a number between 0.0 and 1.0), then increase
 113    the table size by multiplying by the growth factor (a number greater than
 114    1.0).  The growth threshold defaults to 0.8, and the growth factor
 115    defaults to 1.414, meaning that the table will have doubled its size
 116    every second time 80% of the buckets get used.  */
 117 #define DEFAULT_GROWTH_THRESHOLD 0.8
 118 #define DEFAULT_GROWTH_FACTOR 1.414
 119
 120 /* If a deletion empties a bucket and causes the ratio of used buckets to
 121    table size to become smaller than the shrink threshold (a number between
 122    0.0 and 1.0), then shrink the table by multiplying by the shrink factor (a
 123    number greater than the shrink threshold but smaller than 1.0).  The shrink
 124    threshold and factor default to 0.0 and 1.0, meaning that the table never
 125    shrinks.  */
 126 #define DEFAULT_SHRINK_THRESHOLD 0.0
 127 #define DEFAULT_SHRINK_FACTOR 1.0
 128
 129 /* Use this to initialize or reset a TUNING structure to
 130    some sensible values. */
 131 static const Hash_tuning default_tuning =
 132   {
 133     DEFAULT_SHRINK_THRESHOLD,
 134     DEFAULT_SHRINK_FACTOR,
 135     DEFAULT_GROWTH_THRESHOLD,
 136     DEFAULT_GROWTH_FACTOR,
 137     false
 138   };
 139
 140 /* Information and lookup.  */
 141
 142 /* The following few functions provide information about the overall hash
 143    table organization: the number of entries, number of buckets and maximum
 144    length of buckets.  */
 145
 146 /* Return the number of buckets in the hash table.  The table size, the total
 147    number of buckets (used plus unused), or the maximum number of slots, are
 148    the same quantity.  */
 149
 150 size_t
 151 hash_get_n_buckets (const Hash_table *table)
 152 {
 153   return table->n_buckets;
 154 }
 155
 156 /* Return the number of slots in use (non-empty buckets).  */
 157
 158 size_t
 159 hash_get_n_buckets_used (const Hash_table *table)
 160 {
 161   return table->n_buckets_used;
 162 }
 163
 164 /* Return the number of active entries.  */
 165
 166 size_t
 167 hash_get_n_entries (const Hash_table *table)
 168 {
 169   return table->n_entries;
 170 }
 171
 172 /* Return the length of the longest chain (bucket).  */
 173
 174 size_t
 175 hash_get_max_bucket_length (const Hash_table *table)
 176 {
 177   struct hash_entry const *bucket;
 178   size_t max_bucket_length = 0;
 179
 180   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
 181     {
 182       if (bucket->data)
 183         {
 184           struct hash_entry const *cursor = bucket;
 185           size_t bucket_length = 1;
 186
 187           while (cursor = cursor->next, cursor)
 188             bucket_length++;
 189
 190           if (bucket_length > max_bucket_length)
 191             max_bucket_length = bucket_length;
 192         }
 193     }
 194
 195   return max_bucket_length;
 196 }
 197
 198 /* Do a mild validation of a hash table, by traversing it and checking two
 199    statistics.  */
 200
 201 bool
 202 hash_table_ok (const Hash_table *table)
 203 {
 204   struct hash_entry const *bucket;
 205   size_t n_buckets_used = 0;
 206   size_t n_entries = 0;
 207
 208   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
 209     {
 210       if (bucket->data)
 211         {
 212           struct hash_entry const *cursor = bucket;
 213
 214           /* Count bucket head.  */
 215           n_buckets_used++;
 216           n_entries++;
 217
 218           /* Count bucket overflow.  */
 219           while (cursor = cursor->next, cursor)
 220             n_entries++;
 221         }
 222     }
 223
 224   if (n_buckets_used == table->n_buckets_used && n_entries == table->n_entries)
 225     return true;
 226
 227   return false;
 228 }
 229
 230 void
 231 hash_print_statistics (const Hash_table *table, FILE *stream)
 232 {
 233   size_t n_entries = hash_get_n_entries (table);
 234   size_t n_buckets = hash_get_n_buckets (table);
 235   size_t n_buckets_used = hash_get_n_buckets_used (table);
 236   size_t max_bucket_length = hash_get_max_bucket_length (table);
 237
 238   fprintf (stream, "# entries:         %lu\n", (unsigned long int) n_entries);
 239   fprintf (stream, "# buckets:         %lu\n", (unsigned long int) n_buckets);
 240   fprintf (stream, "# buckets used:    %lu (%.2f%%)\n",
 241            (unsigned long int) n_buckets_used,
 242            (100.0 * n_buckets_used) / n_buckets);
 243   fprintf (stream, "max bucket length: %lu\n",
 244            (unsigned long int) max_bucket_length);
 245 }
 246
 247 /* If ENTRY matches an entry already in the hash table, return the
 248    entry from the table.  Otherwise, return NULL.  */
 249
 250 void *
 251 hash_lookup (const Hash_table *table, const void *entry)
 252 {
 253   struct hash_entry const *bucket
 254     = table->bucket + table->hasher (entry, table->n_buckets);
 255   struct hash_entry const *cursor;
 256
 257   if (! (bucket < table->bucket_limit))
 258     abort ();
 259
 260   if (bucket->data == NULL)
 261     return NULL;
 262
 263   for (cursor = bucket; cursor; cursor = cursor->next)
 264     if (entry == cursor->data || table->comparator (entry, cursor->data))
 265       return cursor->data;
 266
 267   return NULL;
 268 }
 269
 270 /* Walking.  */
 271
 272 /* The functions in this page traverse the hash table and process the
 273    contained entries.  For the traversal to work properly, the hash table
 274    should not be resized nor modified while any particular entry is being
 275    processed.  In particular, entries should not be added, and an entry
 276    may be removed only if there is no shrink threshold and the entry being
 277    removed has already been passed to hash_get_next.  */
 278
 279 /* Return the first data in the table, or NULL if the table is empty.  */
 280
 281 void *
 282 hash_get_first (const Hash_table *table)
 283 {
 284   struct hash_entry const *bucket;
 285
 286   if (table->n_entries == 0)
 287     return NULL;
 288
 289   for (bucket = table->bucket; ; bucket++)
 290     if (! (bucket < table->bucket_limit))
 291       abort ();
 292     else if (bucket->data)
 293       return bucket->data;
 294 }
 295
 296 /* Return the user data for the entry following ENTRY, where ENTRY has been
 297    returned by a previous call to either `hash_get_first' or `hash_get_next'.
 298    Return NULL if there are no more entries.  */
 299
 300 void *
 301 hash_get_next (const Hash_table *table, const void *entry)
 302 {
 303   struct hash_entry const *bucket
 304     = table->bucket + table->hasher (entry, table->n_buckets);
 305   struct hash_entry const *cursor;
 306
 307   if (! (bucket < table->bucket_limit))
 308     abort ();
 309
 310   /* Find next entry in the same bucket.  */
 311   for (cursor = bucket; cursor; cursor = cursor->next)
 312     if (cursor->data == entry && cursor->next)
 313       return cursor->next->data;
 314
 315   /* Find first entry in any subsequent bucket.  */
 316   while (++bucket < table->bucket_limit)
 317     if (bucket->data)
 318       return bucket->data;
 319
 320   /* None found.  */
 321   return NULL;
 322 }
 323
 324 /* Fill BUFFER with pointers to active user entries in the hash table, then
 325    return the number of pointers copied.  Do not copy more than BUFFER_SIZE
 326    pointers.  */
 327
 328 size_t
 329 hash_get_entries (const Hash_table *table, void **buffer,
 330                   size_t buffer_size)
 331 {
 332   size_t counter = 0;
 333   struct hash_entry const *bucket;
 334   struct hash_entry const *cursor;
 335
 336   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
 337     {
 338       if (bucket->data)
 339         {
 340           for (cursor = bucket; cursor; cursor = cursor->next)
 341             {
 342               if (counter >= buffer_size)
 343                 return counter;
 344               buffer[counter++] = cursor->data;
 345             }
 346         }
 347     }
 348
 349   return counter;
 350 }
 351
 352 /* Call a PROCESSOR function for each entry of a hash table, and return the
 353    number of entries for which the processor function returned success.  A
 354    pointer to some PROCESSOR_DATA which will be made available to each call to
 355    the processor function.  The PROCESSOR accepts two arguments: the first is
 356    the user entry being walked into, the second is the value of PROCESSOR_DATA
 357    as received.  The walking continue for as long as the PROCESSOR function
 358    returns nonzero.  When it returns zero, the walking is interrupted.  */
 359
 360 size_t
 361 hash_do_for_each (const Hash_table *table, Hash_processor processor,
 362                   void *processor_data)
 363 {
 364   size_t counter = 0;
 365   struct hash_entry const *bucket;
 366   struct hash_entry const *cursor;
 367
 368   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
 369     {
 370       if (bucket->data)
 371         {
 372           for (cursor = bucket; cursor; cursor = cursor->next)
 373             {
 374               if (! processor (cursor->data, processor_data))
 375                 return counter;
 376               counter++;
 377             }
 378         }
 379     }
 380
 381   return counter;
 382 }
 383
 384 /* Allocation and clean-up.  */
 385
 386 /* Return a hash index for a NUL-terminated STRING between 0 and N_BUCKETS-1.
 387    This is a convenience routine for constructing other hashing functions.  */
 388
 389 #if USE_DIFF_HASH
 390
 391 /* About hashings, Paul Eggert writes to me (FP), on 1994-01-01: "Please see
 392    B. J. McKenzie, R. Harries & T. Bell, Selecting a hashing algorithm,
 393    Software--practice & experience 20, 2 (Feb 1990), 209-224.  Good hash
 394    algorithms tend to be domain-specific, so what's good for [diffutils'] io.c
 395    may not be good for your application."  */
 396
 397 size_t
 398 hash_string (const char *string, size_t n_buckets)
 399 {
 400 # define HASH_ONE_CHAR(Value, Byte) \
 401   ((Byte) + rotl_sz (Value, 7))
 402
 403   size_t value = 0;
 404   unsigned char ch;
 405
 406   for (; (ch = *string); string++)
 407     value = HASH_ONE_CHAR (value, ch);
 408   return value % n_buckets;
 409
 410 # undef HASH_ONE_CHAR
 411 }
 412
 413 #else /* not USE_DIFF_HASH */
 414
 415 /* This one comes from `recode', and performs a bit better than the above as
 416    per a few experiments.  It is inspired from a hashing routine found in the
 417    very old Cyber `snoop', itself written in typical Greg Mansfield style.
 418    (By the way, what happened to this excellent man?  Is he still alive?)  */
 419
 420 size_t
 421 hash_string (const char *string, size_t n_buckets)
 422 {
 423   size_t value = 0;
 424   unsigned char ch;
 425
 426   for (; (ch = *string); string++)
 427     value = (value * 31 + ch) % n_buckets;
 428   return value;
 429 }
 430
 431 #endif /* not USE_DIFF_HASH */
 432
 433 /* Return true if CANDIDATE is a prime number.  CANDIDATE should be an odd
 434    number at least equal to 11.  */
 435
 436 static bool
 437 is_prime (size_t candidate)
 438 {
 439   size_t divisor = 3;
 440   size_t square = divisor * divisor;
 441
 442   while (square < candidate && (candidate % divisor))
 443     {
 444       divisor++;
 445       square += 4 * divisor;
 446       divisor++;
 447     }
 448
 449   return (candidate % divisor ? true : false);
 450 }
 451
 452 /* Round a given CANDIDATE number up to the nearest prime, and return that
 453    prime.  Primes lower than 10 are merely skipped.  */
 454
 455 static size_t
 456 next_prime (size_t candidate)
 457 {
 458   /* Skip small primes.  */
 459   if (candidate < 10)
 460     candidate = 10;
 461
 462   /* Make it definitely odd.  */
 463   candidate |= 1;
 464
 465   while (SIZE_MAX != candidate && !is_prime (candidate))
 466     candidate += 2;
 467
 468   return candidate;
 469 }
 470
 471 void
 472 hash_reset_tuning (Hash_tuning *tuning)
 473 {
 474   *tuning = default_tuning;
 475 }
 476
 477 /* If the user passes a NULL hasher, we hash the raw pointer.  */
 478 static size_t
 479 raw_hasher (const void *data, size_t n)
 480 {
 481   /* When hashing unique pointers, it is often the case that they were
 482      generated by malloc and thus have the property that the low-order
 483      bits are 0.  As this tends to give poorer performance with small
 484      tables, we rotate the pointer value before performing division,
 485      in an attempt to improve hash quality.  */
 486   size_t val = rotr_sz ((size_t) data, 3);
 487   return val % n;
 488 }
 489
 490 /* If the user passes a NULL comparator, we use pointer comparison.  */
 491 static bool
 492 raw_comparator (const void *a, const void *b)
 493 {
 494   return a == b;
 495 }
 496
 497
 498 /* For the given hash TABLE, check the user supplied tuning structure for
 499    reasonable values, and return true if there is no gross error with it.
 500    Otherwise, definitively reset the TUNING field to some acceptable default
 501    in the hash table (that is, the user loses the right of further modifying
 502    tuning arguments), and return false.  */
 503
 504 static bool
 505 check_tuning (Hash_table *table)
 506 {
 507   const Hash_tuning *tuning = table->tuning;
 508   float epsilon;
 509   if (tuning == &default_tuning)
 510     return true;
 511
 512   /* Be a bit stricter than mathematics would require, so that
 513      rounding errors in size calculations do not cause allocations to
 514      fail to grow or shrink as they should.  The smallest allocation
 515      is 11 (due to next_prime's algorithm), so an epsilon of 0.1
 516      should be good enough.  */
 517   epsilon = 0.1f;
 518
 519   if (epsilon < tuning->growth_threshold
 520       && tuning->growth_threshold < 1 - epsilon
 521       && 1 + epsilon < tuning->growth_factor
 522       && 0 <= tuning->shrink_threshold
 523       && tuning->shrink_threshold + epsilon < tuning->shrink_factor
 524       && tuning->shrink_factor <= 1
 525       && tuning->shrink_threshold + epsilon < tuning->growth_threshold)
 526     return true;
 527
 528   table->tuning = &default_tuning;
 529   return false;
 530 }
 531
 532 /* Compute the size of the bucket array for the given CANDIDATE and
 533    TUNING, or return 0 if there is no possible way to allocate that
 534    many entries.  */
 535
 536 static size_t
 537 compute_bucket_size (size_t candidate, const Hash_tuning *tuning)
 538 {
 539   if (!tuning->is_n_buckets)
 540     {
 541       float new_candidate = candidate / tuning->growth_threshold;
 542       if (SIZE_MAX <= new_candidate)
 543         return 0;
 544       candidate = new_candidate;
 545     }
 546   candidate = next_prime (candidate);
 547   if (xalloc_oversized (candidate, sizeof (struct hash_entry *)))
 548     return 0;
 549   return candidate;
 550 }
 551
 552 /* Allocate and return a new hash table, or NULL upon failure.  The initial
 553    number of buckets is automatically selected so as to _guarantee_ that you
 554    may insert at least CANDIDATE different user entries before any growth of
 555    the hash table size occurs.  So, if have a reasonably tight a-priori upper
 556    bound on the number of entries you intend to insert in the hash table, you
 557    may save some table memory and insertion time, by specifying it here.  If
 558    the IS_N_BUCKETS field of the TUNING structure is true, the CANDIDATE
 559    argument has its meaning changed to the wanted number of buckets.
 560
 561    TUNING points to a structure of user-supplied values, in case some fine
 562    tuning is wanted over the default behavior of the hasher.  If TUNING is
 563    NULL, the default tuning parameters are used instead.  If TUNING is
 564    provided but the values requested are out of bounds or might cause
 565    rounding errors, return NULL.
 566
 567    The user-supplied HASHER function, when not NULL, accepts two
 568    arguments ENTRY and TABLE_SIZE.  It computes, by hashing ENTRY contents, a
 569    slot number for that entry which should be in the range 0..TABLE_SIZE-1.
 570    This slot number is then returned.
 571
 572    The user-supplied COMPARATOR function, when not NULL, accepts two
 573    arguments pointing to user data, it then returns true for a pair of entries
 574    that compare equal, or false otherwise.  This function is internally called
 575    on entries which are already known to hash to the same bucket index,
 576    but which are distinct pointers.
 577
 578    The user-supplied DATA_FREER function, when not NULL, may be later called
 579    with the user data as an argument, just before the entry containing the
 580    data gets freed.  This happens from within `hash_free' or `hash_clear'.
 581    You should specify this function only if you want these functions to free
 582    all of your `data' data.  This is typically the case when your data is
 583    simply an auxiliary struct that you have malloc'd to aggregate several
 584    values.  */
 585
 586 Hash_table *
 587 hash_initialize (size_t candidate, const Hash_tuning *tuning,
 588                  Hash_hasher hasher, Hash_comparator comparator,
 589                  Hash_data_freer data_freer)
 590 {
 591   Hash_table *table;
 592
 593   if (hasher == NULL)
 594     hasher = raw_hasher;
 595   if (comparator == NULL)
 596     comparator = raw_comparator;
 597
 598   table = malloc (sizeof *table);
 599   if (table == NULL)
 600     return NULL;
 601
 602   if (!tuning)
 603     tuning = &default_tuning;
 604   table->tuning = tuning;
 605   if (!check_tuning (table))
 606     {
 607       /* Fail if the tuning options are invalid.  This is the only occasion
 608          when the user gets some feedback about it.  Once the table is created,
 609          if the user provides invalid tuning options, we silently revert to
 610          using the defaults, and ignore further request to change the tuning
 611          options.  */
 612       goto fail;
 613     }
 614
 615   table->n_buckets = compute_bucket_size (candidate, tuning);
 616   if (!table->n_buckets)
 617     goto fail;
 618
 619   table->bucket = calloc (table->n_buckets, sizeof *table->bucket);
 620   if (table->bucket == NULL)
 621     goto fail;
 622   table->bucket_limit = table->bucket + table->n_buckets;
 623   table->n_buckets_used = 0;
 624   table->n_entries = 0;
 625
 626   table->hasher = hasher;
 627   table->comparator = comparator;
 628   table->data_freer = data_freer;
 629
 630   table->free_entry_list = NULL;
 631 #if USE_OBSTACK
 632   obstack_init (&table->entry_stack);
 633 #endif
 634   return table;
 635
 636  fail:
 637   free (table);
 638   return NULL;
 639 }
 640
 641 /* Make all buckets empty, placing any chained entries on the free list.
 642    Apply the user-specified function data_freer (if any) to the datas of any
 643    affected entries.  */
 644
 645 void
 646 hash_clear (Hash_table *table)
 647 {
 648   struct hash_entry *bucket;
 649
 650   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
 651     {
 652       if (bucket->data)
 653         {
 654           struct hash_entry *cursor;
 655           struct hash_entry *next;
 656
 657           /* Free the bucket overflow.  */
 658           for (cursor = bucket->next; cursor; cursor = next)
 659             {
 660               if (table->data_freer)
 661                 table->data_freer (cursor->data);
 662               cursor->data = NULL;
 663
 664               next = cursor->next;
 665               /* Relinking is done one entry at a time, as it is to be expected
 666                  that overflows are either rare or short.  */
 667               cursor->next = table->free_entry_list;
 668               table->free_entry_list = cursor;
 669             }
 670
 671           /* Free the bucket head.  */
 672           if (table->data_freer)
 673             table->data_freer (bucket->data);
 674           bucket->data = NULL;
 675           bucket->next = NULL;
 676         }
 677     }
 678
 679   table->n_buckets_used = 0;
 680   table->n_entries = 0;
 681 }
 682
 683 /* Reclaim all storage associated with a hash table.  If a data_freer
 684    function has been supplied by the user when the hash table was created,
 685    this function applies it to the data of each entry before freeing that
 686    entry.  */
 687
 688 void
 689 hash_free (Hash_table *table)
 690 {
 691   struct hash_entry *bucket;
 692   struct hash_entry *cursor;
 693   struct hash_entry *next;
 694
 695   /* Call the user data_freer function.  */
 696   if (table->data_freer && table->n_entries)
 697     {
 698       for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
 699         {
 700           if (bucket->data)
 701             {
 702               for (cursor = bucket; cursor; cursor = cursor->next)
 703                 table->data_freer (cursor->data);
 704             }
 705         }
 706     }
 707
 708 #if USE_OBSTACK
 709
 710   obstack_free (&table->entry_stack, NULL);
 711
 712 #else
 713
 714   /* Free all bucket overflowed entries.  */
 715   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
 716     {
 717       for (cursor = bucket->next; cursor; cursor = next)
 718         {
 719           next = cursor->next;
 720           free (cursor);
 721         }
 722     }
 723
 724   /* Also reclaim the internal list of previously freed entries.  */
 725   for (cursor = table->free_entry_list; cursor; cursor = next)
 726     {
 727       next = cursor->next;
 728       free (cursor);
 729     }
 730
 731 #endif
 732
 733   /* Free the remainder of the hash table structure.  */
 734   free (table->bucket);
 735   free (table);
 736 }
 737
 738 /* Insertion and deletion.  */
 739
 740 /* Get a new hash entry for a bucket overflow, possibly by recycling a
 741    previously freed one.  If this is not possible, allocate a new one.  */
 742
 743 static struct hash_entry *
 744 allocate_entry (Hash_table *table)
 745 {
 746   struct hash_entry *new;
 747
 748   if (table->free_entry_list)
 749     {
 750       new = table->free_entry_list;
 751       table->free_entry_list = new->next;
 752     }
 753   else
 754     {
 755 #if USE_OBSTACK
 756       new = obstack_alloc (&table->entry_stack, sizeof *new);
 757 #else
 758       new = malloc (sizeof *new);
 759 #endif
 760     }
 761
 762   return new;
 763 }
 764
 765 /* Free a hash entry which was part of some bucket overflow,
 766    saving it for later recycling.  */
 767
 768 static void
 769 free_entry (Hash_table *table, struct hash_entry *entry)
 770 {
 771   entry->data = NULL;
 772   entry->next = table->free_entry_list;
 773   table->free_entry_list = entry;
 774 }
 775
 776 /* This private function is used to help with insertion and deletion.  When
 777    ENTRY matches an entry in the table, return a pointer to the corresponding
 778    user data and set *BUCKET_HEAD to the head of the selected bucket.
 779    Otherwise, return NULL.  When DELETE is true and ENTRY matches an entry in
 780    the table, unlink the matching entry.  */
 781
 782 static void *
 783 hash_find_entry (Hash_table *table, const void *entry,
 784                  struct hash_entry **bucket_head, bool delete)
 785 {
 786   struct hash_entry *bucket
 787     = table->bucket + table->hasher (entry, table->n_buckets);
 788   struct hash_entry *cursor;
 789
 790   if (! (bucket < table->bucket_limit))
 791     abort ();
 792
 793   *bucket_head = bucket;
 794
 795   /* Test for empty bucket.  */
 796   if (bucket->data == NULL)
 797     return NULL;
 798
 799   /* See if the entry is the first in the bucket.  */
 800   if (entry == bucket->data || table->comparator (entry, bucket->data))
 801     {
 802       void *data = bucket->data;
 803
 804       if (delete)
 805         {
 806           if (bucket->next)
 807             {
 808               struct hash_entry *next = bucket->next;
 809
 810               /* Bump the first overflow entry into the bucket head, then save
 811                  the previous first overflow entry for later recycling.  */
 812               *bucket = *next;
 813               free_entry (table, next);
 814             }
 815           else
 816             {
 817               bucket->data = NULL;
 818             }
 819         }
 820
 821       return data;
 822     }
 823
 824   /* Scan the bucket overflow.  */
 825   for (cursor = bucket; cursor->next; cursor = cursor->next)
 826     {
 827       if (entry == cursor->next->data
 828           || table->comparator (entry, cursor->next->data))
 829         {
 830           void *data = cursor->next->data;
 831
 832           if (delete)
 833             {
 834               struct hash_entry *next = cursor->next;
 835
 836               /* Unlink the entry to delete, then save the freed entry for later
 837                  recycling.  */
 838               cursor->next = next->next;
 839               free_entry (table, next);
 840             }
 841
 842           return data;
 843         }
 844     }
 845
 846   /* No entry found.  */
 847   return NULL;
 848 }
 849
 850 /* Internal helper, to move entries from SRC to DST.  Both tables must
 851    share the same free entry list.  If SAFE, only move overflow
 852    entries, saving bucket heads for later, so that no allocations will
 853    occur.  Return false if the free entry list is exhausted and an
 854    allocation fails.  */
 855
 856 static bool
 857 transfer_entries (Hash_table *dst, Hash_table *src, bool safe)
 858 {
 859   struct hash_entry *bucket;
 860   struct hash_entry *cursor;
 861   struct hash_entry *next;
 862   for (bucket = src->bucket; bucket < src->bucket_limit; bucket++)
 863     if (bucket->data)
 864       {
 865         void *data;
 866         struct hash_entry *new_bucket;
 867
 868         /* Within each bucket, transfer overflow entries first and
 869            then the bucket head, to minimize memory pressure.  After
 870            all, the only time we might allocate is when moving the
 871            bucket head, but moving overflow entries first may create
 872            free entries that can be recycled by the time we finally
 873            get to the bucket head.  */
 874         for (cursor = bucket->next; cursor; cursor = next)
 875           {
 876             data = cursor->data;
 877             new_bucket = (dst->bucket + dst->hasher (data, dst->n_buckets));
 878
 879             if (! (new_bucket < dst->bucket_limit))
 880               abort ();
 881
 882             next = cursor->next;
 883
 884             if (new_bucket->data)
 885               {
 886                 /* Merely relink an existing entry, when moving from a
 887                    bucket overflow into a bucket overflow.  */
 888                 cursor->next = new_bucket->next;
 889                 new_bucket->next = cursor;
 890               }
 891             else
 892               {
 893                 /* Free an existing entry, when moving from a bucket
 894                    overflow into a bucket header.  */
 895                 new_bucket->data = data;
 896                 dst->n_buckets_used++;
 897                 free_entry (dst, cursor);
 898               }
 899           }
 900         /* Now move the bucket head.  Be sure that if we fail due to
 901            allocation failure that the src table is in a consistent
 902            state.  */
 903         data = bucket->data;
 904         bucket->next = NULL;
 905         if (safe)
 906           continue;
 907         new_bucket = (dst->bucket + dst->hasher (data, dst->n_buckets));
 908
 909         if (! (new_bucket < dst->bucket_limit))
 910           abort ();
 911
 912         if (new_bucket->data)
 913           {
 914             /* Allocate or recycle an entry, when moving from a bucket
 915                header into a bucket overflow.  */
 916             struct hash_entry *new_entry = allocate_entry (dst);
 917
 918             if (new_entry == NULL)
 919               return false;
 920
 921             new_entry->data = data;
 922             new_entry->next = new_bucket->next;
 923             new_bucket->next = new_entry;
 924           }
 925         else
 926           {
 927             /* Move from one bucket header to another.  */
 928             new_bucket->data = data;
 929             dst->n_buckets_used++;
 930           }
 931         bucket->data = NULL;
 932         src->n_buckets_used--;
 933       }
 934   return true;
 935 }
 936
 937 /* For an already existing hash table, change the number of buckets through
 938    specifying CANDIDATE.  The contents of the hash table are preserved.  The
 939    new number of buckets is automatically selected so as to _guarantee_ that
 940    the table may receive at least CANDIDATE different user entries, including
 941    those already in the table, before any other growth of the hash table size
 942    occurs.  If TUNING->IS_N_BUCKETS is true, then CANDIDATE specifies the
 943    exact number of buckets desired.  Return true iff the rehash succeeded.  */
 944
 945 bool
 946 hash_rehash (Hash_table *table, size_t candidate)
 947 {
 948   Hash_table storage;
 949   Hash_table *new_table;
 950   size_t new_size = compute_bucket_size (candidate, table->tuning);
 951
 952   if (!new_size)
 953     return false;
 954   if (new_size == table->n_buckets)
 955     return true;
 956   new_table = &storage;
 957   new_table->bucket = calloc (new_size, sizeof *new_table->bucket);
 958   if (new_table->bucket == NULL)
 959     return false;
 960   new_table->n_buckets = new_size;
 961   new_table->bucket_limit = new_table->bucket + new_size;
 962   new_table->n_buckets_used = 0;
 963   new_table->n_entries = 0;
 964   new_table->tuning = table->tuning;
 965   new_table->hasher = table->hasher;
 966   new_table->comparator = table->comparator;
 967   new_table->data_freer = table->data_freer;
 968
 969   /* In order for the transfer to successfully complete, we need
 970      additional overflow entries when distinct buckets in the old
 971      table collide into a common bucket in the new table.  The worst
 972      case possible is a hasher that gives a good spread with the old
 973      size, but returns a constant with the new size; if we were to
 974      guarantee table->n_buckets_used-1 free entries in advance, then
 975      the transfer would be guaranteed to not allocate memory.
 976      However, for large tables, a guarantee of no further allocation
 977      introduces a lot of extra memory pressure, all for an unlikely
 978      corner case (most rehashes reduce, rather than increase, the
 979      number of overflow entries needed).  So, we instead ensure that
 980      the transfer process can be reversed if we hit a memory
 981      allocation failure mid-transfer.  */
 982
 983   /* Merely reuse the extra old space into the new table.  */
 984 #if USE_OBSTACK
 985   new_table->entry_stack = table->entry_stack;
 986 #endif
 987   new_table->free_entry_list = table->free_entry_list;
 988
 989   if (transfer_entries (new_table, table, false))
 990     {
 991       /* Entries transferred successfully; tie up the loose ends.  */
 992       free (table->bucket);
 993       table->bucket = new_table->bucket;
 994       table->bucket_limit = new_table->bucket_limit;
 995       table->n_buckets = new_table->n_buckets;
 996       table->n_buckets_used = new_table->n_buckets_used;
 997       table->free_entry_list = new_table->free_entry_list;
 998       /* table->n_entries and table->entry_stack already hold their value.  */
 999       return true;
1000     }
1001
1002   /* We've allocated new_table->bucket (and possibly some entries),
1003      exhausted the free list, and moved some but not all entries into
1004      new_table.  We must undo the partial move before returning
1005      failure.  The only way to get into this situation is if new_table
1006      uses fewer buckets than the old table, so we will reclaim some
1007      free entries as overflows in the new table are put back into
1008      distinct buckets in the old table.
1009
1010      There are some pathological cases where a single pass through the
1011      table requires more intermediate overflow entries than using two
1012      passes.  Two passes give worse cache performance and takes
1013      longer, but at this point, we're already out of memory, so slow
1014      and safe is better than failure.  */
1015   table->free_entry_list = new_table->free_entry_list;
1016   if (! (transfer_entries (table, new_table, true)
1017          && transfer_entries (table, new_table, false)))
1018     abort ();
1019   /* table->n_entries already holds its value.  */
1020   free (new_table->bucket);
1021   return false;
1022 }
1023
1024 /* If ENTRY matches an entry already in the hash table, return the pointer
1025    to the entry from the table.  Otherwise, insert ENTRY and return ENTRY.
1026    Return NULL if the storage required for insertion cannot be allocated.
1027    This implementation does not support duplicate entries or insertion of
1028    NULL.  */
1029
1030 void *
1031 hash_insert (Hash_table *table, const void *entry)
1032 {
1033   void *data;
1034   struct hash_entry *bucket;
1035
1036   /* The caller cannot insert a NULL entry.  */
1037   if (! entry)
1038     abort ();
1039
1040   /* If there's a matching entry already in the table, return that.  */
1041   if ((data = hash_find_entry (table, entry, &bucket, false)) != NULL)
1042     return data;
1043
1044   /* If the growth threshold of the buckets in use has been reached, increase
1045      the table size and rehash.  There's no point in checking the number of
1046      entries:  if the hashing function is ill-conditioned, rehashing is not
1047      likely to improve it.  */
1048
1049   if (table->n_buckets_used
1050       > table->tuning->growth_threshold * table->n_buckets)
1051     {
1052       /* Check more fully, before starting real work.  If tuning arguments
1053          became invalid, the second check will rely on proper defaults.  */
1054       check_tuning (table);
1055       if (table->n_buckets_used
1056           > table->tuning->growth_threshold * table->n_buckets)
1057         {
1058           const Hash_tuning *tuning = table->tuning;
1059           float candidate =
1060             (tuning->is_n_buckets
1061              ? (table->n_buckets * tuning->growth_factor)
1062              : (table->n_buckets * tuning->growth_factor
1063                 * tuning->growth_threshold));
1064
1065           if (SIZE_MAX <= candidate)
1066             return NULL;
1067
1068           /* If the rehash fails, arrange to return NULL.  */
1069           if (!hash_rehash (table, candidate))
1070             return NULL;
1071
1072           /* Update the bucket we are interested in.  */
1073           if (hash_find_entry (table, entry, &bucket, false) != NULL)
1074             abort ();
1075         }
1076     }
1077
1078   /* ENTRY is not matched, it should be inserted.  */
1079
1080   if (bucket->data)
1081     {
1082       struct hash_entry *new_entry = allocate_entry (table);
1083
1084       if (new_entry == NULL)
1085         return NULL;
1086
1087       /* Add ENTRY in the overflow of the bucket.  */
1088
1089       new_entry->data = (void *) entry;
1090       new_entry->next = bucket->next;
1091       bucket->next = new_entry;
1092       table->n_entries++;
1093       return (void *) entry;
1094     }
1095
1096   /* Add ENTRY right in the bucket head.  */
1097
1098   bucket->data = (void *) entry;
1099   table->n_entries++;
1100   table->n_buckets_used++;
1101
1102   return (void *) entry;
1103 }
1104
1105 /* If ENTRY is already in the table, remove it and return the just-deleted
1106    data (the user may want to deallocate its storage).  If ENTRY is not in the
1107    table, don't modify the table and return NULL.  */
1108
1109 void *
1110 hash_delete (Hash_table *table, const void *entry)
1111 {
1112   void *data;
1113   struct hash_entry *bucket;
1114
1115   data = hash_find_entry (table, entry, &bucket, true);
1116   if (!data)
1117     return NULL;
1118
1119   table->n_entries--;
1120   if (!bucket->data)
1121     {
1122       table->n_buckets_used--;
1123
1124       /* If the shrink threshold of the buckets in use has been reached,
1125          rehash into a smaller table.  */
1126
1127       if (table->n_buckets_used
1128           < table->tuning->shrink_threshold * table->n_buckets)
1129         {
1130           /* Check more fully, before starting real work.  If tuning arguments
1131              became invalid, the second check will rely on proper defaults.  */
1132           check_tuning (table);
1133           if (table->n_buckets_used
1134               < table->tuning->shrink_threshold * table->n_buckets)
1135             {
1136               const Hash_tuning *tuning = table->tuning;
1137               size_t candidate =
1138                 (tuning->is_n_buckets
1139                  ? table->n_buckets * tuning->shrink_factor
1140                  : (table->n_buckets * tuning->shrink_factor
1141                     * tuning->growth_threshold));
1142
1143               if (!hash_rehash (table, candidate))
1144                 {
1145                   /* Failure to allocate memory in an attempt to
1146                      shrink the table is not fatal.  But since memory
1147                      is low, we can at least be kind and free any
1148                      spare entries, rather than keeping them tied up
1149                      in the free entry list.  */
1150 #if ! USE_OBSTACK
1151                   struct hash_entry *cursor = table->free_entry_list;
1152                   struct hash_entry *next;
1153                   while (cursor)
1154                     {
1155                       next = cursor->next;
1156                       free (cursor);
1157                       cursor = next;
1158                     }
1159                   table->free_entry_list = NULL;
1160 #endif
1161                 }
1162             }
1163         }
1164     }
1165
1166   return data;
1167 }
1168
1169 /* Testing.  */
1170
1171 #if TESTING
1172
1173 void
1174 hash_print (const Hash_table *table)
1175 {
1176   struct hash_entry *bucket = (struct hash_entry *) table->bucket;
1177
1178   for ( ; bucket < table->bucket_limit; bucket++)
1179     {
1180       struct hash_entry *cursor;
1181
1182       if (bucket)
1183         printf ("%lu:\n", (unsigned long int) (bucket - table->bucket));
1184
1185       for (cursor = bucket; cursor; cursor = cursor->next)
1186         {
1187           char const *s = cursor->data;
1188           /* FIXME */
1189           if (s)
1190             printf ("  %s\n", s);
1191         }
1192     }
1193 }
1194
1195 #endif /* TESTING */