3 * Copyright (C) 1997 Kaz Kylheku <kaz@ashi.footprints.net>
5 * Free Software License:
7 * All rights are reserved by the author, with the following exceptions:
8 * Permission is granted to freely reproduce and distribute this software,
9 * possibly in exchange for a fee, provided that this copyright notice appears
10 * intact. Permission is also granted to adapt this software to produce
11 * derivative works, as long as the modified versions carry this copyright
12 * notice and additional notices stating that the work has been modified.
13 * This source code may be translated into executable form and incorporated
14 * into proprietary software; there is no requirement for such software to
15 * contain a copyright notice related to this source.
17 * $Id: hash.c,v 1.3 2009-11-19 09:18:28 franklahm Exp $
25 #define HASH_IMPLEMENTATION
29 static const char rcsid[] = "$Id: hash.c,v 1.3 2009-11-19 09:18:28 franklahm Exp $";
33 #define INIT_SIZE (1UL << (INIT_BITS)) /* must be power of two */
34 #define INIT_MASK ((INIT_SIZE) - 1)
36 #define next hash_next
38 #define data hash_data
39 #define hkey hash_hkey
41 #define table hash_table
42 #define nchains hash_nchains
43 #define nodecount hash_nodecount
44 #define maxcount hash_maxcount
45 #define highmark hash_highmark
46 #define lowmark hash_lowmark
47 #define compare hash_compare
48 #define function hash_function
49 #define allocnode hash_allocnode
50 #define freenode hash_freenode
51 #define context hash_context
52 #define mask hash_mask
53 #define dynamic hash_dynamic
55 #define table hash_table
56 #define chain hash_chain
58 static hnode_t *hnode_alloc(void *context);
59 static void hnode_free(hnode_t *node, void *context);
60 static hash_val_t hash_fun_default(const void *key);
61 static int hash_comp_default(const void *key1, const void *key2);
66 * Compute the number of bits in the hash_val_t type. We know that hash_val_t
67 * is an unsigned integral type. Thus the highest value it can hold is a
68 * Mersenne number (power of two, less one). We initialize a hash_val_t
69 * object with this value and then shift bits out one by one while counting.
71 * 1. HASH_VAL_T_MAX is a Mersenne number---one that is one less than a power
72 * of two. This means that its binary representation consists of all one
73 * bits, and hence ``val'' is initialized to all one bits.
74 * 2. While bits remain in val, we increment the bit count and shift it to the
75 * right, replacing the topmost bit by zero.
78 static void compute_bits(void)
80 hash_val_t val = HASH_VAL_T_MAX; /* 1 */
88 hash_val_t_bit = bits;
92 * Verify whether the given argument is a power of two.
95 static int is_power_of_two(hash_val_t arg)
99 while ((arg & 1) == 0)
105 * Compute a shift amount from a given table size
108 static hash_val_t compute_mask(hashcount_t size)
110 assert (is_power_of_two(size));
117 * Initialize the table of pointers to null.
120 static void clear_table(hash_t *hash)
124 for (i = 0; i < hash->nchains; i++)
125 hash->table[i] = NULL;
129 * Double the size of a dynamic table. This works as follows. Each chain splits
130 * into two adjacent chains. The shift amount increases by one, exposing an
131 * additional bit of each hashed key. For each node in the original chain, the
132 * value of this newly exposed bit will decide which of the two new chains will
133 * receive the node: if the bit is 1, the chain with the higher index will have
134 * the node, otherwise the lower chain will receive the node. In this manner,
135 * the hash table will continue to function exactly as before without having to
136 * rehash any of the keys.
139 * 2. The new number of chains is twice the old number of chains.
140 * 3. The new mask is one bit wider than the previous, revealing a
141 * new bit in all hashed keys.
142 * 4. Allocate a new table of chain pointers that is twice as large as the
144 * 5. If the reallocation was successful, we perform the rest of the growth
145 * algorithm, otherwise we do nothing.
146 * 6. The exposed_bit variable holds a mask with which each hashed key can be
147 * AND-ed to test the value of its newly exposed bit.
148 * 7. Now loop over each chain in the table and sort its nodes into two
149 * chains based on the value of each node's newly exposed hash bit.
150 * 8. The low chain replaces the current chain. The high chain goes
151 * into the corresponding sister chain in the upper half of the table.
152 * 9. We have finished dealing with the chains and nodes. We now update
153 * the various bookeeping fields of the hash structure.
156 static void grow_table(hash_t *hash)
160 assert (2 * hash->nchains > hash->nchains); /* 1 */
162 newtable = realloc(hash->table,
163 sizeof *newtable * hash->nchains * 2); /* 4 */
165 if (newtable) { /* 5 */
166 hash_val_t mask = (hash->mask << 1) | 1; /* 3 */
167 hash_val_t exposed_bit = mask ^ hash->mask; /* 6 */
170 assert (mask != hash->mask);
172 for (chain = 0; chain < hash->nchains; chain++) { /* 7 */
173 hnode_t *low_chain = NULL, *high_chain = NULL, *hptr, *next;
175 for (hptr = newtable[chain]; hptr != NULL; hptr = next) {
178 if (hptr->hkey & exposed_bit) {
179 hptr->next = high_chain;
182 hptr->next = low_chain;
187 newtable[chain] = low_chain; /* 8 */
188 newtable[chain + hash->nchains] = high_chain;
191 hash->table = newtable; /* 9 */
197 assert (hash_verify(hash));
201 * Cut a table size in half. This is done by folding together adjacent chains
202 * and populating the lower half of the table with these chains. The chains are
203 * simply spliced together. Once this is done, the whole table is reallocated
204 * to a smaller object.
206 * 1. It is illegal to have a hash table with one slot. This would mean that
207 * hash->shift is equal to hash_val_t_bit, an illegal shift value.
208 * Also, other things could go wrong, such as hash->lowmark becoming zero.
209 * 2. Looping over each pair of sister chains, the low_chain is set to
210 * point to the head node of the chain in the lower half of the table,
211 * and high_chain points to the head node of the sister in the upper half.
212 * 3. The intent here is to compute a pointer to the last node of the
213 * lower chain into the low_tail variable. If this chain is empty,
214 * low_tail ends up with a null value.
215 * 4. If the lower chain is not empty, we simply tack the upper chain onto it.
216 * If the upper chain is a null pointer, nothing happens.
217 * 5. Otherwise if the lower chain is empty but the upper one is not,
218 * If the low chain is empty, but the high chain is not, then the
219 * high chain is simply transferred to the lower half of the table.
220 * 6. Otherwise if both chains are empty, there is nothing to do.
221 * 7. All the chain pointers are in the lower half of the table now, so
222 * we reallocate it to a smaller object. This, of course, invalidates
223 * all pointer-to-pointers which reference into the table from the
224 * first node of each chain.
225 * 8. Though it's unlikely, the reallocation may fail. In this case we
226 * pretend that the table _was_ reallocated to a smaller object.
227 * 9. Finally, update the various table parameters to reflect the new size.
230 static void shrink_table(hash_t *hash)
232 hash_val_t chain, nchains;
233 hnode_t **newtable, *low_tail, *low_chain, *high_chain;
235 assert (hash->nchains >= 2); /* 1 */
236 nchains = hash->nchains / 2;
238 for (chain = 0; chain < nchains; chain++) {
239 low_chain = hash->table[chain]; /* 2 */
240 high_chain = hash->table[chain + nchains];
241 for (low_tail = low_chain; low_tail && low_tail->next; low_tail = low_tail->next)
243 if (low_chain != NULL) /* 4 */
244 low_tail->next = high_chain;
245 else if (high_chain != NULL) /* 5 */
246 hash->table[chain] = high_chain;
248 assert (hash->table[chain] == NULL); /* 6 */
250 newtable = realloc(hash->table,
251 sizeof *newtable * nchains); /* 7 */
252 if (newtable) /* 8 */
253 hash->table = newtable;
254 hash->mask >>= 1; /* 9 */
255 hash->nchains = nchains;
258 assert (hash_verify(hash));
263 * Create a dynamic hash table. Both the hash table structure and the table
264 * itself are dynamically allocated. Furthermore, the table is extendible in
265 * that it will automatically grow as its load factor increases beyond a
268 * 1. If the number of bits in the hash_val_t type has not been computed yet,
269 * we do so here, because this is likely to be the first function that the
271 * 2. Allocate a hash table control structure.
272 * 3. If a hash table control structure is successfully allocated, we
273 * proceed to initialize it. Otherwise we return a null pointer.
274 * 4. We try to allocate the table of hash chains.
275 * 5. If we were able to allocate the hash chain table, we can finish
276 * initializing the hash structure and the table. Otherwise, we must
277 * backtrack by freeing the hash structure.
278 * 6. INIT_SIZE should be a power of two. The high and low marks are always set
279 * to be twice the table size and half the table size respectively. When the
280 * number of nodes in the table grows beyond the high size (beyond load
281 * factor 2), it will double in size to cut the load factor down to about
282 * about 1. If the table shrinks down to or beneath load factor 0.5,
283 * it will shrink, bringing the load up to about 1. However, the table
284 * will never shrink beneath INIT_SIZE even if it's emptied.
285 * 7. This indicates that the table is dynamically allocated and dynamically
286 * resized on the fly. A table that has this value set to zero is
287 * assumed to be statically allocated and will not be resized.
288 * 8. The table of chains must be properly reset to all null pointers.
291 hash_t *hash_create(hashcount_t maxcount, hash_comp_t compfun,
296 if (hash_val_t_bit == 0) /* 1 */
299 hash = malloc(sizeof *hash); /* 2 */
302 hash->table = malloc(sizeof *hash->table * INIT_SIZE); /* 4 */
303 if (hash->table) { /* 5 */
304 hash->nchains = INIT_SIZE; /* 6 */
305 hash->highmark = INIT_SIZE * 2;
306 hash->lowmark = INIT_SIZE / 2;
308 hash->maxcount = maxcount;
309 hash->compare = compfun ? compfun : hash_comp_default;
310 hash->function = hashfun ? hashfun : hash_fun_default;
311 hash->allocnode = hnode_alloc;
312 hash->freenode = hnode_free;
313 hash->context = NULL;
314 hash->mask = INIT_MASK;
315 hash->dynamic = 1; /* 7 */
316 clear_table(hash); /* 8 */
317 assert (hash_verify(hash));
327 * Select a different set of node allocator routines.
330 void hash_set_allocator(hash_t *hash, hnode_alloc_t al,
331 hnode_free_t fr, void *context)
333 assert (hash_count(hash) == 0);
334 assert ((al == 0 && fr == 0) || (al != 0 && fr != 0));
336 hash->allocnode = al ? al : hnode_alloc;
337 hash->freenode = fr ? fr : hnode_free;
338 hash->context = context;
342 * Free every node in the hash using the hash->freenode() function pointer, and
343 * cause the hash to become empty.
346 void hash_free_nodes(hash_t *hash)
350 hash_scan_begin(&hs, hash);
351 while ((node = hash_scan_next(&hs))) {
352 hash_scan_delete(hash, node);
353 hash->freenode(node, hash->context);
360 * Obsolescent function for removing all nodes from a table,
361 * freeing them and then freeing the table all in one step.
364 void hash_free(hash_t *hash)
366 #ifdef KAZLIB_OBSOLESCENT_DEBUG
367 assert ("call to obsolescent function hash_free()" && 0);
369 hash_free_nodes(hash);
374 * Free a dynamic hash table structure.
377 void hash_destroy(hash_t *hash)
379 assert (hash_val_t_bit != 0);
380 assert (hash_isempty(hash));
386 * Initialize a user supplied hash structure. The user also supplies a table of
387 * chains which is assigned to the hash structure. The table is static---it
388 * will not grow or shrink.
389 * 1. See note 1. in hash_create().
390 * 2. The user supplied array of pointers hopefully contains nchains nodes.
391 * 3. See note 7. in hash_create().
392 * 4. We must dynamically compute the mask from the given power of two table
394 * 5. The user supplied table can't be assumed to contain null pointers,
395 * so we reset it here.
398 hash_t *hash_init(hash_t *hash, hashcount_t maxcount,
399 hash_comp_t compfun, hash_fun_t hashfun, hnode_t **table,
402 if (hash_val_t_bit == 0) /* 1 */
405 assert (is_power_of_two(nchains));
407 hash->table = table; /* 2 */
408 hash->nchains = nchains;
410 hash->maxcount = maxcount;
411 hash->compare = compfun ? compfun : hash_comp_default;
412 hash->function = hashfun ? hashfun : hash_fun_default;
413 hash->dynamic = 0; /* 3 */
414 hash->mask = compute_mask(nchains); /* 4 */
415 clear_table(hash); /* 5 */
417 assert (hash_verify(hash));
423 * Reset the hash scanner so that the next element retrieved by
424 * hash_scan_next() shall be the first element on the first non-empty chain.
426 * 1. Locate the first non empty chain.
427 * 2. If an empty chain is found, remember which one it is and set the next
428 * pointer to refer to its first element.
429 * 3. Otherwise if a chain is not found, set the next pointer to NULL
430 * so that hash_scan_next() shall indicate failure.
433 void hash_scan_begin(hscan_t *scan, hash_t *hash)
435 hash_val_t nchains = hash->nchains;
442 for (chain = 0; chain < nchains && hash->table[chain] == NULL; chain++)
445 if (chain < nchains) { /* 2 */
447 scan->next = hash->table[chain];
454 * Retrieve the next node from the hash table, and update the pointer
455 * for the next invocation of hash_scan_next().
457 * 1. Remember the next pointer in a temporary value so that it can be
459 * 2. This assertion essentially checks whether the module has been properly
460 * initialized. The first point of interaction with the module should be
461 * either hash_create() or hash_init(), both of which set hash_val_t_bit to
463 * 3. If the next pointer we are returning is not NULL, then the user is
464 * allowed to call hash_scan_next() again. We prepare the new next pointer
465 * for that call right now. That way the user is allowed to delete the node
466 * we are about to return, since we will no longer be needing it to locate
468 * 4. If there is a next node in the chain (next->next), then that becomes the
469 * new next node, otherwise ...
470 * 5. We have exhausted the current chain, and must locate the next subsequent
471 * non-empty chain in the table.
472 * 6. If a non-empty chain is found, the first element of that chain becomes
473 * the new next node. Otherwise there is no new next node and we set the
474 * pointer to NULL so that the next time hash_scan_next() is called, a null
475 * pointer shall be immediately returned.
479 hnode_t *hash_scan_next(hscan_t *scan)
481 hnode_t *next = scan->next; /* 1 */
482 hash_t *hash = scan->table;
483 hash_val_t chain = scan->chain + 1;
484 hash_val_t nchains = hash->nchains;
486 assert (hash_val_t_bit != 0); /* 2 */
489 if (next->next) { /* 4 */
490 scan->next = next->next;
492 while (chain < nchains && hash->table[chain] == NULL) /* 5 */
494 if (chain < nchains) { /* 6 */
496 scan->next = hash->table[chain];
506 * Insert a node into the hash table.
508 * 1. It's illegal to insert more than the maximum number of nodes. The client
509 * should verify that the hash table is not full before attempting an
511 * 2. The same key may not be inserted into a table twice.
512 * 3. If the table is dynamic and the load factor is already at >= 2,
514 * 4. We take the bottom N bits of the hash value to derive the chain index,
515 * where N is the base 2 logarithm of the size of the hash table.
518 void hash_insert(hash_t *hash, hnode_t *node, const void *key)
520 hash_val_t hkey, chain;
522 assert (hash_val_t_bit != 0);
523 assert (node->next == NULL);
524 assert (hash->nodecount < hash->maxcount); /* 1 */
525 assert (hash_lookup(hash, key) == NULL); /* 2 */
527 if (hash->dynamic && hash->nodecount >= hash->highmark) /* 3 */
530 hkey = hash->function(key);
531 chain = hkey & hash->mask; /* 4 */
535 node->next = hash->table[chain];
536 hash->table[chain] = node;
539 assert (hash_verify(hash));
543 * Find a node in the hash table and return a pointer to it.
545 * 1. We hash the key and keep the entire hash value. As an optimization, when
546 * we descend down the chain, we can compare hash values first and only if
547 * hash values match do we perform a full key comparison.
548 * 2. To locate the chain from among 2^N chains, we look at the lower N bits of
549 * the hash value by anding them with the current mask.
550 * 3. Looping through the chain, we compare the stored hash value inside each
551 * node against our computed hash. If they match, then we do a full
552 * comparison between the unhashed keys. If these match, we have located the
556 hnode_t *hash_lookup(hash_t *hash, const void *key)
558 hash_val_t hkey, chain;
561 hkey = hash->function(key); /* 1 */
562 chain = hkey & hash->mask; /* 2 */
564 for (nptr = hash->table[chain]; nptr; nptr = nptr->next) { /* 3 */
565 if (nptr->hkey == hkey && hash->compare(nptr->key, key) == 0)
573 * Delete the given node from the hash table. Since the chains
574 * are singly linked, we must locate the start of the node's chain
577 * 1. The node must belong to this hash table, and its key must not have
578 * been tampered with.
579 * 2. If this deletion will take the node count below the low mark, we
580 * shrink the table now.
581 * 3. Determine which chain the node belongs to, and fetch the pointer
582 * to the first node in this chain.
583 * 4. If the node being deleted is the first node in the chain, then
584 * simply update the chain head pointer.
585 * 5. Otherwise advance to the node's predecessor, and splice out
586 * by updating the predecessor's next pointer.
587 * 6. Indicate that the node is no longer in a hash table.
590 hnode_t *hash_delete(hash_t *hash, hnode_t *node)
595 assert (hash_lookup(hash, node->key) == node); /* 1 */
596 assert (hash_val_t_bit != 0);
598 if (hash->dynamic && hash->nodecount <= hash->lowmark
599 && hash->nodecount > INIT_SIZE)
600 shrink_table(hash); /* 2 */
602 chain = node->hkey & hash->mask; /* 3 */
603 hptr = hash->table[chain];
605 if (hptr == node) { /* 4 */
606 hash->table[chain] = node->next;
608 while (hptr->next != node) { /* 5 */
612 assert (hptr->next == node);
613 hptr->next = node->next;
617 assert (hash_verify(hash));
619 node->next = NULL; /* 6 */
623 int hash_alloc_insert(hash_t *hash, const void *key, void *data)
625 hnode_t *node = hash->allocnode(hash->context);
628 hnode_init(node, data);
629 hash_insert(hash, node, key);
635 void hash_delete_free(hash_t *hash, hnode_t *node)
637 hash_delete(hash, node);
638 hash->freenode(node, hash->context);
642 * Exactly like hash_delete, except does not trigger table shrinkage. This is to be
643 * used from within a hash table scan operation. See notes for hash_delete.
646 hnode_t *hash_scan_delete(hash_t *hash, hnode_t *node)
651 assert (hash_lookup(hash, node->key) == node);
652 assert (hash_val_t_bit != 0);
654 chain = node->hkey & hash->mask;
655 hptr = hash->table[chain];
658 hash->table[chain] = node->next;
660 while (hptr->next != node)
662 hptr->next = node->next;
666 assert (hash_verify(hash));
673 * Like hash_delete_free but based on hash_scan_delete.
676 void hash_scan_delfree(hash_t *hash, hnode_t *node)
678 hash_scan_delete(hash, node);
679 hash->freenode(node, hash->context);
683 * Verify whether the given object is a valid hash table. This means
685 * 1. If the hash table is dynamic, verify whether the high and
686 * low expansion/shrinkage thresholds are powers of two.
687 * 2. Count all nodes in the table, and test each hash value
688 * to see whether it is correct for the node's chain.
691 int hash_verify(hash_t *hash)
693 hashcount_t count = 0;
697 if (hash->dynamic) { /* 1 */
698 if (hash->lowmark >= hash->highmark)
700 if (!is_power_of_two(hash->highmark))
702 if (!is_power_of_two(hash->lowmark))
706 for (chain = 0; chain < hash->nchains; chain++) { /* 2 */
707 for (hptr = hash->table[chain]; hptr != NULL; hptr = hptr->next) {
708 if ((hptr->hkey & hash->mask) != chain)
714 if (count != hash->nodecount)
721 * Test whether the hash table is full and return 1 if this is true,
726 int hash_isfull(hash_t *hash)
728 return hash->nodecount == hash->maxcount;
732 * Test whether the hash table is empty and return 1 if this is true,
737 int hash_isempty(hash_t *hash)
739 return hash->nodecount == 0;
742 static hnode_t *hnode_alloc(void *context _U_)
744 return malloc(sizeof *hnode_alloc(NULL));
747 static void hnode_free(hnode_t *node, void *context _U_)
754 * Create a hash table node dynamically and assign it the given data.
757 hnode_t *hnode_create(void *data)
759 hnode_t *node = malloc(sizeof *node);
768 * Initialize a client-supplied node
771 hnode_t *hnode_init(hnode_t *hnode, void *data)
779 * Destroy a dynamically allocated node.
782 void hnode_destroy(hnode_t *hnode)
788 void hnode_put(hnode_t *node, void *data)
794 void *hnode_get(hnode_t *node)
800 const void *hnode_getkey(hnode_t *node)
806 hashcount_t hash_count(hash_t *hash)
808 return hash->nodecount;
812 hashcount_t hash_size(hash_t *hash)
814 return hash->nchains;
817 static hash_val_t hash_fun_default(const void *key)
819 static unsigned long randbox[] = {
820 0x49848f1bU, 0xe6255dbaU, 0x36da5bdcU, 0x47bf94e9U,
821 0x8cbcce22U, 0x559fc06aU, 0xd268f536U, 0xe10af79aU,
822 0xc1af4d69U, 0x1d2917b5U, 0xec4c304dU, 0x9ee5016cU,
823 0x69232f74U, 0xfead7bb3U, 0xe9089ab6U, 0xf012f6aeU,
826 const unsigned char *str = key;
830 acc ^= randbox[(*str + acc) & 0xf];
831 acc = (acc << 1) | (acc >> 31);
833 acc ^= randbox[((*str++ >> 4) + acc) & 0xf];
834 acc = (acc << 2) | (acc >> 30);
840 static int hash_comp_default(const void *key1, const void *key2)
842 return strcmp(key1, key2);
845 #ifdef KAZLIB_TEST_MAIN
851 typedef char input_t[256];
853 static int tokenize(char *string, ...)
859 va_start(arglist, string);
860 tokptr = va_arg(arglist, char **);
862 while (*string && isspace((unsigned char) *string))
867 while (*string && !isspace((unsigned char) *string))
869 tokptr = va_arg(arglist, char **);
880 static char *dupstring(char *str)
882 int sz = strlen(str) + 1;
883 char *new = malloc(sz);
885 memcpy(new, str, sz);
889 static hnode_t *new_node(void *c)
891 static hnode_t few[5];
895 return few + count++;
900 static void del_node(hnode_t *n, void *c)
907 hash_t *h = hash_create(HASHCOUNT_T_MAX, 0, 0);
910 char *tok1, *tok2, *val;
915 "a <key> <val> add value to hash table\n"
916 "d <key> delete value from hash table\n"
917 "l <key> lookup value in hash table\n"
918 "n show size of hash table\n"
919 "c show number of entries\n"
920 "t dump whole hash table\n"
921 "+ increase hash table (private func)\n"
922 "- decrease hash table (private func)\n"
923 "b print hash_t_bit value\n"
925 "s switch to non-functioning allocator\n"
929 puts("hash_create failed");
936 if (!fgets(in, sizeof(input_t), stdin))
944 printf("%d\n", hash_val_t_bit);
947 if (tokenize(in+1, &tok1, &tok2, (char **) 0) != 2) {
951 key = dupstring(tok1);
952 val = dupstring(tok2);
955 puts("out of memory");
960 if (!hash_alloc_insert(h, key, val)) {
961 puts("hash_alloc_insert failed");
968 if (tokenize(in+1, &tok1, (char **) 0) != 1) {
972 hn = hash_lookup(h, tok1);
974 puts("hash_lookup failed");
978 key = hnode_getkey(hn);
979 hash_scan_delfree(h, hn);
984 if (tokenize(in+1, &tok1, (char **) 0) != 1) {
988 hn = hash_lookup(h, tok1);
990 puts("hash_lookup failed");
997 printf("%lu\n", (unsigned long) hash_size(h));
1000 printf("%lu\n", (unsigned long) hash_count(h));
1003 hash_scan_begin(&hs, h);
1004 while ((hn = hash_scan_next(&hs)))
1005 printf("%s\t%s\n", (char*) hnode_getkey(hn),
1006 (char*) hnode_get(hn));
1009 grow_table(h); /* private function */
1012 shrink_table(h); /* private function */
1023 hash_set_allocator(h, new_node, del_node, NULL);