hashmap.c Source File

00001 #include "hashmap.h"
00002 #include <malloc.h>
00003 #include <assert.h>
00004 #include <string.h> 
00005 
00006 /*
00007 
00008   Fairly fast implementation, except:
00009 
00010   --- remove not implemented
00011 
00012   --- get-or-put isn't optimize/joined yet
00013 
00014   And it's not thorougly tested yet.
00015 
00016 */
00017 
00018 #include <stdio.h>             /* for dump */
00019 
00020 static void resize_up(Hashmap *h_old);
00021 
00022 static unsigned int table_size[] = {
00023     /* I tried to pick primes which, which some overhead was added,
00024        would still be under the power-of-two that some mallocs use -
00025        but I'm not sure the amount of overhead.  Probably just 1,
00026        since malloc probably wants no more than sizeof(Entry). */
00027     7, 13, 31, 61, 127, 251, 509, 1021, 2039, 4093, 8191,
00028     16381, 32749, 65521, 131071, 
00029     /* switching to just 2^n-1, because I don't have a list */
00030     262143, 524287, 1048575, 2097151, 4194303, 8388607,
00031     16777211, 33554431, 67108863, 134217727, 268435455,
00032     536870911, 1073741823, 2147483647  /* I think we can
00033                       stop here... */,
00034     0};
00035     
00036     
00037 typedef struct hashmap_entry Entry;
00038 #define KEY(x) ( ((void*)x) + sizeof(Entry) )
00039 
00040 /*--- HashPJW ---------------------------------------------------
00041  *  An adaptation of Peter Weinberger's (PJW) generic hashing
00042  *  algorithm based on Allen Holub's version. Accepts a pointer
00043  *  to a datum to be hashed and returns an unsigned integer.
00044  *     Modified by sandro to include datum_end
00045  *     Taken from http://www.ddj.com/articles/1996/9604/9604b/9604b.htm?topic=algorithms
00046  *-------------------------------------------------------------*/
00047 #include <limits.h>
00048 #define BITS_IN_int     ( sizeof(int) * CHAR_BIT )
00049 #define THREE_QUARTERS  ((int) ((BITS_IN_int * 3) / 4))
00050 #define ONE_EIGHTH      ((int) (BITS_IN_int / 8))
00051 #define HIGH_BITS       ( ~((unsigned int)(~0) >> ONE_EIGHTH ))
00052 static unsigned int hash(const char *datum, const char *datum_end)
00053 {
00054     unsigned int hash_value, i;
00055     if (datum_end) {
00056     for ( hash_value = 0; datum<datum_end; ++datum )
00057     {
00058         hash_value = ( hash_value << ONE_EIGHTH ) + *datum;
00059         if (( i = hash_value & HIGH_BITS ) != 0 )
00060         hash_value =
00061             ( hash_value ^ ( i >> THREE_QUARTERS )) &
00062             ~HIGH_BITS;
00063     }
00064     } else {
00065     for ( hash_value = 0; *datum; ++datum )
00066     {
00067         hash_value = ( hash_value << ONE_EIGHTH ) + *datum;
00068         if (( i = hash_value & HIGH_BITS ) != 0 )
00069         hash_value =
00070             ( hash_value ^ ( i >> THREE_QUARTERS )) &
00071             ~HIGH_BITS;
00072     }
00073     /* and the extra null value, so we match if working by length */
00074     hash_value = ( hash_value << ONE_EIGHTH ) + *datum;
00075     if (( i = hash_value & HIGH_BITS ) != 0 )
00076         hash_value =
00077         ( hash_value ^ ( i >> THREE_QUARTERS )) &
00078         ~HIGH_BITS;
00079     }
00080 
00081     /* printf("Hash value of %s//%s is %d\n", datum, datum_end, hash_value); */
00082     return ( hash_value );
00083 }
00084 
00085 void hashmap_open(Hashmap *h, unsigned int initial_size) 
00086 {
00087     h->used_slots=0;
00088     h->table_size_index=0;
00089     while (table_size[h->table_size_index] < initial_size) {
00090     h->table_size_index++;
00091     if (table_size[h->table_size_index] == 0) {
00092         h->table_size_index--;
00093         break;
00094     }
00095     }
00096     h->table=calloc(table_size[h->table_size_index], sizeof(Entry *));
00097 }
00098 
00099 
00100 void hashmap_close(Hashmap *h)
00101 {
00102     int i;
00103     for (i=0; i<table_size[h->table_size_index]; i++) {
00104     Entry *e = h->table[i];
00105     Entry *e_next;
00106     while (e) {
00107         e_next = e->next_in_bucket;
00108         free(e);
00109         e=e_next;
00110     }
00111     }
00112 }
00113 
00114 void hashmap_dump(Hashmap *h)
00115 {
00116     int i;
00117     for (i=0; i<table_size[h->table_size_index]; i++) {
00118     Entry *e = h->table[i];
00119 
00120     printf("\nslot %3d: ", i);
00121     while (e) {
00122         printf("\"%s\"=>\"%s\" ", (char*) KEY(e), (char*) e->value);
00123         e=e->next_in_bucket;
00124     }
00125     }
00126     printf("\n");
00127 }
00128 
00129 void* hashmap_get(Hashmap *h, void *key, void *key_end)
00130 {
00131     unsigned int hashval = hash(key, key_end) % table_size[h->table_size_index];
00132     Entry *e = h->table[hashval];
00133     size_t keysize = key_end ? (key_end - key) : strlen(key)+1;
00134     while (e) {
00135     void* oldkey = (void *) (e+1);
00136     size_t oldkeysize = e->value - oldkey;
00137     assert(oldkeysize > 0 && oldkeysize < 1000000);
00138     if (keysize == oldkeysize && !memcmp(key, oldkey, keysize)) {
00139         return e->value;
00140     }
00141     e=e->next_in_bucket;
00142     }
00143     return 0;
00144 }
00145 
00146 void hashmap_put(Hashmap *h, void *key, void *key_end, 
00147           void *data, void *data_end)
00148 {
00149     if (h->used_slots * 2 > table_size[h->table_size_index]) resize_up(h);
00150 
00151     {
00152     unsigned int hashval = hash(key, key_end) % table_size[h->table_size_index];
00153     int x=hash(key, key_end) % table_size[h->table_size_index];
00154     Entry *e = h->table[hashval];
00155     size_t keysize = key_end ? (key_end - key) : strlen(key)+1;
00156     size_t datasize = data_end ? (data_end - data) : strlen(data)+1;
00157     Entry *n = (Entry *) malloc(sizeof(Entry) + keysize + datasize);
00158     void *keyspot = ((void*)n) + (sizeof(Entry));
00159     assert(n);
00160     
00161     n->next_in_bucket = e;
00162     h->table[hashval] = n;
00163     memcpy(keyspot, key, keysize);
00164     n->value = keyspot + keysize;
00165     memcpy(n->value, data, datasize);
00166     h->used_slots++;
00167     }
00168 }
00169 
00170 void* hashmap_get_or_put(Hashmap *h, 
00171                 void *key, void *key_end, 
00172                 void *data, void *data_end)
00173 {
00174     /* we can make it more efficient some day.  :-) */
00175 
00176     void *value = hashmap_get(h, key, key_end);
00177     if (value) return value;
00178     hashmap_put(h, key, key_end, data, data_end);
00179     return 0;
00180 }
00181 
00182 
00183 int hashmap_remove(Hashmap *h, void *key, void *key_end)
00184 {
00185     assert(0);
00186     return 0;
00187 }
00188     
00189 struct iter_state {
00190     unsigned int index;
00191     Entry *entry;
00192 };
00193 
00194 void hashmap_iterate_reset(iter *i) 
00195 {
00196     if (i->state) {
00197     free(i->state);
00198     i->state = 0;
00199     }
00200 }
00201 
00202 void* hashmap_iterate(Hashmap *h, iter *i, void **value)
00203 {
00204     struct iter_state *s;
00205     int first_time = 0;
00206 
00207     if (i->state == 0) {
00208     i->reset = hashmap_iterate_reset;
00209     i->state = calloc(1, sizeof(struct iter_state));
00210     first_time = 1;
00211     }
00212     s = (struct iter_state *) i->state;
00213 
00214     if (s->entry) s->entry = s->entry->next_in_bucket;
00215     for (;;) {
00216     if (s->entry) {
00217         if (value) *value=s->entry->value;
00218         return KEY(s->entry);
00219     } else {
00220         if (!first_time) s->index++;
00221         first_time = 0;
00222     }
00223     if (s->index >= table_size[h->table_size_index]) {
00224         hashmap_iterate_reset(i);
00225         return 0;
00226     }
00227     s->entry = h->table[s->index];
00228     }
00229 
00230 }
00231 
00232 /*
00233    ...  could be generalized to resize up/down if we ever need down.
00234 */
00235 void resize_up(Hashmap *h_old)
00236 {
00237     Hashmap h;
00238     int i;
00239     if (table_size[h_old->table_size_index + 1] == 0) return;
00240 
00241     h.used_slots=0;
00242     h.table_size_index=h_old->table_size_index + 1;
00243     h.table=calloc(table_size[h.table_size_index], sizeof(Entry *));
00244 
00245     /*
00246       traverse the old hash table entries and re-link them into their
00247       new places in the new table. 
00248     */
00249     for (i=0; i<table_size[h_old->table_size_index]; i++) {
00250     Entry *e = h_old->table[i];
00251     Entry *next_e;
00252     while (e) {
00253         unsigned int hashval = hash(KEY(e), e->value) 
00254         % table_size[h.table_size_index];
00255         next_e = e->next_in_bucket;
00256         e->next_in_bucket = h.table[hashval];
00257         h.table[hashval] = e;
00258         e=next_e;
00259     }
00260     }
00261 
00262     /*
00263       overwrite the old with the new
00264     */
00265     free(h_old->table);
00266     h_old->table = h.table;
00267     h_old->table_size_index++;
00268 }