diff options
Diffstat (limited to 'src/3p/chibicc/hashmap.c')
-rw-r--r-- | src/3p/chibicc/hashmap.c | 165 |
1 files changed, 165 insertions, 0 deletions
diff --git a/src/3p/chibicc/hashmap.c b/src/3p/chibicc/hashmap.c new file mode 100644 index 0000000..46539d9 --- /dev/null +++ b/src/3p/chibicc/hashmap.c @@ -0,0 +1,165 @@ +// This is an implementation of the open-addressing hash table. + +#include "chibicc.h" + +// Initial hash bucket size +#define INIT_SIZE 16 + +// Rehash if the usage exceeds 70%. +#define HIGH_WATERMARK 70 + +// We'll keep the usage below 50% after rehashing. +#define LOW_WATERMARK 50 + +// Represents a deleted hash entry +#define TOMBSTONE ((void *)-1) + +static uint64_t fnv_hash(char *s, int len) { + uint64_t hash = 0xcbf29ce484222325; + for (int i = 0; i < len; i++) { + hash *= 0x100000001b3; + hash ^= (unsigned char)s[i]; + } + return hash; +} + +// Make room for new entires in a given hashmap by removing +// tombstones and possibly extending the bucket size. +static void rehash(HashMap *map) { + // Compute the size of the new hashmap. + int nkeys = 0; + for (int i = 0; i < map->capacity; i++) + if (map->buckets[i].key && map->buckets[i].key != TOMBSTONE) + nkeys++; + + int cap = map->capacity; + while ((nkeys * 100) / cap >= LOW_WATERMARK) + cap = cap * 2; + assert(cap > 0); + + // Create a new hashmap and copy all key-values. + HashMap map2 = {0}; + map2.buckets = calloc(cap, sizeof(HashEntry)); + map2.capacity = cap; + + for (int i = 0; i < map->capacity; i++) { + HashEntry *ent = &map->buckets[i]; + if (ent->key && ent->key != TOMBSTONE) + hashmap_put2(&map2, ent->key, ent->keylen, ent->val); + } + + assert(map2.used == nkeys); + *map = map2; +} + +static bool match(HashEntry *ent, char *key, int keylen) { + return ent->key && ent->key != TOMBSTONE && + ent->keylen == keylen && memcmp(ent->key, key, keylen) == 0; +} + +static HashEntry *get_entry(HashMap *map, char *key, int keylen) { + if (!map->buckets) + return NULL; + + uint64_t hash = fnv_hash(key, keylen); + + for (int i = 0; i < map->capacity; i++) { + HashEntry *ent = &map->buckets[(hash + i) % map->capacity]; + if (match(ent, key, keylen)) + return ent; + if (ent->key == NULL) + return NULL; + } + unreachable(); +} + +static HashEntry *get_or_insert_entry(HashMap *map, char *key, int keylen) { + if (!map->buckets) { + map->buckets = calloc(INIT_SIZE, sizeof(HashEntry)); + map->capacity = INIT_SIZE; + } else if ((map->used * 100) / map->capacity >= HIGH_WATERMARK) { + rehash(map); + } + + uint64_t hash = fnv_hash(key, keylen); + + for (int i = 0; i < map->capacity; i++) { + HashEntry *ent = &map->buckets[(hash + i) % map->capacity]; + + if (match(ent, key, keylen)) + return ent; + + if (ent->key == TOMBSTONE) { + ent->key = key; + ent->keylen = keylen; + return ent; + } + + if (ent->key == NULL) { + ent->key = key; + ent->keylen = keylen; + map->used++; + return ent; + } + } + unreachable(); +} + +void *hashmap_get(HashMap *map, char *key) { + return hashmap_get2(map, key, strlen(key)); +} + +void *hashmap_get2(HashMap *map, char *key, int keylen) { + HashEntry *ent = get_entry(map, key, keylen); + return ent ? ent->val : NULL; +} + +void hashmap_put(HashMap *map, char *key, void *val) { + hashmap_put2(map, key, strlen(key), val); +} + +void hashmap_put2(HashMap *map, char *key, int keylen, void *val) { + HashEntry *ent = get_or_insert_entry(map, key, keylen); + ent->val = val; +} + +void hashmap_delete(HashMap *map, char *key) { + hashmap_delete2(map, key, strlen(key)); +} + +void hashmap_delete2(HashMap *map, char *key, int keylen) { + HashEntry *ent = get_entry(map, key, keylen); + if (ent) + ent->key = TOMBSTONE; +} + +void hashmap_test(void) { + HashMap *map = calloc(1, sizeof(HashMap)); + + for (int i = 0; i < 5000; i++) + hashmap_put(map, format("key %d", i), (void *)(size_t)i); + for (int i = 1000; i < 2000; i++) + hashmap_delete(map, format("key %d", i)); + for (int i = 1500; i < 1600; i++) + hashmap_put(map, format("key %d", i), (void *)(size_t)i); + for (int i = 6000; i < 7000; i++) + hashmap_put(map, format("key %d", i), (void *)(size_t)i); + + for (int i = 0; i < 1000; i++) + assert((size_t)hashmap_get(map, format("key %d", i)) == i); + for (int i = 1000; i < 1500; i++) + assert(hashmap_get(map, "no such key") == NULL); + for (int i = 1500; i < 1600; i++) + assert((size_t)hashmap_get(map, format("key %d", i)) == i); + for (int i = 1600; i < 2000; i++) + assert(hashmap_get(map, "no such key") == NULL); + for (int i = 2000; i < 5000; i++) + assert((size_t)hashmap_get(map, format("key %d", i)) == i); + for (int i = 5000; i < 6000; i++) + assert(hashmap_get(map, "no such key") == NULL); + for (int i = 6000; i < 7000; i++) + hashmap_put(map, format("key %d", i), (void *)(size_t)i); + + assert(hashmap_get(map, "no such key") == NULL); + printf("OK\n"); +} |