summaryrefslogtreecommitdiffhomepage
path: root/src/3p/chibicc/hashmap.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/3p/chibicc/hashmap.c')
-rw-r--r--src/3p/chibicc/hashmap.c165
1 files changed, 165 insertions, 0 deletions
diff --git a/src/3p/chibicc/hashmap.c b/src/3p/chibicc/hashmap.c
new file mode 100644
index 0000000..46539d9
--- /dev/null
+++ b/src/3p/chibicc/hashmap.c
@@ -0,0 +1,165 @@
+// This is an implementation of the open-addressing hash table.
+
+#include "chibicc.h"
+
+// Initial hash bucket size
+#define INIT_SIZE 16
+
+// Rehash if the usage exceeds 70%.
+#define HIGH_WATERMARK 70
+
+// We'll keep the usage below 50% after rehashing.
+#define LOW_WATERMARK 50
+
+// Represents a deleted hash entry
+#define TOMBSTONE ((void *)-1)
+
+static uint64_t fnv_hash(char *s, int len) {
+ uint64_t hash = 0xcbf29ce484222325;
+ for (int i = 0; i < len; i++) {
+ hash *= 0x100000001b3;
+ hash ^= (unsigned char)s[i];
+ }
+ return hash;
+}
+
+// Make room for new entires in a given hashmap by removing
+// tombstones and possibly extending the bucket size.
+static void rehash(HashMap *map) {
+ // Compute the size of the new hashmap.
+ int nkeys = 0;
+ for (int i = 0; i < map->capacity; i++)
+ if (map->buckets[i].key && map->buckets[i].key != TOMBSTONE)
+ nkeys++;
+
+ int cap = map->capacity;
+ while ((nkeys * 100) / cap >= LOW_WATERMARK)
+ cap = cap * 2;
+ assert(cap > 0);
+
+ // Create a new hashmap and copy all key-values.
+ HashMap map2 = {0};
+ map2.buckets = calloc(cap, sizeof(HashEntry));
+ map2.capacity = cap;
+
+ for (int i = 0; i < map->capacity; i++) {
+ HashEntry *ent = &map->buckets[i];
+ if (ent->key && ent->key != TOMBSTONE)
+ hashmap_put2(&map2, ent->key, ent->keylen, ent->val);
+ }
+
+ assert(map2.used == nkeys);
+ *map = map2;
+}
+
+static bool match(HashEntry *ent, char *key, int keylen) {
+ return ent->key && ent->key != TOMBSTONE &&
+ ent->keylen == keylen && memcmp(ent->key, key, keylen) == 0;
+}
+
+static HashEntry *get_entry(HashMap *map, char *key, int keylen) {
+ if (!map->buckets)
+ return NULL;
+
+ uint64_t hash = fnv_hash(key, keylen);
+
+ for (int i = 0; i < map->capacity; i++) {
+ HashEntry *ent = &map->buckets[(hash + i) % map->capacity];
+ if (match(ent, key, keylen))
+ return ent;
+ if (ent->key == NULL)
+ return NULL;
+ }
+ unreachable();
+}
+
+static HashEntry *get_or_insert_entry(HashMap *map, char *key, int keylen) {
+ if (!map->buckets) {
+ map->buckets = calloc(INIT_SIZE, sizeof(HashEntry));
+ map->capacity = INIT_SIZE;
+ } else if ((map->used * 100) / map->capacity >= HIGH_WATERMARK) {
+ rehash(map);
+ }
+
+ uint64_t hash = fnv_hash(key, keylen);
+
+ for (int i = 0; i < map->capacity; i++) {
+ HashEntry *ent = &map->buckets[(hash + i) % map->capacity];
+
+ if (match(ent, key, keylen))
+ return ent;
+
+ if (ent->key == TOMBSTONE) {
+ ent->key = key;
+ ent->keylen = keylen;
+ return ent;
+ }
+
+ if (ent->key == NULL) {
+ ent->key = key;
+ ent->keylen = keylen;
+ map->used++;
+ return ent;
+ }
+ }
+ unreachable();
+}
+
+void *hashmap_get(HashMap *map, char *key) {
+ return hashmap_get2(map, key, strlen(key));
+}
+
+void *hashmap_get2(HashMap *map, char *key, int keylen) {
+ HashEntry *ent = get_entry(map, key, keylen);
+ return ent ? ent->val : NULL;
+}
+
+void hashmap_put(HashMap *map, char *key, void *val) {
+ hashmap_put2(map, key, strlen(key), val);
+}
+
+void hashmap_put2(HashMap *map, char *key, int keylen, void *val) {
+ HashEntry *ent = get_or_insert_entry(map, key, keylen);
+ ent->val = val;
+}
+
+void hashmap_delete(HashMap *map, char *key) {
+ hashmap_delete2(map, key, strlen(key));
+}
+
+void hashmap_delete2(HashMap *map, char *key, int keylen) {
+ HashEntry *ent = get_entry(map, key, keylen);
+ if (ent)
+ ent->key = TOMBSTONE;
+}
+
+void hashmap_test(void) {
+ HashMap *map = calloc(1, sizeof(HashMap));
+
+ for (int i = 0; i < 5000; i++)
+ hashmap_put(map, format("key %d", i), (void *)(size_t)i);
+ for (int i = 1000; i < 2000; i++)
+ hashmap_delete(map, format("key %d", i));
+ for (int i = 1500; i < 1600; i++)
+ hashmap_put(map, format("key %d", i), (void *)(size_t)i);
+ for (int i = 6000; i < 7000; i++)
+ hashmap_put(map, format("key %d", i), (void *)(size_t)i);
+
+ for (int i = 0; i < 1000; i++)
+ assert((size_t)hashmap_get(map, format("key %d", i)) == i);
+ for (int i = 1000; i < 1500; i++)
+ assert(hashmap_get(map, "no such key") == NULL);
+ for (int i = 1500; i < 1600; i++)
+ assert((size_t)hashmap_get(map, format("key %d", i)) == i);
+ for (int i = 1600; i < 2000; i++)
+ assert(hashmap_get(map, "no such key") == NULL);
+ for (int i = 2000; i < 5000; i++)
+ assert((size_t)hashmap_get(map, format("key %d", i)) == i);
+ for (int i = 5000; i < 6000; i++)
+ assert(hashmap_get(map, "no such key") == NULL);
+ for (int i = 6000; i < 7000; i++)
+ hashmap_put(map, format("key %d", i), (void *)(size_t)i);
+
+ assert(hashmap_get(map, "no such key") == NULL);
+ printf("OK\n");
+}