diff options
-rw-r--r-- | README | 4 | ||||
-rw-r--r-- | hash_table.c | 34 | ||||
-rw-r--r-- | hash_table.h | 2 |
3 files changed, 32 insertions, 8 deletions
@@ -37,8 +37,8 @@ Performance considerations: This means that a table that was filled, then emptied, will have performance for unsuccessful searches in O(hash->size) - The solution here is to decide when the table is too full of deleted - entries and recompute the data into a clean version of the hashtable. + This is worked around in practice by later inserts into a hash table + with many deletes in it triggering a rehash at the current size. * The data pointer increases space consumption for the hash table by around 50% diff --git a/hash_table.c b/hash_table.c index 33de524..87e80d2 100644 --- a/hash_table.c +++ b/hash_table.c @@ -90,6 +90,12 @@ entry_is_free(struct hash_entry *entry) } static int +entry_is_deleted(struct hash_entry *entry) +{ + return entry->key == deleted_key; +} + +static int entry_is_present(struct hash_entry *entry) { return entry->key != NULL && entry->key != deleted_key; @@ -147,6 +153,12 @@ hash_table_destroy(struct hash_table *ht, free(ht); } +/** + * Finds a hash table entry with the given key and hash of that key. + * + * Returns NULL if no entry is found. Note that the data pointer may be + * modified by the user. + */ struct hash_entry * hash_table_search(struct hash_table *ht, uint32_t hash, const void *key) { @@ -171,25 +183,26 @@ hash_table_search(struct hash_table *ht, uint32_t hash, const void *key) } static void -hash_table_expand(struct hash_table *ht) +hash_table_rehash(struct hash_table *ht, int new_size_index) { struct hash_table old_ht; struct hash_entry *table, *entry; - if (ht->size_index + 1 == ARRAY_SIZE(hash_sizes)) + if (new_size_index >= ARRAY_SIZE(hash_sizes)) return; - table = calloc(hash_sizes[ht->size_index + 1].size, sizeof(*ht->table)); + table = calloc(hash_sizes[new_size_index].size, sizeof(*ht->table)); if (table == NULL) return; old_ht = *ht; ht->table = table; - ht->size_index++; + ht->size_index = new_size_index; ht->size = hash_sizes[ht->size_index].size; ht->rehash = hash_sizes[ht->size_index].rehash; ht->max_entries = hash_sizes[ht->size_index].max_entries; + ht->deleted_entries = 0; for (entry = old_ht.table; entry != old_ht.table + old_ht.size; @@ -203,6 +216,12 @@ hash_table_expand(struct hash_table *ht) free(old_ht.table); } +/** + * Inserts the key with the given hash into the table. + * + * Note that insertion may rearrange the table on a resize or rehash, + * so previously found hash_entries are no longer valid after this function. + */ struct hash_entry * hash_table_insert(struct hash_table *ht, uint32_t hash, const void *key, void *data) @@ -210,7 +229,9 @@ hash_table_insert(struct hash_table *ht, uint32_t hash, uint32_t hash_address; if (ht->entries >= ht->max_entries) { - hash_table_expand(ht); + hash_table_rehash(ht, ht->size_index + 1); + } else if (ht->deleted_entries + ht->entries >= ht->max_entries) { + hash_table_rehash(ht, ht->size_index); } hash_address = hash % ht->size; @@ -218,6 +239,8 @@ hash_table_insert(struct hash_table *ht, uint32_t hash, struct hash_entry *entry = ht->table + hash_address; if (!entry_is_present(entry)) { + if (entry_is_deleted(entry)) + ht->deleted_entries--; entry->hash = hash; entry->key = key; entry->data = data; @@ -245,6 +268,7 @@ hash_table_remove(struct hash_table *ht, struct hash_entry *entry) { entry->key = deleted_key; ht->entries--; + ht->deleted_entries++; } /** diff --git a/hash_table.h b/hash_table.h index 5cc71b2..ffa3ec7 100644 --- a/hash_table.h +++ b/hash_table.h @@ -35,13 +35,13 @@ struct hash_entry { struct hash_table { struct hash_entry *table; - uint32_t (*hash_function)(const void *key); int (*key_equals_function)(const void *a, const void *b); uint32_t size; uint32_t rehash; uint32_t max_entries; uint32_t size_index; uint32_t entries; + uint32_t deleted_entries; }; struct hash_table *hash_table_create(int (*key_equals_function)(const void *a, |