summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2009-11-24 03:02:38 +0100
committerEric Anholt <eric@anholt.net>2009-11-23 18:09:46 -0800
commit7c7d3e26da0e9e49c3b179403a7b9217d4e9cb18 (patch)
tree4c0b6646391916c91fa49cf7459857f6da99f753
parent1af977924e845bc67db0bb6f3a1b54080841a685 (diff)
Add deleted entry management by rehashing on insert when appropriate.
-rw-r--r--README4
-rw-r--r--hash_table.c34
-rw-r--r--hash_table.h2
3 files changed, 32 insertions, 8 deletions
diff --git a/README b/README
index 877314b..be2fe40 100644
--- a/README
+++ b/README
@@ -37,8 +37,8 @@ Performance considerations:
This means that a table that was filled, then emptied, will have
performance for unsuccessful searches in O(hash->size)
- The solution here is to decide when the table is too full of deleted
- entries and recompute the data into a clean version of the hashtable.
+ This is worked around in practice by later inserts into a hash table
+ with many deletes in it triggering a rehash at the current size.
* The data pointer increases space consumption for the hash table by around
50%
diff --git a/hash_table.c b/hash_table.c
index 33de524..87e80d2 100644
--- a/hash_table.c
+++ b/hash_table.c
@@ -90,6 +90,12 @@ entry_is_free(struct hash_entry *entry)
}
static int
+entry_is_deleted(struct hash_entry *entry)
+{
+ return entry->key == deleted_key;
+}
+
+static int
entry_is_present(struct hash_entry *entry)
{
return entry->key != NULL && entry->key != deleted_key;
@@ -147,6 +153,12 @@ hash_table_destroy(struct hash_table *ht,
free(ht);
}
+/**
+ * Finds a hash table entry with the given key and hash of that key.
+ *
+ * Returns NULL if no entry is found. Note that the data pointer may be
+ * modified by the user.
+ */
struct hash_entry *
hash_table_search(struct hash_table *ht, uint32_t hash, const void *key)
{
@@ -171,25 +183,26 @@ hash_table_search(struct hash_table *ht, uint32_t hash, const void *key)
}
static void
-hash_table_expand(struct hash_table *ht)
+hash_table_rehash(struct hash_table *ht, int new_size_index)
{
struct hash_table old_ht;
struct hash_entry *table, *entry;
- if (ht->size_index + 1 == ARRAY_SIZE(hash_sizes))
+ if (new_size_index >= ARRAY_SIZE(hash_sizes))
return;
- table = calloc(hash_sizes[ht->size_index + 1].size, sizeof(*ht->table));
+ table = calloc(hash_sizes[new_size_index].size, sizeof(*ht->table));
if (table == NULL)
return;
old_ht = *ht;
ht->table = table;
- ht->size_index++;
+ ht->size_index = new_size_index;
ht->size = hash_sizes[ht->size_index].size;
ht->rehash = hash_sizes[ht->size_index].rehash;
ht->max_entries = hash_sizes[ht->size_index].max_entries;
+ ht->deleted_entries = 0;
for (entry = old_ht.table;
entry != old_ht.table + old_ht.size;
@@ -203,6 +216,12 @@ hash_table_expand(struct hash_table *ht)
free(old_ht.table);
}
+/**
+ * Inserts the key with the given hash into the table.
+ *
+ * Note that insertion may rearrange the table on a resize or rehash,
+ * so previously found hash_entries are no longer valid after this function.
+ */
struct hash_entry *
hash_table_insert(struct hash_table *ht, uint32_t hash,
const void *key, void *data)
@@ -210,7 +229,9 @@ hash_table_insert(struct hash_table *ht, uint32_t hash,
uint32_t hash_address;
if (ht->entries >= ht->max_entries) {
- hash_table_expand(ht);
+ hash_table_rehash(ht, ht->size_index + 1);
+ } else if (ht->deleted_entries + ht->entries >= ht->max_entries) {
+ hash_table_rehash(ht, ht->size_index);
}
hash_address = hash % ht->size;
@@ -218,6 +239,8 @@ hash_table_insert(struct hash_table *ht, uint32_t hash,
struct hash_entry *entry = ht->table + hash_address;
if (!entry_is_present(entry)) {
+ if (entry_is_deleted(entry))
+ ht->deleted_entries--;
entry->hash = hash;
entry->key = key;
entry->data = data;
@@ -245,6 +268,7 @@ hash_table_remove(struct hash_table *ht, struct hash_entry *entry)
{
entry->key = deleted_key;
ht->entries--;
+ ht->deleted_entries++;
}
/**
diff --git a/hash_table.h b/hash_table.h
index 5cc71b2..ffa3ec7 100644
--- a/hash_table.h
+++ b/hash_table.h
@@ -35,13 +35,13 @@ struct hash_entry {
struct hash_table {
struct hash_entry *table;
- uint32_t (*hash_function)(const void *key);
int (*key_equals_function)(const void *a, const void *b);
uint32_t size;
uint32_t rehash;
uint32_t max_entries;
uint32_t size_index;
uint32_t entries;
+ uint32_t deleted_entries;
};
struct hash_table *hash_table_create(int (*key_equals_function)(const void *a,