More work on the hash table

author: Søren Sandmann Pedersen <sandmann@daimi.au.dk> 2009-08-29 15:32:43 -0400
committer: Søren Sandmann Pedersen <sandmann@daimi.au.dk> 2009-08-29 15:32:43 -0400
commit: 3e31860fbab6ee73bb064af519ecd3c4953d493b (patch)
tree: 9ddd8a2d08bf270cc29faa01627d8615948cf8d3
parent: 636993cbd190a067a364aaa86587c2bb4cd435f3 (diff)
2 files changed, 42 insertions, 16 deletions
diff --git a/hash-test.c b/hash-test.c
index ae44d42..455d9ff 100644
--- a/hash-test.c
+++ b/hash-test.c
@@ -27,6 +27,14 @@ main ()
     {
 	g_assert (nul_hash_has_key (hash, (nul_ptr_t)i));
     }
-}
 
-    
+    for (i = 0; i < 1234; ++i)
+    {
+	g_assert (nul_hash_remove (hash, (nul_ptr_t)i));
+    }
+
+    for (i = 0; i < 1234; ++i)
+    {
+	g_assert (!nul_hash_has_key (hash, (nul_ptr_t)i));
+    }
+}
diff --git a/hash.c b/hash.c
index d5c6291..b257609 100644
--- a/hash.c
+++ b/hash.c
@@ -12,9 +12,9 @@ struct hash_entry_t
  * Shrink the table when less than 1/LOW_OCCUPATION is in use
  * When resizing the table, size it such that 1/DEFAULT_OCCUPATION is in use
  */
-#define HIGH_OCCUPATION		2
+#define HIGH_WATER(hash)	((((hash)->mask + 1) * 5) / 8)
+#define LOW_WATER(hash)		(((hash)->mask + 1) / 8)
 #define DEFAULT_OCCUPATION	4
-#define LOW_OCCUPATION		8
 #define MIN_SIZE		128
 
 struct nul_hash_t
@@ -27,7 +27,8 @@ struct nul_hash_t
     nul_free_func_t		value_free_func;
     nul_free_func_t		key_free_func;
 
-    size_t			n_items;
+    size_t			n_dead;
+    size_t			n_live;
     size_t			mask;
 
     nul_ptr_t free_marker;	/* entry->value has this value when it's not in use */
@@ -60,6 +61,17 @@ kill_entry (nul_hash_t *hash, hash_entry_t *entry)
 	hash->value_free_func (entry->value);
 
     entry->value = hash->dead_marker;
+    hash->n_live--;
+    hash->n_dead++;
+}
+
+/* It may be worth looking into doing double hashing instead of linear probing
+ * at some point
+ */
+static inline size_t
+next_probe (size_t index, size_t mask)
+{
+    return (index + 1) & mask;
 }
 
 static inline void
@@ -78,12 +90,12 @@ insert_internal (nul_hash_t *hash, nul_ptr_t key, nul_ptr_t value)
 	    break;
 	}
 
-	index = (index + 1) & hash->mask;
+	index = next_probe (index, hash->mask);
 
 	entry = &(hash->entries[index]);
     }
 
-    hash->n_items++;
+    hash->n_live++;
     entry->key = key;
     entry->value = value;
 }
@@ -96,7 +108,7 @@ rehash (nul_hash_t *hash)
     size_t t;
 
     new_size = 1;
-    while (new_size < MAX (hash->n_items * DEFAULT_OCCUPATION, MIN_SIZE))
+    while (new_size < MAX (hash->n_live * DEFAULT_OCCUPATION, MIN_SIZE))
 	new_size *= 2;
 
     old_entries = hash->entries;
@@ -104,7 +116,8 @@ rehash (nul_hash_t *hash)
     hash->entries = nul_array_new (hash_entry_t);
     hash->entries = nul_array_set_size (hash->entries, new_size);
     hash->mask = new_size - 1;
-    hash->n_items = 0;
+    hash->n_live = 0;
+    hash->n_dead = 0;
 
     for (t = 0; t < new_size; ++t)
     {
@@ -196,7 +209,8 @@ nul_hash_new (nul_hash_func_t        hash_func,
     hash->dead_marker = random_pointer();
 
     hash->entries = NULL;
-    hash->n_items = 0;
+    hash->n_live = 0;
+    hash->n_dead = 0;
     hash->mask = 0;
 
     rehash (hash);
@@ -207,12 +221,16 @@ nul_hash_new (nul_hash_func_t        hash_func,
 static inline nul_bool_t
 need_rehash (nul_hash_t *hash)
 {
-    int table_size = hash->mask + 1;
-    int n_items = hash->n_items;
+    if (hash->n_dead + hash->n_live > HIGH_WATER (hash))
+	return TRUE;
 
-    return
-	n_items * HIGH_OCCUPATION > table_size	||
-	(n_items * LOW_OCCUPATION < table_size && table_size > MIN_SIZE);
+    if (hash->mask + 1 == MIN_SIZE)
+	return FALSE;
+
+    if (hash->n_live < LOW_WATER (hash))
+	return TRUE;
+
+    return FALSE;
 }
 
 void
@@ -242,7 +260,7 @@ find_entry (nul_hash_t *hash, nul_ptr_t key)
 	if (entry->value != hash->dead_marker && hash->equal_func (entry->key, key))
 	    return entry;
 
-	index = (index + 1) & hash->mask;
+	index = next_probe (index, hash->mask);
 	entry = &(hash->entries[index]);
     }
author	Søren Sandmann Pedersen <sandmann@daimi.au.dk>	2009-08-29 15:32:43 -0400
committer	Søren Sandmann Pedersen <sandmann@daimi.au.dk>	2009-08-29 15:32:43 -0400
commit	3e31860fbab6ee73bb064af519ecd3c4953d493b (patch)
tree	9ddd8a2d08bf270cc29faa01627d8615948cf8d3
parent	636993cbd190a067a364aaa86587c2bb4cd435f3 (diff)