summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2009-11-23 10:58:33 +0100
committerEric Anholt <eric@anholt.net>2009-11-23 10:58:33 +0100
commit3181f54ca8a77d7694aaf42a87e5a59986fdbe8e (patch)
treee1495bf43764d35f5de3688b2ab91e18028a7776
Initial import of hash_table.
This is entirely written by myself, with the exception of the hash_sizes[] contents which come from the hash_table implementation in nickle, which I decided to not use.
-rw-r--r--.gitignore24
-rw-r--r--COPYING30
-rw-r--r--Makefile.am33
-rw-r--r--README45
-rwxr-xr-xautogen.sh12
-rw-r--r--configure.ac51
-rw-r--r--hash_table.c243
-rw-r--r--hash_table.h59
8 files changed, 497 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..0d682ff
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,24 @@
+.deps
+.libs
+Makefile
+Makefile.in
+*.la
+*.lo
+*.o
+*~
+aclocal.m4
+autom4te.cache
+compile
+config.guess
+config.log
+config.status
+config.sub
+configure
+configure.lineno
+depcomp
+install-sh
+libtool
+ltmain.sh
+missing
+stamp-h1
+cscope.*
diff --git a/COPYING b/COPYING
new file mode 100644
index 0000000..cfd63dd
--- /dev/null
+++ b/COPYING
@@ -0,0 +1,30 @@
+Copyright © 1988-2004 Keith Packard and Bart Massey. All Rights Reserved.
+Copyright © 2009 Intel Corporation
+
+Permission is hereby granted, free of charge, to any person
+obtaining a copy of this software and associated
+documentation files (the "Software"), to deal in the
+Software without restriction, including without limitation
+the rights to use, copy, modify, merge, publish, distribute,
+sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so,
+subject to the following conditions:
+
+The above copyright notice and this permission notice shall
+be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
+KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
+PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS
+BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
+
+Except as contained in this notice, the names of the authors
+or their institutions shall not be used in advertising or
+otherwise to promote the sale, use or other dealings in this
+Software without prior written authorization from the
+authors.
diff --git a/Makefile.am b/Makefile.am
new file mode 100644
index 0000000..dde3fc3
--- /dev/null
+++ b/Makefile.am
@@ -0,0 +1,33 @@
+# Copyright © 2009 Intel Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# on the rights to use, copy, modify, merge, publish, distribute, sub
+# license, and/or sell copies of the Software, and to permit persons to whom
+# the Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+# ADAM JACKSON BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+AUTOMAKE_OPTIONS = foreign
+
+AM_CFLAGS = $(WARN_CFLAGS)
+
+noinst_LTLIBRARIES = libhash_table.la
+
+libhash_table_la_SOURCES = \
+ hash_table.c \
+ hash_table.h
+
+EXTRA_DIST = \
+ COPYING \
+ README
diff --git a/README b/README
new file mode 100644
index 0000000..29b7929
--- /dev/null
+++ b/README
@@ -0,0 +1,45 @@
+This is a simple hash table implementation written in plain old C. The goal
+is for this code to be reusable in many of the projects I've worked on that
+could use something better than a poorly-tuned chaining implementation.
+
+The intention is that users of this code copy it directly into their
+repositories, as it's quite small and should see very little development.
+
+The summary of this implementation would be that it uses open addressing
+with rehashing. The table stores for each entry:
+
+ * uint32_t hash of the key.
+ * pointer to the key.
+ * pointer to the data.
+
+Inserts occur at key->hash % hash->size. When an insert collides, the insert
+reattempts at (key->hash % hash->size + hash->reprobe) % hash->size, and
+onwards at increments of reprobe until a free or dead entry is found.
+
+When searching, the search starts at key % hash_size and continues at
+increments of reprobe as with inserts, until the matching entry or an
+unallocated entry is found.
+
+When deleting an entry, the entry is marked deleted.
+
+Performance considerations:
+
+ * Only an extra 10% free is given.
+
+ This means that as entries are added, the performance of insertion and
+ lookups will degrade as one approaches maximum entries until the table
+ gets resized. Unless an outside entry management results in a maximum
+ number of entries close to the hash table's current size limit, this
+ shouldn't be a concern.
+
+ * Repeated deletions fill the table with deleted entry markers.
+
+ This means that a table that was filled, then emptied, will have
+ performance for unsuccessful searches in O(hash->size)
+
+ The solution here is to decide when the table is too full of deleted
+ entries and recompute the data into a clean version of the hashtable.
+
+In addition to the core hash_table implementation, a copy of the FNV-1a 32-bit
+hash function is included for convenience for those that don't wish to analyze
+hash functions on their own.
diff --git a/autogen.sh b/autogen.sh
new file mode 100755
index 0000000..904cd67
--- /dev/null
+++ b/autogen.sh
@@ -0,0 +1,12 @@
+#! /bin/sh
+
+srcdir=`dirname $0`
+test -z "$srcdir" && srcdir=.
+
+ORIGDIR=`pwd`
+cd $srcdir
+
+autoreconf -v --install || exit 1
+cd $ORIGDIR || exit $?
+
+$srcdir/configure --enable-maintainer-mode "$@"
diff --git a/configure.ac b/configure.ac
new file mode 100644
index 0000000..a92ccf1
--- /dev/null
+++ b/configure.ac
@@ -0,0 +1,51 @@
+# Copyright © 2009 Intel Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+#
+# Authors:
+# Eric Anholt <eric@anholt.net>
+
+AC_PREREQ(2.57)
+AC_INIT([hash_table],
+ 1.0,
+ [https://freedesktop.org/~anholt/hash_table],
+ hash_table)
+
+AC_CONFIG_SRCDIR([Makefile.am])
+
+AM_INIT_AUTOMAKE([dist-bzip2])
+AM_MAINTAINER_MODE
+AC_PROG_LIBTOOL
+AC_PROG_CC
+
+
+WARN_CFLAGS=""
+
+if test "x$GCC" = "xyes"; then
+ WARN_CFLAGS="-Wall -Wpointer-arith -Wstrict-prototypes \
+ -Wmissing-prototypes -Wmissing-declarations \
+ -Wnested-externs -fno-strict-aliasing"
+fi
+
+AC_SUBST([WARN_CFLAGS])
+
+AC_OUTPUT([
+ Makefile
+])
diff --git a/hash_table.c b/hash_table.c
new file mode 100644
index 0000000..7ea01f1
--- /dev/null
+++ b/hash_table.c
@@ -0,0 +1,243 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ * Copyright © 1988-2004 Keith Packard and Bart Massey.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Except as contained in this notice, the names of the authors
+ * or their institutions shall not be used in advertising or
+ * otherwise to promote the sale, use or other dealings in this
+ * Software without prior written authorization from the
+ * authors.
+ *
+ * Authors:
+ * Eric Anholt <eric@anholt.net>
+ * Keith Packard <keithp@keithp.com>
+ */
+
+#include <stdlib.h>
+
+#include "hash_table.h"
+
+/*
+ * From Knuth -- a good choice for hash/rehash values is p, p-2 where
+ * p and p-2 are both prime. These tables are sized to have an extra 10%
+ * free to avoid exponential performance degradation as the hash table fills
+ */
+
+uint32_t deleted_key_value;
+const void *deleted_key = &deleted_key_value;
+
+static const struct {
+ uint32_t max_entries, size, rehash;
+} hash_sizes[] = {
+ { 2, 5, 3 },
+ { 4, 7, 5 },
+ { 8, 13, 11 },
+ { 16, 19, 17 },
+ { 32, 43, 41 },
+ { 64, 73, 71 },
+ { 128, 151, 149 },
+ { 256, 283, 281 },
+ { 512, 571, 569 },
+ { 1024, 1153, 1151 },
+ { 2048, 2269, 2267 },
+ { 4096, 4519, 4517 },
+ { 8192, 9013, 9011 },
+ { 16384, 18043, 18041 },
+ { 32768, 36109, 36107 },
+ { 65536, 72091, 72089 },
+ { 131072, 144409, 144407 },
+ { 262144, 288361, 288359 },
+ { 524288, 576883, 576881 },
+ { 1048576, 1153459, 1153457 },
+ { 2097152, 2307163, 2307161 },
+ { 4194304, 4613893, 4613891 },
+ { 8388608, 9227641, 9227639 },
+ { 16777216, 18455029, 18455027 },
+ { 33554432, 36911011, 36911009 },
+ { 67108864, 73819861, 73819859 },
+ { 134217728, 147639589, 147639587 },
+ { 268435456, 295279081, 295279079 },
+ { 536870912, 590559793, 590559791 },
+ { 1073741824, 1181116273, 1181116271},
+ { 2147483648ul, 2362232233ul, 2362232231ul}
+};
+
+static int
+entry_is_free(struct hash_entry *entry)
+{
+ return entry->key == NULL;
+}
+
+static int
+entry_is_present(struct hash_entry *entry)
+{
+ return entry->key != NULL && entry->key != deleted_key;
+}
+
+struct hash_table *
+hash_table_create(uint32_t (*hash_function)(const void *key),
+ int key_equals_function(const void *a,
+ const void *b))
+{
+ struct hash_table *ht;
+
+ ht = malloc(sizeof(*ht));
+ if (ht == NULL)
+ return NULL;
+
+ ht->size_index = 0;
+ ht->size = hash_sizes[ht->size_index].size;
+ ht->rehash = hash_sizes[ht->size_index].rehash;
+ ht->max_entries = hash_sizes[ht->size_index].max_entries;
+ ht->hash_function = hash_function;
+ ht->table = calloc(ht->size, sizeof(*ht->table));
+
+ if (ht->table == NULL) {
+ free(ht);
+ return NULL;
+ }
+
+ return ht;
+}
+
+/**
+ * Frees the given hash table.
+ *
+ * If delete_function is passed, it gets called on each entry present before
+ * freeing.
+ */
+void
+hash_table_destroy(struct hash_table *ht,
+ void (*delete_function)(struct hash_entry *entry))
+{
+ if (!ht)
+ return;
+
+ if (delete_function) {
+ struct hash_entry *entry;
+
+ for (entry = ht->table;
+ entry != NULL;
+ entry = hash_table_next_entry(ht, entry)) {
+ delete_function(entry);
+ }
+ }
+
+ free(ht);
+}
+
+struct hash_entry *
+hash_table_search(struct hash_table *ht, const void *key)
+{
+ uint32_t hash, hash_address;
+
+ hash = ht->hash_function(key);
+ hash_address = hash & ht->size;
+ do {
+ struct hash_entry *entry = ht->table + hash_address;
+
+ if (entry_is_free(entry)) {
+ return NULL;
+ } else if (entry_is_present(entry) && hash == hash) {
+ if (ht->key_equals_function(key, entry->key)) {
+ return entry;
+ }
+ }
+
+ hash_address = (hash_address + ht->rehash) & ht->size;
+ } while (hash_address != hash % ht->size);
+
+ return NULL;
+}
+
+static void
+hash_table_expand(struct hash_table *ht)
+{
+ /* XXX */
+}
+
+struct hash_entry *
+hash_table_insert(struct hash_table *ht, const void *key, void *data)
+{
+ uint32_t hash, hash_address;
+
+ if (ht->entries + 1 > ht->max_entries) {
+ hash_table_expand(ht);
+ }
+
+ hash = ht->hash_function(key);
+ hash_address = hash & ht->size;
+ do {
+ struct hash_entry *entry = ht->table + hash_address;
+
+ if (!entry_is_present(entry)) {
+ entry->hash = hash;
+ entry->key = key;
+ entry->data = data;
+ ht->entries++;
+ return entry;
+ }
+
+ hash_address = (hash_address + ht->rehash) & ht->size;
+ } while (hash_address != hash % ht->size);
+
+ /* We could hit here if a required resize failed. An unchecked-malloc
+ * application could ignore this result.
+ */
+ return NULL;
+}
+
+/**
+ * This function deletes the given hash table entry.
+ *
+ * Note that deletion doesn't otherwise modify the table, so an iteration over
+ * the table deleting entries is safe.
+ */
+void
+hash_table_delete_entry(struct hash_table *ht, struct hash_entry *entry)
+{
+ entry->key = deleted_key;
+ ht->entries--;
+}
+
+/**
+ * This function is an iterator over the hash table.
+ *
+ * Pass in NULL for the first entry, as in the start of a for loop. Note that
+ * an iteration over the table is O(table_size) not O(entries).
+ */
+struct hash_entry *
+hash_table_next_entry(struct hash_table *ht, struct hash_entry *entry)
+{
+ if (entry == NULL)
+ entry = ht->table;
+ else
+ entry = entry + 1;
+
+ for (; entry != ht->table + ht->size; entry++) {
+ if (entry_is_present(entry)) {
+ return entry;
+ }
+ }
+
+ return NULL;
+}
diff --git a/hash_table.h b/hash_table.h
new file mode 100644
index 0000000..0d7afee
--- /dev/null
+++ b/hash_table.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#include <inttypes.h>
+
+struct hash_entry {
+ uint32_t hash;
+ const void *key;
+ void *data;
+};
+
+struct hash_table {
+ struct hash_entry *table;
+ uint32_t (*hash_function)(const void *key);
+ int (*key_equals_function)(const void *a, const void *b);
+ uint32_t size;
+ uint32_t rehash;
+ uint32_t max_entries;
+ uint32_t size_index;
+ uint32_t entries;
+};
+
+struct hash_table *hash_table_create(uint32_t (*hash_function)(const void *key),
+ int (*key_equals_function)(const void *a,
+ const void *b));
+void hash_table_destroy(struct hash_table *ht,
+ void (*delete_function)(struct hash_entry *entry));
+
+struct hash_entry *hash_table_insert(struct hash_table *ht, const void *key,
+ void *data);
+struct hash_entry *hash_table_search(struct hash_table *ht, const void *key);
+
+void hash_table_delete_entry(struct hash_table *ht, struct hash_entry *entry);
+struct hash_entry *hash_table_next_entry(struct hash_table *ht,
+ struct hash_entry *entry);