Trying with a hashtable instead of a dumb structure.

author: pixel <pixel> 2008-01-28 12:49:06 +0000
committer: pixel <pixel> 2008-01-28 12:49:06 +0000
commit: 7ecf94d4b339bc2197f449f0f1538f2915313087 (patch)
tree: 02167bb1970b7adb53bf7aaa5bb5aaf726d11932 /hashtab.c
parent: a578df37ca8c4c130f28ccfd12948bc2fe429526 (diff)
1 files changed, 370 insertions, 0 deletions
diff --git a/hashtab.c b/hashtab.c
new file mode 100644
index 0000000..8e278a1
--- /dev/null
+++ b/hashtab.c
@@ -0,0 +1,370 @@
+/*
+--------------------------------------------------------------------
+By Bob Jenkins, 1996.  hashtab.c.  Public Domain.
+
+This implements a hash table.
+* Keys are unique.  Adding an item fails if the key is already there.
+* Keys and items are pointed at, not copied.  If you change the value
+  of the key after it is inserted then hfind will not be able to find it.
+* The hash table maintains a position that can be set and queried.
+* The table length doubles dynamically and never shrinks.  The insert
+  that causes table doubling may take a long time.
+* The table length splits when the table length equals the number of items
+  Comparisons usually take 7 instructions.
+  Computing a hash value takes 35+6n instructions for an n-byte key.
+
+  hcreate  - create a hash table
+  hdestroy - destroy a hash table
+   hcount  - The number of items in the hash table
+   hkey    - key at the current position
+   hkeyl   - key length at the current position
+   hstuff  - stuff at the current position
+  hfind    - find an item in the table
+   hadd    - insert an item into the table
+   hdel    - delete an item from the table
+  hstat    - print statistics about the table
+   hfirst  - position at the first item in the table
+   hnext   - move the position to the next item in the table
+--------------------------------------------------------------------
+*/
+
+#include <stdint.h>
+#include <memory.h>
+
+#include "lookupa.h"
+#include "hashtab.h"
+#include "recycle.h"
+
+#ifndef FALSE
+#define FALSE 0
+#endif
+
+#ifndef TRUE
+#define TRUE !FALSE
+#endif
+
+#ifdef HSANITY
+/* sanity check -- make sure ipos, apos, and count make sense */
+static void  hsanity(t)
+htab *t;
+{
+  uint32_t i, end, counter;
+  hitem *h;
+
+  /* test that apos makes sense */
+  end = (uint32_t)1<<(t->logsize);
+  if (end < t->apos)
+    printf("error:  end %ld  apos %ld\n", end, t->apos);
+
+  /* test that ipos is in bucket apos */
+  if (t->ipos)
+  {
+    for (h=t->table[t->apos];  h && h != t->ipos;  h = h->next)
+      ;
+    if (h != t->ipos)
+      printf("error:ipos not in apos, apos is %ld\n", t->apos);
+  }
+
+  /* test that t->count is the number of elements in the table */
+  counter=0;
+  for (counter=0, i=0;  i<end;  ++i)
+    for (h=t->table[i];  h;  h=h->next)
+      ++counter;
+  if (counter != t->count)
+    printf("error: counter %ld  t->count %ld\n", counter, t->count);
+}
+#endif
+
+/*
+ * hgrow - Double the size of a hash table.
+ * Allocate a new, 2x bigger array,
+ * move everything from the old array to the new array,
+ * then free the old array.
+ */
+static void hgrow( t)
+htab  *t;    /* table */
+{
+  register uint32_t  newsize = (uint32_t)1<<(++t->logsize);
+  register uint32_t  newmask = newsize-1;
+  register uint32_t  i;
+  register hitem   **oldtab = t->table;
+  register hitem   **newtab = (hitem **)malloc(newsize*sizeof(hitem *));
+
+  /* make sure newtab is cleared */
+  for (i=0; i<newsize; ++i) newtab[i] = (hitem *)0;
+  t->table = newtab;
+  t->mask = newmask;
+
+  /* Walk through old table putting entries in new table */
+  for (i=newsize>>1; i--;)
+  {
+    register hitem *this, *that, **newplace;
+    for (this = oldtab[i]; this;)
+    {
+      that = this;
+      this = this->next;
+      newplace = &newtab[(that->hval & newmask)];
+      that->next = *newplace;
+      *newplace = that;
+    }
+  }
+
+  /* position the hash table on some existing item */
+  hfirst(t);
+
+  /* free the old array */
+  free((char *)oldtab);
+
+}
+
+/* hcreate - create a hash table initially of size power(2,logsize) */
+htab *hcreate(logsize)
+int   logsize;    /* log base 2 of the size of the hash table */
+{
+  uint32_t i,len;
+  htab *t = (htab *)malloc(sizeof(htab));
+
+  len = ((uint32_t)1<<logsize);
+  t->table = (hitem **)malloc(sizeof(hitem *)*(uint32_t)len);
+  for (i=0; i<len; ++i) t->table[i] = (hitem *)0;
+  t->logsize = logsize;
+  t->mask = len-1;
+  t->count = 0;
+  t->apos = (uint32_t)0;
+  t->ipos = (hitem *)0;
+  t->space = remkroot(sizeof(hitem));
+  t->bcount = 0;
+  return t;
+}
+
+/* hdestroy - destroy the hash table and free all its memory */
+void hdestroy( t)
+htab  *t;    /* the table */
+{
+  refree(t->space);
+  free((char *)t->table);
+  free((char *)t);
+}
+
+/* hcount() is a macro, see hashtab.h */
+/* hkey() is a macro, see hashtab.h */
+/* hkeyl() is a macro, see hashtab.h */
+/* hstuff() is a macro, see hashtab.h */
+
+/* hfind - find an item with a given key in a hash table */
+int    hfind( t, key, keyl )
+htab    *t;     /* table */
+uint8_t *key;   /* key to find */
+uint32_t keyl;  /* key length */
+{
+  hitem *h;
+  uint32_t x = lookup(key,keyl,0);
+  uint32_t y;
+  for (h = t->table[y=(x&t->mask)]; h; h = h->next)
+  {
+    if ((x == h->hval) && 
+        (keyl == h->keyl) && 
+        !memcmp(key, h->key, keyl))
+    {
+      t->apos = y;
+      t->ipos = h;
+      return TRUE;
+    }
+  }
+  return FALSE;
+}
+
+/*
+ * hadd - add an item to a hash table.
+ * return FALSE if the key is already there, otherwise TRUE.
+ */
+int  hadd( t, key, keyl, stuff)
+htab    *t;      /* table */
+uint8_t *key;    /* key to add to hash table */
+uint32_t keyl;   /* key length */
+void    *stuff;  /* stuff to associate with this key */
+{
+  register hitem  *h,**hp;
+  register uint32_t  y, x = lookup(key,keyl,0);
+
+  /* make sure the key is not already there */
+  for (h = t->table[(y=(x&t->mask))]; h; h = h->next)
+  {
+    if ((x == h->hval) && 
+        (keyl == h->keyl) && 
+        !memcmp(key, h->key, keyl))
+    {
+      t->apos = y;
+      t->ipos = h;
+      return FALSE;
+    }
+  }
+
+  /* find space for a new item */
+  h = (hitem *)renew(t->space);
+
+  /* make the hash table bigger if it is getting full */
+  if (++t->count > (uint32_t)1<<(t->logsize))
+  {
+    hgrow(t);
+    y = (x&t->mask);
+  }
+
+  /* add the new key to the table */
+  h->key   = key;
+  h->keyl  = keyl;
+  h->stuff = stuff;
+  h->hval  = x;
+  hp = &t->table[y];
+  h->next = *hp;
+  *hp = h;
+  t->ipos = h;
+  t->apos = y;
+
+#ifdef HSANITY
+  hsanity(t);
+#endif  /* HSANITY */
+
+  return TRUE;
+}
+
+/* hdel - delete the item at the current position */
+int   hdel(t)
+htab *t;      /* the hash table */
+{
+  hitem  *h;    /* item being deleted */
+  hitem **ip;   /* a counter */
+
+  /* check for item not existing */
+  if (!(h = t->ipos)) return FALSE;
+
+  /* remove item from its list */
+  for (ip = &t->table[t->apos]; *ip != h; ip = &(*ip)->next)
+    ;
+  *ip = (*ip)->next;
+  --(t->count);
+
+  /* adjust position to something that exists */
+  if (!(t->ipos = h->next)) hnbucket(t);
+
+  /* recycle the deleted hitem node */
+  redel(t->space, h);
+
+#ifdef HSANITY
+  hsanity(t);
+#endif  /* HSANITY */
+
+  return TRUE;
+}
+
+/* hfirst - position on the first element in the table */
+int  hfirst(t)
+htab  *t;    /* the hash table */
+{
+  t->apos = t->mask;
+  (void)hnbucket(t);
+  return (t->ipos != (hitem *)0);
+}
+
+/* hnext() is a macro, see hashtab.h */
+
+/*
+ * hnbucket - Move position to the first item in the next bucket.
+ * Return TRUE if we did not wrap around to the beginning of the table
+ */
+int  hnbucket(t)
+htab *t;
+{
+  uint32_t  oldapos = t->apos;
+  uint32_t  end = (uint32_t)1<<(t->logsize);
+  uint32_t  i;
+
+  /* see if the element can be found without wrapping around */
+  for (i=oldapos+1; i<end; ++i)
+  {
+    if (t->table[i&t->mask])
+    {
+      t->apos = i;
+      t->ipos = t->table[i];
+      return TRUE;
+    }
+  }
+
+  /* must have to wrap around to find the last element */
+  for (i=0; i<=oldapos; ++i)
+  {
+    if (t->table[i])
+    {
+      t->apos = i;
+      t->ipos = t->table[i];
+      return FALSE;
+    }
+  }
+
+  return FALSE;
+}
+
+#ifdef HSTATS
+void hstat(t)
+htab  *t;
+{
+  uint32_t  i,j;
+  double    total = 0.0;
+  hitem    *h;
+  hitem    *walk, *walk2, *stat = (hitem *)0;
+
+  /* in stat, keyl will store length of list, hval the number of buckets */
+  for (i=0; i<=t->mask; ++i)
+  {
+    for (h=t->table[i], j=0; h; ++j, h=h->next)
+      ;
+    for (walk=stat; walk && (walk->keyl != j); walk=walk->next)
+      ;
+    if (walk)
+    {
+      ++(walk->hval);
+    }
+    else
+    {
+      walk = (hitem *)renew(t->space);
+      walk->keyl = j;
+      walk->hval = 1;
+      if (!stat || stat->keyl > j) {walk->next=stat; stat=walk;}
+      else
+      {
+        for (walk2=stat;
+             walk2->next && (walk2->next->keyl<j);
+             walk2=walk2->next)
+          ;
+        walk->next = walk2->next;
+        walk2->next = walk;
+      }
+    }
+  }
+
+  /* figure out average list length for existing elements */
+  for (walk=stat; walk; walk=walk->next)
+  {
+    total+=(double)walk->hval*(double)walk->keyl*(double)walk->keyl;
+  }
+  if (t->count) total /= (double)t->count;
+  else          total  = (double)0;
+
+  /* print statistics */
+  printf("\n");
+  for (walk=stat; walk; walk=walk->next)
+  {
+    printf("items %u:  %u buckets\n", walk->keyl, walk->hval);
+  }
+  printf("\nbuckets: %u  items: %u  existing: %g\n\n",
+         ((uint32_t)1<<t->logsize), t->count, total);
+
+  /* clean up */
+  while (stat)
+  {
+    walk = stat->next;
+    redel(t->space, stat);
+    stat = walk;
+  }
+}
+#endif
author	pixel <pixel>	2008-01-28 12:49:06 +0000
committer	pixel <pixel>	2008-01-28 12:49:06 +0000
commit	7ecf94d4b339bc2197f449f0f1538f2915313087 (patch)
tree	02167bb1970b7adb53bf7aaa5bb5aaf726d11932 /hashtab.c
parent	a578df37ca8c4c130f28ccfd12948bc2fe429526 (diff)