Trying with a hashtable instead of a dumb structure.

author: pixel <pixel> 2008-01-28 12:49:06 +0000
committer: pixel <pixel> 2008-01-28 12:49:06 +0000
commit: 7ecf94d4b339bc2197f449f0f1538f2915313087 (patch)
tree: 02167bb1970b7adb53bf7aaa5bb5aaf726d11932
parent: a578df37ca8c4c130f28ccfd12948bc2fe429526 (diff)
8 files changed, 1047 insertions, 94 deletions
diff --git a/Makefile b/Makefile
index 5900267..5971ac3 100644
--- a/Makefile
+++ b/Makefile
@@ -9,6 +9,9 @@ SOURCE_LIST = \
 MPQCryptography.c \
 errors.c \
 extract.c \
+lookupa.c \
+hashtab.c \
+recycle.c \
 mpq-bios.c \
 mpq-errors.c \
 mpq-file.c \
diff --git a/hashtab.c b/hashtab.c
new file mode 100644
index 0000000..8e278a1
--- /dev/null
+++ b/hashtab.c
@@ -0,0 +1,370 @@
+/*
+--------------------------------------------------------------------
+By Bob Jenkins, 1996.  hashtab.c.  Public Domain.
+
+This implements a hash table.
+* Keys are unique.  Adding an item fails if the key is already there.
+* Keys and items are pointed at, not copied.  If you change the value
+  of the key after it is inserted then hfind will not be able to find it.
+* The hash table maintains a position that can be set and queried.
+* The table length doubles dynamically and never shrinks.  The insert
+  that causes table doubling may take a long time.
+* The table length splits when the table length equals the number of items
+  Comparisons usually take 7 instructions.
+  Computing a hash value takes 35+6n instructions for an n-byte key.
+
+  hcreate  - create a hash table
+  hdestroy - destroy a hash table
+   hcount  - The number of items in the hash table
+   hkey    - key at the current position
+   hkeyl   - key length at the current position
+   hstuff  - stuff at the current position
+  hfind    - find an item in the table
+   hadd    - insert an item into the table
+   hdel    - delete an item from the table
+  hstat    - print statistics about the table
+   hfirst  - position at the first item in the table
+   hnext   - move the position to the next item in the table
+--------------------------------------------------------------------
+*/
+
+#include <stdint.h>
+#include <memory.h>
+
+#include "lookupa.h"
+#include "hashtab.h"
+#include "recycle.h"
+
+#ifndef FALSE
+#define FALSE 0
+#endif
+
+#ifndef TRUE
+#define TRUE !FALSE
+#endif
+
+#ifdef HSANITY
+/* sanity check -- make sure ipos, apos, and count make sense */
+static void  hsanity(t)
+htab *t;
+{
+  uint32_t i, end, counter;
+  hitem *h;
+
+  /* test that apos makes sense */
+  end = (uint32_t)1<<(t->logsize);
+  if (end < t->apos)
+    printf("error:  end %ld  apos %ld\n", end, t->apos);
+
+  /* test that ipos is in bucket apos */
+  if (t->ipos)
+  {
+    for (h=t->table[t->apos];  h && h != t->ipos;  h = h->next)
+      ;
+    if (h != t->ipos)
+      printf("error:ipos not in apos, apos is %ld\n", t->apos);
+  }
+
+  /* test that t->count is the number of elements in the table */
+  counter=0;
+  for (counter=0, i=0;  i<end;  ++i)
+    for (h=t->table[i];  h;  h=h->next)
+      ++counter;
+  if (counter != t->count)
+    printf("error: counter %ld  t->count %ld\n", counter, t->count);
+}
+#endif
+
+/*
+ * hgrow - Double the size of a hash table.
+ * Allocate a new, 2x bigger array,
+ * move everything from the old array to the new array,
+ * then free the old array.
+ */
+static void hgrow( t)
+htab  *t;    /* table */
+{
+  register uint32_t  newsize = (uint32_t)1<<(++t->logsize);
+  register uint32_t  newmask = newsize-1;
+  register uint32_t  i;
+  register hitem   **oldtab = t->table;
+  register hitem   **newtab = (hitem **)malloc(newsize*sizeof(hitem *));
+
+  /* make sure newtab is cleared */
+  for (i=0; i<newsize; ++i) newtab[i] = (hitem *)0;
+  t->table = newtab;
+  t->mask = newmask;
+
+  /* Walk through old table putting entries in new table */
+  for (i=newsize>>1; i--;)
+  {
+    register hitem *this, *that, **newplace;
+    for (this = oldtab[i]; this;)
+    {
+      that = this;
+      this = this->next;
+      newplace = &newtab[(that->hval & newmask)];
+      that->next = *newplace;
+      *newplace = that;
+    }
+  }
+
+  /* position the hash table on some existing item */
+  hfirst(t);
+
+  /* free the old array */
+  free((char *)oldtab);
+
+}
+
+/* hcreate - create a hash table initially of size power(2,logsize) */
+htab *hcreate(logsize)
+int   logsize;    /* log base 2 of the size of the hash table */
+{
+  uint32_t i,len;
+  htab *t = (htab *)malloc(sizeof(htab));
+
+  len = ((uint32_t)1<<logsize);
+  t->table = (hitem **)malloc(sizeof(hitem *)*(uint32_t)len);
+  for (i=0; i<len; ++i) t->table[i] = (hitem *)0;
+  t->logsize = logsize;
+  t->mask = len-1;
+  t->count = 0;
+  t->apos = (uint32_t)0;
+  t->ipos = (hitem *)0;
+  t->space = remkroot(sizeof(hitem));
+  t->bcount = 0;
+  return t;
+}
+
+/* hdestroy - destroy the hash table and free all its memory */
+void hdestroy( t)
+htab  *t;    /* the table */
+{
+  refree(t->space);
+  free((char *)t->table);
+  free((char *)t);
+}
+
+/* hcount() is a macro, see hashtab.h */
+/* hkey() is a macro, see hashtab.h */
+/* hkeyl() is a macro, see hashtab.h */
+/* hstuff() is a macro, see hashtab.h */
+
+/* hfind - find an item with a given key in a hash table */
+int    hfind( t, key, keyl )
+htab    *t;     /* table */
+uint8_t *key;   /* key to find */
+uint32_t keyl;  /* key length */
+{
+  hitem *h;
+  uint32_t x = lookup(key,keyl,0);
+  uint32_t y;
+  for (h = t->table[y=(x&t->mask)]; h; h = h->next)
+  {
+    if ((x == h->hval) && 
+        (keyl == h->keyl) && 
+        !memcmp(key, h->key, keyl))
+    {
+      t->apos = y;
+      t->ipos = h;
+      return TRUE;
+    }
+  }
+  return FALSE;
+}
+
+/*
+ * hadd - add an item to a hash table.
+ * return FALSE if the key is already there, otherwise TRUE.
+ */
+int  hadd( t, key, keyl, stuff)
+htab    *t;      /* table */
+uint8_t *key;    /* key to add to hash table */
+uint32_t keyl;   /* key length */
+void    *stuff;  /* stuff to associate with this key */
+{
+  register hitem  *h,**hp;
+  register uint32_t  y, x = lookup(key,keyl,0);
+
+  /* make sure the key is not already there */
+  for (h = t->table[(y=(x&t->mask))]; h; h = h->next)
+  {
+    if ((x == h->hval) && 
+        (keyl == h->keyl) && 
+        !memcmp(key, h->key, keyl))
+    {
+      t->apos = y;
+      t->ipos = h;
+      return FALSE;
+    }
+  }
+
+  /* find space for a new item */
+  h = (hitem *)renew(t->space);
+
+  /* make the hash table bigger if it is getting full */
+  if (++t->count > (uint32_t)1<<(t->logsize))
+  {
+    hgrow(t);
+    y = (x&t->mask);
+  }
+
+  /* add the new key to the table */
+  h->key   = key;
+  h->keyl  = keyl;
+  h->stuff = stuff;
+  h->hval  = x;
+  hp = &t->table[y];
+  h->next = *hp;
+  *hp = h;
+  t->ipos = h;
+  t->apos = y;
+
+#ifdef HSANITY
+  hsanity(t);
+#endif  /* HSANITY */
+
+  return TRUE;
+}
+
+/* hdel - delete the item at the current position */
+int   hdel(t)
+htab *t;      /* the hash table */
+{
+  hitem  *h;    /* item being deleted */
+  hitem **ip;   /* a counter */
+
+  /* check for item not existing */
+  if (!(h = t->ipos)) return FALSE;
+
+  /* remove item from its list */
+  for (ip = &t->table[t->apos]; *ip != h; ip = &(*ip)->next)
+    ;
+  *ip = (*ip)->next;
+  --(t->count);
+
+  /* adjust position to something that exists */
+  if (!(t->ipos = h->next)) hnbucket(t);
+
+  /* recycle the deleted hitem node */
+  redel(t->space, h);
+
+#ifdef HSANITY
+  hsanity(t);
+#endif  /* HSANITY */
+
+  return TRUE;
+}
+
+/* hfirst - position on the first element in the table */
+int  hfirst(t)
+htab  *t;    /* the hash table */
+{
+  t->apos = t->mask;
+  (void)hnbucket(t);
+  return (t->ipos != (hitem *)0);
+}
+
+/* hnext() is a macro, see hashtab.h */
+
+/*
+ * hnbucket - Move position to the first item in the next bucket.
+ * Return TRUE if we did not wrap around to the beginning of the table
+ */
+int  hnbucket(t)
+htab *t;
+{
+  uint32_t  oldapos = t->apos;
+  uint32_t  end = (uint32_t)1<<(t->logsize);
+  uint32_t  i;
+
+  /* see if the element can be found without wrapping around */
+  for (i=oldapos+1; i<end; ++i)
+  {
+    if (t->table[i&t->mask])
+    {
+      t->apos = i;
+      t->ipos = t->table[i];
+      return TRUE;
+    }
+  }
+
+  /* must have to wrap around to find the last element */
+  for (i=0; i<=oldapos; ++i)
+  {
+    if (t->table[i])
+    {
+      t->apos = i;
+      t->ipos = t->table[i];
+      return FALSE;
+    }
+  }
+
+  return FALSE;
+}
+
+#ifdef HSTATS
+void hstat(t)
+htab  *t;
+{
+  uint32_t  i,j;
+  double    total = 0.0;
+  hitem    *h;
+  hitem    *walk, *walk2, *stat = (hitem *)0;
+
+  /* in stat, keyl will store length of list, hval the number of buckets */
+  for (i=0; i<=t->mask; ++i)
+  {
+    for (h=t->table[i], j=0; h; ++j, h=h->next)
+      ;
+    for (walk=stat; walk && (walk->keyl != j); walk=walk->next)
+      ;
+    if (walk)
+    {
+      ++(walk->hval);
+    }
+    else
+    {
+      walk = (hitem *)renew(t->space);
+      walk->keyl = j;
+      walk->hval = 1;
+      if (!stat || stat->keyl > j) {walk->next=stat; stat=walk;}
+      else
+      {
+        for (walk2=stat;
+             walk2->next && (walk2->next->keyl<j);
+             walk2=walk2->next)
+          ;
+        walk->next = walk2->next;
+        walk2->next = walk;
+      }
+    }
+  }
+
+  /* figure out average list length for existing elements */
+  for (walk=stat; walk; walk=walk->next)
+  {
+    total+=(double)walk->hval*(double)walk->keyl*(double)walk->keyl;
+  }
+  if (t->count) total /= (double)t->count;
+  else          total  = (double)0;
+
+  /* print statistics */
+  printf("\n");
+  for (walk=stat; walk; walk=walk->next)
+  {
+    printf("items %u:  %u buckets\n", walk->keyl, walk->hval);
+  }
+  printf("\nbuckets: %u  items: %u  existing: %g\n\n",
+         ((uint32_t)1<<t->logsize), t->count, total);
+
+  /* clean up */
+  while (stat)
+  {
+    walk = stat->next;
+    redel(t->space, stat);
+    stat = walk;
+  }
+}
+#endif
diff --git a/hashtab.h b/hashtab.h
new file mode 100644
index 0000000..2f8b22c
--- /dev/null
+++ b/hashtab.h
@@ -0,0 +1,216 @@
+/*
+--------------------------------------------------------------------
+By Bob Jenkins, 1996.  hash.h.  Public Domain.
+
+This implements a hash table.
+* Keys are unique.  Adding an item fails if the key is already there.
+* Keys and items are pointed at, not copied.  If you change the value
+  of the key after it is inserted then hfind will not be able to find it.
+* The hash table maintains a position that can be set and queried.
+* The table length doubles dynamically and never shrinks.  The insert
+  that causes table doubling may take a long time.
+* The table length splits when the table length equals the number of items
+  Comparisons usually take 7 instructions.
+  Computing a hash value takes 35+6n instructions for an n-byte key.
+
+  hcreate  - create a hash table
+  hdestroy - destroy a hash table
+   hcount  - The number of items in the hash table
+   hkey    - key at the current position
+   hkeyl   - key length at the current position
+   hstuff  - stuff at the current position
+  hfind    - find an item in the table
+   hadd    - insert an item into the table
+   hdel    - delete an item from the table
+  hstat    - print statistics about the table
+   hfirst  - position at the first item in the table
+   hnext   - move the position to the next item in the table
+--------------------------------------------------------------------
+*/
+
+#ifndef HASHTAB
+#define HASHTAB
+
+#include <stdint.h>
+#include <stdlib.h>
+
+/* PRIVATE TYPES AND DEFINITIONS */
+
+struct hitem
+{
+  uint8_t      *key;      /* key that is hashed */
+  uint32_t      keyl;     /* length of key */
+  void         *stuff;    /* stuff stored in this hitem */
+  uint32_t      hval;     /* hash value */
+  struct hitem *next;     /* next hitem in list */
+};
+typedef  struct hitem  hitem;
+
+
+struct htab
+{
+  struct hitem **table;   /* hash table, array of size 2^logsize */
+  int            logsize; /* log of size of table */
+  size_t         mask;    /* (hashval & mask) is position in table */
+  uint32_t       count;   /* how many items in this hash table so far? */
+  uint32_t       apos;    /* position in the array */
+  struct hitem  *ipos;    /* current item in the array */
+  struct reroot *space;   /* space for the hitems */
+  uint32_t       bcount;  /* # hitems useable in current block */
+};
+typedef  struct htab  htab;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* PUBLIC FUNCTIONS */
+
+/* hcreate - create a hash table
+   ARGUMENTS:
+     logsize - 1<<logsize will be the initial table length
+   RETURNS:
+     the new table
+ */
+htab *hcreate(int  logsize);
+
+
+/* hdestroy - destroy a hash table
+   ARGUMENTS:
+     t - the hash table to be destroyed.  Note that the items and keys
+         will not be freed, the user created them and must destroy
+         them himself.
+   RETURNS:
+     nothing
+ */
+void  hdestroy(htab *t);
+
+
+/* hcount, hkey, hkeyl, hstuff
+     ARGUMENTS:
+     t - the hash table
+   RETURNS:
+     hcount - (ub4)    The number of items in the hash table
+     hkey   - (ub1 *)  key for the current item
+     hkeyl  - (ub4)    key length for the current item
+     hstuff - (void *) stuff for the current item
+   NOTE:
+     The current position always has an item as long as there
+       are items in the table, so hexist can be used to test if the
+       table is empty.
+     hkey, hkeyl, and hstuff will crash if hcount returns 0
+ */
+#define hcount(t) ((t)->count)
+#define hkey(t)   ((t)->ipos->key)
+#define hkeyl(t)  ((t)->ipos->keyl)
+#define hstuff(t) ((t)->ipos->stuff)
+
+
+
+/* hfind - move the current position to a given key
+   ARGUMENTS:
+     t    - the hash table
+     key  - the key to look for
+     keyl - length of the key
+   RETURNS:
+     TRUE if the item exists, FALSE if it does not.
+     If the item exists, moves the current position to that item.
+ */
+int   hfind(htab *t, uint8_t *key, uint32_t keyl);
+
+
+/* hadd - add a new item to the hash table
+          change the position to point at the item with the key
+   ARGUMENTS:
+     t     - the hash table
+     key   - the key to look for
+     keyl  - length of the key
+     stuff - other stuff to be stored in this item
+   RETURNS:
+     FALSE if the operation fails (because that key is already there).
+ */
+int   hadd(htab *t, uint8_t *key, uint32_t keyl, void *stuff);
+
+
+/* hdel - delete the item at the current position
+          change the position to the following item
+  ARGUMENTS:
+    t    - the hash table
+  RETURNS:
+    FALSE if there is no current item (meaning the table is empty)
+  NOTE:
+    This frees the item, but not the key or stuff stored in the item.
+    If you want these then deal with them first.  For example:
+      if (hfind(tab, key, keyl))
+      {
+        free(hkey(tab));
+        free(hstuff(tab));
+        hdel(tab);
+      }
+ */
+int   hdel(htab *t);
+
+
+/* hfirst - move position to the first item in the table
+  ARGUMENTS:
+    t    - the hash table
+  RETURNS:
+    FALSE if there is no current item (meaning the table is empty)
+  NOTE:
+ */
+int  hfirst(htab *t);
+
+
+/* hnext - move position to the next item in the table
+  ARGUMENTS:
+    t    - the hash table
+  RETURNS:
+    FALSE if the position wraps around to the beginning of the table
+  NOTE:
+    To see every item in the table, do
+      if (hfirst(t)) do
+      {
+        key   = hkey(t);
+        stuff = hstuff(t);
+      }
+      while (hnext(t));
+ */
+/* int  hnext(/o_ htab *t _o/); */
+#define hnext(t) \
+  ((!(t)->ipos) ? FALSE :  \
+   ((t)->ipos=(t)->ipos->next) ? TRUE : hnbucket(t))
+
+/* hnbucket - PRIVATE - move to first item in the next nonempty bucket
+  ARGUMENTS:
+    t    - the hash table
+  RETURNS:
+    FALSE if the position wraps around to the beginning of the table
+  NOTE:
+    This is private to hashtab; do not use it externally.
+ */
+int  hnbucket(htab *t);
+
+
+/* hstat - print statistics about the hash table
+  ARGUMENTS:
+    t    - the hash table
+  NOTE:
+    items <0>:  <#buckets with zero items> buckets
+    items <1>:  <#buckets with 1 item> buckets
+    ...
+    buckets: #buckets  items: #items  existing: x
+    ( x is the average length of the list when you look for an
+      item that exists.  When the item does not exists, the average
+      length is #items/#buckets. )
+
+    If you put n items into n buckets, expect 1/(n!)e buckets to
+    have n items.  That is, .3678 0, .3678 1, .1839 2, ...
+    Also expect "existing" to be about 2.
+ */
+void hstat(htab *t);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif   /* HASHTAB */
diff --git a/lookupa.c b/lookupa.c
new file mode 100644
index 0000000..731163b
--- /dev/null
+++ b/lookupa.c
@@ -0,0 +1,239 @@
+/*
+--------------------------------------------------------------------
+lookupa.c, by Bob Jenkins, December 1996.  Same as lookup2.c
+Use this code however you wish.  Public Domain.  No warranty.
+Source is http://burtleburtle.net/bob/c/lookupa.c
+--------------------------------------------------------------------
+*/
+#include "lookupa.h"
+
+/*
+--------------------------------------------------------------------
+mix -- mix 3 32-bit values reversibly.
+For every delta with one or two bit set, and the deltas of all three
+  high bits or all three low bits, whether the original value of a,b,c
+  is almost all zero or is uniformly distributed,
+* If mix() is run forward or backward, at least 32 bits in a,b,c
+  have at least 1/4 probability of changing.
+* If mix() is run forward, every bit of c will change between 1/3 and
+  2/3 of the time.  (Well, 22/100 and 78/100 for some 2-bit deltas.)
+mix() was built out of 36 single-cycle latency instructions in a 
+  structure that could supported 2x parallelism, like so:
+      a -= b; 
+      a -= c; x = (c>>13);
+      b -= c; a ^= x;
+      b -= a; x = (a<<8);
+      c -= a; b ^= x;
+      c -= b; x = (b>>13);
+      ...
+  Unfortunately, superscalar Pentiums and Sparcs can't take advantage 
+  of that parallelism.  They've also turned some of those single-cycle
+  latency instructions into multi-cycle latency instructions.  Still,
+  this is the fastest good hash I could find.  There were about 2^^68
+  to choose from.  I only looked at a billion or so.
+--------------------------------------------------------------------
+*/
+#define mix(a,b,c) \
+{ \
+  a -= b; a -= c; a ^= (c>>13); \
+  b -= c; b -= a; b ^= (a<<8); \
+  c -= a; c -= b; c ^= (b>>13); \
+  a -= b; a -= c; a ^= (c>>12);  \
+  b -= c; b -= a; b ^= (a<<16); \
+  c -= a; c -= b; c ^= (b>>5); \
+  a -= b; a -= c; a ^= (c>>3);  \
+  b -= c; b -= a; b ^= (a<<10); \
+  c -= a; c -= b; c ^= (b>>15); \
+}
+
+/*
+--------------------------------------------------------------------
+lookup() -- hash a variable-length key into a 32-bit value
+  k     : the key (the unaligned variable-length array of bytes)
+  len   : the length of the key, counting by bytes
+  level : can be any 4-byte value
+Returns a 32-bit value.  Every bit of the key affects every bit of
+the return value.  Every 1-bit and 2-bit delta achieves avalanche.
+About 6len+35 instructions.
+
+The best hash table sizes are powers of 2.  There is no need to do
+mod a prime (mod is sooo slow!).  If you need less than 32 bits,
+use a bitmask.  For example, if you need only 10 bits, do
+  h = (h & hashmask(10));
+In which case, the hash table should have hashsize(10) elements.
+
+If you are hashing n strings (ub1 **)k, do it like this:
+  for (i=0, h=0; i<n; ++i) h = lookup( k[i], len[i], h);
+
+By Bob Jenkins, 1996.  bob_jenkins@burtleburtle.net.  You may use this
+code any way you wish, private, educational, or commercial.
+
+See http://burtleburtle.net/bob/hash/evahash.html
+Use for hash table lookup, or anything where one collision in 2^32 is
+acceptable.  Do NOT use for cryptographic purposes.
+--------------------------------------------------------------------
+*/
+
+uint32_t lookup( k, length, level)
+register uint8_t *k;        /* the key */
+register uint32_t length;   /* the length of the key */
+register uint32_t level;    /* the previous hash, or an arbitrary value */
+{
+   register uint32_t a,b,c,len;
+
+   /* Set up the internal state */
+   len = length;
+   a = b = 0x9e3779b9;  /* the golden ratio; an arbitrary value */
+   c = level;           /* the previous hash value */
+
+   /*---------------------------------------- handle most of the key */
+   while (len >= 12)
+   {
+      a += (k[0] +((uint32_t)k[1]<<8) +((uint32_t)k[2]<<16) +((uint32_t)k[3]<<24));
+      b += (k[4] +((uint32_t)k[5]<<8) +((uint32_t)k[6]<<16) +((uint32_t)k[7]<<24));
+      c += (k[8] +((uint32_t)k[9]<<8) +((uint32_t)k[10]<<16)+((uint32_t)k[11]<<24));
+      mix(a,b,c);
+      k += 12; len -= 12;
+   }
+
+   /*------------------------------------- handle the last 11 bytes */
+   c += length;
+   switch(len)              /* all the case statements fall through */
+   {
+   case 11: c+=((uint32_t)k[10]<<24);
+   case 10: c+=((uint32_t)k[9]<<16);
+   case 9 : c+=((uint32_t)k[8]<<8);
+      /* the first byte of c is reserved for the length */
+   case 8 : b+=((uint32_t)k[7]<<24);
+   case 7 : b+=((uint32_t)k[6]<<16);
+   case 6 : b+=((uint32_t)k[5]<<8);
+   case 5 : b+=k[4];
+   case 4 : a+=((uint32_t)k[3]<<24);
+   case 3 : a+=((uint32_t)k[2]<<16);
+   case 2 : a+=((uint32_t)k[1]<<8);
+   case 1 : a+=k[0];
+     /* case 0: nothing left to add */
+   }
+   mix(a,b,c);
+   /*-------------------------------------------- report the result */
+   return c;
+}
+
+
+/*
+--------------------------------------------------------------------
+mixc -- mixc 8 4-bit values as quickly and thoroughly as possible.
+Repeating mix() three times achieves avalanche.
+Repeating mix() four times eliminates all funnels and all
+  characteristics stronger than 2^{-11}.
+--------------------------------------------------------------------
+*/
+#define mixc(a,b,c,d,e,f,g,h) \
+{ \
+   a^=b<<11; d+=a; b+=c; \
+   b^=c>>2;  e+=b; c+=d; \
+   c^=d<<8;  f+=c; d+=e; \
+   d^=e>>16; g+=d; e+=f; \
+   e^=f<<10; h+=e; f+=g; \
+   f^=g>>4;  a+=f; g+=h; \
+   g^=h<<8;  b+=g; h+=a; \
+   h^=a>>9;  c+=h; a+=b; \
+}
+
+/*
+--------------------------------------------------------------------
+checksum() -- hash a variable-length key into a 256-bit value
+  k     : the key (the unaligned variable-length array of bytes)
+  len   : the length of the key, counting by bytes
+  state : an array of CHECKSTATE 4-byte values (256 bits)
+The state is the checksum.  Every bit of the key affects every bit of
+the state.  There are no funnels.  About 112+6.875len instructions.
+
+If you are hashing n strings (ub1 **)k, do it like this:
+  for (i=0; i<8; ++i) state[i] = 0x9e3779b9;
+  for (i=0, h=0; i<n; ++i) checksum( k[i], len[i], state);
+
+(c) Bob Jenkins, 1996.  bob_jenkins@burtleburtle.net.  You may use this
+code any way you wish, private, educational, or commercial, as long
+as this whole comment accompanies it.
+
+See http://burtleburtle.net/bob/hash/evahash.html
+Use to detect changes between revisions of documents, assuming nobody
+is trying to cause collisions.  Do NOT use for cryptography.
+--------------------------------------------------------------------
+*/
+void  checksum( k, len, state)
+register uint8_t *k;
+register uint32_t len;
+register uint32_t*state;
+{
+   register uint32_t a,b,c,d,e,f,g,h,length;
+
+   /* Use the length and level; add in the golden ratio. */
+   length = len;
+   a=state[0]; b=state[1]; c=state[2]; d=state[3];
+   e=state[4]; f=state[5]; g=state[6]; h=state[7];
+
+   /*---------------------------------------- handle most of the key */
+   while (len >= 32)
+   {
+      a += (k[0] +(k[1]<<8) +(k[2]<<16) +(k[3]<<24));
+      b += (k[4] +(k[5]<<8) +(k[6]<<16) +(k[7]<<24));
+      c += (k[8] +(k[9]<<8) +(k[10]<<16)+(k[11]<<24));
+      d += (k[12]+(k[13]<<8)+(k[14]<<16)+(k[15]<<24));
+      e += (k[16]+(k[17]<<8)+(k[18]<<16)+(k[19]<<24));
+      f += (k[20]+(k[21]<<8)+(k[22]<<16)+(k[23]<<24));
+      g += (k[24]+(k[25]<<8)+(k[26]<<16)+(k[27]<<24));
+      h += (k[28]+(k[29]<<8)+(k[30]<<16)+(k[31]<<24));
+      mixc(a,b,c,d,e,f,g,h);
+      mixc(a,b,c,d,e,f,g,h);
+      mixc(a,b,c,d,e,f,g,h);
+      mixc(a,b,c,d,e,f,g,h);
+      k += 32; len -= 32;
+   }
+
+   /*------------------------------------- handle the last 31 bytes */
+   h += length;
+   switch(len)
+   {
+   case 31: h+=(k[30]<<24);
+   case 30: h+=(k[29]<<16);
+   case 29: h+=(k[28]<<8);
+   case 28: g+=(k[27]<<24);
+   case 27: g+=(k[26]<<16);
+   case 26: g+=(k[25]<<8);
+   case 25: g+=k[24];
+   case 24: f+=(k[23]<<24);
+   case 23: f+=(k[22]<<16);
+   case 22: f+=(k[21]<<8);
+   case 21: f+=k[20];
+   case 20: e+=(k[19]<<24);
+   case 19: e+=(k[18]<<16);
+   case 18: e+=(k[17]<<8);
+   case 17: e+=k[16];
+   case 16: d+=(k[15]<<24);
+   case 15: d+=(k[14]<<16);
+   case 14: d+=(k[13]<<8);
+   case 13: d+=k[12];
+   case 12: c+=(k[11]<<24);
+   case 11: c+=(k[10]<<16);
+   case 10: c+=(k[9]<<8);
+   case 9 : c+=k[8];
+   case 8 : b+=(k[7]<<24);
+   case 7 : b+=(k[6]<<16);
+   case 6 : b+=(k[5]<<8);
+   case 5 : b+=k[4];
+   case 4 : a+=(k[3]<<24);
+   case 3 : a+=(k[2]<<16);
+   case 2 : a+=(k[1]<<8);
+   case 1 : a+=k[0];
+   }
+   mixc(a,b,c,d,e,f,g,h);
+   mixc(a,b,c,d,e,f,g,h);
+   mixc(a,b,c,d,e,f,g,h);
+   mixc(a,b,c,d,e,f,g,h);
+
+   /*-------------------------------------------- report the result */
+   state[0]=a; state[1]=b; state[2]=c; state[3]=d;
+   state[4]=e; state[5]=f; state[6]=g; state[7]=h;
+}
diff --git a/lookupa.h b/lookupa.h
new file mode 100644
index 0000000..8001b3c
--- /dev/null
+++ b/lookupa.h
@@ -0,0 +1,30 @@
+/*
+------------------------------------------------------------------------------
+By Bob Jenkins, September 1996.
+lookupa.h, a hash function for table lookup, same function as lookup.c.
+Use this code in any way you wish.  Public Domain.  It has no warranty.
+Source is http://burtleburtle.net/bob/c/lookupa.h
+------------------------------------------------------------------------------
+*/
+
+#ifndef LOOKUPA
+#define LOOKUPA
+
+#include <stdint.h>
+
+#define CHECKSTATE 8
+#define hashsize(n) ((uint32_t)1<<(n))
+#define hashmask(n) (hashsize(n)-1)
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+uint32_t lookup(uint8_t * k, uint32_t length, uint32_t level);
+void checksum(uint8_t * k, uint32_t length, uint32_t * state);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* LOOKUPA */
diff --git a/mpq-fs.c b/mpq-fs.c
index ae9ed02..3a2a4b3 100644
--- a/mpq-fs.c
+++ b/mpq-fs.c
@@ -1,90 +1,50 @@
 #include <stdlib.h>
 #include <string.h>
 #include <ctype.h>
+#include "lookupa.h"
+#include "hashtab.h"
 #include "mpq-fs.h"
+#include "mpq-misc.h"
 
 #define MAX_FNAME 2048
 
-typedef struct directory_list_t {
-    struct directory_list_t * child;
-    char * name;
+#define INITIAL_HASH_SIZE 10
+
+typedef struct hash_entry_t {
     struct mpq_archive_t * mpq_a;
     int entry;
-    struct directory_list_t * next;
-} directory_list;
-
-static directory_list root_dir = {
-    NULL, "", NULL, -1, NULL
-};
-
-static directory_list * root = &root_dir;
+} hash_entry;
 
-static char * strtoupper(char * _str) {
+static char * strnormalize(char * _str) {
     char * str = _str;
     while (*str) {
         *str = toupper(*str);
+        if (*str == '/')
+            *str = '\\';
         str++;
     }
     return _str;
 }
 
-/*
- * The 'new' keyword is abusive: the function returns an existing entry if it matches, allowing overrites.
- */
-static directory_list * new_directory_list(directory_list * parent, struct mpq_archive_t * mpq_a, const char * name) {
-    directory_list * r;
-    
-    for (r = parent->child; r; r = r->next) {
-        if (!strcasecmp(r->name, name))
-            break;
-    }
-    
-    if (!r) {
-        r = (directory_list *) malloc(sizeof(directory_list));
-        r->child = NULL;
-        r->next = parent->child;
-        parent->child = r;
-        r->name = strtoupper(strdup(name));
-    }
-    r->mpq_a = mpq_a;
-    r->entry = -1;
-    
-    return r;
-}
+htab * hash_table = 0;
 
-/*
- * Recursively adds a new entry into our directory structure.
- */
-static directory_list * add_file_r(directory_list * parent, struct mpq_archive_t * mpq_a, char * fname, int entry) {
-    char * bs;
-    directory_list * r;
-    int tail = 0;
-    
-    bs = strchr(fname, '\\');
-    
-    if (bs) {
-        *bs = 0;
-    } else {
-        tail = 1;
-    }
-    if (!(r = new_directory_list(parent, mpq_a, fname)))
-        return NULL;
-    
-    if (tail) {
-        r->entry = entry;
-        return r;
-    }
-    
-    return add_file_r(r, mpq_a, bs + 1, entry);
-}
-
-static directory_list * add_file(struct mpq_archive_t * mpq_a, char * fname) {
+static void add_file(struct mpq_archive_t * mpq_a, char * fname) {
     int entry;
-    
+    char * nfname;
+
     if ((entry = mpqlib_find_hash_entry_by_name(mpq_a, fname, 0, 0)) < 0)
-        return NULL;
+        return;
     
-    return add_file_r(root, mpq_a, fname, entry);
+    if (!hash_table)
+        hash_table = hcreate(INITIAL_HASH_SIZE);
+
+    nfname = strnormalize(strdup(fname));    
+    hadd(hash_table, (uint8_t *) nfname, strlen(nfname), NULL);
+    if (!hstuff(hash_table)) {
+        hstuff(hash_table) = malloc(sizeof(hash_entry));
+    }
+    ((hash_entry *) hstuff(hash_table))->mpq_a = mpq_a;
+    ((hash_entry *) hstuff(hash_table))->entry = entry;
 }
 
 /*
@@ -143,39 +103,16 @@ void mpqlib_fs_attach_listfile(struct mpq_archive_t * mpq_a, const char * listfi
 /*
  * Recursively find a file.
  */
-static directory_list * find_file_r(directory_list * parent, char * fname) {
-    char * bs;
-    directory_list * r;
-    int tail = 0;
-    
-    bs = strchr(fname, '\\');
-    
-    if (bs) {
-        *bs = 0;
-    } else {
-        tail = 1;
-    }
-    
-    for (r = parent->child; r; r = r->next) {
-        if (!strcmp(fname, r->name)) {
-            if (tail) {
-                return r;
-            } else {
-                return find_file_r(r, bs + 1);
-            }
-        }
-    }
+static hash_entry * find_file(char * fname) {
+    if (!hfind(hash_table, (uint8_t *) fname, strlen(fname)))
+        return NULL;
     
-    return NULL;
-}
-
-static directory_list * find_file(char * fname) {
-    return find_file_r(root, fname);
+    return (hash_entry *) hstuff(hash_table);
 }
 
 struct mpq_file_t * mpqlib_fs_open(const char * _fname) {
-    char * fname = strtoupper(strdup(_fname));
-    directory_list * entry;
+    char * fname = strnormalize(strdup(_fname));
+    hash_entry * entry;
     
     entry = find_file(fname);
     
diff --git a/recycle.c b/recycle.c
new file mode 100644
index 0000000..261c54f
--- /dev/null
+++ b/recycle.c
@@ -0,0 +1,87 @@
+/*
+--------------------------------------------------------------------
+By Bob Jenkins, September 1996.  recycle.c
+You may use this code in any way you wish, and it is free.  No warranty.
+
+This manages memory for commonly-allocated structures.
+It allocates RESTART to REMAX items at a time.
+Timings have shown that, if malloc is used for every new structure,
+  malloc will consume about 90% of the time in a program.  This
+  module cuts down the number of mallocs by an order of magnitude.
+This also decreases memory fragmentation, and freeing structures
+  only requires freeing the root.
+--------------------------------------------------------------------
+*/
+
+#include <stdint.h>
+#include <stdio.h>
+#include <memory.h>
+#include "recycle.h"
+
+#define align(a) (((uint32_t)a+(sizeof(void *)-1))&(~(sizeof(void *)-1)))
+
+reroot *remkroot(size)
+size_t  size;
+{
+   reroot *r = (reroot *)remalloc(sizeof(reroot), "recycle.c, root");
+   r->list = (recycle *)0;
+   r->trash = (recycle *)0;
+   r->size = align(size);
+   r->logsize = RESTART;
+   r->numleft = 0;
+   return r;
+}
+
+void  refree(r)
+struct reroot *r;
+{
+   recycle *temp;
+   if ((temp = r->list)) while (r->list)
+   {
+      temp = r->list->next;
+      free((char *)r->list);
+      r->list = temp;
+   }
+   free((char *)r);
+   return;
+}
+
+/* to be called from the macro renew only */
+char  *renewx(r)
+struct reroot *r;
+{
+   recycle *temp;
+   if (r->trash)
+   {  /* pull a node off the trash heap */
+      temp = r->trash;
+      r->trash = temp->next;
+      (void)memset((void *)temp, 0, r->size);
+   }
+   else
+   {  /* allocate a new block of nodes */
+      r->numleft = r->size*((uint32_t)1<<r->logsize);
+      if (r->numleft < REMAX) ++r->logsize;
+      temp = (recycle *)remalloc(sizeof(recycle) + r->numleft, 
+				 "recycle.c, data");
+      temp->next = r->list;
+      r->list = temp;
+      r->numleft-=r->size;
+      temp = (recycle *)((char *)(r->list+1)+r->numleft);
+   }
+   return (char *)temp;
+}
+
+char   *remalloc(len, purpose)
+size_t  len;
+char   *purpose;
+{
+  char *x = (char *)malloc(len);
+  if (!x)
+  {
+    fprintf(stderr, "malloc of %d failed for %s\n", 
+	    len, purpose);
+    exit(-1);
+  }
+  return x;
+}
+
diff --git a/recycle.h b/recycle.h
new file mode 100644
index 0000000..8f4b2ce
--- /dev/null
+++ b/recycle.h
@@ -0,0 +1,71 @@
+/*
+--------------------------------------------------------------------
+By Bob Jenkins, September 1996.  recycle.h
+You may use this code in any way you wish, and it is free.  No warranty.
+
+This manages memory for commonly-allocated structures.
+It allocates RESTART to REMAX items at a time.
+Timings have shown that, if malloc is used for every new structure,
+  malloc will consume about 90% of the time in a program.  This
+  module cuts down the number of mallocs by an order of magnitude.
+This also decreases memory fragmentation, and freeing all structures
+  only requires freeing the root.
+--------------------------------------------------------------------
+*/
+
+#ifndef RECYCLE
+#define RECYCLE
+
+#define RESTART    0
+#define REMAX      32000
+
+#include <stdlib.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct recycle
+{
+   struct recycle *next;
+};
+typedef  struct recycle  recycle;
+
+struct reroot
+{
+   struct recycle *list;     /* list of malloced blocks */
+   struct recycle *trash;    /* list of deleted items */
+   size_t          size;     /* size of an item */
+   size_t          logsize;  /* log_2 of number of items in a block */
+   int             numleft;  /* number of bytes left in this block */
+};
+typedef  struct reroot  reroot;
+
+/* make a new recycling root */
+reroot  *remkroot(size_t mysize);
+
+/* free a recycling root and all the items it has made */
+void     refree(struct reroot *r);
+
+/* get a new (cleared) item from the root */
+#define renew(r) ((r)->numleft ? \
+   (((char *)((r)->list+1))+((r)->numleft-=(r)->size)) : renewx(r))
+
+char    *renewx(struct reroot *r);
+
+/* delete an item; let the root recycle it */
+/* void     redel(/o_ struct reroot *r, struct recycle *item _o/); */
+#define redel(root,item) { \
+   ((recycle *)item)->next=(root)->trash; \
+   (root)->trash=(recycle *)(item); \
+}
+
+/* malloc, but complain to stderr and exit program if no joy */
+/* use plain free() to free memory allocated by remalloc() */
+char    *remalloc(size_t len, char *purpose);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  /* RECYCLE */
author	pixel <pixel>	2008-01-28 12:49:06 +0000
committer	pixel <pixel>	2008-01-28 12:49:06 +0000
commit	7ecf94d4b339bc2197f449f0f1538f2915313087 (patch)
tree	02167bb1970b7adb53bf7aaa5bb5aaf726d11932
parent	a578df37ca8c4c130f28ccfd12948bc2fe429526 (diff)