summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorpixel <pixel>2008-01-28 12:49:06 +0000
committerpixel <pixel>2008-01-28 12:49:06 +0000
commit7ecf94d4b339bc2197f449f0f1538f2915313087 (patch)
tree02167bb1970b7adb53bf7aaa5bb5aaf726d11932
parenta578df37ca8c4c130f28ccfd12948bc2fe429526 (diff)
Trying with a hashtable instead of a dumb structure.
-rw-r--r--Makefile3
-rw-r--r--hashtab.c370
-rw-r--r--hashtab.h216
-rw-r--r--lookupa.c239
-rw-r--r--lookupa.h30
-rw-r--r--mpq-fs.c125
-rw-r--r--recycle.c87
-rw-r--r--recycle.h71
8 files changed, 1047 insertions, 94 deletions
diff --git a/Makefile b/Makefile
index 5900267..5971ac3 100644
--- a/Makefile
+++ b/Makefile
@@ -9,6 +9,9 @@ SOURCE_LIST = \
MPQCryptography.c \
errors.c \
extract.c \
+lookupa.c \
+hashtab.c \
+recycle.c \
mpq-bios.c \
mpq-errors.c \
mpq-file.c \
diff --git a/hashtab.c b/hashtab.c
new file mode 100644
index 0000000..8e278a1
--- /dev/null
+++ b/hashtab.c
@@ -0,0 +1,370 @@
+/*
+--------------------------------------------------------------------
+By Bob Jenkins, 1996. hashtab.c. Public Domain.
+
+This implements a hash table.
+* Keys are unique. Adding an item fails if the key is already there.
+* Keys and items are pointed at, not copied. If you change the value
+ of the key after it is inserted then hfind will not be able to find it.
+* The hash table maintains a position that can be set and queried.
+* The table length doubles dynamically and never shrinks. The insert
+ that causes table doubling may take a long time.
+* The table length splits when the table length equals the number of items
+ Comparisons usually take 7 instructions.
+ Computing a hash value takes 35+6n instructions for an n-byte key.
+
+ hcreate - create a hash table
+ hdestroy - destroy a hash table
+ hcount - The number of items in the hash table
+ hkey - key at the current position
+ hkeyl - key length at the current position
+ hstuff - stuff at the current position
+ hfind - find an item in the table
+ hadd - insert an item into the table
+ hdel - delete an item from the table
+ hstat - print statistics about the table
+ hfirst - position at the first item in the table
+ hnext - move the position to the next item in the table
+--------------------------------------------------------------------
+*/
+
+#include <stdint.h>
+#include <memory.h>
+
+#include "lookupa.h"
+#include "hashtab.h"
+#include "recycle.h"
+
+#ifndef FALSE
+#define FALSE 0
+#endif
+
+#ifndef TRUE
+#define TRUE !FALSE
+#endif
+
+#ifdef HSANITY
+/* sanity check -- make sure ipos, apos, and count make sense */
+static void hsanity(t)
+htab *t;
+{
+ uint32_t i, end, counter;
+ hitem *h;
+
+ /* test that apos makes sense */
+ end = (uint32_t)1<<(t->logsize);
+ if (end < t->apos)
+ printf("error: end %ld apos %ld\n", end, t->apos);
+
+ /* test that ipos is in bucket apos */
+ if (t->ipos)
+ {
+ for (h=t->table[t->apos]; h && h != t->ipos; h = h->next)
+ ;
+ if (h != t->ipos)
+ printf("error:ipos not in apos, apos is %ld\n", t->apos);
+ }
+
+ /* test that t->count is the number of elements in the table */
+ counter=0;
+ for (counter=0, i=0; i<end; ++i)
+ for (h=t->table[i]; h; h=h->next)
+ ++counter;
+ if (counter != t->count)
+ printf("error: counter %ld t->count %ld\n", counter, t->count);
+}
+#endif
+
+/*
+ * hgrow - Double the size of a hash table.
+ * Allocate a new, 2x bigger array,
+ * move everything from the old array to the new array,
+ * then free the old array.
+ */
+static void hgrow( t)
+htab *t; /* table */
+{
+ register uint32_t newsize = (uint32_t)1<<(++t->logsize);
+ register uint32_t newmask = newsize-1;
+ register uint32_t i;
+ register hitem **oldtab = t->table;
+ register hitem **newtab = (hitem **)malloc(newsize*sizeof(hitem *));
+
+ /* make sure newtab is cleared */
+ for (i=0; i<newsize; ++i) newtab[i] = (hitem *)0;
+ t->table = newtab;
+ t->mask = newmask;
+
+ /* Walk through old table putting entries in new table */
+ for (i=newsize>>1; i--;)
+ {
+ register hitem *this, *that, **newplace;
+ for (this = oldtab[i]; this;)
+ {
+ that = this;
+ this = this->next;
+ newplace = &newtab[(that->hval & newmask)];
+ that->next = *newplace;
+ *newplace = that;
+ }
+ }
+
+ /* position the hash table on some existing item */
+ hfirst(t);
+
+ /* free the old array */
+ free((char *)oldtab);
+
+}
+
+/* hcreate - create a hash table initially of size power(2,logsize) */
+htab *hcreate(logsize)
+int logsize; /* log base 2 of the size of the hash table */
+{
+ uint32_t i,len;
+ htab *t = (htab *)malloc(sizeof(htab));
+
+ len = ((uint32_t)1<<logsize);
+ t->table = (hitem **)malloc(sizeof(hitem *)*(uint32_t)len);
+ for (i=0; i<len; ++i) t->table[i] = (hitem *)0;
+ t->logsize = logsize;
+ t->mask = len-1;
+ t->count = 0;
+ t->apos = (uint32_t)0;
+ t->ipos = (hitem *)0;
+ t->space = remkroot(sizeof(hitem));
+ t->bcount = 0;
+ return t;
+}
+
+/* hdestroy - destroy the hash table and free all its memory */
+void hdestroy( t)
+htab *t; /* the table */
+{
+ refree(t->space);
+ free((char *)t->table);
+ free((char *)t);
+}
+
+/* hcount() is a macro, see hashtab.h */
+/* hkey() is a macro, see hashtab.h */
+/* hkeyl() is a macro, see hashtab.h */
+/* hstuff() is a macro, see hashtab.h */
+
+/* hfind - find an item with a given key in a hash table */
+int hfind( t, key, keyl )
+htab *t; /* table */
+uint8_t *key; /* key to find */
+uint32_t keyl; /* key length */
+{
+ hitem *h;
+ uint32_t x = lookup(key,keyl,0);
+ uint32_t y;
+ for (h = t->table[y=(x&t->mask)]; h; h = h->next)
+ {
+ if ((x == h->hval) &&
+ (keyl == h->keyl) &&
+ !memcmp(key, h->key, keyl))
+ {
+ t->apos = y;
+ t->ipos = h;
+ return TRUE;
+ }
+ }
+ return FALSE;
+}
+
+/*
+ * hadd - add an item to a hash table.
+ * return FALSE if the key is already there, otherwise TRUE.
+ */
+int hadd( t, key, keyl, stuff)
+htab *t; /* table */
+uint8_t *key; /* key to add to hash table */
+uint32_t keyl; /* key length */
+void *stuff; /* stuff to associate with this key */
+{
+ register hitem *h,**hp;
+ register uint32_t y, x = lookup(key,keyl,0);
+
+ /* make sure the key is not already there */
+ for (h = t->table[(y=(x&t->mask))]; h; h = h->next)
+ {
+ if ((x == h->hval) &&
+ (keyl == h->keyl) &&
+ !memcmp(key, h->key, keyl))
+ {
+ t->apos = y;
+ t->ipos = h;
+ return FALSE;
+ }
+ }
+
+ /* find space for a new item */
+ h = (hitem *)renew(t->space);
+
+ /* make the hash table bigger if it is getting full */
+ if (++t->count > (uint32_t)1<<(t->logsize))
+ {
+ hgrow(t);
+ y = (x&t->mask);
+ }
+
+ /* add the new key to the table */
+ h->key = key;
+ h->keyl = keyl;
+ h->stuff = stuff;
+ h->hval = x;
+ hp = &t->table[y];
+ h->next = *hp;
+ *hp = h;
+ t->ipos = h;
+ t->apos = y;
+
+#ifdef HSANITY
+ hsanity(t);
+#endif /* HSANITY */
+
+ return TRUE;
+}
+
+/* hdel - delete the item at the current position */
+int hdel(t)
+htab *t; /* the hash table */
+{
+ hitem *h; /* item being deleted */
+ hitem **ip; /* a counter */
+
+ /* check for item not existing */
+ if (!(h = t->ipos)) return FALSE;
+
+ /* remove item from its list */
+ for (ip = &t->table[t->apos]; *ip != h; ip = &(*ip)->next)
+ ;
+ *ip = (*ip)->next;
+ --(t->count);
+
+ /* adjust position to something that exists */
+ if (!(t->ipos = h->next)) hnbucket(t);
+
+ /* recycle the deleted hitem node */
+ redel(t->space, h);
+
+#ifdef HSANITY
+ hsanity(t);
+#endif /* HSANITY */
+
+ return TRUE;
+}
+
+/* hfirst - position on the first element in the table */
+int hfirst(t)
+htab *t; /* the hash table */
+{
+ t->apos = t->mask;
+ (void)hnbucket(t);
+ return (t->ipos != (hitem *)0);
+}
+
+/* hnext() is a macro, see hashtab.h */
+
+/*
+ * hnbucket - Move position to the first item in the next bucket.
+ * Return TRUE if we did not wrap around to the beginning of the table
+ */
+int hnbucket(t)
+htab *t;
+{
+ uint32_t oldapos = t->apos;
+ uint32_t end = (uint32_t)1<<(t->logsize);
+ uint32_t i;
+
+ /* see if the element can be found without wrapping around */
+ for (i=oldapos+1; i<end; ++i)
+ {
+ if (t->table[i&t->mask])
+ {
+ t->apos = i;
+ t->ipos = t->table[i];
+ return TRUE;
+ }
+ }
+
+ /* must have to wrap around to find the last element */
+ for (i=0; i<=oldapos; ++i)
+ {
+ if (t->table[i])
+ {
+ t->apos = i;
+ t->ipos = t->table[i];
+ return FALSE;
+ }
+ }
+
+ return FALSE;
+}
+
+#ifdef HSTATS
+void hstat(t)
+htab *t;
+{
+ uint32_t i,j;
+ double total = 0.0;
+ hitem *h;
+ hitem *walk, *walk2, *stat = (hitem *)0;
+
+ /* in stat, keyl will store length of list, hval the number of buckets */
+ for (i=0; i<=t->mask; ++i)
+ {
+ for (h=t->table[i], j=0; h; ++j, h=h->next)
+ ;
+ for (walk=stat; walk && (walk->keyl != j); walk=walk->next)
+ ;
+ if (walk)
+ {
+ ++(walk->hval);
+ }
+ else
+ {
+ walk = (hitem *)renew(t->space);
+ walk->keyl = j;
+ walk->hval = 1;
+ if (!stat || stat->keyl > j) {walk->next=stat; stat=walk;}
+ else
+ {
+ for (walk2=stat;
+ walk2->next && (walk2->next->keyl<j);
+ walk2=walk2->next)
+ ;
+ walk->next = walk2->next;
+ walk2->next = walk;
+ }
+ }
+ }
+
+ /* figure out average list length for existing elements */
+ for (walk=stat; walk; walk=walk->next)
+ {
+ total+=(double)walk->hval*(double)walk->keyl*(double)walk->keyl;
+ }
+ if (t->count) total /= (double)t->count;
+ else total = (double)0;
+
+ /* print statistics */
+ printf("\n");
+ for (walk=stat; walk; walk=walk->next)
+ {
+ printf("items %u: %u buckets\n", walk->keyl, walk->hval);
+ }
+ printf("\nbuckets: %u items: %u existing: %g\n\n",
+ ((uint32_t)1<<t->logsize), t->count, total);
+
+ /* clean up */
+ while (stat)
+ {
+ walk = stat->next;
+ redel(t->space, stat);
+ stat = walk;
+ }
+}
+#endif
diff --git a/hashtab.h b/hashtab.h
new file mode 100644
index 0000000..2f8b22c
--- /dev/null
+++ b/hashtab.h
@@ -0,0 +1,216 @@
+/*
+--------------------------------------------------------------------
+By Bob Jenkins, 1996. hash.h. Public Domain.
+
+This implements a hash table.
+* Keys are unique. Adding an item fails if the key is already there.
+* Keys and items are pointed at, not copied. If you change the value
+ of the key after it is inserted then hfind will not be able to find it.
+* The hash table maintains a position that can be set and queried.
+* The table length doubles dynamically and never shrinks. The insert
+ that causes table doubling may take a long time.
+* The table length splits when the table length equals the number of items
+ Comparisons usually take 7 instructions.
+ Computing a hash value takes 35+6n instructions for an n-byte key.
+
+ hcreate - create a hash table
+ hdestroy - destroy a hash table
+ hcount - The number of items in the hash table
+ hkey - key at the current position
+ hkeyl - key length at the current position
+ hstuff - stuff at the current position
+ hfind - find an item in the table
+ hadd - insert an item into the table
+ hdel - delete an item from the table
+ hstat - print statistics about the table
+ hfirst - position at the first item in the table
+ hnext - move the position to the next item in the table
+--------------------------------------------------------------------
+*/
+
+#ifndef HASHTAB
+#define HASHTAB
+
+#include <stdint.h>
+#include <stdlib.h>
+
+/* PRIVATE TYPES AND DEFINITIONS */
+
+struct hitem
+{
+ uint8_t *key; /* key that is hashed */
+ uint32_t keyl; /* length of key */
+ void *stuff; /* stuff stored in this hitem */
+ uint32_t hval; /* hash value */
+ struct hitem *next; /* next hitem in list */
+};
+typedef struct hitem hitem;
+
+
+struct htab
+{
+ struct hitem **table; /* hash table, array of size 2^logsize */
+ int logsize; /* log of size of table */
+ size_t mask; /* (hashval & mask) is position in table */
+ uint32_t count; /* how many items in this hash table so far? */
+ uint32_t apos; /* position in the array */
+ struct hitem *ipos; /* current item in the array */
+ struct reroot *space; /* space for the hitems */
+ uint32_t bcount; /* # hitems useable in current block */
+};
+typedef struct htab htab;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* PUBLIC FUNCTIONS */
+
+/* hcreate - create a hash table
+ ARGUMENTS:
+ logsize - 1<<logsize will be the initial table length
+ RETURNS:
+ the new table
+ */
+htab *hcreate(int logsize);
+
+
+/* hdestroy - destroy a hash table
+ ARGUMENTS:
+ t - the hash table to be destroyed. Note that the items and keys
+ will not be freed, the user created them and must destroy
+ them himself.
+ RETURNS:
+ nothing
+ */
+void hdestroy(htab *t);
+
+
+/* hcount, hkey, hkeyl, hstuff
+ ARGUMENTS:
+ t - the hash table
+ RETURNS:
+ hcount - (ub4) The number of items in the hash table
+ hkey - (ub1 *) key for the current item
+ hkeyl - (ub4) key length for the current item
+ hstuff - (void *) stuff for the current item
+ NOTE:
+ The current position always has an item as long as there
+ are items in the table, so hexist can be used to test if the
+ table is empty.
+ hkey, hkeyl, and hstuff will crash if hcount returns 0
+ */
+#define hcount(t) ((t)->count)
+#define hkey(t) ((t)->ipos->key)
+#define hkeyl(t) ((t)->ipos->keyl)
+#define hstuff(t) ((t)->ipos->stuff)
+
+
+
+/* hfind - move the current position to a given key
+ ARGUMENTS:
+ t - the hash table
+ key - the key to look for
+ keyl - length of the key
+ RETURNS:
+ TRUE if the item exists, FALSE if it does not.
+ If the item exists, moves the current position to that item.
+ */
+int hfind(htab *t, uint8_t *key, uint32_t keyl);
+
+
+/* hadd - add a new item to the hash table
+ change the position to point at the item with the key
+ ARGUMENTS:
+ t - the hash table
+ key - the key to look for
+ keyl - length of the key
+ stuff - other stuff to be stored in this item
+ RETURNS:
+ FALSE if the operation fails (because that key is already there).
+ */
+int hadd(htab *t, uint8_t *key, uint32_t keyl, void *stuff);
+
+
+/* hdel - delete the item at the current position
+ change the position to the following item
+ ARGUMENTS:
+ t - the hash table
+ RETURNS:
+ FALSE if there is no current item (meaning the table is empty)
+ NOTE:
+ This frees the item, but not the key or stuff stored in the item.
+ If you want these then deal with them first. For example:
+ if (hfind(tab, key, keyl))
+ {
+ free(hkey(tab));
+ free(hstuff(tab));
+ hdel(tab);
+ }
+ */
+int hdel(htab *t);
+
+
+/* hfirst - move position to the first item in the table
+ ARGUMENTS:
+ t - the hash table
+ RETURNS:
+ FALSE if there is no current item (meaning the table is empty)
+ NOTE:
+ */
+int hfirst(htab *t);
+
+
+/* hnext - move position to the next item in the table
+ ARGUMENTS:
+ t - the hash table
+ RETURNS:
+ FALSE if the position wraps around to the beginning of the table
+ NOTE:
+ To see every item in the table, do
+ if (hfirst(t)) do
+ {
+ key = hkey(t);
+ stuff = hstuff(t);
+ }
+ while (hnext(t));
+ */
+/* int hnext(/o_ htab *t _o/); */
+#define hnext(t) \
+ ((!(t)->ipos) ? FALSE : \
+ ((t)->ipos=(t)->ipos->next) ? TRUE : hnbucket(t))
+
+/* hnbucket - PRIVATE - move to first item in the next nonempty bucket
+ ARGUMENTS:
+ t - the hash table
+ RETURNS:
+ FALSE if the position wraps around to the beginning of the table
+ NOTE:
+ This is private to hashtab; do not use it externally.
+ */
+int hnbucket(htab *t);
+
+
+/* hstat - print statistics about the hash table
+ ARGUMENTS:
+ t - the hash table
+ NOTE:
+ items <0>: <#buckets with zero items> buckets
+ items <1>: <#buckets with 1 item> buckets
+ ...
+ buckets: #buckets items: #items existing: x
+ ( x is the average length of the list when you look for an
+ item that exists. When the item does not exists, the average
+ length is #items/#buckets. )
+
+ If you put n items into n buckets, expect 1/(n!)e buckets to
+ have n items. That is, .3678 0, .3678 1, .1839 2, ...
+ Also expect "existing" to be about 2.
+ */
+void hstat(htab *t);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* HASHTAB */
diff --git a/lookupa.c b/lookupa.c
new file mode 100644
index 0000000..731163b
--- /dev/null
+++ b/lookupa.c
@@ -0,0 +1,239 @@
+/*
+--------------------------------------------------------------------
+lookupa.c, by Bob Jenkins, December 1996. Same as lookup2.c
+Use this code however you wish. Public Domain. No warranty.
+Source is http://burtleburtle.net/bob/c/lookupa.c
+--------------------------------------------------------------------
+*/
+#include "lookupa.h"
+
+/*
+--------------------------------------------------------------------
+mix -- mix 3 32-bit values reversibly.
+For every delta with one or two bit set, and the deltas of all three
+ high bits or all three low bits, whether the original value of a,b,c
+ is almost all zero or is uniformly distributed,
+* If mix() is run forward or backward, at least 32 bits in a,b,c
+ have at least 1/4 probability of changing.
+* If mix() is run forward, every bit of c will change between 1/3 and
+ 2/3 of the time. (Well, 22/100 and 78/100 for some 2-bit deltas.)
+mix() was built out of 36 single-cycle latency instructions in a
+ structure that could supported 2x parallelism, like so:
+ a -= b;
+ a -= c; x = (c>>13);
+ b -= c; a ^= x;
+ b -= a; x = (a<<8);
+ c -= a; b ^= x;
+ c -= b; x = (b>>13);
+ ...
+ Unfortunately, superscalar Pentiums and Sparcs can't take advantage
+ of that parallelism. They've also turned some of those single-cycle
+ latency instructions into multi-cycle latency instructions. Still,
+ this is the fastest good hash I could find. There were about 2^^68
+ to choose from. I only looked at a billion or so.
+--------------------------------------------------------------------
+*/
+#define mix(a,b,c) \
+{ \
+ a -= b; a -= c; a ^= (c>>13); \
+ b -= c; b -= a; b ^= (a<<8); \
+ c -= a; c -= b; c ^= (b>>13); \
+ a -= b; a -= c; a ^= (c>>12); \
+ b -= c; b -= a; b ^= (a<<16); \
+ c -= a; c -= b; c ^= (b>>5); \
+ a -= b; a -= c; a ^= (c>>3); \
+ b -= c; b -= a; b ^= (a<<10); \
+ c -= a; c -= b; c ^= (b>>15); \
+}
+
+/*
+--------------------------------------------------------------------
+lookup() -- hash a variable-length key into a 32-bit value
+ k : the key (the unaligned variable-length array of bytes)
+ len : the length of the key, counting by bytes
+ level : can be any 4-byte value
+Returns a 32-bit value. Every bit of the key affects every bit of
+the return value. Every 1-bit and 2-bit delta achieves avalanche.
+About 6len+35 instructions.
+
+The best hash table sizes are powers of 2. There is no need to do
+mod a prime (mod is sooo slow!). If you need less than 32 bits,
+use a bitmask. For example, if you need only 10 bits, do
+ h = (h & hashmask(10));
+In which case, the hash table should have hashsize(10) elements.
+
+If you are hashing n strings (ub1 **)k, do it like this:
+ for (i=0, h=0; i<n; ++i) h = lookup( k[i], len[i], h);
+
+By Bob Jenkins, 1996. bob_jenkins@burtleburtle.net. You may use this
+code any way you wish, private, educational, or commercial.
+
+See http://burtleburtle.net/bob/hash/evahash.html
+Use for hash table lookup, or anything where one collision in 2^32 is
+acceptable. Do NOT use for cryptographic purposes.
+--------------------------------------------------------------------
+*/
+
+uint32_t lookup( k, length, level)
+register uint8_t *k; /* the key */
+register uint32_t length; /* the length of the key */
+register uint32_t level; /* the previous hash, or an arbitrary value */
+{
+ register uint32_t a,b,c,len;
+
+ /* Set up the internal state */
+ len = length;
+ a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */
+ c = level; /* the previous hash value */
+
+ /*---------------------------------------- handle most of the key */
+ while (len >= 12)
+ {
+ a += (k[0] +((uint32_t)k[1]<<8) +((uint32_t)k[2]<<16) +((uint32_t)k[3]<<24));
+ b += (k[4] +((uint32_t)k[5]<<8) +((uint32_t)k[6]<<16) +((uint32_t)k[7]<<24));
+ c += (k[8] +((uint32_t)k[9]<<8) +((uint32_t)k[10]<<16)+((uint32_t)k[11]<<24));
+ mix(a,b,c);
+ k += 12; len -= 12;
+ }
+
+ /*------------------------------------- handle the last 11 bytes */
+ c += length;
+ switch(len) /* all the case statements fall through */
+ {
+ case 11: c+=((uint32_t)k[10]<<24);
+ case 10: c+=((uint32_t)k[9]<<16);
+ case 9 : c+=((uint32_t)k[8]<<8);
+ /* the first byte of c is reserved for the length */
+ case 8 : b+=((uint32_t)k[7]<<24);
+ case 7 : b+=((uint32_t)k[6]<<16);
+ case 6 : b+=((uint32_t)k[5]<<8);
+ case 5 : b+=k[4];
+ case 4 : a+=((uint32_t)k[3]<<24);
+ case 3 : a+=((uint32_t)k[2]<<16);
+ case 2 : a+=((uint32_t)k[1]<<8);
+ case 1 : a+=k[0];
+ /* case 0: nothing left to add */
+ }
+ mix(a,b,c);
+ /*-------------------------------------------- report the result */
+ return c;
+}
+
+
+/*
+--------------------------------------------------------------------
+mixc -- mixc 8 4-bit values as quickly and thoroughly as possible.
+Repeating mix() three times achieves avalanche.
+Repeating mix() four times eliminates all funnels and all
+ characteristics stronger than 2^{-11}.
+--------------------------------------------------------------------
+*/
+#define mixc(a,b,c,d,e,f,g,h) \
+{ \
+ a^=b<<11; d+=a; b+=c; \
+ b^=c>>2; e+=b; c+=d; \
+ c^=d<<8; f+=c; d+=e; \
+ d^=e>>16; g+=d; e+=f; \
+ e^=f<<10; h+=e; f+=g; \
+ f^=g>>4; a+=f; g+=h; \
+ g^=h<<8; b+=g; h+=a; \
+ h^=a>>9; c+=h; a+=b; \
+}
+
+/*
+--------------------------------------------------------------------
+checksum() -- hash a variable-length key into a 256-bit value
+ k : the key (the unaligned variable-length array of bytes)
+ len : the length of the key, counting by bytes
+ state : an array of CHECKSTATE 4-byte values (256 bits)
+The state is the checksum. Every bit of the key affects every bit of
+the state. There are no funnels. About 112+6.875len instructions.
+
+If you are hashing n strings (ub1 **)k, do it like this:
+ for (i=0; i<8; ++i) state[i] = 0x9e3779b9;
+ for (i=0, h=0; i<n; ++i) checksum( k[i], len[i], state);
+
+(c) Bob Jenkins, 1996. bob_jenkins@burtleburtle.net. You may use this
+code any way you wish, private, educational, or commercial, as long
+as this whole comment accompanies it.
+
+See http://burtleburtle.net/bob/hash/evahash.html
+Use to detect changes between revisions of documents, assuming nobody
+is trying to cause collisions. Do NOT use for cryptography.
+--------------------------------------------------------------------
+*/
+void checksum( k, len, state)
+register uint8_t *k;
+register uint32_t len;
+register uint32_t*state;
+{
+ register uint32_t a,b,c,d,e,f,g,h,length;
+
+ /* Use the length and level; add in the golden ratio. */
+ length = len;
+ a=state[0]; b=state[1]; c=state[2]; d=state[3];
+ e=state[4]; f=state[5]; g=state[6]; h=state[7];
+
+ /*---------------------------------------- handle most of the key */
+ while (len >= 32)
+ {
+ a += (k[0] +(k[1]<<8) +(k[2]<<16) +(k[3]<<24));
+ b += (k[4] +(k[5]<<8) +(k[6]<<16) +(k[7]<<24));
+ c += (k[8] +(k[9]<<8) +(k[10]<<16)+(k[11]<<24));
+ d += (k[12]+(k[13]<<8)+(k[14]<<16)+(k[15]<<24));
+ e += (k[16]+(k[17]<<8)+(k[18]<<16)+(k[19]<<24));
+ f += (k[20]+(k[21]<<8)+(k[22]<<16)+(k[23]<<24));
+ g += (k[24]+(k[25]<<8)+(k[26]<<16)+(k[27]<<24));
+ h += (k[28]+(k[29]<<8)+(k[30]<<16)+(k[31]<<24));
+ mixc(a,b,c,d,e,f,g,h);
+ mixc(a,b,c,d,e,f,g,h);
+ mixc(a,b,c,d,e,f,g,h);
+ mixc(a,b,c,d,e,f,g,h);
+ k += 32; len -= 32;
+ }
+
+ /*------------------------------------- handle the last 31 bytes */
+ h += length;
+ switch(len)
+ {
+ case 31: h+=(k[30]<<24);
+ case 30: h+=(k[29]<<16);
+ case 29: h+=(k[28]<<8);
+ case 28: g+=(k[27]<<24);
+ case 27: g+=(k[26]<<16);
+ case 26: g+=(k[25]<<8);
+ case 25: g+=k[24];
+ case 24: f+=(k[23]<<24);
+ case 23: f+=(k[22]<<16);
+ case 22: f+=(k[21]<<8);
+ case 21: f+=k[20];
+ case 20: e+=(k[19]<<24);
+ case 19: e+=(k[18]<<16);
+ case 18: e+=(k[17]<<8);
+ case 17: e+=k[16];
+ case 16: d+=(k[15]<<24);
+ case 15: d+=(k[14]<<16);
+ case 14: d+=(k[13]<<8);
+ case 13: d+=k[12];
+ case 12: c+=(k[11]<<24);
+ case 11: c+=(k[10]<<16);
+ case 10: c+=(k[9]<<8);
+ case 9 : c+=k[8];
+ case 8 : b+=(k[7]<<24);
+ case 7 : b+=(k[6]<<16);
+ case 6 : b+=(k[5]<<8);
+ case 5 : b+=k[4];
+ case 4 : a+=(k[3]<<24);
+ case 3 : a+=(k[2]<<16);
+ case 2 : a+=(k[1]<<8);
+ case 1 : a+=k[0];
+ }
+ mixc(a,b,c,d,e,f,g,h);
+ mixc(a,b,c,d,e,f,g,h);
+ mixc(a,b,c,d,e,f,g,h);
+ mixc(a,b,c,d,e,f,g,h);
+
+ /*-------------------------------------------- report the result */
+ state[0]=a; state[1]=b; state[2]=c; state[3]=d;
+ state[4]=e; state[5]=f; state[6]=g; state[7]=h;
+}
diff --git a/lookupa.h b/lookupa.h
new file mode 100644
index 0000000..8001b3c
--- /dev/null
+++ b/lookupa.h
@@ -0,0 +1,30 @@
+/*
+------------------------------------------------------------------------------
+By Bob Jenkins, September 1996.
+lookupa.h, a hash function for table lookup, same function as lookup.c.
+Use this code in any way you wish. Public Domain. It has no warranty.
+Source is http://burtleburtle.net/bob/c/lookupa.h
+------------------------------------------------------------------------------
+*/
+
+#ifndef LOOKUPA
+#define LOOKUPA
+
+#include <stdint.h>
+
+#define CHECKSTATE 8
+#define hashsize(n) ((uint32_t)1<<(n))
+#define hashmask(n) (hashsize(n)-1)
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+uint32_t lookup(uint8_t * k, uint32_t length, uint32_t level);
+void checksum(uint8_t * k, uint32_t length, uint32_t * state);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* LOOKUPA */
diff --git a/mpq-fs.c b/mpq-fs.c
index ae9ed02..3a2a4b3 100644
--- a/mpq-fs.c
+++ b/mpq-fs.c
@@ -1,90 +1,50 @@
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
+#include "lookupa.h"
+#include "hashtab.h"
#include "mpq-fs.h"
+#include "mpq-misc.h"
#define MAX_FNAME 2048
-typedef struct directory_list_t {
- struct directory_list_t * child;
- char * name;
+#define INITIAL_HASH_SIZE 10
+
+typedef struct hash_entry_t {
struct mpq_archive_t * mpq_a;
int entry;
- struct directory_list_t * next;
-} directory_list;
-
-static directory_list root_dir = {
- NULL, "", NULL, -1, NULL
-};
-
-static directory_list * root = &root_dir;
+} hash_entry;
-static char * strtoupper(char * _str) {
+static char * strnormalize(char * _str) {
char * str = _str;
while (*str) {
*str = toupper(*str);
+ if (*str == '/')
+ *str = '\\';
str++;
}
return _str;
}
-/*
- * The 'new' keyword is abusive: the function returns an existing entry if it matches, allowing overrites.
- */
-static directory_list * new_directory_list(directory_list * parent, struct mpq_archive_t * mpq_a, const char * name) {
- directory_list * r;
-
- for (r = parent->child; r; r = r->next) {
- if (!strcasecmp(r->name, name))
- break;
- }
-
- if (!r) {
- r = (directory_list *) malloc(sizeof(directory_list));
- r->child = NULL;
- r->next = parent->child;
- parent->child = r;
- r->name = strtoupper(strdup(name));
- }
- r->mpq_a = mpq_a;
- r->entry = -1;
-
- return r;
-}
+htab * hash_table = 0;
-/*
- * Recursively adds a new entry into our directory structure.
- */
-static directory_list * add_file_r(directory_list * parent, struct mpq_archive_t * mpq_a, char * fname, int entry) {
- char * bs;
- directory_list * r;
- int tail = 0;
-
- bs = strchr(fname, '\\');
-
- if (bs) {
- *bs = 0;
- } else {
- tail = 1;
- }
- if (!(r = new_directory_list(parent, mpq_a, fname)))
- return NULL;
-
- if (tail) {
- r->entry = entry;
- return r;
- }
-
- return add_file_r(r, mpq_a, bs + 1, entry);
-}
-
-static directory_list * add_file(struct mpq_archive_t * mpq_a, char * fname) {
+static void add_file(struct mpq_archive_t * mpq_a, char * fname) {
int entry;
-
+ char * nfname;
+
if ((entry = mpqlib_find_hash_entry_by_name(mpq_a, fname, 0, 0)) < 0)
- return NULL;
+ return;
- return add_file_r(root, mpq_a, fname, entry);
+ if (!hash_table)
+ hash_table = hcreate(INITIAL_HASH_SIZE);
+
+ nfname = strnormalize(strdup(fname));
+ hadd(hash_table, (uint8_t *) nfname, strlen(nfname), NULL);
+ if (!hstuff(hash_table)) {
+ hstuff(hash_table) = malloc(sizeof(hash_entry));
+ }
+ ((hash_entry *) hstuff(hash_table))->mpq_a = mpq_a;
+ ((hash_entry *) hstuff(hash_table))->entry = entry;
}
/*
@@ -143,39 +103,16 @@ void mpqlib_fs_attach_listfile(struct mpq_archive_t * mpq_a, const char * listfi
/*
* Recursively find a file.
*/
-static directory_list * find_file_r(directory_list * parent, char * fname) {
- char * bs;
- directory_list * r;
- int tail = 0;
-
- bs = strchr(fname, '\\');
-
- if (bs) {
- *bs = 0;
- } else {
- tail = 1;
- }
-
- for (r = parent->child; r; r = r->next) {
- if (!strcmp(fname, r->name)) {
- if (tail) {
- return r;
- } else {
- return find_file_r(r, bs + 1);
- }
- }
- }
+static hash_entry * find_file(char * fname) {
+ if (!hfind(hash_table, (uint8_t *) fname, strlen(fname)))
+ return NULL;
- return NULL;
-}
-
-static directory_list * find_file(char * fname) {
- return find_file_r(root, fname);
+ return (hash_entry *) hstuff(hash_table);
}
struct mpq_file_t * mpqlib_fs_open(const char * _fname) {
- char * fname = strtoupper(strdup(_fname));
- directory_list * entry;
+ char * fname = strnormalize(strdup(_fname));
+ hash_entry * entry;
entry = find_file(fname);
diff --git a/recycle.c b/recycle.c
new file mode 100644
index 0000000..261c54f
--- /dev/null
+++ b/recycle.c
@@ -0,0 +1,87 @@
+/*
+--------------------------------------------------------------------
+By Bob Jenkins, September 1996. recycle.c
+You may use this code in any way you wish, and it is free. No warranty.
+
+This manages memory for commonly-allocated structures.
+It allocates RESTART to REMAX items at a time.
+Timings have shown that, if malloc is used for every new structure,
+ malloc will consume about 90% of the time in a program. This
+ module cuts down the number of mallocs by an order of magnitude.
+This also decreases memory fragmentation, and freeing structures
+ only requires freeing the root.
+--------------------------------------------------------------------
+*/
+
+#include <stdint.h>
+#include <stdio.h>
+#include <memory.h>
+#include "recycle.h"
+
+#define align(a) (((uint32_t)a+(sizeof(void *)-1))&(~(sizeof(void *)-1)))
+
+reroot *remkroot(size)
+size_t size;
+{
+ reroot *r = (reroot *)remalloc(sizeof(reroot), "recycle.c, root");
+ r->list = (recycle *)0;
+ r->trash = (recycle *)0;
+ r->size = align(size);
+ r->logsize = RESTART;
+ r->numleft = 0;
+ return r;
+}
+
+void refree(r)
+struct reroot *r;
+{
+ recycle *temp;
+ if ((temp = r->list)) while (r->list)
+ {
+ temp = r->list->next;
+ free((char *)r->list);
+ r->list = temp;
+ }
+ free((char *)r);
+ return;
+}
+
+/* to be called from the macro renew only */
+char *renewx(r)
+struct reroot *r;
+{
+ recycle *temp;
+ if (r->trash)
+ { /* pull a node off the trash heap */
+ temp = r->trash;
+ r->trash = temp->next;
+ (void)memset((void *)temp, 0, r->size);
+ }
+ else
+ { /* allocate a new block of nodes */
+ r->numleft = r->size*((uint32_t)1<<r->logsize);
+ if (r->numleft < REMAX) ++r->logsize;
+ temp = (recycle *)remalloc(sizeof(recycle) + r->numleft,
+ "recycle.c, data");
+ temp->next = r->list;
+ r->list = temp;
+ r->numleft-=r->size;
+ temp = (recycle *)((char *)(r->list+1)+r->numleft);
+ }
+ return (char *)temp;
+}
+
+char *remalloc(len, purpose)
+size_t len;
+char *purpose;
+{
+ char *x = (char *)malloc(len);
+ if (!x)
+ {
+ fprintf(stderr, "malloc of %d failed for %s\n",
+ len, purpose);
+ exit(-1);
+ }
+ return x;
+}
+
diff --git a/recycle.h b/recycle.h
new file mode 100644
index 0000000..8f4b2ce
--- /dev/null
+++ b/recycle.h
@@ -0,0 +1,71 @@
+/*
+--------------------------------------------------------------------
+By Bob Jenkins, September 1996. recycle.h
+You may use this code in any way you wish, and it is free. No warranty.
+
+This manages memory for commonly-allocated structures.
+It allocates RESTART to REMAX items at a time.
+Timings have shown that, if malloc is used for every new structure,
+ malloc will consume about 90% of the time in a program. This
+ module cuts down the number of mallocs by an order of magnitude.
+This also decreases memory fragmentation, and freeing all structures
+ only requires freeing the root.
+--------------------------------------------------------------------
+*/
+
+#ifndef RECYCLE
+#define RECYCLE
+
+#define RESTART 0
+#define REMAX 32000
+
+#include <stdlib.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct recycle
+{
+ struct recycle *next;
+};
+typedef struct recycle recycle;
+
+struct reroot
+{
+ struct recycle *list; /* list of malloced blocks */
+ struct recycle *trash; /* list of deleted items */
+ size_t size; /* size of an item */
+ size_t logsize; /* log_2 of number of items in a block */
+ int numleft; /* number of bytes left in this block */
+};
+typedef struct reroot reroot;
+
+/* make a new recycling root */
+reroot *remkroot(size_t mysize);
+
+/* free a recycling root and all the items it has made */
+void refree(struct reroot *r);
+
+/* get a new (cleared) item from the root */
+#define renew(r) ((r)->numleft ? \
+ (((char *)((r)->list+1))+((r)->numleft-=(r)->size)) : renewx(r))
+
+char *renewx(struct reroot *r);
+
+/* delete an item; let the root recycle it */
+/* void redel(/o_ struct reroot *r, struct recycle *item _o/); */
+#define redel(root,item) { \
+ ((recycle *)item)->next=(root)->trash; \
+ (root)->trash=(recycle *)(item); \
+}
+
+/* malloc, but complain to stderr and exit program if no joy */
+/* use plain free() to free memory allocated by remalloc() */
+char *remalloc(size_t len, char *purpose);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* RECYCLE */