From 7ecf94d4b339bc2197f449f0f1538f2915313087 Mon Sep 17 00:00:00 2001 From: pixel Date: Mon, 28 Jan 2008 12:49:06 +0000 Subject: Trying with a hashtable instead of a dumb structure. --- Makefile | 3 + hashtab.c | 370 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ hashtab.h | 216 ++++++++++++++++++++++++++++++++++++ lookupa.c | 239 ++++++++++++++++++++++++++++++++++++++++ lookupa.h | 30 +++++ mpq-fs.c | 125 ++++++--------------- recycle.c | 87 +++++++++++++++ recycle.h | 71 ++++++++++++ 8 files changed, 1047 insertions(+), 94 deletions(-) create mode 100644 hashtab.c create mode 100644 hashtab.h create mode 100644 lookupa.c create mode 100644 lookupa.h create mode 100644 recycle.c create mode 100644 recycle.h diff --git a/Makefile b/Makefile index 5900267..5971ac3 100644 --- a/Makefile +++ b/Makefile @@ -9,6 +9,9 @@ SOURCE_LIST = \ MPQCryptography.c \ errors.c \ extract.c \ +lookupa.c \ +hashtab.c \ +recycle.c \ mpq-bios.c \ mpq-errors.c \ mpq-file.c \ diff --git a/hashtab.c b/hashtab.c new file mode 100644 index 0000000..8e278a1 --- /dev/null +++ b/hashtab.c @@ -0,0 +1,370 @@ +/* +-------------------------------------------------------------------- +By Bob Jenkins, 1996. hashtab.c. Public Domain. + +This implements a hash table. +* Keys are unique. Adding an item fails if the key is already there. +* Keys and items are pointed at, not copied. If you change the value + of the key after it is inserted then hfind will not be able to find it. +* The hash table maintains a position that can be set and queried. +* The table length doubles dynamically and never shrinks. The insert + that causes table doubling may take a long time. +* The table length splits when the table length equals the number of items + Comparisons usually take 7 instructions. + Computing a hash value takes 35+6n instructions for an n-byte key. + + hcreate - create a hash table + hdestroy - destroy a hash table + hcount - The number of items in the hash table + hkey - key at the current position + hkeyl - key length at the current position + hstuff - stuff at the current position + hfind - find an item in the table + hadd - insert an item into the table + hdel - delete an item from the table + hstat - print statistics about the table + hfirst - position at the first item in the table + hnext - move the position to the next item in the table +-------------------------------------------------------------------- +*/ + +#include +#include + +#include "lookupa.h" +#include "hashtab.h" +#include "recycle.h" + +#ifndef FALSE +#define FALSE 0 +#endif + +#ifndef TRUE +#define TRUE !FALSE +#endif + +#ifdef HSANITY +/* sanity check -- make sure ipos, apos, and count make sense */ +static void hsanity(t) +htab *t; +{ + uint32_t i, end, counter; + hitem *h; + + /* test that apos makes sense */ + end = (uint32_t)1<<(t->logsize); + if (end < t->apos) + printf("error: end %ld apos %ld\n", end, t->apos); + + /* test that ipos is in bucket apos */ + if (t->ipos) + { + for (h=t->table[t->apos]; h && h != t->ipos; h = h->next) + ; + if (h != t->ipos) + printf("error:ipos not in apos, apos is %ld\n", t->apos); + } + + /* test that t->count is the number of elements in the table */ + counter=0; + for (counter=0, i=0; itable[i]; h; h=h->next) + ++counter; + if (counter != t->count) + printf("error: counter %ld t->count %ld\n", counter, t->count); +} +#endif + +/* + * hgrow - Double the size of a hash table. + * Allocate a new, 2x bigger array, + * move everything from the old array to the new array, + * then free the old array. + */ +static void hgrow( t) +htab *t; /* table */ +{ + register uint32_t newsize = (uint32_t)1<<(++t->logsize); + register uint32_t newmask = newsize-1; + register uint32_t i; + register hitem **oldtab = t->table; + register hitem **newtab = (hitem **)malloc(newsize*sizeof(hitem *)); + + /* make sure newtab is cleared */ + for (i=0; itable = newtab; + t->mask = newmask; + + /* Walk through old table putting entries in new table */ + for (i=newsize>>1; i--;) + { + register hitem *this, *that, **newplace; + for (this = oldtab[i]; this;) + { + that = this; + this = this->next; + newplace = &newtab[(that->hval & newmask)]; + that->next = *newplace; + *newplace = that; + } + } + + /* position the hash table on some existing item */ + hfirst(t); + + /* free the old array */ + free((char *)oldtab); + +} + +/* hcreate - create a hash table initially of size power(2,logsize) */ +htab *hcreate(logsize) +int logsize; /* log base 2 of the size of the hash table */ +{ + uint32_t i,len; + htab *t = (htab *)malloc(sizeof(htab)); + + len = ((uint32_t)1<table = (hitem **)malloc(sizeof(hitem *)*(uint32_t)len); + for (i=0; itable[i] = (hitem *)0; + t->logsize = logsize; + t->mask = len-1; + t->count = 0; + t->apos = (uint32_t)0; + t->ipos = (hitem *)0; + t->space = remkroot(sizeof(hitem)); + t->bcount = 0; + return t; +} + +/* hdestroy - destroy the hash table and free all its memory */ +void hdestroy( t) +htab *t; /* the table */ +{ + refree(t->space); + free((char *)t->table); + free((char *)t); +} + +/* hcount() is a macro, see hashtab.h */ +/* hkey() is a macro, see hashtab.h */ +/* hkeyl() is a macro, see hashtab.h */ +/* hstuff() is a macro, see hashtab.h */ + +/* hfind - find an item with a given key in a hash table */ +int hfind( t, key, keyl ) +htab *t; /* table */ +uint8_t *key; /* key to find */ +uint32_t keyl; /* key length */ +{ + hitem *h; + uint32_t x = lookup(key,keyl,0); + uint32_t y; + for (h = t->table[y=(x&t->mask)]; h; h = h->next) + { + if ((x == h->hval) && + (keyl == h->keyl) && + !memcmp(key, h->key, keyl)) + { + t->apos = y; + t->ipos = h; + return TRUE; + } + } + return FALSE; +} + +/* + * hadd - add an item to a hash table. + * return FALSE if the key is already there, otherwise TRUE. + */ +int hadd( t, key, keyl, stuff) +htab *t; /* table */ +uint8_t *key; /* key to add to hash table */ +uint32_t keyl; /* key length */ +void *stuff; /* stuff to associate with this key */ +{ + register hitem *h,**hp; + register uint32_t y, x = lookup(key,keyl,0); + + /* make sure the key is not already there */ + for (h = t->table[(y=(x&t->mask))]; h; h = h->next) + { + if ((x == h->hval) && + (keyl == h->keyl) && + !memcmp(key, h->key, keyl)) + { + t->apos = y; + t->ipos = h; + return FALSE; + } + } + + /* find space for a new item */ + h = (hitem *)renew(t->space); + + /* make the hash table bigger if it is getting full */ + if (++t->count > (uint32_t)1<<(t->logsize)) + { + hgrow(t); + y = (x&t->mask); + } + + /* add the new key to the table */ + h->key = key; + h->keyl = keyl; + h->stuff = stuff; + h->hval = x; + hp = &t->table[y]; + h->next = *hp; + *hp = h; + t->ipos = h; + t->apos = y; + +#ifdef HSANITY + hsanity(t); +#endif /* HSANITY */ + + return TRUE; +} + +/* hdel - delete the item at the current position */ +int hdel(t) +htab *t; /* the hash table */ +{ + hitem *h; /* item being deleted */ + hitem **ip; /* a counter */ + + /* check for item not existing */ + if (!(h = t->ipos)) return FALSE; + + /* remove item from its list */ + for (ip = &t->table[t->apos]; *ip != h; ip = &(*ip)->next) + ; + *ip = (*ip)->next; + --(t->count); + + /* adjust position to something that exists */ + if (!(t->ipos = h->next)) hnbucket(t); + + /* recycle the deleted hitem node */ + redel(t->space, h); + +#ifdef HSANITY + hsanity(t); +#endif /* HSANITY */ + + return TRUE; +} + +/* hfirst - position on the first element in the table */ +int hfirst(t) +htab *t; /* the hash table */ +{ + t->apos = t->mask; + (void)hnbucket(t); + return (t->ipos != (hitem *)0); +} + +/* hnext() is a macro, see hashtab.h */ + +/* + * hnbucket - Move position to the first item in the next bucket. + * Return TRUE if we did not wrap around to the beginning of the table + */ +int hnbucket(t) +htab *t; +{ + uint32_t oldapos = t->apos; + uint32_t end = (uint32_t)1<<(t->logsize); + uint32_t i; + + /* see if the element can be found without wrapping around */ + for (i=oldapos+1; itable[i&t->mask]) + { + t->apos = i; + t->ipos = t->table[i]; + return TRUE; + } + } + + /* must have to wrap around to find the last element */ + for (i=0; i<=oldapos; ++i) + { + if (t->table[i]) + { + t->apos = i; + t->ipos = t->table[i]; + return FALSE; + } + } + + return FALSE; +} + +#ifdef HSTATS +void hstat(t) +htab *t; +{ + uint32_t i,j; + double total = 0.0; + hitem *h; + hitem *walk, *walk2, *stat = (hitem *)0; + + /* in stat, keyl will store length of list, hval the number of buckets */ + for (i=0; i<=t->mask; ++i) + { + for (h=t->table[i], j=0; h; ++j, h=h->next) + ; + for (walk=stat; walk && (walk->keyl != j); walk=walk->next) + ; + if (walk) + { + ++(walk->hval); + } + else + { + walk = (hitem *)renew(t->space); + walk->keyl = j; + walk->hval = 1; + if (!stat || stat->keyl > j) {walk->next=stat; stat=walk;} + else + { + for (walk2=stat; + walk2->next && (walk2->next->keylnext) + ; + walk->next = walk2->next; + walk2->next = walk; + } + } + } + + /* figure out average list length for existing elements */ + for (walk=stat; walk; walk=walk->next) + { + total+=(double)walk->hval*(double)walk->keyl*(double)walk->keyl; + } + if (t->count) total /= (double)t->count; + else total = (double)0; + + /* print statistics */ + printf("\n"); + for (walk=stat; walk; walk=walk->next) + { + printf("items %u: %u buckets\n", walk->keyl, walk->hval); + } + printf("\nbuckets: %u items: %u existing: %g\n\n", + ((uint32_t)1<logsize), t->count, total); + + /* clean up */ + while (stat) + { + walk = stat->next; + redel(t->space, stat); + stat = walk; + } +} +#endif diff --git a/hashtab.h b/hashtab.h new file mode 100644 index 0000000..2f8b22c --- /dev/null +++ b/hashtab.h @@ -0,0 +1,216 @@ +/* +-------------------------------------------------------------------- +By Bob Jenkins, 1996. hash.h. Public Domain. + +This implements a hash table. +* Keys are unique. Adding an item fails if the key is already there. +* Keys and items are pointed at, not copied. If you change the value + of the key after it is inserted then hfind will not be able to find it. +* The hash table maintains a position that can be set and queried. +* The table length doubles dynamically and never shrinks. The insert + that causes table doubling may take a long time. +* The table length splits when the table length equals the number of items + Comparisons usually take 7 instructions. + Computing a hash value takes 35+6n instructions for an n-byte key. + + hcreate - create a hash table + hdestroy - destroy a hash table + hcount - The number of items in the hash table + hkey - key at the current position + hkeyl - key length at the current position + hstuff - stuff at the current position + hfind - find an item in the table + hadd - insert an item into the table + hdel - delete an item from the table + hstat - print statistics about the table + hfirst - position at the first item in the table + hnext - move the position to the next item in the table +-------------------------------------------------------------------- +*/ + +#ifndef HASHTAB +#define HASHTAB + +#include +#include + +/* PRIVATE TYPES AND DEFINITIONS */ + +struct hitem +{ + uint8_t *key; /* key that is hashed */ + uint32_t keyl; /* length of key */ + void *stuff; /* stuff stored in this hitem */ + uint32_t hval; /* hash value */ + struct hitem *next; /* next hitem in list */ +}; +typedef struct hitem hitem; + + +struct htab +{ + struct hitem **table; /* hash table, array of size 2^logsize */ + int logsize; /* log of size of table */ + size_t mask; /* (hashval & mask) is position in table */ + uint32_t count; /* how many items in this hash table so far? */ + uint32_t apos; /* position in the array */ + struct hitem *ipos; /* current item in the array */ + struct reroot *space; /* space for the hitems */ + uint32_t bcount; /* # hitems useable in current block */ +}; +typedef struct htab htab; + +#ifdef __cplusplus +extern "C" { +#endif + +/* PUBLIC FUNCTIONS */ + +/* hcreate - create a hash table + ARGUMENTS: + logsize - 1<count) +#define hkey(t) ((t)->ipos->key) +#define hkeyl(t) ((t)->ipos->keyl) +#define hstuff(t) ((t)->ipos->stuff) + + + +/* hfind - move the current position to a given key + ARGUMENTS: + t - the hash table + key - the key to look for + keyl - length of the key + RETURNS: + TRUE if the item exists, FALSE if it does not. + If the item exists, moves the current position to that item. + */ +int hfind(htab *t, uint8_t *key, uint32_t keyl); + + +/* hadd - add a new item to the hash table + change the position to point at the item with the key + ARGUMENTS: + t - the hash table + key - the key to look for + keyl - length of the key + stuff - other stuff to be stored in this item + RETURNS: + FALSE if the operation fails (because that key is already there). + */ +int hadd(htab *t, uint8_t *key, uint32_t keyl, void *stuff); + + +/* hdel - delete the item at the current position + change the position to the following item + ARGUMENTS: + t - the hash table + RETURNS: + FALSE if there is no current item (meaning the table is empty) + NOTE: + This frees the item, but not the key or stuff stored in the item. + If you want these then deal with them first. For example: + if (hfind(tab, key, keyl)) + { + free(hkey(tab)); + free(hstuff(tab)); + hdel(tab); + } + */ +int hdel(htab *t); + + +/* hfirst - move position to the first item in the table + ARGUMENTS: + t - the hash table + RETURNS: + FALSE if there is no current item (meaning the table is empty) + NOTE: + */ +int hfirst(htab *t); + + +/* hnext - move position to the next item in the table + ARGUMENTS: + t - the hash table + RETURNS: + FALSE if the position wraps around to the beginning of the table + NOTE: + To see every item in the table, do + if (hfirst(t)) do + { + key = hkey(t); + stuff = hstuff(t); + } + while (hnext(t)); + */ +/* int hnext(/o_ htab *t _o/); */ +#define hnext(t) \ + ((!(t)->ipos) ? FALSE : \ + ((t)->ipos=(t)->ipos->next) ? TRUE : hnbucket(t)) + +/* hnbucket - PRIVATE - move to first item in the next nonempty bucket + ARGUMENTS: + t - the hash table + RETURNS: + FALSE if the position wraps around to the beginning of the table + NOTE: + This is private to hashtab; do not use it externally. + */ +int hnbucket(htab *t); + + +/* hstat - print statistics about the hash table + ARGUMENTS: + t - the hash table + NOTE: + items <0>: <#buckets with zero items> buckets + items <1>: <#buckets with 1 item> buckets + ... + buckets: #buckets items: #items existing: x + ( x is the average length of the list when you look for an + item that exists. When the item does not exists, the average + length is #items/#buckets. ) + + If you put n items into n buckets, expect 1/(n!)e buckets to + have n items. That is, .3678 0, .3678 1, .1839 2, ... + Also expect "existing" to be about 2. + */ +void hstat(htab *t); + +#ifdef __cplusplus +} +#endif + +#endif /* HASHTAB */ diff --git a/lookupa.c b/lookupa.c new file mode 100644 index 0000000..731163b --- /dev/null +++ b/lookupa.c @@ -0,0 +1,239 @@ +/* +-------------------------------------------------------------------- +lookupa.c, by Bob Jenkins, December 1996. Same as lookup2.c +Use this code however you wish. Public Domain. No warranty. +Source is http://burtleburtle.net/bob/c/lookupa.c +-------------------------------------------------------------------- +*/ +#include "lookupa.h" + +/* +-------------------------------------------------------------------- +mix -- mix 3 32-bit values reversibly. +For every delta with one or two bit set, and the deltas of all three + high bits or all three low bits, whether the original value of a,b,c + is almost all zero or is uniformly distributed, +* If mix() is run forward or backward, at least 32 bits in a,b,c + have at least 1/4 probability of changing. +* If mix() is run forward, every bit of c will change between 1/3 and + 2/3 of the time. (Well, 22/100 and 78/100 for some 2-bit deltas.) +mix() was built out of 36 single-cycle latency instructions in a + structure that could supported 2x parallelism, like so: + a -= b; + a -= c; x = (c>>13); + b -= c; a ^= x; + b -= a; x = (a<<8); + c -= a; b ^= x; + c -= b; x = (b>>13); + ... + Unfortunately, superscalar Pentiums and Sparcs can't take advantage + of that parallelism. They've also turned some of those single-cycle + latency instructions into multi-cycle latency instructions. Still, + this is the fastest good hash I could find. There were about 2^^68 + to choose from. I only looked at a billion or so. +-------------------------------------------------------------------- +*/ +#define mix(a,b,c) \ +{ \ + a -= b; a -= c; a ^= (c>>13); \ + b -= c; b -= a; b ^= (a<<8); \ + c -= a; c -= b; c ^= (b>>13); \ + a -= b; a -= c; a ^= (c>>12); \ + b -= c; b -= a; b ^= (a<<16); \ + c -= a; c -= b; c ^= (b>>5); \ + a -= b; a -= c; a ^= (c>>3); \ + b -= c; b -= a; b ^= (a<<10); \ + c -= a; c -= b; c ^= (b>>15); \ +} + +/* +-------------------------------------------------------------------- +lookup() -- hash a variable-length key into a 32-bit value + k : the key (the unaligned variable-length array of bytes) + len : the length of the key, counting by bytes + level : can be any 4-byte value +Returns a 32-bit value. Every bit of the key affects every bit of +the return value. Every 1-bit and 2-bit delta achieves avalanche. +About 6len+35 instructions. + +The best hash table sizes are powers of 2. There is no need to do +mod a prime (mod is sooo slow!). If you need less than 32 bits, +use a bitmask. For example, if you need only 10 bits, do + h = (h & hashmask(10)); +In which case, the hash table should have hashsize(10) elements. + +If you are hashing n strings (ub1 **)k, do it like this: + for (i=0, h=0; i= 12) + { + a += (k[0] +((uint32_t)k[1]<<8) +((uint32_t)k[2]<<16) +((uint32_t)k[3]<<24)); + b += (k[4] +((uint32_t)k[5]<<8) +((uint32_t)k[6]<<16) +((uint32_t)k[7]<<24)); + c += (k[8] +((uint32_t)k[9]<<8) +((uint32_t)k[10]<<16)+((uint32_t)k[11]<<24)); + mix(a,b,c); + k += 12; len -= 12; + } + + /*------------------------------------- handle the last 11 bytes */ + c += length; + switch(len) /* all the case statements fall through */ + { + case 11: c+=((uint32_t)k[10]<<24); + case 10: c+=((uint32_t)k[9]<<16); + case 9 : c+=((uint32_t)k[8]<<8); + /* the first byte of c is reserved for the length */ + case 8 : b+=((uint32_t)k[7]<<24); + case 7 : b+=((uint32_t)k[6]<<16); + case 6 : b+=((uint32_t)k[5]<<8); + case 5 : b+=k[4]; + case 4 : a+=((uint32_t)k[3]<<24); + case 3 : a+=((uint32_t)k[2]<<16); + case 2 : a+=((uint32_t)k[1]<<8); + case 1 : a+=k[0]; + /* case 0: nothing left to add */ + } + mix(a,b,c); + /*-------------------------------------------- report the result */ + return c; +} + + +/* +-------------------------------------------------------------------- +mixc -- mixc 8 4-bit values as quickly and thoroughly as possible. +Repeating mix() three times achieves avalanche. +Repeating mix() four times eliminates all funnels and all + characteristics stronger than 2^{-11}. +-------------------------------------------------------------------- +*/ +#define mixc(a,b,c,d,e,f,g,h) \ +{ \ + a^=b<<11; d+=a; b+=c; \ + b^=c>>2; e+=b; c+=d; \ + c^=d<<8; f+=c; d+=e; \ + d^=e>>16; g+=d; e+=f; \ + e^=f<<10; h+=e; f+=g; \ + f^=g>>4; a+=f; g+=h; \ + g^=h<<8; b+=g; h+=a; \ + h^=a>>9; c+=h; a+=b; \ +} + +/* +-------------------------------------------------------------------- +checksum() -- hash a variable-length key into a 256-bit value + k : the key (the unaligned variable-length array of bytes) + len : the length of the key, counting by bytes + state : an array of CHECKSTATE 4-byte values (256 bits) +The state is the checksum. Every bit of the key affects every bit of +the state. There are no funnels. About 112+6.875len instructions. + +If you are hashing n strings (ub1 **)k, do it like this: + for (i=0; i<8; ++i) state[i] = 0x9e3779b9; + for (i=0, h=0; i= 32) + { + a += (k[0] +(k[1]<<8) +(k[2]<<16) +(k[3]<<24)); + b += (k[4] +(k[5]<<8) +(k[6]<<16) +(k[7]<<24)); + c += (k[8] +(k[9]<<8) +(k[10]<<16)+(k[11]<<24)); + d += (k[12]+(k[13]<<8)+(k[14]<<16)+(k[15]<<24)); + e += (k[16]+(k[17]<<8)+(k[18]<<16)+(k[19]<<24)); + f += (k[20]+(k[21]<<8)+(k[22]<<16)+(k[23]<<24)); + g += (k[24]+(k[25]<<8)+(k[26]<<16)+(k[27]<<24)); + h += (k[28]+(k[29]<<8)+(k[30]<<16)+(k[31]<<24)); + mixc(a,b,c,d,e,f,g,h); + mixc(a,b,c,d,e,f,g,h); + mixc(a,b,c,d,e,f,g,h); + mixc(a,b,c,d,e,f,g,h); + k += 32; len -= 32; + } + + /*------------------------------------- handle the last 31 bytes */ + h += length; + switch(len) + { + case 31: h+=(k[30]<<24); + case 30: h+=(k[29]<<16); + case 29: h+=(k[28]<<8); + case 28: g+=(k[27]<<24); + case 27: g+=(k[26]<<16); + case 26: g+=(k[25]<<8); + case 25: g+=k[24]; + case 24: f+=(k[23]<<24); + case 23: f+=(k[22]<<16); + case 22: f+=(k[21]<<8); + case 21: f+=k[20]; + case 20: e+=(k[19]<<24); + case 19: e+=(k[18]<<16); + case 18: e+=(k[17]<<8); + case 17: e+=k[16]; + case 16: d+=(k[15]<<24); + case 15: d+=(k[14]<<16); + case 14: d+=(k[13]<<8); + case 13: d+=k[12]; + case 12: c+=(k[11]<<24); + case 11: c+=(k[10]<<16); + case 10: c+=(k[9]<<8); + case 9 : c+=k[8]; + case 8 : b+=(k[7]<<24); + case 7 : b+=(k[6]<<16); + case 6 : b+=(k[5]<<8); + case 5 : b+=k[4]; + case 4 : a+=(k[3]<<24); + case 3 : a+=(k[2]<<16); + case 2 : a+=(k[1]<<8); + case 1 : a+=k[0]; + } + mixc(a,b,c,d,e,f,g,h); + mixc(a,b,c,d,e,f,g,h); + mixc(a,b,c,d,e,f,g,h); + mixc(a,b,c,d,e,f,g,h); + + /*-------------------------------------------- report the result */ + state[0]=a; state[1]=b; state[2]=c; state[3]=d; + state[4]=e; state[5]=f; state[6]=g; state[7]=h; +} diff --git a/lookupa.h b/lookupa.h new file mode 100644 index 0000000..8001b3c --- /dev/null +++ b/lookupa.h @@ -0,0 +1,30 @@ +/* +------------------------------------------------------------------------------ +By Bob Jenkins, September 1996. +lookupa.h, a hash function for table lookup, same function as lookup.c. +Use this code in any way you wish. Public Domain. It has no warranty. +Source is http://burtleburtle.net/bob/c/lookupa.h +------------------------------------------------------------------------------ +*/ + +#ifndef LOOKUPA +#define LOOKUPA + +#include + +#define CHECKSTATE 8 +#define hashsize(n) ((uint32_t)1<<(n)) +#define hashmask(n) (hashsize(n)-1) + +#ifdef __cplusplus +extern "C" { +#endif + +uint32_t lookup(uint8_t * k, uint32_t length, uint32_t level); +void checksum(uint8_t * k, uint32_t length, uint32_t * state); + +#ifdef __cplusplus +} +#endif + +#endif /* LOOKUPA */ diff --git a/mpq-fs.c b/mpq-fs.c index ae9ed02..3a2a4b3 100644 --- a/mpq-fs.c +++ b/mpq-fs.c @@ -1,90 +1,50 @@ #include #include #include +#include "lookupa.h" +#include "hashtab.h" #include "mpq-fs.h" +#include "mpq-misc.h" #define MAX_FNAME 2048 -typedef struct directory_list_t { - struct directory_list_t * child; - char * name; +#define INITIAL_HASH_SIZE 10 + +typedef struct hash_entry_t { struct mpq_archive_t * mpq_a; int entry; - struct directory_list_t * next; -} directory_list; - -static directory_list root_dir = { - NULL, "", NULL, -1, NULL -}; - -static directory_list * root = &root_dir; +} hash_entry; -static char * strtoupper(char * _str) { +static char * strnormalize(char * _str) { char * str = _str; while (*str) { *str = toupper(*str); + if (*str == '/') + *str = '\\'; str++; } return _str; } -/* - * The 'new' keyword is abusive: the function returns an existing entry if it matches, allowing overrites. - */ -static directory_list * new_directory_list(directory_list * parent, struct mpq_archive_t * mpq_a, const char * name) { - directory_list * r; - - for (r = parent->child; r; r = r->next) { - if (!strcasecmp(r->name, name)) - break; - } - - if (!r) { - r = (directory_list *) malloc(sizeof(directory_list)); - r->child = NULL; - r->next = parent->child; - parent->child = r; - r->name = strtoupper(strdup(name)); - } - r->mpq_a = mpq_a; - r->entry = -1; - - return r; -} +htab * hash_table = 0; -/* - * Recursively adds a new entry into our directory structure. - */ -static directory_list * add_file_r(directory_list * parent, struct mpq_archive_t * mpq_a, char * fname, int entry) { - char * bs; - directory_list * r; - int tail = 0; - - bs = strchr(fname, '\\'); - - if (bs) { - *bs = 0; - } else { - tail = 1; - } - if (!(r = new_directory_list(parent, mpq_a, fname))) - return NULL; - - if (tail) { - r->entry = entry; - return r; - } - - return add_file_r(r, mpq_a, bs + 1, entry); -} - -static directory_list * add_file(struct mpq_archive_t * mpq_a, char * fname) { +static void add_file(struct mpq_archive_t * mpq_a, char * fname) { int entry; - + char * nfname; + if ((entry = mpqlib_find_hash_entry_by_name(mpq_a, fname, 0, 0)) < 0) - return NULL; + return; - return add_file_r(root, mpq_a, fname, entry); + if (!hash_table) + hash_table = hcreate(INITIAL_HASH_SIZE); + + nfname = strnormalize(strdup(fname)); + hadd(hash_table, (uint8_t *) nfname, strlen(nfname), NULL); + if (!hstuff(hash_table)) { + hstuff(hash_table) = malloc(sizeof(hash_entry)); + } + ((hash_entry *) hstuff(hash_table))->mpq_a = mpq_a; + ((hash_entry *) hstuff(hash_table))->entry = entry; } /* @@ -143,39 +103,16 @@ void mpqlib_fs_attach_listfile(struct mpq_archive_t * mpq_a, const char * listfi /* * Recursively find a file. */ -static directory_list * find_file_r(directory_list * parent, char * fname) { - char * bs; - directory_list * r; - int tail = 0; - - bs = strchr(fname, '\\'); - - if (bs) { - *bs = 0; - } else { - tail = 1; - } - - for (r = parent->child; r; r = r->next) { - if (!strcmp(fname, r->name)) { - if (tail) { - return r; - } else { - return find_file_r(r, bs + 1); - } - } - } +static hash_entry * find_file(char * fname) { + if (!hfind(hash_table, (uint8_t *) fname, strlen(fname))) + return NULL; - return NULL; -} - -static directory_list * find_file(char * fname) { - return find_file_r(root, fname); + return (hash_entry *) hstuff(hash_table); } struct mpq_file_t * mpqlib_fs_open(const char * _fname) { - char * fname = strtoupper(strdup(_fname)); - directory_list * entry; + char * fname = strnormalize(strdup(_fname)); + hash_entry * entry; entry = find_file(fname); diff --git a/recycle.c b/recycle.c new file mode 100644 index 0000000..261c54f --- /dev/null +++ b/recycle.c @@ -0,0 +1,87 @@ +/* +-------------------------------------------------------------------- +By Bob Jenkins, September 1996. recycle.c +You may use this code in any way you wish, and it is free. No warranty. + +This manages memory for commonly-allocated structures. +It allocates RESTART to REMAX items at a time. +Timings have shown that, if malloc is used for every new structure, + malloc will consume about 90% of the time in a program. This + module cuts down the number of mallocs by an order of magnitude. +This also decreases memory fragmentation, and freeing structures + only requires freeing the root. +-------------------------------------------------------------------- +*/ + +#include +#include +#include +#include "recycle.h" + +#define align(a) (((uint32_t)a+(sizeof(void *)-1))&(~(sizeof(void *)-1))) + +reroot *remkroot(size) +size_t size; +{ + reroot *r = (reroot *)remalloc(sizeof(reroot), "recycle.c, root"); + r->list = (recycle *)0; + r->trash = (recycle *)0; + r->size = align(size); + r->logsize = RESTART; + r->numleft = 0; + return r; +} + +void refree(r) +struct reroot *r; +{ + recycle *temp; + if ((temp = r->list)) while (r->list) + { + temp = r->list->next; + free((char *)r->list); + r->list = temp; + } + free((char *)r); + return; +} + +/* to be called from the macro renew only */ +char *renewx(r) +struct reroot *r; +{ + recycle *temp; + if (r->trash) + { /* pull a node off the trash heap */ + temp = r->trash; + r->trash = temp->next; + (void)memset((void *)temp, 0, r->size); + } + else + { /* allocate a new block of nodes */ + r->numleft = r->size*((uint32_t)1<logsize); + if (r->numleft < REMAX) ++r->logsize; + temp = (recycle *)remalloc(sizeof(recycle) + r->numleft, + "recycle.c, data"); + temp->next = r->list; + r->list = temp; + r->numleft-=r->size; + temp = (recycle *)((char *)(r->list+1)+r->numleft); + } + return (char *)temp; +} + +char *remalloc(len, purpose) +size_t len; +char *purpose; +{ + char *x = (char *)malloc(len); + if (!x) + { + fprintf(stderr, "malloc of %d failed for %s\n", + len, purpose); + exit(-1); + } + return x; +} + diff --git a/recycle.h b/recycle.h new file mode 100644 index 0000000..8f4b2ce --- /dev/null +++ b/recycle.h @@ -0,0 +1,71 @@ +/* +-------------------------------------------------------------------- +By Bob Jenkins, September 1996. recycle.h +You may use this code in any way you wish, and it is free. No warranty. + +This manages memory for commonly-allocated structures. +It allocates RESTART to REMAX items at a time. +Timings have shown that, if malloc is used for every new structure, + malloc will consume about 90% of the time in a program. This + module cuts down the number of mallocs by an order of magnitude. +This also decreases memory fragmentation, and freeing all structures + only requires freeing the root. +-------------------------------------------------------------------- +*/ + +#ifndef RECYCLE +#define RECYCLE + +#define RESTART 0 +#define REMAX 32000 + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +struct recycle +{ + struct recycle *next; +}; +typedef struct recycle recycle; + +struct reroot +{ + struct recycle *list; /* list of malloced blocks */ + struct recycle *trash; /* list of deleted items */ + size_t size; /* size of an item */ + size_t logsize; /* log_2 of number of items in a block */ + int numleft; /* number of bytes left in this block */ +}; +typedef struct reroot reroot; + +/* make a new recycling root */ +reroot *remkroot(size_t mysize); + +/* free a recycling root and all the items it has made */ +void refree(struct reroot *r); + +/* get a new (cleared) item from the root */ +#define renew(r) ((r)->numleft ? \ + (((char *)((r)->list+1))+((r)->numleft-=(r)->size)) : renewx(r)) + +char *renewx(struct reroot *r); + +/* delete an item; let the root recycle it */ +/* void redel(/o_ struct reroot *r, struct recycle *item _o/); */ +#define redel(root,item) { \ + ((recycle *)item)->next=(root)->trash; \ + (root)->trash=(recycle *)(item); \ +} + +/* malloc, but complain to stderr and exit program if no joy */ +/* use plain free() to free memory allocated by remalloc() */ +char *remalloc(size_t len, char *purpose); + +#ifdef __cplusplus +} +#endif + +#endif /* RECYCLE */ -- cgit v1.2.3