diff options
-rw-r--r-- | Changes | 4 | ||||
-rw-r--r-- | bench.c | 14 | ||||
-rw-r--r-- | lzf_c.c | 6 | ||||
-rw-r--r-- | lzf_d.c | 57 |
4 files changed, 64 insertions, 17 deletions
@@ -1,5 +1,9 @@ - fixed hash calculation in C♯ version (Tiago Freitas Leal). + - unroll copy for small sizes, use memcpy for larger sizes, + greatly speeding up decompression in most cases. + - finally disable rep movsb - it's a big loss on modern intel cpus, + and only a small win on amd cpus. 3.5 Fri May 1 02:28:42 CEST 2009 - lzf_compress did sometimes write one octet past the given output @@ -6,6 +6,7 @@ #include <sys/times.h> #include <sys/types.h> #include <sys/socket.h> +#include <sys/mman.h> #include <sys/ioctl.h> #include <sys/stat.h> #include <sys/resource.h> @@ -70,7 +71,7 @@ int main(void) for (lp = 0; lp < 1000000; lp++) { s=stamp(); - struct timespec ts; clock_gettime (CLOCK_THREAD_CPUTIME_ID, &ts); + //struct timespec ts; clock_gettime (CLOCK_THREAD_CPUTIME_ID, &ts); //printf ("%9ld\n", ts.tv_nsec);//D //struct rusage usage; getrusage (RUSAGE_SELF, &usage); //struct tms tms; times (&tms); @@ -82,14 +83,17 @@ int main(void) //read (p[0], &buf, 4); //stat ("/etc/passwd", &sbuf); //struct timeval tv; gettimeofday (&tv, 0); + //void *x = mmap (0, 16384, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE,-1,0); - //l = lzf_compress (data, DSIZE, data2, DSIZE*2); - //assert(l); + l = lzf_compress (data, DSIZE, data2, DSIZE*2); + assert(l); + + j = lzf_decompress (data2, l, data3, DSIZE*2); + assert (j == DSIZE); si[0]=measure(s); - //j = lzf_decompress (data2, l, data3, DSIZE*2); - //assert (j == DSIZE); + assert (!memcmp (data, data3, DSIZE)); printf ("\r%10d (%d) ", si[0], l); if (si[0] < min && si[0] > 0) @@ -89,9 +89,9 @@ /* * compressed format * - * 000LLLLL <L+1> ; literal - * LLLooooo oooooooo ; backref L - * 111ooooo LLLLLLLL oooooooo ; backref L+7 + * 000LLLLL <L+1> ; literal, L+1=1..33 octets + * LLLooooo oooooooo ; backref L+1=1..7 octets, o+1=1..4096 offset + * 111ooooo LLLLLLLL oooooooo ; backref L+8 octets, o+1=1..4096 offset * */ @@ -36,6 +36,8 @@ #include "lzfP.h" +#include <string.h> /* for memcpy/memset */ + #if AVOID_ERRNO # define SET_ERRNO(n) #else @@ -43,12 +45,14 @@ # define SET_ERRNO(n) errno = (n) #endif +#if USE_REP_MOVSB /* small win on amd, big loss on intel */ #if (__i386 || __amd64) && __GNUC__ >= 3 # define lzf_movsb(dst, src, len) \ asm ("rep movsb" \ : "=D" (dst), "=S" (src), "=c" (len) \ : "0" (dst), "1" (src), "2" (len)); #endif +#endif unsigned int lzf_decompress (const void *const in_data, unsigned int in_len, @@ -84,9 +88,17 @@ lzf_decompress (const void *const in_data, unsigned int in_len, #ifdef lzf_movsb lzf_movsb (op, ip, ctrl); #else - do - *op++ = *ip++; - while (--ctrl); + switch (ctrl) + { + case 32: *op++ = *ip++; case 31: *op++ = *ip++; case 30: *op++ = *ip++; case 29: *op++ = *ip++; + case 28: *op++ = *ip++; case 27: *op++ = *ip++; case 26: *op++ = *ip++; case 25: *op++ = *ip++; + case 24: *op++ = *ip++; case 23: *op++ = *ip++; case 22: *op++ = *ip++; case 21: *op++ = *ip++; + case 20: *op++ = *ip++; case 19: *op++ = *ip++; case 18: *op++ = *ip++; case 17: *op++ = *ip++; + case 16: *op++ = *ip++; case 15: *op++ = *ip++; case 14: *op++ = *ip++; case 13: *op++ = *ip++; + case 12: *op++ = *ip++; case 11: *op++ = *ip++; case 10: *op++ = *ip++; case 9: *op++ = *ip++; + case 8: *op++ = *ip++; case 7: *op++ = *ip++; case 6: *op++ = *ip++; case 5: *op++ = *ip++; + case 4: *op++ = *ip++; case 3: *op++ = *ip++; case 2: *op++ = *ip++; case 1: *op++ = *ip++; + } #endif } else /* back reference */ @@ -132,12 +144,39 @@ lzf_decompress (const void *const in_data, unsigned int in_len, len += 2; lzf_movsb (op, ref, len); #else - *op++ = *ref++; - *op++ = *ref++; - - do - *op++ = *ref++; - while (--len); + switch (len) + { + default: + len += 2; + + if (op >= ref + len) + { + /* disjunct areas */ + memcpy (op, ref, len); + op += len; + } + else + { + /* overlapping, use octte by octte copying */ + do + *op++ = *ref++; + while (--len); + } + + break; + + case 9: *op++ = *ref++; + case 8: *op++ = *ref++; + case 7: *op++ = *ref++; + case 6: *op++ = *ref++; + case 5: *op++ = *ref++; + case 4: *op++ = *ref++; + case 3: *op++ = *ref++; + case 2: *op++ = *ref++; + case 1: *op++ = *ref++; + case 0: *op++ = *ref++; /* two octets more */ + *op++ = *ref++; + } #endif } } |