diff options
| author | root <root> | 2010-05-30 05:46:55 +0000 | 
|---|---|---|
| committer | root <root> | 2010-05-30 05:46:55 +0000 | 
| commit | 93f371a3809b11713e0230705637e7f148904505 (patch) | |
| tree | 1a75258c5c55abfff7cc9a4d66524faf668b689b | |
| parent | a7c01fa037a0965364cfd1fe2d19c158aa2bafaf (diff) | |
*** empty log message ***
| -rw-r--r-- | Changes | 4 | ||||
| -rw-r--r-- | bench.c | 14 | ||||
| -rw-r--r-- | lzf_c.c | 6 | ||||
| -rw-r--r-- | lzf_d.c | 57 | 
4 files changed, 64 insertions, 17 deletions
| @@ -1,5 +1,9 @@  	- fixed hash calculation in C♯ version (Tiago Freitas Leal). +        - unroll copy for small sizes, use memcpy for larger sizes, +          greatly speeding up decompression in most cases. +        - finally disable rep movsb - it's a big loss on modern intel cpus, +          and only a small win on amd cpus.  3.5  Fri May  1 02:28:42 CEST 2009  	- lzf_compress did sometimes write one octet past the given output @@ -6,6 +6,7 @@  #include <sys/times.h>  #include <sys/types.h>  #include <sys/socket.h> +#include <sys/mman.h>  #include <sys/ioctl.h>  #include <sys/stat.h>  #include <sys/resource.h> @@ -70,7 +71,7 @@ int main(void)     for (lp = 0; lp < 1000000; lp++) {        s=stamp(); -      struct timespec ts; clock_gettime (CLOCK_THREAD_CPUTIME_ID, &ts); +      //struct timespec ts; clock_gettime (CLOCK_THREAD_CPUTIME_ID, &ts);        //printf ("%9ld\n", ts.tv_nsec);//D        //struct rusage usage; getrusage (RUSAGE_SELF, &usage);        //struct tms tms; times (&tms); @@ -82,14 +83,17 @@ int main(void)        //read (p[0], &buf, 4);        //stat ("/etc/passwd", &sbuf);        //struct timeval tv; gettimeofday (&tv, 0); +      //void *x = mmap (0, 16384, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE,-1,0); -      //l = lzf_compress (data, DSIZE, data2, DSIZE*2); -      //assert(l); +      l = lzf_compress (data, DSIZE, data2, DSIZE*2); +      assert(l); + +      j = lzf_decompress (data2, l, data3, DSIZE*2); +      assert (j == DSIZE);        si[0]=measure(s); -      //j = lzf_decompress (data2, l, data3, DSIZE*2); -      //assert (j == DSIZE); +      assert (!memcmp (data, data3, DSIZE));        printf ("\r%10d (%d) ", si[0], l);        if (si[0] < min && si[0] > 0) @@ -89,9 +89,9 @@  /*   * compressed format   * - * 000LLLLL <L+1>    ; literal - * LLLooooo oooooooo ; backref L - * 111ooooo LLLLLLLL oooooooo ; backref L+7 + * 000LLLLL <L+1>    ; literal, L+1=1..33 octets + * LLLooooo oooooooo ; backref L+1=1..7 octets, o+1=1..4096 offset + * 111ooooo LLLLLLLL oooooooo ; backref L+8 octets, o+1=1..4096 offset   *   */ @@ -36,6 +36,8 @@  #include "lzfP.h" +#include <string.h> /* for memcpy/memset */ +  #if AVOID_ERRNO  # define SET_ERRNO(n)  #else @@ -43,12 +45,14 @@  # define SET_ERRNO(n) errno = (n)  #endif +#if USE_REP_MOVSB /* small win on amd, big loss on intel */  #if (__i386 || __amd64) && __GNUC__ >= 3  # define lzf_movsb(dst, src, len)                \     asm ("rep movsb"                              \          : "=D" (dst), "=S" (src), "=c" (len)     \          :  "0" (dst),  "1" (src),  "2" (len));  #endif +#endif  unsigned int   lzf_decompress (const void *const in_data,  unsigned int in_len, @@ -84,9 +88,17 @@ lzf_decompress (const void *const in_data,  unsigned int in_len,  #ifdef lzf_movsb            lzf_movsb (op, ip, ctrl);  #else -          do -            *op++ = *ip++; -          while (--ctrl); +          switch (ctrl) +            { +              case 32: *op++ = *ip++; case 31: *op++ = *ip++; case 30: *op++ = *ip++; case 29: *op++ = *ip++; +              case 28: *op++ = *ip++; case 27: *op++ = *ip++; case 26: *op++ = *ip++; case 25: *op++ = *ip++; +              case 24: *op++ = *ip++; case 23: *op++ = *ip++; case 22: *op++ = *ip++; case 21: *op++ = *ip++; +              case 20: *op++ = *ip++; case 19: *op++ = *ip++; case 18: *op++ = *ip++; case 17: *op++ = *ip++; +              case 16: *op++ = *ip++; case 15: *op++ = *ip++; case 14: *op++ = *ip++; case 13: *op++ = *ip++; +              case 12: *op++ = *ip++; case 11: *op++ = *ip++; case 10: *op++ = *ip++; case  9: *op++ = *ip++; +              case  8: *op++ = *ip++; case  7: *op++ = *ip++; case  6: *op++ = *ip++; case  5: *op++ = *ip++; +              case  4: *op++ = *ip++; case  3: *op++ = *ip++; case  2: *op++ = *ip++; case  1: *op++ = *ip++; +            }  #endif          }        else /* back reference */ @@ -132,12 +144,39 @@ lzf_decompress (const void *const in_data,  unsigned int in_len,            len += 2;            lzf_movsb (op, ref, len);  #else -          *op++ = *ref++; -          *op++ = *ref++; - -          do -            *op++ = *ref++; -          while (--len); +          switch (len) +            { +              default: +                len += 2; + +                if (op >= ref + len) +                  { +                    /* disjunct areas */ +                    memcpy (op, ref, len); +                    op += len; +                  } +                else +                  { +                    /* overlapping, use octte by octte copying */ +                    do +                      *op++ = *ref++; +                    while (--len); +                  } + +                break; + +              case 9: *op++ = *ref++; +              case 8: *op++ = *ref++; +              case 7: *op++ = *ref++; +              case 6: *op++ = *ref++; +              case 5: *op++ = *ref++; +              case 4: *op++ = *ref++; +              case 3: *op++ = *ref++; +              case 2: *op++ = *ref++; +              case 1: *op++ = *ref++; +              case 0: *op++ = *ref++; /* two octets more */ +                      *op++ = *ref++; +            }  #endif          }      } | 
