diff options
| author | root <root> | 2007-06-21 22:11:34 +0000 | 
|---|---|---|
| committer | root <root> | 2007-06-21 22:11:34 +0000 | 
| commit | 3abfcd994a7dad841f223bb64bd74fb59d13fef2 (patch) | |
| tree | 1f0da4493c9d60ef30858bac6491556a00e71783 | |
| parent | 4e5f1e7cb11f71bcc35be6babc6a3ba037e5b5b8 (diff) | |
*** empty log message ***
| -rw-r--r-- | Changes | 4 | ||||
| -rw-r--r-- | lzfP.h | 10 | ||||
| -rw-r--r-- | lzf_c.c | 30 | ||||
| -rw-r--r-- | lzf_d.c | 18 | 
4 files changed, 40 insertions, 22 deletions
| @@ -1,3 +1,7 @@ +2.1 +	- get rid of memcpy. +        - tentatively use rep movsb on x86 and x86_64 (gcc only) for a +          moderate speed improvement.  2.0  Fri Feb 16 23:11:18 CET 2007  	- replaced lzf demo by industrial-strength lzf utility with behaviour @@ -86,14 +86,6 @@  #endif  /* - * Use string functions to copy memory. - * this is usually a loss, even with glibc's optimized memcpy - */ -#ifndef USE_MEMCPY -# define USE_MEMCPY 0 -#endif - -/*   * You may choose to pre-set the hash table (might be faster on some   * modern cpus and large (>>64k) blocks, and also makes compression   * deterministic/repeatable when the configuration otherwise is the same). @@ -158,7 +150,7 @@ typedef const u8 *LZF_STATE[1 << (HLOG)];  # endif  #endif -#if USE_MEMCPY || INIT_HTAB +#if INIT_HTAB  # ifdef __cplusplus  #  include <cstring>  # else @@ -72,6 +72,13 @@  #define        MAX_OFF        (1 << 13)  #define        MAX_REF        ((1 << 8) + (1 << 3)) +#if (__i386 || __amd64) && __GNUC__ >= 3 +# define lzf_movsb(dst, src, len)                \ +   asm ("rep movsb"                              \ +        : "=D" (dst), "=S" (src), "=c" (len)     \ +        :  "0" (dst),  "1" (src),  "2" (len)); +#endif +  /*   * compressed format   * @@ -104,11 +111,10 @@ lzf_compress (const void *const in_data, unsigned int in_len,             int lit = 0;  #if INIT_HTAB -# if USE_MEMCPY -    memset (htab, 0, sizeof (htab)); -# else -    for (hslot = htab; hslot < htab + HSIZE; hslot++) -      *hslot++ = ip; +  memset (htab, 0, sizeof (htab)); +# if 0 +  for (hslot = htab; hslot < htab + HSIZE; hslot++) +    *hslot++ = ip;  # endif  #endif @@ -214,10 +220,10 @@ lzf_compress (const void *const in_data, unsigned int in_len,              return 0;            *op++ = MAX_LIT - 1; -#if USE_MEMCPY -          memcpy (op, ip - MAX_LIT, MAX_LIT); -          op += MAX_LIT; -          lit = 0; + +#ifdef lzf_movsb +          ip -= lit; +          lzf_movsb (op, ip, lit);  #else            lit = -lit;            do @@ -233,11 +239,17 @@ lzf_compress (const void *const in_data, unsigned int in_len,  	return 0;        *op++ = lit - 1; +#ifdef lzf_movsb +      ip -= lit; +      lzf_movsb (op, ip, lit); +#else        lit = -lit;        do  	*op++ = ip[lit];        while (++lit); +#endif      }    return op - (u8 *) out_data;  } + @@ -45,6 +45,13 @@  # define SET_ERRNO(n) errno = (n)  #endif +#if (__i386 || __amd64) && __GNUC__ >= 3 +# define lzf_movsb(dst, src, len)                \ +   asm ("rep movsb"                              \ +        : "=D" (dst), "=S" (src), "=c" (len)     \ +        :  "0" (dst),  "1" (src),  "2" (len)); +#endif +  unsigned int   lzf_decompress (const void *const in_data,  unsigned int in_len,                  void             *out_data, unsigned int out_len) @@ -76,10 +83,8 @@ lzf_decompress (const void *const in_data,  unsigned int in_len,              }  #endif -#if USE_MEMCPY -          memcpy (op, ip, ctrl); -          op += ctrl; -          ip += ctrl; +#ifdef lzf_movsb +          lzf_movsb (op, ip, ctrl);  #else            do              *op++ = *ip++; @@ -125,12 +130,17 @@ lzf_decompress (const void *const in_data,  unsigned int in_len,                return 0;              } +#ifdef lzf_movsb +          len += 2; +          lzf_movsb (op, ref, len); +#else            *op++ = *ref++;            *op++ = *ref++;            do              *op++ = *ref++;            while (--len); +#endif          }      }    while (ip < in_end); | 
