diff options
-rw-r--r-- | Changes | 4 | ||||
-rw-r--r-- | lzfP.h | 10 | ||||
-rw-r--r-- | lzf_c.c | 30 | ||||
-rw-r--r-- | lzf_d.c | 18 |
4 files changed, 40 insertions, 22 deletions
@@ -1,3 +1,7 @@ +2.1 + - get rid of memcpy. + - tentatively use rep movsb on x86 and x86_64 (gcc only) for a + moderate speed improvement. 2.0 Fri Feb 16 23:11:18 CET 2007 - replaced lzf demo by industrial-strength lzf utility with behaviour @@ -86,14 +86,6 @@ #endif /* - * Use string functions to copy memory. - * this is usually a loss, even with glibc's optimized memcpy - */ -#ifndef USE_MEMCPY -# define USE_MEMCPY 0 -#endif - -/* * You may choose to pre-set the hash table (might be faster on some * modern cpus and large (>>64k) blocks, and also makes compression * deterministic/repeatable when the configuration otherwise is the same). @@ -158,7 +150,7 @@ typedef const u8 *LZF_STATE[1 << (HLOG)]; # endif #endif -#if USE_MEMCPY || INIT_HTAB +#if INIT_HTAB # ifdef __cplusplus # include <cstring> # else @@ -72,6 +72,13 @@ #define MAX_OFF (1 << 13) #define MAX_REF ((1 << 8) + (1 << 3)) +#if (__i386 || __amd64) && __GNUC__ >= 3 +# define lzf_movsb(dst, src, len) \ + asm ("rep movsb" \ + : "=D" (dst), "=S" (src), "=c" (len) \ + : "0" (dst), "1" (src), "2" (len)); +#endif + /* * compressed format * @@ -104,11 +111,10 @@ lzf_compress (const void *const in_data, unsigned int in_len, int lit = 0; #if INIT_HTAB -# if USE_MEMCPY - memset (htab, 0, sizeof (htab)); -# else - for (hslot = htab; hslot < htab + HSIZE; hslot++) - *hslot++ = ip; + memset (htab, 0, sizeof (htab)); +# if 0 + for (hslot = htab; hslot < htab + HSIZE; hslot++) + *hslot++ = ip; # endif #endif @@ -214,10 +220,10 @@ lzf_compress (const void *const in_data, unsigned int in_len, return 0; *op++ = MAX_LIT - 1; -#if USE_MEMCPY - memcpy (op, ip - MAX_LIT, MAX_LIT); - op += MAX_LIT; - lit = 0; + +#ifdef lzf_movsb + ip -= lit; + lzf_movsb (op, ip, lit); #else lit = -lit; do @@ -233,11 +239,17 @@ lzf_compress (const void *const in_data, unsigned int in_len, return 0; *op++ = lit - 1; +#ifdef lzf_movsb + ip -= lit; + lzf_movsb (op, ip, lit); +#else lit = -lit; do *op++ = ip[lit]; while (++lit); +#endif } return op - (u8 *) out_data; } + @@ -45,6 +45,13 @@ # define SET_ERRNO(n) errno = (n) #endif +#if (__i386 || __amd64) && __GNUC__ >= 3 +# define lzf_movsb(dst, src, len) \ + asm ("rep movsb" \ + : "=D" (dst), "=S" (src), "=c" (len) \ + : "0" (dst), "1" (src), "2" (len)); +#endif + unsigned int lzf_decompress (const void *const in_data, unsigned int in_len, void *out_data, unsigned int out_len) @@ -76,10 +83,8 @@ lzf_decompress (const void *const in_data, unsigned int in_len, } #endif -#if USE_MEMCPY - memcpy (op, ip, ctrl); - op += ctrl; - ip += ctrl; +#ifdef lzf_movsb + lzf_movsb (op, ip, ctrl); #else do *op++ = *ip++; @@ -125,12 +130,17 @@ lzf_decompress (const void *const in_data, unsigned int in_len, return 0; } +#ifdef lzf_movsb + len += 2; + lzf_movsb (op, ref, len); +#else *op++ = *ref++; *op++ = *ref++; do *op++ = *ref++; while (--len); +#endif } } while (ip < in_end); |