From 3abfcd994a7dad841f223bb64bd74fb59d13fef2 Mon Sep 17 00:00:00 2001 From: root Date: Thu, 21 Jun 2007 22:11:34 +0000 Subject: *** empty log message *** --- Changes | 4 ++++ lzfP.h | 10 +--------- lzf_c.c | 30 +++++++++++++++++++++--------- lzf_d.c | 18 ++++++++++++++---- 4 files changed, 40 insertions(+), 22 deletions(-) diff --git a/Changes b/Changes index cbded04..ac650cd 100644 --- a/Changes +++ b/Changes @@ -1,3 +1,7 @@ +2.1 + - get rid of memcpy. + - tentatively use rep movsb on x86 and x86_64 (gcc only) for a + moderate speed improvement. 2.0 Fri Feb 16 23:11:18 CET 2007 - replaced lzf demo by industrial-strength lzf utility with behaviour diff --git a/lzfP.h b/lzfP.h index 988ea14..ce775a2 100644 --- a/lzfP.h +++ b/lzfP.h @@ -85,14 +85,6 @@ # define STRICT_ALIGN !(defined(__i386) || defined (__amd64)) #endif -/* - * Use string functions to copy memory. - * this is usually a loss, even with glibc's optimized memcpy - */ -#ifndef USE_MEMCPY -# define USE_MEMCPY 0 -#endif - /* * You may choose to pre-set the hash table (might be faster on some * modern cpus and large (>>64k) blocks, and also makes compression @@ -158,7 +150,7 @@ typedef const u8 *LZF_STATE[1 << (HLOG)]; # endif #endif -#if USE_MEMCPY || INIT_HTAB +#if INIT_HTAB # ifdef __cplusplus # include # else diff --git a/lzf_c.c b/lzf_c.c index f65ad72..216a4d9 100644 --- a/lzf_c.c +++ b/lzf_c.c @@ -72,6 +72,13 @@ #define MAX_OFF (1 << 13) #define MAX_REF ((1 << 8) + (1 << 3)) +#if (__i386 || __amd64) && __GNUC__ >= 3 +# define lzf_movsb(dst, src, len) \ + asm ("rep movsb" \ + : "=D" (dst), "=S" (src), "=c" (len) \ + : "0" (dst), "1" (src), "2" (len)); +#endif + /* * compressed format * @@ -104,11 +111,10 @@ lzf_compress (const void *const in_data, unsigned int in_len, int lit = 0; #if INIT_HTAB -# if USE_MEMCPY - memset (htab, 0, sizeof (htab)); -# else - for (hslot = htab; hslot < htab + HSIZE; hslot++) - *hslot++ = ip; + memset (htab, 0, sizeof (htab)); +# if 0 + for (hslot = htab; hslot < htab + HSIZE; hslot++) + *hslot++ = ip; # endif #endif @@ -214,10 +220,10 @@ lzf_compress (const void *const in_data, unsigned int in_len, return 0; *op++ = MAX_LIT - 1; -#if USE_MEMCPY - memcpy (op, ip - MAX_LIT, MAX_LIT); - op += MAX_LIT; - lit = 0; + +#ifdef lzf_movsb + ip -= lit; + lzf_movsb (op, ip, lit); #else lit = -lit; do @@ -233,11 +239,17 @@ lzf_compress (const void *const in_data, unsigned int in_len, return 0; *op++ = lit - 1; +#ifdef lzf_movsb + ip -= lit; + lzf_movsb (op, ip, lit); +#else lit = -lit; do *op++ = ip[lit]; while (++lit); +#endif } return op - (u8 *) out_data; } + diff --git a/lzf_d.c b/lzf_d.c index 73a1a80..61df183 100644 --- a/lzf_d.c +++ b/lzf_d.c @@ -45,6 +45,13 @@ # define SET_ERRNO(n) errno = (n) #endif +#if (__i386 || __amd64) && __GNUC__ >= 3 +# define lzf_movsb(dst, src, len) \ + asm ("rep movsb" \ + : "=D" (dst), "=S" (src), "=c" (len) \ + : "0" (dst), "1" (src), "2" (len)); +#endif + unsigned int lzf_decompress (const void *const in_data, unsigned int in_len, void *out_data, unsigned int out_len) @@ -76,10 +83,8 @@ lzf_decompress (const void *const in_data, unsigned int in_len, } #endif -#if USE_MEMCPY - memcpy (op, ip, ctrl); - op += ctrl; - ip += ctrl; +#ifdef lzf_movsb + lzf_movsb (op, ip, ctrl); #else do *op++ = *ip++; @@ -125,12 +130,17 @@ lzf_decompress (const void *const in_data, unsigned int in_len, return 0; } +#ifdef lzf_movsb + len += 2; + lzf_movsb (op, ref, len); +#else *op++ = *ref++; *op++ = *ref++; do *op++ = *ref++; while (--len); +#endif } } while (ip < in_end); -- cgit v1.2.3