summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Changes4
-rw-r--r--lzfP.h10
-rw-r--r--lzf_c.c30
-rw-r--r--lzf_d.c18
4 files changed, 40 insertions, 22 deletions
diff --git a/Changes b/Changes
index cbded04..ac650cd 100644
--- a/Changes
+++ b/Changes
@@ -1,3 +1,7 @@
+2.1
+ - get rid of memcpy.
+ - tentatively use rep movsb on x86 and x86_64 (gcc only) for a
+ moderate speed improvement.
2.0 Fri Feb 16 23:11:18 CET 2007
- replaced lzf demo by industrial-strength lzf utility with behaviour
diff --git a/lzfP.h b/lzfP.h
index 988ea14..ce775a2 100644
--- a/lzfP.h
+++ b/lzfP.h
@@ -86,14 +86,6 @@
#endif
/*
- * Use string functions to copy memory.
- * this is usually a loss, even with glibc's optimized memcpy
- */
-#ifndef USE_MEMCPY
-# define USE_MEMCPY 0
-#endif
-
-/*
* You may choose to pre-set the hash table (might be faster on some
* modern cpus and large (>>64k) blocks, and also makes compression
* deterministic/repeatable when the configuration otherwise is the same).
@@ -158,7 +150,7 @@ typedef const u8 *LZF_STATE[1 << (HLOG)];
# endif
#endif
-#if USE_MEMCPY || INIT_HTAB
+#if INIT_HTAB
# ifdef __cplusplus
# include <cstring>
# else
diff --git a/lzf_c.c b/lzf_c.c
index f65ad72..216a4d9 100644
--- a/lzf_c.c
+++ b/lzf_c.c
@@ -72,6 +72,13 @@
#define MAX_OFF (1 << 13)
#define MAX_REF ((1 << 8) + (1 << 3))
+#if (__i386 || __amd64) && __GNUC__ >= 3
+# define lzf_movsb(dst, src, len) \
+ asm ("rep movsb" \
+ : "=D" (dst), "=S" (src), "=c" (len) \
+ : "0" (dst), "1" (src), "2" (len));
+#endif
+
/*
* compressed format
*
@@ -104,11 +111,10 @@ lzf_compress (const void *const in_data, unsigned int in_len,
int lit = 0;
#if INIT_HTAB
-# if USE_MEMCPY
- memset (htab, 0, sizeof (htab));
-# else
- for (hslot = htab; hslot < htab + HSIZE; hslot++)
- *hslot++ = ip;
+ memset (htab, 0, sizeof (htab));
+# if 0
+ for (hslot = htab; hslot < htab + HSIZE; hslot++)
+ *hslot++ = ip;
# endif
#endif
@@ -214,10 +220,10 @@ lzf_compress (const void *const in_data, unsigned int in_len,
return 0;
*op++ = MAX_LIT - 1;
-#if USE_MEMCPY
- memcpy (op, ip - MAX_LIT, MAX_LIT);
- op += MAX_LIT;
- lit = 0;
+
+#ifdef lzf_movsb
+ ip -= lit;
+ lzf_movsb (op, ip, lit);
#else
lit = -lit;
do
@@ -233,11 +239,17 @@ lzf_compress (const void *const in_data, unsigned int in_len,
return 0;
*op++ = lit - 1;
+#ifdef lzf_movsb
+ ip -= lit;
+ lzf_movsb (op, ip, lit);
+#else
lit = -lit;
do
*op++ = ip[lit];
while (++lit);
+#endif
}
return op - (u8 *) out_data;
}
+
diff --git a/lzf_d.c b/lzf_d.c
index 73a1a80..61df183 100644
--- a/lzf_d.c
+++ b/lzf_d.c
@@ -45,6 +45,13 @@
# define SET_ERRNO(n) errno = (n)
#endif
+#if (__i386 || __amd64) && __GNUC__ >= 3
+# define lzf_movsb(dst, src, len) \
+ asm ("rep movsb" \
+ : "=D" (dst), "=S" (src), "=c" (len) \
+ : "0" (dst), "1" (src), "2" (len));
+#endif
+
unsigned int
lzf_decompress (const void *const in_data, unsigned int in_len,
void *out_data, unsigned int out_len)
@@ -76,10 +83,8 @@ lzf_decompress (const void *const in_data, unsigned int in_len,
}
#endif
-#if USE_MEMCPY
- memcpy (op, ip, ctrl);
- op += ctrl;
- ip += ctrl;
+#ifdef lzf_movsb
+ lzf_movsb (op, ip, ctrl);
#else
do
*op++ = *ip++;
@@ -125,12 +130,17 @@ lzf_decompress (const void *const in_data, unsigned int in_len,
return 0;
}
+#ifdef lzf_movsb
+ len += 2;
+ lzf_movsb (op, ref, len);
+#else
*op++ = *ref++;
*op++ = *ref++;
do
*op++ = *ref++;
while (--len);
+#endif
}
}
while (ip < in_end);