summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Changes4
-rw-r--r--bench.c14
-rw-r--r--lzf_c.c6
-rw-r--r--lzf_d.c57
4 files changed, 64 insertions, 17 deletions
diff --git a/Changes b/Changes
index faa6acd..46da03e 100644
--- a/Changes
+++ b/Changes
@@ -1,5 +1,9 @@
- fixed hash calculation in C♯ version (Tiago Freitas Leal).
+ - unroll copy for small sizes, use memcpy for larger sizes,
+ greatly speeding up decompression in most cases.
+ - finally disable rep movsb - it's a big loss on modern intel cpus,
+ and only a small win on amd cpus.
3.5 Fri May 1 02:28:42 CEST 2009
- lzf_compress did sometimes write one octet past the given output
diff --git a/bench.c b/bench.c
index c919eb6..b108503 100644
--- a/bench.c
+++ b/bench.c
@@ -6,6 +6,7 @@
#include <sys/times.h>
#include <sys/types.h>
#include <sys/socket.h>
+#include <sys/mman.h>
#include <sys/ioctl.h>
#include <sys/stat.h>
#include <sys/resource.h>
@@ -70,7 +71,7 @@ int main(void)
for (lp = 0; lp < 1000000; lp++) {
s=stamp();
- struct timespec ts; clock_gettime (CLOCK_THREAD_CPUTIME_ID, &ts);
+ //struct timespec ts; clock_gettime (CLOCK_THREAD_CPUTIME_ID, &ts);
//printf ("%9ld\n", ts.tv_nsec);//D
//struct rusage usage; getrusage (RUSAGE_SELF, &usage);
//struct tms tms; times (&tms);
@@ -82,14 +83,17 @@ int main(void)
//read (p[0], &buf, 4);
//stat ("/etc/passwd", &sbuf);
//struct timeval tv; gettimeofday (&tv, 0);
+ //void *x = mmap (0, 16384, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE,-1,0);
- //l = lzf_compress (data, DSIZE, data2, DSIZE*2);
- //assert(l);
+ l = lzf_compress (data, DSIZE, data2, DSIZE*2);
+ assert(l);
+
+ j = lzf_decompress (data2, l, data3, DSIZE*2);
+ assert (j == DSIZE);
si[0]=measure(s);
- //j = lzf_decompress (data2, l, data3, DSIZE*2);
- //assert (j == DSIZE);
+ assert (!memcmp (data, data3, DSIZE));
printf ("\r%10d (%d) ", si[0], l);
if (si[0] < min && si[0] > 0)
diff --git a/lzf_c.c b/lzf_c.c
index fc03a2b..9f881cd 100644
--- a/lzf_c.c
+++ b/lzf_c.c
@@ -89,9 +89,9 @@
/*
* compressed format
*
- * 000LLLLL <L+1> ; literal
- * LLLooooo oooooooo ; backref L
- * 111ooooo LLLLLLLL oooooooo ; backref L+7
+ * 000LLLLL <L+1> ; literal, L+1=1..33 octets
+ * LLLooooo oooooooo ; backref L+1=1..7 octets, o+1=1..4096 offset
+ * 111ooooo LLLLLLLL oooooooo ; backref L+8 octets, o+1=1..4096 offset
*
*/
diff --git a/lzf_d.c b/lzf_d.c
index 9e2cd82..076d1d9 100644
--- a/lzf_d.c
+++ b/lzf_d.c
@@ -36,6 +36,8 @@
#include "lzfP.h"
+#include <string.h> /* for memcpy/memset */
+
#if AVOID_ERRNO
# define SET_ERRNO(n)
#else
@@ -43,12 +45,14 @@
# define SET_ERRNO(n) errno = (n)
#endif
+#if USE_REP_MOVSB /* small win on amd, big loss on intel */
#if (__i386 || __amd64) && __GNUC__ >= 3
# define lzf_movsb(dst, src, len) \
asm ("rep movsb" \
: "=D" (dst), "=S" (src), "=c" (len) \
: "0" (dst), "1" (src), "2" (len));
#endif
+#endif
unsigned int
lzf_decompress (const void *const in_data, unsigned int in_len,
@@ -84,9 +88,17 @@ lzf_decompress (const void *const in_data, unsigned int in_len,
#ifdef lzf_movsb
lzf_movsb (op, ip, ctrl);
#else
- do
- *op++ = *ip++;
- while (--ctrl);
+ switch (ctrl)
+ {
+ case 32: *op++ = *ip++; case 31: *op++ = *ip++; case 30: *op++ = *ip++; case 29: *op++ = *ip++;
+ case 28: *op++ = *ip++; case 27: *op++ = *ip++; case 26: *op++ = *ip++; case 25: *op++ = *ip++;
+ case 24: *op++ = *ip++; case 23: *op++ = *ip++; case 22: *op++ = *ip++; case 21: *op++ = *ip++;
+ case 20: *op++ = *ip++; case 19: *op++ = *ip++; case 18: *op++ = *ip++; case 17: *op++ = *ip++;
+ case 16: *op++ = *ip++; case 15: *op++ = *ip++; case 14: *op++ = *ip++; case 13: *op++ = *ip++;
+ case 12: *op++ = *ip++; case 11: *op++ = *ip++; case 10: *op++ = *ip++; case 9: *op++ = *ip++;
+ case 8: *op++ = *ip++; case 7: *op++ = *ip++; case 6: *op++ = *ip++; case 5: *op++ = *ip++;
+ case 4: *op++ = *ip++; case 3: *op++ = *ip++; case 2: *op++ = *ip++; case 1: *op++ = *ip++;
+ }
#endif
}
else /* back reference */
@@ -132,12 +144,39 @@ lzf_decompress (const void *const in_data, unsigned int in_len,
len += 2;
lzf_movsb (op, ref, len);
#else
- *op++ = *ref++;
- *op++ = *ref++;
-
- do
- *op++ = *ref++;
- while (--len);
+ switch (len)
+ {
+ default:
+ len += 2;
+
+ if (op >= ref + len)
+ {
+ /* disjunct areas */
+ memcpy (op, ref, len);
+ op += len;
+ }
+ else
+ {
+ /* overlapping, use octte by octte copying */
+ do
+ *op++ = *ref++;
+ while (--len);
+ }
+
+ break;
+
+ case 9: *op++ = *ref++;
+ case 8: *op++ = *ref++;
+ case 7: *op++ = *ref++;
+ case 6: *op++ = *ref++;
+ case 5: *op++ = *ref++;
+ case 4: *op++ = *ref++;
+ case 3: *op++ = *ref++;
+ case 2: *op++ = *ref++;
+ case 1: *op++ = *ref++;
+ case 0: *op++ = *ref++; /* two octets more */
+ *op++ = *ref++;
+ }
#endif
}
}