summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorroot <root>2007-11-13 10:41:52 +0000
committerroot <root>2007-11-13 10:41:52 +0000
commit84e5a8dc51fb7e6e9e6044d9fd100cb91c451c9a (patch)
tree96057f002bf240afeed7d7b0d4696f8db3b34c53
parent16682590a57fb99f2c80e42302659924db3137b2 (diff)
*** empty log message ***
-rw-r--r--Changes10
-rw-r--r--lzf.h2
-rw-r--r--lzfP.h3
-rw-r--r--lzf_c.c242
4 files changed, 125 insertions, 132 deletions
diff --git a/Changes b/Changes
index 44f79f1..07c835e 100644
--- a/Changes
+++ b/Changes
@@ -1,4 +1,14 @@
- switched to GPL v2 or any later version.
+ - speed up compression by ~10-15% in common cases
+ by some manual unrolling.
+ - import some compiler tricks from JSON::XS.
+ - tune hash functions depending on ULTRA_FAST or VERY_FAST settings.
+ - for typical binary data (e.g. /bin/bash, memory dumps,
+ canterbury corpus etc.), speed is now comparable to fastlz, but
+ with better compression ratio. with ULTRA_FAST, it's typically
+ 3-8% faster than fastlz while still maintaining a similar ratio.
+ (amd64, ymmv). thanks a lot for the competition :)
+ - undo inline assembly, it is no longer helpful.
2.1 Fri Nov 2 13:34:42 CET 2007
- switched to a 2-clause bsd license with GPL exception.
diff --git a/lzf.h b/lzf.h
index 866be77..1b6da21 100644
--- a/lzf.h
+++ b/lzf.h
@@ -60,7 +60,7 @@
* the data uncompressed otherwise.
*
* lzf_compress might use different algorithms on different systems and
- * even diferent runs, thus might result in different compressed strings
+ * even different runs, thus might result in different compressed strings
* depending on the phase of the moon or similar factors. However, all
* these strings are architecture-independent and will result in the
* original data when decompressed using lzf_decompress.
diff --git a/lzfP.h b/lzfP.h
index 81f1f41..5c3ddc5 100644
--- a/lzfP.h
+++ b/lzfP.h
@@ -58,9 +58,8 @@
/*
* Sacrifice very little compression quality in favour of compression speed.
* This gives almost the same compression as the default code, and is
- * (very roughly) 15% faster. This is the preferable mode of operation.
+ * (very roughly) 15% faster. This is the preferred mode of operation.
*/
-
#ifndef VERY_FAST
# define VERY_FAST 1
#endif
diff --git a/lzf_c.c b/lzf_c.c
index bd44027..95a245d 100644
--- a/lzf_c.c
+++ b/lzf_c.c
@@ -47,7 +47,13 @@
#ifndef FRST
# define FRST(p) (((p[0]) << 8) | p[1])
# define NEXT(v,p) (((v) << 8) | p[2])
-# define IDX(h) ((((h ^ (h << 5)) >> (3*8 - HLOG)) - h*5) & (HSIZE - 1))
+# if ULTRA_FAST
+# define IDX(h) (((h >> (3*8 - HLOG)) - h ) & (HSIZE - 1))
+# elif VERY_FAST
+# define IDX(h) (((h >> (3*8 - HLOG)) - h*5) & (HSIZE - 1))
+# else
+# define IDX(h) ((((h ^ (h << 5)) >> (3*8 - HLOG)) - h*5) & (HSIZE - 1))
+# endif
/*# define IDX(h) ((ip[0] * 121 ^ ip[1] * 33 ^ ip[2] * 1) & (HSIZE-1))*/
#endif
/*
@@ -70,13 +76,6 @@
#define MAX_OFF (1 << 13)
#define MAX_REF ((1 << 8) + (1 << 3))
-#if (__i386 || __amd64) && __GNUC__ >= 3
-# define lzf_movsb(dst, src, len) \
- asm ("rep movsb" \
- : "=D" (dst), "=S" (src), "=c" (len) \
- : "0" (dst), "1" (src), "2" (len));
-#endif
-
#if __GNUC__ >= 3
# define expect(expr,value) __builtin_expect ((expr),(value))
# define inline inline
@@ -115,9 +114,12 @@ lzf_compress (const void *const in_data, unsigned int in_len,
u8 *out_end = op + out_len;
const u8 *ref;
- unsigned int hval = FRST (ip);
+ unsigned int hval;
unsigned long off;
- int lit = 0;
+ int lit;
+
+ if (!in_len || !out_len)
+ return 0;
#if INIT_HTAB
memset (htab, 0, sizeof (htab));
@@ -127,163 +129,145 @@ lzf_compress (const void *const in_data, unsigned int in_len,
# endif
#endif
- for (;;)
+ lit = 0; op++; /* start run */
+
+ hval = FRST (ip);
+ while (ip < in_end - 2)
{
- if (expect_true (ip < in_end - 2))
- {
- hval = NEXT (hval, ip);
- hslot = htab + IDX (hval);
- ref = *hslot; *hslot = ip;
+ hval = NEXT (hval, ip);
+ hslot = htab + IDX (hval);
+ ref = *hslot; *hslot = ip;
- if (1
+ if (1
#if INIT_HTAB && !USE_MEMCPY
- && ref < ip /* the next test will actually take care of this, but this is faster */
+ && ref < ip /* the next test will actually take care of this, but this is faster */
#endif
- && (off = ip - ref - 1) < MAX_OFF
- && ip + 4 < in_end
- && ref > (u8 *)in_data
+ && (off = ip - ref - 1) < MAX_OFF
+ && ip + 4 < in_end
+ && ref > (u8 *)in_data
#if STRICT_ALIGN
- && ref[0] == ip[0]
- && ref[1] == ip[1]
- && ref[2] == ip[2]
+ && ref[0] == ip[0]
+ && ref[1] == ip[1]
+ && ref[2] == ip[2]
#else
- && *(u16 *)ref == *(u16 *)ip
- && ref[2] == ip[2]
+ && *(u16 *)ref == *(u16 *)ip
+ && ref[2] == ip[2]
#endif
- )
- {
- /* match found at *ref++ */
- unsigned int len = 2;
- unsigned int maxlen = in_end - ip - len;
- maxlen = maxlen > MAX_REF ? MAX_REF : maxlen;
+ )
+ {
+ /* match found at *ref++ */
+ unsigned int len = 2;
+ unsigned int maxlen = in_end - ip - len;
+ maxlen = maxlen > MAX_REF ? MAX_REF : maxlen;
- if (expect_false (op + lit + 1 + 3 >= out_end))
- return 0;
+ if (expect_false (op + 1 + 3 >= out_end))
+ return 0;
- if (expect_false (lit))
- {
- *op++ = lit - 1;
- lit = -lit;
- do
- *op++ = ip[lit];
- while (expect_false (++lit));
- }
+ op [- lit - 1] = lit - 1; /* stop run */
+ op -= !lit; /* undo run if length is zero */
- for (;;)
+ for (;;)
+ {
+ if (expect_true (maxlen > 16))
{
- if (expect_true (maxlen > 16))
- {
- len++; if (ref [len] != ip [len]) break;
- len++; if (ref [len] != ip [len]) break;
- len++; if (ref [len] != ip [len]) break;
- len++; if (ref [len] != ip [len]) break;
- len++; if (ref [len] != ip [len]) break;
- len++; if (ref [len] != ip [len]) break;
- len++; if (ref [len] != ip [len]) break;
- len++; if (ref [len] != ip [len]) break;
- len++; if (ref [len] != ip [len]) break;
- len++; if (ref [len] != ip [len]) break;
- len++; if (ref [len] != ip [len]) break;
- len++; if (ref [len] != ip [len]) break;
- len++; if (ref [len] != ip [len]) break;
- len++; if (ref [len] != ip [len]) break;
- len++; if (ref [len] != ip [len]) break;
- len++; if (ref [len] != ip [len]) break;
- }
-
- do
- len++;
- while (len < maxlen && ref[len] == ip[len]);
-
- break;
+ len++; if (ref [len] != ip [len]) break;
+ len++; if (ref [len] != ip [len]) break;
+ len++; if (ref [len] != ip [len]) break;
+ len++; if (ref [len] != ip [len]) break;
+
+ len++; if (ref [len] != ip [len]) break;
+ len++; if (ref [len] != ip [len]) break;
+ len++; if (ref [len] != ip [len]) break;
+ len++; if (ref [len] != ip [len]) break;
+
+ len++; if (ref [len] != ip [len]) break;
+ len++; if (ref [len] != ip [len]) break;
+ len++; if (ref [len] != ip [len]) break;
+ len++; if (ref [len] != ip [len]) break;
+
+ len++; if (ref [len] != ip [len]) break;
+ len++; if (ref [len] != ip [len]) break;
+ len++; if (ref [len] != ip [len]) break;
+ len++; if (ref [len] != ip [len]) break;
}
- len -= 2;
- ip++;
+ do
+ len++;
+ while (len < maxlen && ref[len] == ip[len]);
- if (len < 7)
- {
- *op++ = (off >> 8) + (len << 5);
- }
- else
- {
- *op++ = (off >> 8) + ( 7 << 5);
- *op++ = len - 7;
- }
+ break;
+ }
+
+ len -= 2;
+ ip++;
- *op++ = off;
+ if (len < 7)
+ {
+ *op++ = (off >> 8) + (len << 5);
+ }
+ else
+ {
+ *op++ = (off >> 8) + ( 7 << 5);
+ *op++ = len - 7;
+ }
+
+ *op++ = off;
#if ULTRA_FAST || VERY_FAST
- ip += len;
+ ip += len;
#if VERY_FAST && !ULTRA_FAST
- --ip;
+ --ip;
#endif
- hval = FRST (ip);
+ hval = FRST (ip);
- hval = NEXT (hval, ip);
- htab[IDX (hval)] = ip;
- ip++;
+ hval = NEXT (hval, ip);
+ htab[IDX (hval)] = ip;
+ ip++;
#if VERY_FAST && !ULTRA_FAST
+ hval = NEXT (hval, ip);
+ htab[IDX (hval)] = ip;
+ ip++;
+#endif
+#else
+ do
+ {
hval = NEXT (hval, ip);
htab[IDX (hval)] = ip;
ip++;
-#endif
-#else
- do
- {
- hval = NEXT (hval, ip);
- htab[IDX (hval)] = ip;
- ip++;
- }
- while (len--);
-#endif
- continue;
}
+ while (len--);
+#endif
+ lit = 0; op++; /* start run */
+ continue;
}
- else if (expect_false (ip == in_end))
- break;
/* one more literal byte we must copy */
+
+ if (expect_false (op >= out_end))
+ return 0;
+
lit++;
- ip++;
+ *op++ = *ip++;
if (expect_false (lit == MAX_LIT))
{
- if (op + 1 + MAX_LIT >= out_end)
- return 0;
-
- *op++ = MAX_LIT - 1;
-
-#ifdef lzf_movsb
- ip -= MAX_LIT;
- lzf_movsb (op, ip, lit);
-#else
- lit = -lit;
- do
- *op++ = ip[lit];
- while (++lit);
-#endif
+ op [- lit - 1] = lit - 1; /* stop run */
+ lit = 0; op++; /* start run */
}
}
- if (lit)
- {
- if (op + lit + 1 >= out_end)
- return 0;
+ if (op + lit + 2 >= out_end)
+ return 0;
- *op++ = lit - 1;
-#ifdef lzf_movsb
- ip -= lit;
- lzf_movsb (op, ip, lit);
-#else
- lit = -lit;
- do
- *op++ = ip[lit];
- while (++lit);
-#endif
+ while (ip < in_end)
+ {
+ lit++;
+ *op++ = *ip++;
}
- return op - (u8 *) out_data;
+ op [- lit - 1] = lit - 1;
+
+ return op - (u8 *)out_data;
}