From 84e5a8dc51fb7e6e9e6044d9fd100cb91c451c9a Mon Sep 17 00:00:00 2001
From: root <root>
Date: Tue, 13 Nov 2007 10:41:52 +0000
Subject: *** empty log message ***

---
 Changes |  10 +++
 lzf.h   |   2 +-
 lzfP.h  |   3 +-
 lzf_c.c | 242 ++++++++++++++++++++++++++++++----------------------------------
 4 files changed, 125 insertions(+), 132 deletions(-)
diff --git a/Changes b/Changes
index 44f79f1..07c835e 100644
--- a/Changes
+++ b/Changes
@@ -1,4 +1,14 @@
 	- switched to GPL v2 or any later version.
+        - speed up compression by ~10-15% in common cases
+          by some manual unrolling.
+        - import some compiler tricks from JSON::XS.
+        - tune hash functions depending on ULTRA_FAST or VERY_FAST settings.
+        - for typical binary data (e.g. /bin/bash, memory dumps,
+          canterbury corpus etc.), speed is now comparable to fastlz, but
+          with better compression ratio. with ULTRA_FAST, it's typically
+          3-8% faster than fastlz while still maintaining a similar ratio.
+          (amd64, ymmv). thanks a lot for the competition :)
+        - undo inline assembly, it is no longer helpful.
 
 2.1  Fri Nov  2 13:34:42 CET 2007
 	- switched to a 2-clause bsd license with GPL exception.
diff --git a/lzf.h b/lzf.h
index 866be77..1b6da21 100644
--- a/lzf.h
+++ b/lzf.h
@@ -60,7 +60,7 @@
  * the data uncompressed otherwise.
  *
  * lzf_compress might use different algorithms on different systems and
- * even diferent runs, thus might result in different compressed strings
+ * even different runs, thus might result in different compressed strings
  * depending on the phase of the moon or similar factors. However, all
  * these strings are architecture-independent and will result in the
  * original data when decompressed using lzf_decompress.
diff --git a/lzfP.h b/lzfP.h
index 81f1f41..5c3ddc5 100644
--- a/lzfP.h
+++ b/lzfP.h
@@ -58,9 +58,8 @@
 /*
  * Sacrifice very little compression quality in favour of compression speed.
  * This gives almost the same compression as the default code, and is
- * (very roughly) 15% faster. This is the preferable mode of operation.
+ * (very roughly) 15% faster. This is the preferred mode of operation.
  */
-
 #ifndef VERY_FAST
 # define VERY_FAST 1
 #endif
diff --git a/lzf_c.c b/lzf_c.c
index bd44027..95a245d 100644
--- a/lzf_c.c
+++ b/lzf_c.c
@@ -47,7 +47,13 @@
 #ifndef FRST
 # define FRST(p) (((p[0]) << 8) | p[1])
 # define NEXT(v,p) (((v) << 8) | p[2])
-# define IDX(h) ((((h ^ (h << 5)) >> (3*8 - HLOG)) - h*5) & (HSIZE - 1))
+# if ULTRA_FAST
+#  define IDX(h) (((h >> (3*8 - HLOG)) - h  ) & (HSIZE - 1))
+# elif VERY_FAST
+#  define IDX(h) (((h >> (3*8 - HLOG)) - h*5) & (HSIZE - 1))
+# else
+#  define IDX(h) ((((h ^ (h << 5)) >> (3*8 - HLOG)) - h*5) & (HSIZE - 1))
+# endif
 /*# define IDX(h) ((ip[0] * 121 ^ ip[1] * 33 ^ ip[2] * 1) & (HSIZE-1))*/
 #endif
 /*
@@ -70,13 +76,6 @@
 #define        MAX_OFF        (1 << 13)
 #define        MAX_REF        ((1 << 8) + (1 << 3))
 
-#if (__i386 || __amd64) && __GNUC__ >= 3
-# define lzf_movsb(dst, src, len)                \
-   asm ("rep movsb"                              \
-        : "=D" (dst), "=S" (src), "=c" (len)     \
-        :  "0" (dst),  "1" (src),  "2" (len));
-#endif
-
 #if __GNUC__ >= 3
 # define expect(expr,value)         __builtin_expect ((expr),(value))
 # define inline                     inline
@@ -115,9 +114,12 @@ lzf_compress (const void *const in_data, unsigned int in_len,
         u8 *out_end = op + out_len;
   const u8 *ref;
 
-  unsigned int hval = FRST (ip);
+  unsigned int hval;
   unsigned long off;
-           int lit = 0;
+  int lit;
+
+  if (!in_len || !out_len)
+    return 0;
 
 #if INIT_HTAB
   memset (htab, 0, sizeof (htab));
@@ -127,163 +129,145 @@ lzf_compress (const void *const in_data, unsigned int in_len,
 # endif
 #endif
 
-  for (;;)
+  lit = 0; op++; /* start run */
+
+  hval = FRST (ip);
+  while (ip < in_end - 2)
     {
-      if (expect_true (ip < in_end - 2))
-        {
-          hval = NEXT (hval, ip);
-          hslot = htab + IDX (hval);
-          ref = *hslot; *hslot = ip;
+      hval = NEXT (hval, ip);
+      hslot = htab + IDX (hval);
+      ref = *hslot; *hslot = ip;
 
-          if (1
+      if (1
 #if INIT_HTAB && !USE_MEMCPY
-              && ref < ip /* the next test will actually take care of this, but this is faster */
+          && ref < ip /* the next test will actually take care of this, but this is faster */
 #endif
-              && (off = ip - ref - 1) < MAX_OFF
-              && ip + 4 < in_end
-              && ref > (u8 *)in_data
+          && (off = ip - ref - 1) < MAX_OFF
+          && ip + 4 < in_end
+          && ref > (u8 *)in_data
 #if STRICT_ALIGN
-              && ref[0] == ip[0]
-              && ref[1] == ip[1]
-              && ref[2] == ip[2]
+          && ref[0] == ip[0]
+          && ref[1] == ip[1]
+          && ref[2] == ip[2]
 #else
-              && *(u16 *)ref == *(u16 *)ip
-              && ref[2] == ip[2]
+          && *(u16 *)ref == *(u16 *)ip
+          && ref[2] == ip[2]
 #endif
-            )
-            {
-              /* match found at *ref++ */
-              unsigned int len = 2;
-              unsigned int maxlen = in_end - ip - len;
-              maxlen = maxlen > MAX_REF ? MAX_REF : maxlen;
+        )
+        {
+          /* match found at *ref++ */
+          unsigned int len = 2;
+          unsigned int maxlen = in_end - ip - len;
+          maxlen = maxlen > MAX_REF ? MAX_REF : maxlen;
 
-              if (expect_false (op + lit + 1 + 3 >= out_end))
-                return 0;
+          if (expect_false (op + 1 + 3 >= out_end))
+            return 0;
 
-              if (expect_false (lit))
-                {
-                  *op++ = lit - 1;
-                  lit = -lit;
-                  do
-                    *op++ = ip[lit];
-                  while (expect_false (++lit));
-                }
+          op [- lit - 1] = lit - 1; /* stop run */
+          op -= !lit; /* undo run if length is zero */
 
-              for (;;)
+          for (;;)
+            {
+              if (expect_true (maxlen > 16))
                 {
-                  if (expect_true (maxlen > 16))
-                    {
-                      len++; if (ref [len] != ip [len]) break;
-                      len++; if (ref [len] != ip [len]) break;
-                      len++; if (ref [len] != ip [len]) break;
-                      len++; if (ref [len] != ip [len]) break;
-                      len++; if (ref [len] != ip [len]) break;
-                      len++; if (ref [len] != ip [len]) break;
-                      len++; if (ref [len] != ip [len]) break;
-                      len++; if (ref [len] != ip [len]) break;
-                      len++; if (ref [len] != ip [len]) break;
-                      len++; if (ref [len] != ip [len]) break;
-                      len++; if (ref [len] != ip [len]) break;
-                      len++; if (ref [len] != ip [len]) break;
-                      len++; if (ref [len] != ip [len]) break;
-                      len++; if (ref [len] != ip [len]) break;
-                      len++; if (ref [len] != ip [len]) break;
-                      len++; if (ref [len] != ip [len]) break;
-                    }
-
-                  do
-                    len++;
-                  while (len < maxlen && ref[len] == ip[len]);
-
-                  break;
+                  len++; if (ref [len] != ip [len]) break;
+                  len++; if (ref [len] != ip [len]) break;
+                  len++; if (ref [len] != ip [len]) break;
+                  len++; if (ref [len] != ip [len]) break;
+
+                  len++; if (ref [len] != ip [len]) break;
+                  len++; if (ref [len] != ip [len]) break;
+                  len++; if (ref [len] != ip [len]) break;
+                  len++; if (ref [len] != ip [len]) break;
+
+                  len++; if (ref [len] != ip [len]) break;
+                  len++; if (ref [len] != ip [len]) break;
+                  len++; if (ref [len] != ip [len]) break;
+                  len++; if (ref [len] != ip [len]) break;
+
+                  len++; if (ref [len] != ip [len]) break;
+                  len++; if (ref [len] != ip [len]) break;
+                  len++; if (ref [len] != ip [len]) break;
+                  len++; if (ref [len] != ip [len]) break;
                 }
 
-              len -= 2;
-              ip++;
+              do
+                len++;
+              while (len < maxlen && ref[len] == ip[len]);
 
-              if (len < 7)
-                {
-                  *op++ = (off >> 8) + (len << 5);
-                }
-              else
-                {
-                  *op++ = (off >> 8) + (  7 << 5);
-                  *op++ = len - 7;
-                }
+              break;
+            }
+
+          len -= 2;
+          ip++;
 
-              *op++ = off;
+          if (len < 7)
+            {
+              *op++ = (off >> 8) + (len << 5);
+            }
+          else
+            {
+              *op++ = (off >> 8) + (  7 << 5);
+              *op++ = len - 7;
+            }
+
+          *op++ = off;
 
 #if ULTRA_FAST || VERY_FAST
-              ip += len;
+          ip += len;
 #if VERY_FAST && !ULTRA_FAST
-              --ip;
+          --ip;
 #endif
-              hval = FRST (ip);
+          hval = FRST (ip);
 
-              hval = NEXT (hval, ip);
-              htab[IDX (hval)] = ip;
-              ip++;
+          hval = NEXT (hval, ip);
+          htab[IDX (hval)] = ip;
+          ip++;
 
 #if VERY_FAST && !ULTRA_FAST
+          hval = NEXT (hval, ip);
+          htab[IDX (hval)] = ip;
+          ip++;
+#endif
+#else
+          do
+            {
               hval = NEXT (hval, ip);
               htab[IDX (hval)] = ip;
               ip++;
-#endif
-#else
-              do
-                {
-                  hval = NEXT (hval, ip);
-                  htab[IDX (hval)] = ip;
-                  ip++;
-                }
-              while (len--);
-#endif
-              continue;
             }
+          while (len--);
+#endif
+          lit = 0; op++; /* start run */
+          continue;
         }
-      else if (expect_false (ip == in_end))
-        break;
 
       /* one more literal byte we must copy */
+
+      if (expect_false (op >= out_end))
+        return 0;
+
       lit++;
-      ip++;
+      *op++ = *ip++;
 
       if (expect_false (lit == MAX_LIT))
         {
-          if (op + 1 + MAX_LIT >= out_end)
-            return 0;
-
-          *op++ = MAX_LIT - 1;
-
-#ifdef lzf_movsb
-          ip -= MAX_LIT;
-          lzf_movsb (op, ip, lit);
-#else
-          lit = -lit;
-          do
-            *op++ = ip[lit];
-          while (++lit);
-#endif
+          op [- lit - 1] = lit - 1; /* stop run */
+          lit = 0; op++; /* start run */
         }
     }
 
-  if (lit)
-    {
-      if (op + lit + 1 >= out_end)
-	return 0;
+  if (op + lit + 2 >= out_end)
+    return 0;
 
-      *op++ = lit - 1;
-#ifdef lzf_movsb
-      ip -= lit;
-      lzf_movsb (op, ip, lit);
-#else
-      lit = -lit;
-      do
-	*op++ = ip[lit];
-      while (++lit);
-#endif
+  while (ip < in_end)
+    {
+      lit++;
+      *op++ = *ip++;
     }
 
-  return op - (u8 *) out_data;
+  op [- lit - 1] = lit - 1;
+
+  return op - (u8 *)out_data;
 }
 
-- 
cgit v1.2.3