6 files changed, 58 insertions, 23 deletions
diff --git a/Changes b/Changes
index ad23d8b..a77fb59 100644
--- a/Changes
+++ b/Changes
@@ -1,3 +1,8 @@
+1.5  Tue Mar  8 20:23:23 CET 2005
+	- incorporated improvements by Adam D. Moss,
+          which includes a new VERY_FAST mode which is
+          a bit slower than ULTRA_FAST but much better,
+          and enabled it as default.
 
 1.401 Thu Mar  3 18:00:52 CET 2005
 	- use cstring in c++, not string.h.
diff --git a/Makefile.in b/Makefile.in
index f94e0cc..cec120a 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -1,4 +1,4 @@
-VERSION = 1.401
+VERSION = 1.5
 
 prefix = @prefix@
 exec_prefix = @exec_prefix@
diff --git a/bench.c b/bench.c
index 41ca037..9e0f3c3 100644
--- a/bench.c
+++ b/bench.c
@@ -40,7 +40,7 @@ int main(void)
    fread (data, DSIZE, 1, f);
    fclose (f);
    
-   for (lp = 0; lp < 100; lp++) {
+   for (lp = 0; lp < 1000; lp++) {
       s=stamp();
       l = lzf_compress (data, DSIZE, data2, DSIZE*2);
       j = lzf_decompress (data2, l, data3, DSIZE*2);
diff --git a/lzf.h b/lzf.h
index 5e7ff74..8538609 100644
--- a/lzf.h
+++ b/lzf.h
@@ -48,6 +48,8 @@
 **
 ***********************************************************************/
 
+#define LZF_VERSION 0x0105 /* 1.5 */
+
 /*
  * Compress in_len bytes stored at the memory block starting at
  * in_data and write the result to out_data, up to a maximum length
@@ -60,15 +62,16 @@
  * the data uncompressed otherwise.
  *
  * lzf_compress might use different algorithms on different systems and
- * thus might result in different compressed strings depending on the
- * phase of the moon or similar factors. However, all these strings are
- * architecture-independent and will result in the original data when
- * decompressed using lzf_decompress.
+ * even diferent runs, thus might result in different compressed strings
+ * depending on the phase of the moon or similar factors. However, all
+ * these strings are architecture-independent and will result in the
+ * original data when decompressed using lzf_decompress.
  *
  * The buffers must not be overlapping.
  *
  * If the option LZF_STATE_ARG is enabled, an extra argument must be
- * supplied which is not reflected in this header file. Refer to lzf_c.c.
+ * supplied which is not reflected in this header file. Refer to lzfP.h
+ * and lzf_c.c.
  *
  */
 unsigned int 
diff --git a/lzfP.h b/lzfP.h
index 33b9037..48963b2 100644
--- a/lzfP.h
+++ b/lzfP.h
@@ -39,7 +39,7 @@
 #ifndef LZFP_h
 #define LZFP_h
 
-#define STANDALONE /* at the moment, this is ok. */
+#define STANDALONE 1 /* at the moment, this is ok. */
 
 #ifndef STANDALONE
 # include "lzf.h"
@@ -49,29 +49,40 @@
  * size of hashtable is (1 << HLOG) * sizeof (char *)
  * decompression is independent of the hash table size
  * the difference between 15 and 14 is very small
- * for small blocks (and 14 is also faster).
- * For a low-memory configuration, use HLOG == 13;
- * For best compression, use 15 or 16.
+ * for small blocks (and 14 is usually a but faster).
+ * For a low-memory/faster configuration, use HLOG == 13;
+ * For best compression, use 15 or 16 (or more).
  */
 #ifndef HLOG
 # define HLOG 14
 #endif
 
 /*
- * sacrifice some compression quality in favour of compression speed.
+ * sacrifice very little compression quality in favour of compression speed.
+ * This gives almost the same compression as the default code, and is
+ * (very roughly) 15% faster. This is the preferable mode of operation.
+ */
+
+#ifndef VERY_FAST
+# define VERY_FAST 1
+#endif
+
+/*
+ * sacrifice some more compression quality in favour of compression speed.
  * (roughly 1-2% worse compression for large blocks and
  * 9-10% for small, redundant, blocks and >>20% better speed in both cases)
- * In short: enable this for binary data, disable this for text data.
+ * In short: when in need for speed, enable this for binary data,
+ * possibly disable this for text data.
  */
 #ifndef ULTRA_FAST
-# define ULTRA_FAST 1
+# define ULTRA_FAST 0
 #endif
 
 /*
  * unconditionally aligning does not cost very much, so do it if unsure
  */
 #ifndef STRICT_ALIGN
-# define STRICT_ALIGN !defined(__i386)
+# define STRICT_ALIGN !(defined(__i386) || defined (__amd64))
 #endif
 
 /*
@@ -83,8 +94,8 @@
 #endif
 
 /*
- * you may choose to pre-set the hash table (might be faster on modern cpus
- * and large (>>64k) blocks)
+ * you may choose to pre-set the hash table (might be faster on some
+ * modern cpus and large (>>64k) blocks)
  */
 #ifndef INIT_HTAB
 # define INIT_HTAB 0
@@ -128,6 +139,12 @@ typedef const u8 *LZF_STATE[1 << (HLOG)];
 # endif
 #endif
 
+#if ULTRA_FAST
+# if defined(VERY_FAST)
+#  undef VERY_FAST
+# endif
+#endif
+
 #if USE_MEMCPY || INIT_HTAB
 # ifdef __cplusplus
 #  include <cstring>
diff --git a/lzf_c.c b/lzf_c.c
index eeb114e..43f8fb5 100644
--- a/lzf_c.c
+++ b/lzf_c.c
@@ -46,8 +46,8 @@
  * the hashing function might seem strange, just believe me
  * it works ;)
  */
-#define FRST(p) (((p[0]) << 8) + p[1])
-#define NEXT(v,p) (((v) << 8) + p[2])
+#define FRST(p) (((p[0]) << 8) | p[1])
+#define NEXT(v,p) (((v) << 8) | p[2])
 #define IDX(h) ((((h ^ (h << 5)) >> (3*8 - HLOG)) - h*5) & (HSIZE - 1))
 /*
  * IDX works because it is very similar to a multiplicative hash, e.g.
@@ -139,13 +139,13 @@ lzf_compress (const void *const in_data, unsigned int in_len,
               unsigned int maxlen = in_end - ip - len;
               maxlen = maxlen > MAX_REF ? MAX_REF : maxlen;
 
+              if (op + lit + 1 + 3 >= out_end)
+                return 0;
+
               do
                 len++;
               while (len < maxlen && ref[len] == ip[len]);
 
-              if (op + lit + 1 + 3 >= out_end)
-                return 0;
-
               if (lit)
                 {
                   *op++ = lit - 1;
@@ -170,12 +170,22 @@ lzf_compress (const void *const in_data, unsigned int in_len,
 
               *op++ = off;
 
-#if ULTRA_FAST
+#if ULTRA_FAST || VERY_FAST
               ip += len;
+#if VERY_FAST && !ULTRA_FAST
+              --ip;
+#endif
               hval = FRST (ip);
+
+              hval = NEXT (hval, ip);
+              htab[IDX (hval)] = ip;
+              ip++;
+
+#if VERY_FAST && !ULTRA_FAST
               hval = NEXT (hval, ip);
               htab[IDX (hval)] = ip;
               ip++;
+#endif
 #else
               do
                 {