summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Changes2
-rw-r--r--lzfP.h37
-rw-r--r--lzf_c.c15
3 files changed, 39 insertions, 15 deletions
diff --git a/Changes b/Changes
index db70df9..09cd72b 100644
--- a/Changes
+++ b/Changes
@@ -6,6 +6,8 @@
and only a small win on amd cpus.
- improve c++ compatibility of the code.
- slightly improve compressor speed.
+ - halved memory requirements for compressor on 64 bit architectures,
+ which can improve the speed quite a bit on older cpus.
3.5 Fri May 1 02:28:42 CEST 2009
- lzf_compress did sometimes write one octet past the given output
diff --git a/lzfP.h b/lzfP.h
index 7f0e02b..c54d087 100644
--- a/lzfP.h
+++ b/lzfP.h
@@ -121,27 +121,52 @@
# define CHECK_INPUT 1
#endif
+/*
+ * Whether to store pointers or offsets inside the hash table. On
+ * 64 bit architetcures, pointers take up twice as much space,
+ * and might also be slower. Default is to autodetect.
+ */
+/*#define LZF_USER_OFFSETS autodetect */
+
/*****************************************************************************/
/* nothing should be changed below */
#ifdef __cplusplus
# include <cstring>
+# include <climits>
using namespace std;
#else
# include <string.h>
+# include <limits.h>
+#endif
+
+#ifndef LZF_USE_OFFSETS
+# if defined (WIN32)
+# define LZF_USE_OFFSETS defined(_M_X64)
+# else
+# ifdef __cplusplus
+# include <cstdint>
+# else
+# include <stdint.h>
+# endif
+# define LZF_USE_OFFSETS (UINTPTR_MAX > 0xffffffffU)
+# endif
#endif
typedef unsigned char u8;
-typedef const u8 *LZF_STATE[1 << (HLOG)];
+#if LZF_USE_OFFSETS
+# define LZF_HSLOT_BIAS ((const u8 *)in_data)
+ typedef unsigned int LZF_HSLOT;
+#else
+# define LZF_HSLOT_BIAS 0
+ typedef const u8 *LZF_HSLOT;
+#endif
+
+typedef LZF_HSLOT LZF_STATE[1 << (HLOG)];
#if !STRICT_ALIGN
/* for unaligned accesses we need a 16 bit datatype. */
-# ifdef __cplusplus
-# include <climits>
-# else
-# include <limits.h>
-# endif
# if USHRT_MAX == 65535
typedef unsigned short u16;
# elif UINT_MAX == 65535
diff --git a/lzf_c.c b/lzf_c.c
index 06afa63..e3b1108 100644
--- a/lzf_c.c
+++ b/lzf_c.c
@@ -106,7 +106,6 @@ lzf_compress (const void *const in_data, unsigned int in_len,
#if !LZF_STATE_ARG
LZF_STATE htab;
#endif
- const u8 **hslot;
const u8 *ip = (const u8 *)in_data;
u8 *op = (u8 *)out_data;
const u8 *in_end = ip + in_len;
@@ -133,10 +132,6 @@ lzf_compress (const void *const in_data, unsigned int in_len,
#if INIT_HTAB
memset (htab, 0, sizeof (htab));
-# if 0
- for (hslot = htab; hslot < htab + HSIZE; hslot++)
- *hslot++ = ip;
-# endif
#endif
lit = 0; op++; /* start run */
@@ -144,9 +139,11 @@ lzf_compress (const void *const in_data, unsigned int in_len,
hval = FRST (ip);
while (ip < in_end - 2)
{
+ LZF_HSLOT *hslot;
+
hval = NEXT (hval, ip);
hslot = htab + IDX (hval);
- ref = *hslot; *hslot = ip;
+ ref = *hslot + LZF_HSLOT_BIAS; *hslot = ip - LZF_HSLOT_BIAS;
if (1
#if INIT_HTAB
@@ -236,12 +233,12 @@ lzf_compress (const void *const in_data, unsigned int in_len,
hval = FRST (ip);
hval = NEXT (hval, ip);
- htab[IDX (hval)] = ip;
+ htab[IDX (hval)] = ip - LZF_HSLOT_BIAS;
ip++;
# if VERY_FAST && !ULTRA_FAST
hval = NEXT (hval, ip);
- htab[IDX (hval)] = ip;
+ htab[IDX (hval)] = ip - LZF_HSLOT_BIAS;
ip++;
# endif
#else
@@ -250,7 +247,7 @@ lzf_compress (const void *const in_data, unsigned int in_len,
do
{
hval = NEXT (hval, ip);
- htab[IDX (hval)] = ip;
+ htab[IDX (hval)] = ip - LZF_HSLOT_BIAS;
ip++;
}
while (len--);