From db8367fa234d20d6e8e07901398fd45e23058d7b Mon Sep 17 00:00:00 2001 From: root Date: Sun, 9 Jun 2002 22:41:34 +0000 Subject: *** empty log message *** --- Changes | 15 ++++ LICENSE | 26 +++++++ Makefile.in | 63 ++++++++++++++++ README | 24 +++++++ bench.c | 63 ++++++++++++++++ configure.in | 22 ++++++ crc32.h | 65 +++++++++++++++++ lzf.c | 232 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ lzf.h | 88 +++++++++++++++++++++++ lzfP.h | 107 +++++++++++++++++++++++++++ lzf_c.c | 209 +++++++++++++++++++++++++++++++++++++++++++++++++++++ lzf_d.c | 101 ++++++++++++++++++++++++++ 12 files changed, 1015 insertions(+) create mode 100644 Changes create mode 100644 LICENSE create mode 100644 Makefile.in create mode 100644 README create mode 100644 bench.c create mode 100644 configure.in create mode 100644 crc32.h create mode 100644 lzf.c create mode 100644 lzf.h create mode 100644 lzfP.h create mode 100644 lzf_c.c create mode 100644 lzf_d.c diff --git a/Changes b/Changes new file mode 100644 index 0000000..28e4318 --- /dev/null +++ b/Changes @@ -0,0 +1,15 @@ +0.4 + - typoe fix. + - lzf demo program now properly decompresses small files. + - fix another 64 bit issue, found by Laurent Deniel. + +0.3 Tue Jan 16 13:21:14 CET 2001 + - fix silly beginners 32/64 bit mistake. + +0.2 Thu Jan 4 05:56:42 CET 2001 + - now totally indepdendent of autoconfig, for + easy inclusion into other programs. + - much better fine-tuning, faster and better than 0.1. + +0.1 + - initial release. diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..5d64fb4 --- /dev/null +++ b/LICENSE @@ -0,0 +1,26 @@ +Copyright (c) 2000,2001 Marc Alexander Lehmann + +Redistribution and use in source and binary forms, with or without modifica- +tion, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED +WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER- +CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE- +CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH- +ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +OF THE POSSIBILITY OF SUCH DAMAGE. + diff --git a/Makefile.in b/Makefile.in new file mode 100644 index 0000000..9644125 --- /dev/null +++ b/Makefile.in @@ -0,0 +1,63 @@ +VERSION = 0.4 + +prefix = @prefix@ +exec_prefix = @exec_prefix@ +libdir = @libdir@ +bindir = @bindir@ +includedir = @includedir@ + +CC = @CC@ +CPPFLAGS = @CPPFLAGS@ +CFLAGS = @CFLAGS@ +LDFLAGS = @LDFLAGS@ +RANLIB = @RANLIB@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ + +all: Makefile lzf + +clean: + -rm -f *.o *.a lzf bench + +lzf_c.o: lzf_c.c lzfP.h + +lzf_d.o: lzf_d.c lzfP.h + +lzf.o: lzf.c + +lzf: lzf.o liblzf.a + +lzfP.h: lzf.h config.h + +liblzf.a: lzf_c.o lzf_d.o + rm -f $@ + $(AR) rc $@ $^ + $(RANLIB) $@ + +install: all + $(INSTALL) -d $(bindir) + $(INSTALL) -m 755 lzf $(bindir) + $(INSTALL) -d $(includedir) + $(INSTALL_DATA) lzf.h $(includedir) + $(INSTALL) -d $(libdir) + $(INSTALL_DATA) liblzf.a $(libdir) + +dist: + mkdir liblzf-$(VERSION) + cp -Rp LICENSE README Makefile.in config.h.in \ + configure configure.in install-sh \ + lzf.h lzfP.h lzf_c.c lzf_d.c \ + crc32.h lzf.c Changes \ + liblzf-$(VERSION) + -chown -R root.root liblzf-$(VERSION) + chmod -R u=rwX,go=rX liblzf-$(VERSION) + tar cvf - liblzf-$(VERSION) | gzip -9 >liblzf-$(VERSION).tar.gz + rm -rf liblzf-$(VERSION) + ls -l liblzf-$(VERSION).tar.gz + +Makefile: Makefile.in + ./config.status + +bench: Makefile liblzf.a bench.c + $(CC) $(CFLAGS) -g -o bench bench.c -L. -llzf + diff --git a/README b/README new file mode 100644 index 0000000..dd55f72 --- /dev/null +++ b/README @@ -0,0 +1,24 @@ +DESCRIPTION + LZF is an extremely fast (not that much slower than a pure memcpy) + compression algorithm. It is ideal for applications where you want to + save *some* space but not at the cost of speed. It is ideal for + repetitive data as well. The module is self-contained and very small (no + large library to be pulled in). + + I have no idea wether any patents in any countries apply to this + algorithm, but at the moment it is believed that it is free from any + patents. More importantly, it is also free to use in every software + package (see COPYING). + + See the lzf.h file for details on how the functions in this + mini-library are to be used. + + NOTE: This package contains a very bare-bones command-line utility + which is neither optimized for speed nor for compression. This library + is really intented to be used inside larger programs. + +AUTHOR + This library was written by Marc Lehmann (See also + http://liblzf.plan9.de/). + + diff --git a/bench.c b/bench.c new file mode 100644 index 0000000..2700c46 --- /dev/null +++ b/bench.c @@ -0,0 +1,63 @@ +#include +#include +#include + +#include "lzf.h" + +typedef unsigned long tval; +typedef unsigned long long stamp64; + +extern inline tval stamp(void) +{ + tval tsc; + asm volatile("rdtsc" : "=a" (tsc) : : "edx"); + return tsc; +} + +extern inline tval measure(tval t) +{ + tval tsc; + asm volatile("rdtsc" : "=a" (tsc) : : "edx"); + if (tsc>t) + return tsc-t; + else + return t-tsc; +} + +#define DSIZE 1000000 + +unsigned char data[DSIZE], data2[DSIZE*2], data3[DSIZE*2]; + +int main(void) +{ + tval s; + tval si[1000]; + int i, l, j; + int min = 1<<30; + + FILE *f = fopen ("data", "r"); + fread (data, DSIZE, 1, f); + fclose (f); + + for(;;) { + s=stamp(); + l = lzf_compress (data, DSIZE, data2, DSIZE*2); + j = lzf_decompress (data2, l, data3, DSIZE*2); + si[0]=measure(s); + + printf ("\r%10d (%d) ", si[0], l); + if (si[0] < min && si[0] > 0) + { + printf ("\n"); + min = si[0]; + } + + fflush (stdout); + + assert (memcmp (data, data3, DSIZE) == 0); + } + return 0; +} + + + diff --git a/configure.in b/configure.in new file mode 100644 index 0000000..33b5921 --- /dev/null +++ b/configure.in @@ -0,0 +1,22 @@ +AC_INIT(lzfP.h) + +AC_CONFIG_HEADER(config.h) + +AC_PROG_CC +AC_PROG_RANLIB +AC_PROG_INSTALL +AC_HEADER_STDC + +AC_CHECK_SIZEOF(short, 2) +AC_CHECK_SIZEOF(int, 4) +AC_CHECK_SIZEOF(long, 4) + +AC_C_CONST + +if test "$GCC" = yes; then + CFLAGS="$CFLAGS -O2 -funroll-all-loops" +else + AC_MSG_RESULT(no gcc) +fi + +AC_OUTPUT(Makefile) diff --git a/crc32.h b/crc32.h new file mode 100644 index 0000000..cf8f6d4 --- /dev/null +++ b/crc32.h @@ -0,0 +1,65 @@ +#ifndef CRC32_H +#define CRC32_H + +/* crc32 0xdebb20e3 table and supplementary functions. */ + +static const u32 crc_32_tab[] = +{ + 0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL, + 0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL, + 0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL, + 0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL, + 0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL, + 0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL, + 0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL, + 0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL, + 0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL, + 0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL, + 0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL, + 0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL, + 0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL, + 0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL, + 0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL, + 0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL, + 0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL, + 0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL, + 0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL, + 0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL, + 0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL, + 0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL, + 0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL, + 0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL, + 0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL, + 0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL, + 0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL, + 0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL, + 0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL, + 0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL, + 0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL, + 0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL, + 0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL, + 0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL, + 0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL, + 0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL, + 0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL, + 0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL, + 0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL, + 0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL, + 0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL, + 0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL, + 0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL, + 0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL, + 0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL, + 0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL, + 0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL, + 0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL, + 0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL, + 0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL, + 0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL, + 0x2d02ef8dL +}; + +#define crc32(crc,byte) (crc_32_tab[(u8)(crc) ^ (u8)(byte)] ^ ((crc) >> 8)) + +#endif + diff --git a/lzf.c b/lzf.c new file mode 100644 index 0000000..1582d3c --- /dev/null +++ b/lzf.c @@ -0,0 +1,232 @@ +/* + * Copyright (c) 2000 Marc Alexander Lehmann + * + * Redistribution and use in source and binary forms, with or without modifica- + * tion, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER- + * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO + * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE- + * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH- + * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" + +#include +#include +#include + +#include +#include + +#include "lzf.h" + +static void +usage (int ec) +{ + fprintf (stderr, "\n" + "lzf, a very leightweight compression/decompression filter\n" + "written by Marc Lehmann You can find more info at\n" + "http://liblzv.plan9.de/\n" + "\n" + "USAGE: lzf -c [-b blocksize] | -d\n" + " -c compress\n" + " -d decompress\n" + " -b specify the blocksize (default 64k-1)\n" + "\n" + ); + + exit (ec); +} + +/* + * Anatomy: an lzf file consists of any number of blocks in the following format: + * + * "ZV\0" 2-byte-usize + * "ZV\1" 2-byte-csize 2-byte-usize + * "ZV\2" 4-byte-crc32-0xdebb20e3 (NYI) + * + */ + +static void compress (unsigned int blocksize) +{ + ssize_t us; + unsigned int cs; + u8 buff1[64*1024]; + u8 buff2[64*1024]; + u8 header[3+2+2]; + + header[0] = 'Z'; + header[1] = 'V'; + + for(;;) { + us = fread (buff1, 1, blocksize, stdin); + + if (us < blocksize) + { + if (us == 0) + break; + else if (!feof (stdin)) + { + perror ("compress"); + exit (1); + } + } + + cs = lzf_compress (buff1, us, buff2, us - 4); + + if (cs) + { + header[2] = 1; + header[3] = cs >> 8; + header[4] = cs & 0xff; + header[5] = us >> 8; + header[6] = us & 0xff; + + fwrite (header, 3+2+2, 1, stdout); + fwrite (buff2, cs, 1, stdout); + } + else + { + header[2] = 0; + header[3] = us >> 8; + header[4] = us & 0xff; + + fwrite (header, 3+2, 1, stdout); + fwrite (buff1, us, 1, stdout); + } + } while (!feof (stdin)); +} + +static void decompress (void) +{ + ssize_t us; + unsigned int cs; + u8 buff1[64*1024]; + u8 buff2[64*1024]; + u8 header[3+2+2]; + + for(;;) { + if (fread (header, 3+2, 1, stdin) != 1) + { + if (feof (stdin)) + break; + else + { + perror ("decompress"); + exit (1); + } + } + + if (header[0] != 'Z' || header[1] != 'V') + { + fprintf (stderr, "decompress: invalid stream - no magic number found\n"); + exit (1); + } + + cs = (header[3] << 8) | header[4]; + + if (header[2] == 1) + { + if (fread (header+3+2, 2, 1, stdin) != 1) + { + perror ("decompress"); + exit (1); + } + + us = (header[5] << 8) | header[6]; + + if (fread (buff1, cs, 1, stdin) != 1) + { + perror ("decompress"); + exit (1); + } + + if (lzf_decompress (buff1, cs, buff2, us) != us) + { + fprintf (stderr, "decompress: invalid stream - data corrupted\n"); + exit (1); + } + + fwrite (buff2, us, 1, stdout); + } + else if (header[2] == 0) + { + if (fread (buff2, cs, 1, stdin) != 1) + { + perror ("decompress"); + exit (1); + } + + fwrite (buff2, cs, 1, stdout); + } + else + { + fprintf (stderr, "decompress: invalid stream - unknown block type\n"); + exit (1); + } + } +} + +int +main (int argc, char *argv[]) +{ + int c; + unsigned int blocksize = 64*1024-1; + enum { m_compress, m_decompress } mode = m_compress; + + while ((c = getopt (argc, argv, "cdb:h")) != -1) + switch (c) + { + case 'c': + mode = m_compress; + break; + + case 'd': + mode = m_decompress; + break; + + case 'b': + blocksize = atol (optarg); + break; + + case 'h': + usage (0); + + case ':': + fprintf (stderr, "required argument missing, use -h\n"); + exit (1); + + case '?': + fprintf (stderr, "unknown option, use -h\n"); + exit (1); + + default: + usage (1); + } + + if (mode == m_compress) + compress (blocksize); + else if (mode == m_decompress) + decompress (); + else + abort (); + + return 0; +} diff --git a/lzf.h b/lzf.h new file mode 100644 index 0000000..d6655a2 --- /dev/null +++ b/lzf.h @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2000 Marc Alexander Lehmann + * + * Redistribution and use in source and binary forms, with or without modifica- + * tion, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER- + * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO + * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE- + * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH- + * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef LZF_H +#define LZF_H + +/*********************************************************************** +** +** lzf -- an extremely fast/free compression/decompression-method +** http://liblzf.plan9.de/ +** +** Based on ideas by Hermann Vogt, but liblzf is a total +** re-implementation of LZV that is not compatible to the +** original lzv code. +** +** This algorithm is believed to be patent-free. +** +***********************************************************************/ + +/* + * Compress in_len bytes stored at the memory block starting at + * in_data and write the result to out_data, up to a maximum length + * of out_len bytes. + * + * If the output buffer is not large enough or any error occurs + * return 0, otherwise return the number of bytes used (which might + * be considerably larger than in_len, so it makes sense to always + * use out_len == in_len). + * + * lzf_compress might use different algorithms on different systems and + * thus might result in different compressed strings depending on the + * phase of the moon or similar factors. However, all these strings are + * architecture-independent and will result in the original data when + * decompressed using lzf_decompress. + * + * The buffers must not be overlapping. + * + */ +unsigned int +lzf_compress (const void *const in_data, unsigned int in_len, + void *out_data, unsigned int out_len); + +/* + * Decompress data compressed with some version of the lzf_compress + * function and stored at location in_data and length in_len. The result + * will be stored at out_data up to a maximum of out_len characters. + * + * If * the output buffer is not large enough to hold the decompressed + * data, a 0 is returned and errno is set to E2BIG. Otherwise the number + * of decompressed bytes (i.e. the original length of the data) is + * returned. + * + * If an error in the compressed data is detected, a zero is returned and + * errno is set to EINVAL. + * + * This function is very fast, about as fast as a copying loop. + */ +unsigned int +lzf_decompress (const void *const in_data, unsigned int in_len, + void *out_data, unsigned int out_len); + +#endif + diff --git a/lzfP.h b/lzfP.h new file mode 100644 index 0000000..43acbe2 --- /dev/null +++ b/lzfP.h @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2000 Marc Alexander Lehmann + * + * Redistribution and use in source and binary forms, with or without modifica- + * tion, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER- + * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO + * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE- + * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH- + * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef LZFP_h +#define LZFP_h + +#define STANDALONE /* at the moment, this is ok. */ + +#ifndef STANDALONE +# include "lzf.h" +#endif + +/* + * size of hashtable is (1 << HLOG) * sizeof (char *) + * decompression is independent of the hash table size + * the difference between 15 and 14 is very small + * for small blocks (and 14 is also faster). + * For a low-memory configuration, use HLOG == 13; + * For best compression, use 15 or 16. + */ +#ifndef HLOG +# define HLOG 14 +#endif + +/* + * sacrifice some compression quality in favour of compression speed. + * (roughly 1-2% worse compression for large blocks and + * 9-10% for small, redundant, blocks and 20% better speed in both cases) + * In short: enable this for binary data, disable this for text data. + */ +#ifndef ULTRA_FAST +# define ULTRA_FAST 1 +#endif + +/* + * unconditionally aligning does not cost very much, so do it if unsure + */ +#ifndef STRICT_ALIGN +# define STRICT_ALIGN !defined(__i386) +#endif + +/* + * use string functions to copy memory. + * this is usually a loss, even with glibc's optimized memcpy + */ +#ifndef USE_MEMCPY +# define USE_MEMCPY 0 +#endif + +/* + * you may choose to pre-set the hash table (might be faster on modern cpus + * and large (>>64k) blocks) + */ +#ifndef INIT_HTAB +# define INIT_HTAB 0 +#endif + +/*****************************************************************************/ +/* nothing should be changed below */ + +typedef unsigned char u8; + +#if !STRICT_ALIGN +/* for unaligned accesses we need a 16 bit datatype. */ +# include +# if USHRT_MAX == 65535 + typedef unsigned short u16; +# elif UINT_MAX == 65535 + typedef unsigned int u16; +# else +# warn need 16 bit datatype when STRICT_ALIGN == 0, this is non-fatal +# undef STRICT_ALIGN +# define STRICT_ALIGN 1 +# endif +#endif + +#if USE_MEMCPY || INIT_HTAB +# include +#endif + +#endif + diff --git a/lzf_c.c b/lzf_c.c new file mode 100644 index 0000000..97ac814 --- /dev/null +++ b/lzf_c.c @@ -0,0 +1,209 @@ +/* + * Copyright (c) 2000 Marc Alexander Lehmann + * + * Redistribution and use in source and binary forms, with or without modifica- + * tion, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER- + * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO + * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE- + * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH- + * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "lzfP.h" + +#define HSIZE (1 << (HLOG)) + +/* + * don't play with this unless you benchmark! + * decompression is not dependent on the hash function + * the hashing function might seem strange, just believe me + * it works ;) + */ +#define FRST(p) (((p[0]) << 8) + p[1]) +#define NEXT(v,p) (((v) << 8) + p[2]) +#define IDX(h) ((((h ^ (h << 4)) >> (3*8 - HLOG)) + h*3) & (HSIZE - 1)) +/* + * IDX works because it is very similar to a multiplicative hash, e.g. + * (h * 57321 >> (3*8 - HLOG)) + * the next one is also quite good, albeit slow ;) + * (int)(cos(h & 0xffffff) * 1e6) + */ + +#if 0 +/* original lzv-like hash function */ +# define FRST(p) (p[0] << 5) ^ p[1] +# define NEXT(v,p) (v << 5) ^ p[2] +# define IDX(h) (h) & (HSIZE - 1) +#endif + +#define MAX_LIT (1 << 5) +#define MAX_OFF (1 << 13) +#define MAX_REF ((1 << 8) + (1 << 3)) + +/* + * compressed format + * + * 000LLLLL ; literal + * LLLOOOOO oooooooo ; backref L + * 111OOOOO LLLLLLLL oooooooo ; backref L+7 + * + */ + +unsigned int +lzf_compress (const void *const in_data, unsigned int in_len, + void *out_data, unsigned int out_len) +{ + const u8 *htab[HSIZE]; + const u8 **hslot; + const u8 *ip = in_data; + u8 *op = out_data; + const u8 *in_end = ip + in_len; + u8 *out_end = op + out_len; + const u8 *ref; + + unsigned int hval = FRST (ip); + unsigned long off; + int lit = 0; + +#if INIT_HTAB +# if USE_MEMCPY + memset (htab, 0, sizeof (htab)); +# else + for (hslot = htab; hslot < htab + HSIZE; hslot++) + *hslot++ = ip; +# endif +#endif + + do + { + hval = NEXT (hval, ip); + hslot = htab + IDX (hval); + ref = *hslot; *hslot = ip; + + if (1 +#if INIT_HTAB && !USE_MEMCPY + && ref < ip /* the next test will actually take care of this, but it is faster */ +#endif + && (off = ip - ref - 1) < MAX_OFF + && ip + 4 < in_end + && ref > (u8 *)in_data +#if STRICT_ALIGN + && ref[0] == ip[0] + && ref[1] == ip[1] + && ref[2] == ip[2] +#else + && *(u16 *)ref == *(u16 *)ip + && ref[2] == ip[2] +#endif + ) + { + /* match found at *ref++ */ + unsigned int len = 2; + unsigned int maxlen = in_end - ip - len; + maxlen = maxlen > MAX_REF ? MAX_REF : maxlen; + + do + len++; + while (len < maxlen && ref[len] == ip[len]); + + if (op + lit + 1 + 3 >= out_end) + return 0; + + if (lit) + { + *op++ = lit - 1; + lit = -lit; + do + *op++ = ip[lit]; + while (++lit); + } + + len -= 2; + ip++; + + if (len < 7) + { + *op++ = (off >> 8) + (len << 5); + } + else + { + *op++ = (off >> 8) + ( 7 << 5); + *op++ = len - 7; + } + + *op++ = off; + +#if ULTRA_FAST + ip += len; + hval = FRST (ip); + hval = NEXT (hval, ip); + htab[IDX (hval)] = ip; + ip++; +#else + do + { + hval = NEXT (hval, ip); + htab[IDX (hval)] = ip; + ip++; + } + while (len--); +#endif + } + else + { + /* one more literal byte we must copy */ + lit++; + ip++; + + if (lit == MAX_LIT) + { + if (op + 1 + MAX_LIT >= out_end) + return 0; + + *op++ = MAX_LIT - 1; +#if USE_MEMCPY + memcpy (op, ip - MAX_LIT, MAX_LIT); + op += MAX_LIT; + lit = 0; +#else + lit = -lit; + do + *op++ = ip[lit]; + while (++lit); +#endif + } + } + } + while (ip < in_end); + + if (lit) + { + if (op + lit + 1 >= out_end) + return 0; + + *op++ = lit - 1; + lit = -lit; + do + *op++ = ip[lit]; + while (++lit); + } + + return op - (u8 *) out_data; +} diff --git a/lzf_d.c b/lzf_d.c new file mode 100644 index 0000000..5b1fe8f --- /dev/null +++ b/lzf_d.c @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2000 Marc Alexander Lehmann + * + * Redistribution and use in source and binary forms, with or without modifica- + * tion, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER- + * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO + * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE- + * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH- + * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include "lzfP.h" + +unsigned int +lzf_decompress (const void *const in_data, unsigned int in_len, + void *out_data, unsigned int out_len) +{ + u8 const *ip = in_data; + u8 *op = out_data; + u8 const *const in_end = ip + in_len; + u8 *const out_end = op + out_len; + + do + { + unsigned int ctrl = *ip++; + + if (ctrl < (1 << 5)) /* literal run */ + { + ctrl++; + + if (op + ctrl > out_end) + { + errno = E2BIG; + return 0; + } + +#if USE_MEMCPY + memcpy (op, ip, ctrl); + op += ctrl; + ip += ctrl; +#else + do + *op++ = *ip++; + while (--ctrl); +#endif + } + else /* back reference */ + { + unsigned int len = ctrl >> 5; + + u8 *ref = op - ((ctrl & 0x1f) << 8) - 1; + + if (len == 7) + len += *ip++; + + ref -= *ip++; + + if (op + len + 2 > out_end) + { + errno = E2BIG; + return 0; + } + + if (ref < (u8 *)out_data) + { + errno = EINVAL; + return 0; + } + + *op++ = *ref++; + *op++ = *ref++; + + do + *op++ = *ref++; + while (--len); + } + } + while (op < out_end && ip < in_end); + + return op - (u8 *)out_data; +} + -- cgit v1.2.3