diff options
| author | Nicolas "Pixel" Noble <pixel@nobis-crew.org> | 2013-12-18 22:07:51 -0800 | 
|---|---|---|
| committer | Nicolas "Pixel" Noble <pixel@nobis-crew.org> | 2013-12-18 22:07:51 -0800 | 
| commit | b2f3f5217a0e9833479367bc3ebbb7926819b71b (patch) | |
| tree | 1a9cd4b581cd6435fc12de5538d205216d312e9c /win32/zlib/examples | |
| parent | 838eca1ee266100da43656ddaa817fc1db604294 (diff) | |
Adding zlib for Visual Studio.
Diffstat (limited to 'win32/zlib/examples')
| -rw-r--r-- | win32/zlib/examples/README.examples | 49 | ||||
| -rw-r--r-- | win32/zlib/examples/enough.c | 572 | ||||
| -rw-r--r-- | win32/zlib/examples/fitblk.c | 233 | ||||
| -rw-r--r-- | win32/zlib/examples/gun.c | 702 | ||||
| -rw-r--r-- | win32/zlib/examples/gzappend.c | 504 | ||||
| -rw-r--r-- | win32/zlib/examples/gzjoin.c | 449 | ||||
| -rw-r--r-- | win32/zlib/examples/gzlog.c | 1059 | ||||
| -rw-r--r-- | win32/zlib/examples/gzlog.h | 91 | ||||
| -rw-r--r-- | win32/zlib/examples/zlib_how.html | 545 | ||||
| -rw-r--r-- | win32/zlib/examples/zpipe.c | 205 | ||||
| -rw-r--r-- | win32/zlib/examples/zran.c | 409 | 
11 files changed, 4818 insertions, 0 deletions
| diff --git a/win32/zlib/examples/README.examples b/win32/zlib/examples/README.examples new file mode 100644 index 0000000..56a3171 --- /dev/null +++ b/win32/zlib/examples/README.examples @@ -0,0 +1,49 @@ +This directory contains examples of the use of zlib and other relevant +programs and documentation. + +enough.c +    calculation and justification of ENOUGH parameter in inftrees.h +    - calculates the maximum table space used in inflate tree +      construction over all possible Huffman codes + +fitblk.c +    compress just enough input to nearly fill a requested output size +    - zlib isn't designed to do this, but fitblk does it anyway + +gun.c +    uncompress a gzip file +    - illustrates the use of inflateBack() for high speed file-to-file +      decompression using call-back functions +    - is approximately twice as fast as gzip -d +    - also provides Unix uncompress functionality, again twice as fast + +gzappend.c +    append to a gzip file +    - illustrates the use of the Z_BLOCK flush parameter for inflate() +    - illustrates the use of deflatePrime() to start at any bit + +gzjoin.c +    join gzip files without recalculating the crc or recompressing +    - illustrates the use of the Z_BLOCK flush parameter for inflate() +    - illustrates the use of crc32_combine() + +gzlog.c +gzlog.h +    efficiently and robustly maintain a message log file in gzip format +    - illustrates use of raw deflate, Z_PARTIAL_FLUSH, deflatePrime(), +      and deflateSetDictionary() +    - illustrates use of a gzip header extra field + +zlib_how.html +    painfully comprehensive description of zpipe.c (see below) +    - describes in excruciating detail the use of deflate() and inflate() + +zpipe.c +    reads and writes zlib streams from stdin to stdout +    - illustrates the proper use of deflate() and inflate() +    - deeply commented in zlib_how.html (see above) + +zran.c +    index a zlib or gzip stream and randomly access it +    - illustrates the use of Z_BLOCK, inflatePrime(), and +      inflateSetDictionary() to provide random access diff --git a/win32/zlib/examples/enough.c b/win32/zlib/examples/enough.c new file mode 100644 index 0000000..b991144 --- /dev/null +++ b/win32/zlib/examples/enough.c @@ -0,0 +1,572 @@ +/* enough.c -- determine the maximum size of inflate's Huffman code tables over + * all possible valid and complete Huffman codes, subject to a length limit. + * Copyright (C) 2007, 2008, 2012 Mark Adler + * Version 1.4  18 August 2012  Mark Adler + */ + +/* Version history: +   1.0   3 Jan 2007  First version (derived from codecount.c version 1.4) +   1.1   4 Jan 2007  Use faster incremental table usage computation +                     Prune examine() search on previously visited states +   1.2   5 Jan 2007  Comments clean up +                     As inflate does, decrease root for short codes +                     Refuse cases where inflate would increase root +   1.3  17 Feb 2008  Add argument for initial root table size +                     Fix bug for initial root table size == max - 1 +                     Use a macro to compute the history index +   1.4  18 Aug 2012  Avoid shifts more than bits in type (caused endless loop!) +                     Clean up comparisons of different types +                     Clean up code indentation + */ + +/* +   Examine all possible Huffman codes for a given number of symbols and a +   maximum code length in bits to determine the maximum table size for zilb's +   inflate.  Only complete Huffman codes are counted. + +   Two codes are considered distinct if the vectors of the number of codes per +   length are not identical.  So permutations of the symbol assignments result +   in the same code for the counting, as do permutations of the assignments of +   the bit values to the codes (i.e. only canonical codes are counted). + +   We build a code from shorter to longer lengths, determining how many symbols +   are coded at each length.  At each step, we have how many symbols remain to +   be coded, what the last code length used was, and how many bit patterns of +   that length remain unused. Then we add one to the code length and double the +   number of unused patterns to graduate to the next code length.  We then +   assign all portions of the remaining symbols to that code length that +   preserve the properties of a correct and eventually complete code.  Those +   properties are: we cannot use more bit patterns than are available; and when +   all the symbols are used, there are exactly zero possible bit patterns +   remaining. + +   The inflate Huffman decoding algorithm uses two-level lookup tables for +   speed.  There is a single first-level table to decode codes up to root bits +   in length (root == 9 in the current inflate implementation).  The table +   has 1 << root entries and is indexed by the next root bits of input.  Codes +   shorter than root bits have replicated table entries, so that the correct +   entry is pointed to regardless of the bits that follow the short code.  If +   the code is longer than root bits, then the table entry points to a second- +   level table.  The size of that table is determined by the longest code with +   that root-bit prefix.  If that longest code has length len, then the table +   has size 1 << (len - root), to index the remaining bits in that set of +   codes.  Each subsequent root-bit prefix then has its own sub-table.  The +   total number of table entries required by the code is calculated +   incrementally as the number of codes at each bit length is populated.  When +   all of the codes are shorter than root bits, then root is reduced to the +   longest code length, resulting in a single, smaller, one-level table. + +   The inflate algorithm also provides for small values of root (relative to +   the log2 of the number of symbols), where the shortest code has more bits +   than root.  In that case, root is increased to the length of the shortest +   code.  This program, by design, does not handle that case, so it is verified +   that the number of symbols is less than 2^(root + 1). + +   In order to speed up the examination (by about ten orders of magnitude for +   the default arguments), the intermediate states in the build-up of a code +   are remembered and previously visited branches are pruned.  The memory +   required for this will increase rapidly with the total number of symbols and +   the maximum code length in bits.  However this is a very small price to pay +   for the vast speedup. + +   First, all of the possible Huffman codes are counted, and reachable +   intermediate states are noted by a non-zero count in a saved-results array. +   Second, the intermediate states that lead to (root + 1) bit or longer codes +   are used to look at all sub-codes from those junctures for their inflate +   memory usage.  (The amount of memory used is not affected by the number of +   codes of root bits or less in length.)  Third, the visited states in the +   construction of those sub-codes and the associated calculation of the table +   size is recalled in order to avoid recalculating from the same juncture. +   Beginning the code examination at (root + 1) bit codes, which is enabled by +   identifying the reachable nodes, accounts for about six of the orders of +   magnitude of improvement for the default arguments.  About another four +   orders of magnitude come from not revisiting previous states.  Out of +   approximately 2x10^16 possible Huffman codes, only about 2x10^6 sub-codes +   need to be examined to cover all of the possible table memory usage cases +   for the default arguments of 286 symbols limited to 15-bit codes. + +   Note that an unsigned long long type is used for counting.  It is quite easy +   to exceed the capacity of an eight-byte integer with a large number of +   symbols and a large maximum code length, so multiple-precision arithmetic +   would need to replace the unsigned long long arithmetic in that case.  This +   program will abort if an overflow occurs.  The big_t type identifies where +   the counting takes place. + +   An unsigned long long type is also used for calculating the number of +   possible codes remaining at the maximum length.  This limits the maximum +   code length to the number of bits in a long long minus the number of bits +   needed to represent the symbols in a flat code.  The code_t type identifies +   where the bit pattern counting takes place. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> + +#define local static + +/* special data types */ +typedef unsigned long long big_t;   /* type for code counting */ +typedef unsigned long long code_t;  /* type for bit pattern counting */ +struct tab {                        /* type for been here check */ +    size_t len;         /* length of bit vector in char's */ +    char *vec;          /* allocated bit vector */ +}; + +/* The array for saving results, num[], is indexed with this triplet: + +      syms: number of symbols remaining to code +      left: number of available bit patterns at length len +      len: number of bits in the codes currently being assigned + +   Those indices are constrained thusly when saving results: + +      syms: 3..totsym (totsym == total symbols to code) +      left: 2..syms - 1, but only the evens (so syms == 8 -> 2, 4, 6) +      len: 1..max - 1 (max == maximum code length in bits) + +   syms == 2 is not saved since that immediately leads to a single code.  left +   must be even, since it represents the number of available bit patterns at +   the current length, which is double the number at the previous length. +   left ends at syms-1 since left == syms immediately results in a single code. +   (left > sym is not allowed since that would result in an incomplete code.) +   len is less than max, since the code completes immediately when len == max. + +   The offset into the array is calculated for the three indices with the +   first one (syms) being outermost, and the last one (len) being innermost. +   We build the array with length max-1 lists for the len index, with syms-3 +   of those for each symbol.  There are totsym-2 of those, with each one +   varying in length as a function of sym.  See the calculation of index in +   count() for the index, and the calculation of size in main() for the size +   of the array. + +   For the deflate example of 286 symbols limited to 15-bit codes, the array +   has 284,284 entries, taking up 2.17 MB for an 8-byte big_t.  More than +   half of the space allocated for saved results is actually used -- not all +   possible triplets are reached in the generation of valid Huffman codes. + */ + +/* The array for tracking visited states, done[], is itself indexed identically +   to the num[] array as described above for the (syms, left, len) triplet. +   Each element in the array is further indexed by the (mem, rem) doublet, +   where mem is the amount of inflate table space used so far, and rem is the +   remaining unused entries in the current inflate sub-table.  Each indexed +   element is simply one bit indicating whether the state has been visited or +   not.  Since the ranges for mem and rem are not known a priori, each bit +   vector is of a variable size, and grows as needed to accommodate the visited +   states.  mem and rem are used to calculate a single index in a triangular +   array.  Since the range of mem is expected in the default case to be about +   ten times larger than the range of rem, the array is skewed to reduce the +   memory usage, with eight times the range for mem than for rem.  See the +   calculations for offset and bit in beenhere() for the details. + +   For the deflate example of 286 symbols limited to 15-bit codes, the bit +   vectors grow to total approximately 21 MB, in addition to the 4.3 MB done[] +   array itself. + */ + +/* Globals to avoid propagating constants or constant pointers recursively */ +local int max;          /* maximum allowed bit length for the codes */ +local int root;         /* size of base code table in bits */ +local int large;        /* largest code table so far */ +local size_t size;      /* number of elements in num and done */ +local int *code;        /* number of symbols assigned to each bit length */ +local big_t *num;       /* saved results array for code counting */ +local struct tab *done; /* states already evaluated array */ + +/* Index function for num[] and done[] */ +#define INDEX(i,j,k) (((size_t)((i-1)>>1)*((i-2)>>1)+(j>>1)-1)*(max-1)+k-1) + +/* Free allocated space.  Uses globals code, num, and done. */ +local void cleanup(void) +{ +    size_t n; + +    if (done != NULL) { +        for (n = 0; n < size; n++) +            if (done[n].len) +                free(done[n].vec); +        free(done); +    } +    if (num != NULL) +        free(num); +    if (code != NULL) +        free(code); +} + +/* Return the number of possible Huffman codes using bit patterns of lengths +   len through max inclusive, coding syms symbols, with left bit patterns of +   length len unused -- return -1 if there is an overflow in the counting. +   Keep a record of previous results in num to prevent repeating the same +   calculation.  Uses the globals max and num. */ +local big_t count(int syms, int len, int left) +{ +    big_t sum;          /* number of possible codes from this juncture */ +    big_t got;          /* value returned from count() */ +    int least;          /* least number of syms to use at this juncture */ +    int most;           /* most number of syms to use at this juncture */ +    int use;            /* number of bit patterns to use in next call */ +    size_t index;       /* index of this case in *num */ + +    /* see if only one possible code */ +    if (syms == left) +        return 1; + +    /* note and verify the expected state */ +    assert(syms > left && left > 0 && len < max); + +    /* see if we've done this one already */ +    index = INDEX(syms, left, len); +    got = num[index]; +    if (got) +        return got;         /* we have -- return the saved result */ + +    /* we need to use at least this many bit patterns so that the code won't be +       incomplete at the next length (more bit patterns than symbols) */ +    least = (left << 1) - syms; +    if (least < 0) +        least = 0; + +    /* we can use at most this many bit patterns, lest there not be enough +       available for the remaining symbols at the maximum length (if there were +       no limit to the code length, this would become: most = left - 1) */ +    most = (((code_t)left << (max - len)) - syms) / +            (((code_t)1 << (max - len)) - 1); + +    /* count all possible codes from this juncture and add them up */ +    sum = 0; +    for (use = least; use <= most; use++) { +        got = count(syms - use, len + 1, (left - use) << 1); +        sum += got; +        if (got == (big_t)0 - 1 || sum < got)   /* overflow */ +            return (big_t)0 - 1; +    } + +    /* verify that all recursive calls are productive */ +    assert(sum != 0); + +    /* save the result and return it */ +    num[index] = sum; +    return sum; +} + +/* Return true if we've been here before, set to true if not.  Set a bit in a +   bit vector to indicate visiting this state.  Each (syms,len,left) state +   has a variable size bit vector indexed by (mem,rem).  The bit vector is +   lengthened if needed to allow setting the (mem,rem) bit. */ +local int beenhere(int syms, int len, int left, int mem, int rem) +{ +    size_t index;       /* index for this state's bit vector */ +    size_t offset;      /* offset in this state's bit vector */ +    int bit;            /* mask for this state's bit */ +    size_t length;      /* length of the bit vector in bytes */ +    char *vector;       /* new or enlarged bit vector */ + +    /* point to vector for (syms,left,len), bit in vector for (mem,rem) */ +    index = INDEX(syms, left, len); +    mem -= 1 << root; +    offset = (mem >> 3) + rem; +    offset = ((offset * (offset + 1)) >> 1) + rem; +    bit = 1 << (mem & 7); + +    /* see if we've been here */ +    length = done[index].len; +    if (offset < length && (done[index].vec[offset] & bit) != 0) +        return 1;       /* done this! */ + +    /* we haven't been here before -- set the bit to show we have now */ + +    /* see if we need to lengthen the vector in order to set the bit */ +    if (length <= offset) { +        /* if we have one already, enlarge it, zero out the appended space */ +        if (length) { +            do { +                length <<= 1; +            } while (length <= offset); +            vector = realloc(done[index].vec, length); +            if (vector != NULL) +                memset(vector + done[index].len, 0, length - done[index].len); +        } + +        /* otherwise we need to make a new vector and zero it out */ +        else { +            length = 1 << (len - root); +            while (length <= offset) +                length <<= 1; +            vector = calloc(length, sizeof(char)); +        } + +        /* in either case, bail if we can't get the memory */ +        if (vector == NULL) { +            fputs("abort: unable to allocate enough memory\n", stderr); +            cleanup(); +            exit(1); +        } + +        /* install the new vector */ +        done[index].len = length; +        done[index].vec = vector; +    } + +    /* set the bit */ +    done[index].vec[offset] |= bit; +    return 0; +} + +/* Examine all possible codes from the given node (syms, len, left).  Compute +   the amount of memory required to build inflate's decoding tables, where the +   number of code structures used so far is mem, and the number remaining in +   the current sub-table is rem.  Uses the globals max, code, root, large, and +   done. */ +local void examine(int syms, int len, int left, int mem, int rem) +{ +    int least;          /* least number of syms to use at this juncture */ +    int most;           /* most number of syms to use at this juncture */ +    int use;            /* number of bit patterns to use in next call */ + +    /* see if we have a complete code */ +    if (syms == left) { +        /* set the last code entry */ +        code[len] = left; + +        /* complete computation of memory used by this code */ +        while (rem < left) { +            left -= rem; +            rem = 1 << (len - root); +            mem += rem; +        } +        assert(rem == left); + +        /* if this is a new maximum, show the entries used and the sub-code */ +        if (mem > large) { +            large = mem; +            printf("max %d: ", mem); +            for (use = root + 1; use <= max; use++) +                if (code[use]) +                    printf("%d[%d] ", code[use], use); +            putchar('\n'); +            fflush(stdout); +        } + +        /* remove entries as we drop back down in the recursion */ +        code[len] = 0; +        return; +    } + +    /* prune the tree if we can */ +    if (beenhere(syms, len, left, mem, rem)) +        return; + +    /* we need to use at least this many bit patterns so that the code won't be +       incomplete at the next length (more bit patterns than symbols) */ +    least = (left << 1) - syms; +    if (least < 0) +        least = 0; + +    /* we can use at most this many bit patterns, lest there not be enough +       available for the remaining symbols at the maximum length (if there were +       no limit to the code length, this would become: most = left - 1) */ +    most = (((code_t)left << (max - len)) - syms) / +            (((code_t)1 << (max - len)) - 1); + +    /* occupy least table spaces, creating new sub-tables as needed */ +    use = least; +    while (rem < use) { +        use -= rem; +        rem = 1 << (len - root); +        mem += rem; +    } +    rem -= use; + +    /* examine codes from here, updating table space as we go */ +    for (use = least; use <= most; use++) { +        code[len] = use; +        examine(syms - use, len + 1, (left - use) << 1, +                mem + (rem ? 1 << (len - root) : 0), rem << 1); +        if (rem == 0) { +            rem = 1 << (len - root); +            mem += rem; +        } +        rem--; +    } + +    /* remove entries as we drop back down in the recursion */ +    code[len] = 0; +} + +/* Look at all sub-codes starting with root + 1 bits.  Look at only the valid +   intermediate code states (syms, left, len).  For each completed code, +   calculate the amount of memory required by inflate to build the decoding +   tables. Find the maximum amount of memory required and show the code that +   requires that maximum.  Uses the globals max, root, and num. */ +local void enough(int syms) +{ +    int n;              /* number of remaing symbols for this node */ +    int left;           /* number of unused bit patterns at this length */ +    size_t index;       /* index of this case in *num */ + +    /* clear code */ +    for (n = 0; n <= max; n++) +        code[n] = 0; + +    /* look at all (root + 1) bit and longer codes */ +    large = 1 << root;              /* base table */ +    if (root < max)                 /* otherwise, there's only a base table */ +        for (n = 3; n <= syms; n++) +            for (left = 2; left < n; left += 2) +            { +                /* look at all reachable (root + 1) bit nodes, and the +                   resulting codes (complete at root + 2 or more) */ +                index = INDEX(n, left, root + 1); +                if (root + 1 < max && num[index])       /* reachable node */ +                    examine(n, root + 1, left, 1 << root, 0); + +                /* also look at root bit codes with completions at root + 1 +                   bits (not saved in num, since complete), just in case */ +                if (num[index - 1] && n <= left << 1) +                    examine((n - left) << 1, root + 1, (n - left) << 1, +                            1 << root, 0); +            } + +    /* done */ +    printf("done: maximum of %d table entries\n", large); +} + +/* +   Examine and show the total number of possible Huffman codes for a given +   maximum number of symbols, initial root table size, and maximum code length +   in bits -- those are the command arguments in that order.  The default +   values are 286, 9, and 15 respectively, for the deflate literal/length code. +   The possible codes are counted for each number of coded symbols from two to +   the maximum.  The counts for each of those and the total number of codes are +   shown.  The maximum number of inflate table entires is then calculated +   across all possible codes.  Each new maximum number of table entries and the +   associated sub-code (starting at root + 1 == 10 bits) is shown. + +   To count and examine Huffman codes that are not length-limited, provide a +   maximum length equal to the number of symbols minus one. + +   For the deflate literal/length code, use "enough".  For the deflate distance +   code, use "enough 30 6". + +   This uses the %llu printf format to print big_t numbers, which assumes that +   big_t is an unsigned long long.  If the big_t type is changed (for example +   to a multiple precision type), the method of printing will also need to be +   updated. + */ +int main(int argc, char **argv) +{ +    int syms;           /* total number of symbols to code */ +    int n;              /* number of symbols to code for this run */ +    big_t got;          /* return value of count() */ +    big_t sum;          /* accumulated number of codes over n */ +    code_t word;        /* for counting bits in code_t */ + +    /* set up globals for cleanup() */ +    code = NULL; +    num = NULL; +    done = NULL; + +    /* get arguments -- default to the deflate literal/length code */ +    syms = 286; +    root = 9; +    max = 15; +    if (argc > 1) { +        syms = atoi(argv[1]); +        if (argc > 2) { +            root = atoi(argv[2]); +            if (argc > 3) +                max = atoi(argv[3]); +        } +    } +    if (argc > 4 || syms < 2 || root < 1 || max < 1) { +        fputs("invalid arguments, need: [sym >= 2 [root >= 1 [max >= 1]]]\n", +              stderr); +        return 1; +    } + +    /* if not restricting the code length, the longest is syms - 1 */ +    if (max > syms - 1) +        max = syms - 1; + +    /* determine the number of bits in a code_t */ +    for (n = 0, word = 1; word; n++, word <<= 1) +        ; + +    /* make sure that the calculation of most will not overflow */ +    if (max > n || (code_t)(syms - 2) >= (((code_t)0 - 1) >> (max - 1))) { +        fputs("abort: code length too long for internal types\n", stderr); +        return 1; +    } + +    /* reject impossible code requests */ +    if ((code_t)(syms - 1) > ((code_t)1 << max) - 1) { +        fprintf(stderr, "%d symbols cannot be coded in %d bits\n", +                syms, max); +        return 1; +    } + +    /* allocate code vector */ +    code = calloc(max + 1, sizeof(int)); +    if (code == NULL) { +        fputs("abort: unable to allocate enough memory\n", stderr); +        return 1; +    } + +    /* determine size of saved results array, checking for overflows, +       allocate and clear the array (set all to zero with calloc()) */ +    if (syms == 2)              /* iff max == 1 */ +        num = NULL;             /* won't be saving any results */ +    else { +        size = syms >> 1; +        if (size > ((size_t)0 - 1) / (n = (syms - 1) >> 1) || +                (size *= n, size > ((size_t)0 - 1) / (n = max - 1)) || +                (size *= n, size > ((size_t)0 - 1) / sizeof(big_t)) || +                (num = calloc(size, sizeof(big_t))) == NULL) { +            fputs("abort: unable to allocate enough memory\n", stderr); +            cleanup(); +            return 1; +        } +    } + +    /* count possible codes for all numbers of symbols, add up counts */ +    sum = 0; +    for (n = 2; n <= syms; n++) { +        got = count(n, 1, 2); +        sum += got; +        if (got == (big_t)0 - 1 || sum < got) {     /* overflow */ +            fputs("abort: can't count that high!\n", stderr); +            cleanup(); +            return 1; +        } +        printf("%llu %d-codes\n", got, n); +    } +    printf("%llu total codes for 2 to %d symbols", sum, syms); +    if (max < syms - 1) +        printf(" (%d-bit length limit)\n", max); +    else +        puts(" (no length limit)"); + +    /* allocate and clear done array for beenhere() */ +    if (syms == 2) +        done = NULL; +    else if (size > ((size_t)0 - 1) / sizeof(struct tab) || +             (done = calloc(size, sizeof(struct tab))) == NULL) { +        fputs("abort: unable to allocate enough memory\n", stderr); +        cleanup(); +        return 1; +    } + +    /* find and show maximum inflate table usage */ +    if (root > max)                 /* reduce root to max length */ +        root = max; +    if ((code_t)syms < ((code_t)1 << (root + 1))) +        enough(syms); +    else +        puts("cannot handle minimum code lengths > root"); + +    /* done */ +    cleanup(); +    return 0; +} diff --git a/win32/zlib/examples/fitblk.c b/win32/zlib/examples/fitblk.c new file mode 100644 index 0000000..c61de5c --- /dev/null +++ b/win32/zlib/examples/fitblk.c @@ -0,0 +1,233 @@ +/* fitblk.c: example of fitting compressed output to a specified size +   Not copyrighted -- provided to the public domain +   Version 1.1  25 November 2004  Mark Adler */ + +/* Version history: +   1.0  24 Nov 2004  First version +   1.1  25 Nov 2004  Change deflateInit2() to deflateInit() +                     Use fixed-size, stack-allocated raw buffers +                     Simplify code moving compression to subroutines +                     Use assert() for internal errors +                     Add detailed description of approach + */ + +/* Approach to just fitting a requested compressed size: + +   fitblk performs three compression passes on a portion of the input +   data in order to determine how much of that input will compress to +   nearly the requested output block size.  The first pass generates +   enough deflate blocks to produce output to fill the requested +   output size plus a specfied excess amount (see the EXCESS define +   below).  The last deflate block may go quite a bit past that, but +   is discarded.  The second pass decompresses and recompresses just +   the compressed data that fit in the requested plus excess sized +   buffer.  The deflate process is terminated after that amount of +   input, which is less than the amount consumed on the first pass. +   The last deflate block of the result will be of a comparable size +   to the final product, so that the header for that deflate block and +   the compression ratio for that block will be about the same as in +   the final product.  The third compression pass decompresses the +   result of the second step, but only the compressed data up to the +   requested size minus an amount to allow the compressed stream to +   complete (see the MARGIN define below).  That will result in a +   final compressed stream whose length is less than or equal to the +   requested size.  Assuming sufficient input and a requested size +   greater than a few hundred bytes, the shortfall will typically be +   less than ten bytes. + +   If the input is short enough that the first compression completes +   before filling the requested output size, then that compressed +   stream is return with no recompression. + +   EXCESS is chosen to be just greater than the shortfall seen in a +   two pass approach similar to the above.  That shortfall is due to +   the last deflate block compressing more efficiently with a smaller +   header on the second pass.  EXCESS is set to be large enough so +   that there is enough uncompressed data for the second pass to fill +   out the requested size, and small enough so that the final deflate +   block of the second pass will be close in size to the final deflate +   block of the third and final pass.  MARGIN is chosen to be just +   large enough to assure that the final compression has enough room +   to complete in all cases. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <assert.h> +#include "zlib.h" + +#define local static + +/* print nastygram and leave */ +local void quit(char *why) +{ +    fprintf(stderr, "fitblk abort: %s\n", why); +    exit(1); +} + +#define RAWLEN 4096    /* intermediate uncompressed buffer size */ + +/* compress from file to def until provided buffer is full or end of +   input reached; return last deflate() return value, or Z_ERRNO if +   there was read error on the file */ +local int partcompress(FILE *in, z_streamp def) +{ +    int ret, flush; +    unsigned char raw[RAWLEN]; + +    flush = Z_NO_FLUSH; +    do { +        def->avail_in = fread(raw, 1, RAWLEN, in); +        if (ferror(in)) +            return Z_ERRNO; +        def->next_in = raw; +        if (feof(in)) +            flush = Z_FINISH; +        ret = deflate(def, flush); +        assert(ret != Z_STREAM_ERROR); +    } while (def->avail_out != 0 && flush == Z_NO_FLUSH); +    return ret; +} + +/* recompress from inf's input to def's output; the input for inf and +   the output for def are set in those structures before calling; +   return last deflate() return value, or Z_MEM_ERROR if inflate() +   was not able to allocate enough memory when it needed to */ +local int recompress(z_streamp inf, z_streamp def) +{ +    int ret, flush; +    unsigned char raw[RAWLEN]; + +    flush = Z_NO_FLUSH; +    do { +        /* decompress */ +        inf->avail_out = RAWLEN; +        inf->next_out = raw; +        ret = inflate(inf, Z_NO_FLUSH); +        assert(ret != Z_STREAM_ERROR && ret != Z_DATA_ERROR && +               ret != Z_NEED_DICT); +        if (ret == Z_MEM_ERROR) +            return ret; + +        /* compress what was decompresed until done or no room */ +        def->avail_in = RAWLEN - inf->avail_out; +        def->next_in = raw; +        if (inf->avail_out != 0) +            flush = Z_FINISH; +        ret = deflate(def, flush); +        assert(ret != Z_STREAM_ERROR); +    } while (ret != Z_STREAM_END && def->avail_out != 0); +    return ret; +} + +#define EXCESS 256      /* empirically determined stream overage */ +#define MARGIN 8        /* amount to back off for completion */ + +/* compress from stdin to fixed-size block on stdout */ +int main(int argc, char **argv) +{ +    int ret;                /* return code */ +    unsigned size;          /* requested fixed output block size */ +    unsigned have;          /* bytes written by deflate() call */ +    unsigned char *blk;     /* intermediate and final stream */ +    unsigned char *tmp;     /* close to desired size stream */ +    z_stream def, inf;      /* zlib deflate and inflate states */ + +    /* get requested output size */ +    if (argc != 2) +        quit("need one argument: size of output block"); +    ret = strtol(argv[1], argv + 1, 10); +    if (argv[1][0] != 0) +        quit("argument must be a number"); +    if (ret < 8)            /* 8 is minimum zlib stream size */ +        quit("need positive size of 8 or greater"); +    size = (unsigned)ret; + +    /* allocate memory for buffers and compression engine */ +    blk = malloc(size + EXCESS); +    def.zalloc = Z_NULL; +    def.zfree = Z_NULL; +    def.opaque = Z_NULL; +    ret = deflateInit(&def, Z_DEFAULT_COMPRESSION); +    if (ret != Z_OK || blk == NULL) +        quit("out of memory"); + +    /* compress from stdin until output full, or no more input */ +    def.avail_out = size + EXCESS; +    def.next_out = blk; +    ret = partcompress(stdin, &def); +    if (ret == Z_ERRNO) +        quit("error reading input"); + +    /* if it all fit, then size was undersubscribed -- done! */ +    if (ret == Z_STREAM_END && def.avail_out >= EXCESS) { +        /* write block to stdout */ +        have = size + EXCESS - def.avail_out; +        if (fwrite(blk, 1, have, stdout) != have || ferror(stdout)) +            quit("error writing output"); + +        /* clean up and print results to stderr */ +        ret = deflateEnd(&def); +        assert(ret != Z_STREAM_ERROR); +        free(blk); +        fprintf(stderr, +                "%u bytes unused out of %u requested (all input)\n", +                size - have, size); +        return 0; +    } + +    /* it didn't all fit -- set up for recompression */ +    inf.zalloc = Z_NULL; +    inf.zfree = Z_NULL; +    inf.opaque = Z_NULL; +    inf.avail_in = 0; +    inf.next_in = Z_NULL; +    ret = inflateInit(&inf); +    tmp = malloc(size + EXCESS); +    if (ret != Z_OK || tmp == NULL) +        quit("out of memory"); +    ret = deflateReset(&def); +    assert(ret != Z_STREAM_ERROR); + +    /* do first recompression close to the right amount */ +    inf.avail_in = size + EXCESS; +    inf.next_in = blk; +    def.avail_out = size + EXCESS; +    def.next_out = tmp; +    ret = recompress(&inf, &def); +    if (ret == Z_MEM_ERROR) +        quit("out of memory"); + +    /* set up for next reocmpression */ +    ret = inflateReset(&inf); +    assert(ret != Z_STREAM_ERROR); +    ret = deflateReset(&def); +    assert(ret != Z_STREAM_ERROR); + +    /* do second and final recompression (third compression) */ +    inf.avail_in = size - MARGIN;   /* assure stream will complete */ +    inf.next_in = tmp; +    def.avail_out = size; +    def.next_out = blk; +    ret = recompress(&inf, &def); +    if (ret == Z_MEM_ERROR) +        quit("out of memory"); +    assert(ret == Z_STREAM_END);    /* otherwise MARGIN too small */ + +    /* done -- write block to stdout */ +    have = size - def.avail_out; +    if (fwrite(blk, 1, have, stdout) != have || ferror(stdout)) +        quit("error writing output"); + +    /* clean up and print results to stderr */ +    free(tmp); +    ret = inflateEnd(&inf); +    assert(ret != Z_STREAM_ERROR); +    ret = deflateEnd(&def); +    assert(ret != Z_STREAM_ERROR); +    free(blk); +    fprintf(stderr, +            "%u bytes unused out of %u requested (%lu input)\n", +            size - have, size, def.total_in); +    return 0; +} diff --git a/win32/zlib/examples/gun.c b/win32/zlib/examples/gun.c new file mode 100644 index 0000000..89e484f --- /dev/null +++ b/win32/zlib/examples/gun.c @@ -0,0 +1,702 @@ +/* gun.c -- simple gunzip to give an example of the use of inflateBack() + * Copyright (C) 2003, 2005, 2008, 2010, 2012 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h +   Version 1.7  12 August 2012  Mark Adler */ + +/* Version history: +   1.0  16 Feb 2003  First version for testing of inflateBack() +   1.1  21 Feb 2005  Decompress concatenated gzip streams +                     Remove use of "this" variable (C++ keyword) +                     Fix return value for in() +                     Improve allocation failure checking +                     Add typecasting for void * structures +                     Add -h option for command version and usage +                     Add a bunch of comments +   1.2  20 Mar 2005  Add Unix compress (LZW) decompression +                     Copy file attributes from input file to output file +   1.3  12 Jun 2005  Add casts for error messages [Oberhumer] +   1.4   8 Dec 2006  LZW decompression speed improvements +   1.5   9 Feb 2008  Avoid warning in latest version of gcc +   1.6  17 Jan 2010  Avoid signed/unsigned comparison warnings +   1.7  12 Aug 2012  Update for z_const usage in zlib 1.2.8 + */ + +/* +   gun [ -t ] [ name ... ] + +   decompresses the data in the named gzip files.  If no arguments are given, +   gun will decompress from stdin to stdout.  The names must end in .gz, -gz, +   .z, -z, _z, or .Z.  The uncompressed data will be written to a file name +   with the suffix stripped.  On success, the original file is deleted.  On +   failure, the output file is deleted.  For most failures, the command will +   continue to process the remaining names on the command line.  A memory +   allocation failure will abort the command.  If -t is specified, then the +   listed files or stdin will be tested as gzip files for integrity (without +   checking for a proper suffix), no output will be written, and no files +   will be deleted. + +   Like gzip, gun allows concatenated gzip streams and will decompress them, +   writing all of the uncompressed data to the output.  Unlike gzip, gun allows +   an empty file on input, and will produce no error writing an empty output +   file. + +   gun will also decompress files made by Unix compress, which uses LZW +   compression.  These files are automatically detected by virtue of their +   magic header bytes.  Since the end of Unix compress stream is marked by the +   end-of-file, they cannot be concantenated.  If a Unix compress stream is +   encountered in an input file, it is the last stream in that file. + +   Like gunzip and uncompress, the file attributes of the orignal compressed +   file are maintained in the final uncompressed file, to the extent that the +   user permissions allow it. + +   On my Mac OS X PowerPC G4, gun is almost twice as fast as gunzip (version +   1.2.4) is on the same file, when gun is linked with zlib 1.2.2.  Also the +   LZW decompression provided by gun is about twice as fast as the standard +   Unix uncompress command. + */ + +/* external functions and related types and constants */ +#include <stdio.h>          /* fprintf() */ +#include <stdlib.h>         /* malloc(), free() */ +#include <string.h>         /* strerror(), strcmp(), strlen(), memcpy() */ +#include <errno.h>          /* errno */ +#include <fcntl.h>          /* open() */ +#include <unistd.h>         /* read(), write(), close(), chown(), unlink() */ +#include <sys/types.h> +#include <sys/stat.h>       /* stat(), chmod() */ +#include <utime.h>          /* utime() */ +#include "zlib.h"           /* inflateBackInit(), inflateBack(), */ +                            /* inflateBackEnd(), crc32() */ + +/* function declaration */ +#define local static + +/* buffer constants */ +#define SIZE 32768U         /* input and output buffer sizes */ +#define PIECE 16384         /* limits i/o chunks for 16-bit int case */ + +/* structure for infback() to pass to input function in() -- it maintains the +   input file and a buffer of size SIZE */ +struct ind { +    int infile; +    unsigned char *inbuf; +}; + +/* Load input buffer, assumed to be empty, and return bytes loaded and a +   pointer to them.  read() is called until the buffer is full, or until it +   returns end-of-file or error.  Return 0 on error. */ +local unsigned in(void *in_desc, z_const unsigned char **buf) +{ +    int ret; +    unsigned len; +    unsigned char *next; +    struct ind *me = (struct ind *)in_desc; + +    next = me->inbuf; +    *buf = next; +    len = 0; +    do { +        ret = PIECE; +        if ((unsigned)ret > SIZE - len) +            ret = (int)(SIZE - len); +        ret = (int)read(me->infile, next, ret); +        if (ret == -1) { +            len = 0; +            break; +        } +        next += ret; +        len += ret; +    } while (ret != 0 && len < SIZE); +    return len; +} + +/* structure for infback() to pass to output function out() -- it maintains the +   output file, a running CRC-32 check on the output and the total number of +   bytes output, both for checking against the gzip trailer.  (The length in +   the gzip trailer is stored modulo 2^32, so it's ok if a long is 32 bits and +   the output is greater than 4 GB.) */ +struct outd { +    int outfile; +    int check;                  /* true if checking crc and total */ +    unsigned long crc; +    unsigned long total; +}; + +/* Write output buffer and update the CRC-32 and total bytes written.  write() +   is called until all of the output is written or an error is encountered. +   On success out() returns 0.  For a write failure, out() returns 1.  If the +   output file descriptor is -1, then nothing is written. + */ +local int out(void *out_desc, unsigned char *buf, unsigned len) +{ +    int ret; +    struct outd *me = (struct outd *)out_desc; + +    if (me->check) { +        me->crc = crc32(me->crc, buf, len); +        me->total += len; +    } +    if (me->outfile != -1) +        do { +            ret = PIECE; +            if ((unsigned)ret > len) +                ret = (int)len; +            ret = (int)write(me->outfile, buf, ret); +            if (ret == -1) +                return 1; +            buf += ret; +            len -= ret; +        } while (len != 0); +    return 0; +} + +/* next input byte macro for use inside lunpipe() and gunpipe() */ +#define NEXT() (have ? 0 : (have = in(indp, &next)), \ +                last = have ? (have--, (int)(*next++)) : -1) + +/* memory for gunpipe() and lunpipe() -- +   the first 256 entries of prefix[] and suffix[] are never used, could +   have offset the index, but it's faster to waste the memory */ +unsigned char inbuf[SIZE];              /* input buffer */ +unsigned char outbuf[SIZE];             /* output buffer */ +unsigned short prefix[65536];           /* index to LZW prefix string */ +unsigned char suffix[65536];            /* one-character LZW suffix */ +unsigned char match[65280 + 2];         /* buffer for reversed match or gzip +                                           32K sliding window */ + +/* throw out what's left in the current bits byte buffer (this is a vestigial +   aspect of the compressed data format derived from an implementation that +   made use of a special VAX machine instruction!) */ +#define FLUSHCODE() \ +    do { \ +        left = 0; \ +        rem = 0; \ +        if (chunk > have) { \ +            chunk -= have; \ +            have = 0; \ +            if (NEXT() == -1) \ +                break; \ +            chunk--; \ +            if (chunk > have) { \ +                chunk = have = 0; \ +                break; \ +            } \ +        } \ +        have -= chunk; \ +        next += chunk; \ +        chunk = 0; \ +    } while (0) + +/* Decompress a compress (LZW) file from indp to outfile.  The compress magic +   header (two bytes) has already been read and verified.  There are have bytes +   of buffered input at next.  strm is used for passing error information back +   to gunpipe(). + +   lunpipe() will return Z_OK on success, Z_BUF_ERROR for an unexpected end of +   file, read error, or write error (a write error indicated by strm->next_in +   not equal to Z_NULL), or Z_DATA_ERROR for invalid input. + */ +local int lunpipe(unsigned have, z_const unsigned char *next, struct ind *indp, +                  int outfile, z_stream *strm) +{ +    int last;                   /* last byte read by NEXT(), or -1 if EOF */ +    unsigned chunk;             /* bytes left in current chunk */ +    int left;                   /* bits left in rem */ +    unsigned rem;               /* unused bits from input */ +    int bits;                   /* current bits per code */ +    unsigned code;              /* code, table traversal index */ +    unsigned mask;              /* mask for current bits codes */ +    int max;                    /* maximum bits per code for this stream */ +    unsigned flags;             /* compress flags, then block compress flag */ +    unsigned end;               /* last valid entry in prefix/suffix tables */ +    unsigned temp;              /* current code */ +    unsigned prev;              /* previous code */ +    unsigned final;             /* last character written for previous code */ +    unsigned stack;             /* next position for reversed string */ +    unsigned outcnt;            /* bytes in output buffer */ +    struct outd outd;           /* output structure */ +    unsigned char *p; + +    /* set up output */ +    outd.outfile = outfile; +    outd.check = 0; + +    /* process remainder of compress header -- a flags byte */ +    flags = NEXT(); +    if (last == -1) +        return Z_BUF_ERROR; +    if (flags & 0x60) { +        strm->msg = (char *)"unknown lzw flags set"; +        return Z_DATA_ERROR; +    } +    max = flags & 0x1f; +    if (max < 9 || max > 16) { +        strm->msg = (char *)"lzw bits out of range"; +        return Z_DATA_ERROR; +    } +    if (max == 9)                           /* 9 doesn't really mean 9 */ +        max = 10; +    flags &= 0x80;                          /* true if block compress */ + +    /* clear table */ +    bits = 9; +    mask = 0x1ff; +    end = flags ? 256 : 255; + +    /* set up: get first 9-bit code, which is the first decompressed byte, but +       don't create a table entry until the next code */ +    if (NEXT() == -1)                       /* no compressed data is ok */ +        return Z_OK; +    final = prev = (unsigned)last;          /* low 8 bits of code */ +    if (NEXT() == -1)                       /* missing a bit */ +        return Z_BUF_ERROR; +    if (last & 1) {                         /* code must be < 256 */ +        strm->msg = (char *)"invalid lzw code"; +        return Z_DATA_ERROR; +    } +    rem = (unsigned)last >> 1;              /* remaining 7 bits */ +    left = 7; +    chunk = bits - 2;                       /* 7 bytes left in this chunk */ +    outbuf[0] = (unsigned char)final;       /* write first decompressed byte */ +    outcnt = 1; + +    /* decode codes */ +    stack = 0; +    for (;;) { +        /* if the table will be full after this, increment the code size */ +        if (end >= mask && bits < max) { +            FLUSHCODE(); +            bits++; +            mask <<= 1; +            mask++; +        } + +        /* get a code of length bits */ +        if (chunk == 0)                     /* decrement chunk modulo bits */ +            chunk = bits; +        code = rem;                         /* low bits of code */ +        if (NEXT() == -1) {                 /* EOF is end of compressed data */ +            /* write remaining buffered output */ +            if (outcnt && out(&outd, outbuf, outcnt)) { +                strm->next_in = outbuf;     /* signal write error */ +                return Z_BUF_ERROR; +            } +            return Z_OK; +        } +        code += (unsigned)last << left;     /* middle (or high) bits of code */ +        left += 8; +        chunk--; +        if (bits > left) {                  /* need more bits */ +            if (NEXT() == -1)               /* can't end in middle of code */ +                return Z_BUF_ERROR; +            code += (unsigned)last << left; /* high bits of code */ +            left += 8; +            chunk--; +        } +        code &= mask;                       /* mask to current code length */ +        left -= bits;                       /* number of unused bits */ +        rem = (unsigned)last >> (8 - left); /* unused bits from last byte */ + +        /* process clear code (256) */ +        if (code == 256 && flags) { +            FLUSHCODE(); +            bits = 9;                       /* initialize bits and mask */ +            mask = 0x1ff; +            end = 255;                      /* empty table */ +            continue;                       /* get next code */ +        } + +        /* special code to reuse last match */ +        temp = code;                        /* save the current code */ +        if (code > end) { +            /* Be picky on the allowed code here, and make sure that the code +               we drop through (prev) will be a valid index so that random +               input does not cause an exception.  The code != end + 1 check is +               empirically derived, and not checked in the original uncompress +               code.  If this ever causes a problem, that check could be safely +               removed.  Leaving this check in greatly improves gun's ability +               to detect random or corrupted input after a compress header. +               In any case, the prev > end check must be retained. */ +            if (code != end + 1 || prev > end) { +                strm->msg = (char *)"invalid lzw code"; +                return Z_DATA_ERROR; +            } +            match[stack++] = (unsigned char)final; +            code = prev; +        } + +        /* walk through linked list to generate output in reverse order */ +        p = match + stack; +        while (code >= 256) { +            *p++ = suffix[code]; +            code = prefix[code]; +        } +        stack = p - match; +        match[stack++] = (unsigned char)code; +        final = code; + +        /* link new table entry */ +        if (end < mask) { +            end++; +            prefix[end] = (unsigned short)prev; +            suffix[end] = (unsigned char)final; +        } + +        /* set previous code for next iteration */ +        prev = temp; + +        /* write output in forward order */ +        while (stack > SIZE - outcnt) { +            while (outcnt < SIZE) +                outbuf[outcnt++] = match[--stack]; +            if (out(&outd, outbuf, outcnt)) { +                strm->next_in = outbuf; /* signal write error */ +                return Z_BUF_ERROR; +            } +            outcnt = 0; +        } +        p = match + stack; +        do { +            outbuf[outcnt++] = *--p; +        } while (p > match); +        stack = 0; + +        /* loop for next code with final and prev as the last match, rem and +           left provide the first 0..7 bits of the next code, end is the last +           valid table entry */ +    } +} + +/* Decompress a gzip file from infile to outfile.  strm is assumed to have been +   successfully initialized with inflateBackInit().  The input file may consist +   of a series of gzip streams, in which case all of them will be decompressed +   to the output file.  If outfile is -1, then the gzip stream(s) integrity is +   checked and nothing is written. + +   The return value is a zlib error code: Z_MEM_ERROR if out of memory, +   Z_DATA_ERROR if the header or the compressed data is invalid, or if the +   trailer CRC-32 check or length doesn't match, Z_BUF_ERROR if the input ends +   prematurely or a write error occurs, or Z_ERRNO if junk (not a another gzip +   stream) follows a valid gzip stream. + */ +local int gunpipe(z_stream *strm, int infile, int outfile) +{ +    int ret, first, last; +    unsigned have, flags, len; +    z_const unsigned char *next = NULL; +    struct ind ind, *indp; +    struct outd outd; + +    /* setup input buffer */ +    ind.infile = infile; +    ind.inbuf = inbuf; +    indp = &ind; + +    /* decompress concatenated gzip streams */ +    have = 0;                               /* no input data read in yet */ +    first = 1;                              /* looking for first gzip header */ +    strm->next_in = Z_NULL;                 /* so Z_BUF_ERROR means EOF */ +    for (;;) { +        /* look for the two magic header bytes for a gzip stream */ +        if (NEXT() == -1) { +            ret = Z_OK; +            break;                          /* empty gzip stream is ok */ +        } +        if (last != 31 || (NEXT() != 139 && last != 157)) { +            strm->msg = (char *)"incorrect header check"; +            ret = first ? Z_DATA_ERROR : Z_ERRNO; +            break;                          /* not a gzip or compress header */ +        } +        first = 0;                          /* next non-header is junk */ + +        /* process a compress (LZW) file -- can't be concatenated after this */ +        if (last == 157) { +            ret = lunpipe(have, next, indp, outfile, strm); +            break; +        } + +        /* process remainder of gzip header */ +        ret = Z_BUF_ERROR; +        if (NEXT() != 8) {                  /* only deflate method allowed */ +            if (last == -1) break; +            strm->msg = (char *)"unknown compression method"; +            ret = Z_DATA_ERROR; +            break; +        } +        flags = NEXT();                     /* header flags */ +        NEXT();                             /* discard mod time, xflgs, os */ +        NEXT(); +        NEXT(); +        NEXT(); +        NEXT(); +        NEXT(); +        if (last == -1) break; +        if (flags & 0xe0) { +            strm->msg = (char *)"unknown header flags set"; +            ret = Z_DATA_ERROR; +            break; +        } +        if (flags & 4) {                    /* extra field */ +            len = NEXT(); +            len += (unsigned)(NEXT()) << 8; +            if (last == -1) break; +            while (len > have) { +                len -= have; +                have = 0; +                if (NEXT() == -1) break; +                len--; +            } +            if (last == -1) break; +            have -= len; +            next += len; +        } +        if (flags & 8)                      /* file name */ +            while (NEXT() != 0 && last != -1) +                ; +        if (flags & 16)                     /* comment */ +            while (NEXT() != 0 && last != -1) +                ; +        if (flags & 2) {                    /* header crc */ +            NEXT(); +            NEXT(); +        } +        if (last == -1) break; + +        /* set up output */ +        outd.outfile = outfile; +        outd.check = 1; +        outd.crc = crc32(0L, Z_NULL, 0); +        outd.total = 0; + +        /* decompress data to output */ +        strm->next_in = next; +        strm->avail_in = have; +        ret = inflateBack(strm, in, indp, out, &outd); +        if (ret != Z_STREAM_END) break; +        next = strm->next_in; +        have = strm->avail_in; +        strm->next_in = Z_NULL;             /* so Z_BUF_ERROR means EOF */ + +        /* check trailer */ +        ret = Z_BUF_ERROR; +        if (NEXT() != (int)(outd.crc & 0xff) || +            NEXT() != (int)((outd.crc >> 8) & 0xff) || +            NEXT() != (int)((outd.crc >> 16) & 0xff) || +            NEXT() != (int)((outd.crc >> 24) & 0xff)) { +            /* crc error */ +            if (last != -1) { +                strm->msg = (char *)"incorrect data check"; +                ret = Z_DATA_ERROR; +            } +            break; +        } +        if (NEXT() != (int)(outd.total & 0xff) || +            NEXT() != (int)((outd.total >> 8) & 0xff) || +            NEXT() != (int)((outd.total >> 16) & 0xff) || +            NEXT() != (int)((outd.total >> 24) & 0xff)) { +            /* length error */ +            if (last != -1) { +                strm->msg = (char *)"incorrect length check"; +                ret = Z_DATA_ERROR; +            } +            break; +        } + +        /* go back and look for another gzip stream */ +    } + +    /* clean up and return */ +    return ret; +} + +/* Copy file attributes, from -> to, as best we can.  This is best effort, so +   no errors are reported.  The mode bits, including suid, sgid, and the sticky +   bit are copied (if allowed), the owner's user id and group id are copied +   (again if allowed), and the access and modify times are copied. */ +local void copymeta(char *from, char *to) +{ +    struct stat was; +    struct utimbuf when; + +    /* get all of from's Unix meta data, return if not a regular file */ +    if (stat(from, &was) != 0 || (was.st_mode & S_IFMT) != S_IFREG) +        return; + +    /* set to's mode bits, ignore errors */ +    (void)chmod(to, was.st_mode & 07777); + +    /* copy owner's user and group, ignore errors */ +    (void)chown(to, was.st_uid, was.st_gid); + +    /* copy access and modify times, ignore errors */ +    when.actime = was.st_atime; +    when.modtime = was.st_mtime; +    (void)utime(to, &when); +} + +/* Decompress the file inname to the file outnname, of if test is true, just +   decompress without writing and check the gzip trailer for integrity.  If +   inname is NULL or an empty string, read from stdin.  If outname is NULL or +   an empty string, write to stdout.  strm is a pre-initialized inflateBack +   structure.  When appropriate, copy the file attributes from inname to +   outname. + +   gunzip() returns 1 if there is an out-of-memory error or an unexpected +   return code from gunpipe().  Otherwise it returns 0. + */ +local int gunzip(z_stream *strm, char *inname, char *outname, int test) +{ +    int ret; +    int infile, outfile; + +    /* open files */ +    if (inname == NULL || *inname == 0) { +        inname = "-"; +        infile = 0;     /* stdin */ +    } +    else { +        infile = open(inname, O_RDONLY, 0); +        if (infile == -1) { +            fprintf(stderr, "gun cannot open %s\n", inname); +            return 0; +        } +    } +    if (test) +        outfile = -1; +    else if (outname == NULL || *outname == 0) { +        outname = "-"; +        outfile = 1;    /* stdout */ +    } +    else { +        outfile = open(outname, O_CREAT | O_TRUNC | O_WRONLY, 0666); +        if (outfile == -1) { +            close(infile); +            fprintf(stderr, "gun cannot create %s\n", outname); +            return 0; +        } +    } +    errno = 0; + +    /* decompress */ +    ret = gunpipe(strm, infile, outfile); +    if (outfile > 2) close(outfile); +    if (infile > 2) close(infile); + +    /* interpret result */ +    switch (ret) { +    case Z_OK: +    case Z_ERRNO: +        if (infile > 2 && outfile > 2) { +            copymeta(inname, outname);          /* copy attributes */ +            unlink(inname); +        } +        if (ret == Z_ERRNO) +            fprintf(stderr, "gun warning: trailing garbage ignored in %s\n", +                    inname); +        break; +    case Z_DATA_ERROR: +        if (outfile > 2) unlink(outname); +        fprintf(stderr, "gun data error on %s: %s\n", inname, strm->msg); +        break; +    case Z_MEM_ERROR: +        if (outfile > 2) unlink(outname); +        fprintf(stderr, "gun out of memory error--aborting\n"); +        return 1; +    case Z_BUF_ERROR: +        if (outfile > 2) unlink(outname); +        if (strm->next_in != Z_NULL) { +            fprintf(stderr, "gun write error on %s: %s\n", +                    outname, strerror(errno)); +        } +        else if (errno) { +            fprintf(stderr, "gun read error on %s: %s\n", +                    inname, strerror(errno)); +        } +        else { +            fprintf(stderr, "gun unexpected end of file on %s\n", +                    inname); +        } +        break; +    default: +        if (outfile > 2) unlink(outname); +        fprintf(stderr, "gun internal error--aborting\n"); +        return 1; +    } +    return 0; +} + +/* Process the gun command line arguments.  See the command syntax near the +   beginning of this source file. */ +int main(int argc, char **argv) +{ +    int ret, len, test; +    char *outname; +    unsigned char *window; +    z_stream strm; + +    /* initialize inflateBack state for repeated use */ +    window = match;                         /* reuse LZW match buffer */ +    strm.zalloc = Z_NULL; +    strm.zfree = Z_NULL; +    strm.opaque = Z_NULL; +    ret = inflateBackInit(&strm, 15, window); +    if (ret != Z_OK) { +        fprintf(stderr, "gun out of memory error--aborting\n"); +        return 1; +    } + +    /* decompress each file to the same name with the suffix removed */ +    argc--; +    argv++; +    test = 0; +    if (argc && strcmp(*argv, "-h") == 0) { +        fprintf(stderr, "gun 1.6 (17 Jan 2010)\n"); +        fprintf(stderr, "Copyright (C) 2003-2010 Mark Adler\n"); +        fprintf(stderr, "usage: gun [-t] [file1.gz [file2.Z ...]]\n"); +        return 0; +    } +    if (argc && strcmp(*argv, "-t") == 0) { +        test = 1; +        argc--; +        argv++; +    } +    if (argc) +        do { +            if (test) +                outname = NULL; +            else { +                len = (int)strlen(*argv); +                if (strcmp(*argv + len - 3, ".gz") == 0 || +                    strcmp(*argv + len - 3, "-gz") == 0) +                    len -= 3; +                else if (strcmp(*argv + len - 2, ".z") == 0 || +                    strcmp(*argv + len - 2, "-z") == 0 || +                    strcmp(*argv + len - 2, "_z") == 0 || +                    strcmp(*argv + len - 2, ".Z") == 0) +                    len -= 2; +                else { +                    fprintf(stderr, "gun error: no gz type on %s--skipping\n", +                            *argv); +                    continue; +                } +                outname = malloc(len + 1); +                if (outname == NULL) { +                    fprintf(stderr, "gun out of memory error--aborting\n"); +                    ret = 1; +                    break; +                } +                memcpy(outname, *argv, len); +                outname[len] = 0; +            } +            ret = gunzip(&strm, *argv, outname, test); +            if (outname != NULL) free(outname); +            if (ret) break; +        } while (argv++, --argc); +    else +        ret = gunzip(&strm, NULL, NULL, test); + +    /* clean up */ +    inflateBackEnd(&strm); +    return ret; +} diff --git a/win32/zlib/examples/gzappend.c b/win32/zlib/examples/gzappend.c new file mode 100644 index 0000000..662dec3 --- /dev/null +++ b/win32/zlib/examples/gzappend.c @@ -0,0 +1,504 @@ +/* gzappend -- command to append to a gzip file + +  Copyright (C) 2003, 2012 Mark Adler, all rights reserved +  version 1.2, 11 Oct 2012 + +  This software is provided 'as-is', without any express or implied +  warranty.  In no event will the author be held liable for any damages +  arising from the use of this software. + +  Permission is granted to anyone to use this software for any purpose, +  including commercial applications, and to alter it and redistribute it +  freely, subject to the following restrictions: + +  1. The origin of this software must not be misrepresented; you must not +     claim that you wrote the original software. If you use this software +     in a product, an acknowledgment in the product documentation would be +     appreciated but is not required. +  2. Altered source versions must be plainly marked as such, and must not be +     misrepresented as being the original software. +  3. This notice may not be removed or altered from any source distribution. + +  Mark Adler    madler@alumni.caltech.edu + */ + +/* + * Change history: + * + * 1.0  19 Oct 2003     - First version + * 1.1   4 Nov 2003     - Expand and clarify some comments and notes + *                      - Add version and copyright to help + *                      - Send help to stdout instead of stderr + *                      - Add some preemptive typecasts + *                      - Add L to constants in lseek() calls + *                      - Remove some debugging information in error messages + *                      - Use new data_type definition for zlib 1.2.1 + *                      - Simplfy and unify file operations + *                      - Finish off gzip file in gztack() + *                      - Use deflatePrime() instead of adding empty blocks + *                      - Keep gzip file clean on appended file read errors + *                      - Use in-place rotate instead of auxiliary buffer + *                        (Why you ask?  Because it was fun to write!) + * 1.2  11 Oct 2012     - Fix for proper z_const usage + *                      - Check for input buffer malloc failure + */ + +/* +   gzappend takes a gzip file and appends to it, compressing files from the +   command line or data from stdin.  The gzip file is written to directly, to +   avoid copying that file, in case it's large.  Note that this results in the +   unfriendly behavior that if gzappend fails, the gzip file is corrupted. + +   This program was written to illustrate the use of the new Z_BLOCK option of +   zlib 1.2.x's inflate() function.  This option returns from inflate() at each +   block boundary to facilitate locating and modifying the last block bit at +   the start of the final deflate block.  Also whether using Z_BLOCK or not, +   another required feature of zlib 1.2.x is that inflate() now provides the +   number of unusued bits in the last input byte used.  gzappend will not work +   with versions of zlib earlier than 1.2.1. + +   gzappend first decompresses the gzip file internally, discarding all but +   the last 32K of uncompressed data, and noting the location of the last block +   bit and the number of unused bits in the last byte of the compressed data. +   The gzip trailer containing the CRC-32 and length of the uncompressed data +   is verified.  This trailer will be later overwritten. + +   Then the last block bit is cleared by seeking back in the file and rewriting +   the byte that contains it.  Seeking forward, the last byte of the compressed +   data is saved along with the number of unused bits to initialize deflate. + +   A deflate process is initialized, using the last 32K of the uncompressed +   data from the gzip file to initialize the dictionary.  If the total +   uncompressed data was less than 32K, then all of it is used to initialize +   the dictionary.  The deflate output bit buffer is also initialized with the +   last bits from the original deflate stream.  From here on, the data to +   append is simply compressed using deflate, and written to the gzip file. +   When that is complete, the new CRC-32 and uncompressed length are written +   as the trailer of the gzip file. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <fcntl.h> +#include <unistd.h> +#include "zlib.h" + +#define local static +#define LGCHUNK 14 +#define CHUNK (1U << LGCHUNK) +#define DSIZE 32768U + +/* print an error message and terminate with extreme prejudice */ +local void bye(char *msg1, char *msg2) +{ +    fprintf(stderr, "gzappend error: %s%s\n", msg1, msg2); +    exit(1); +} + +/* return the greatest common divisor of a and b using Euclid's algorithm, +   modified to be fast when one argument much greater than the other, and +   coded to avoid unnecessary swapping */ +local unsigned gcd(unsigned a, unsigned b) +{ +    unsigned c; + +    while (a && b) +        if (a > b) { +            c = b; +            while (a - c >= c) +                c <<= 1; +            a -= c; +        } +        else { +            c = a; +            while (b - c >= c) +                c <<= 1; +            b -= c; +        } +    return a + b; +} + +/* rotate list[0..len-1] left by rot positions, in place */ +local void rotate(unsigned char *list, unsigned len, unsigned rot) +{ +    unsigned char tmp; +    unsigned cycles; +    unsigned char *start, *last, *to, *from; + +    /* normalize rot and handle degenerate cases */ +    if (len < 2) return; +    if (rot >= len) rot %= len; +    if (rot == 0) return; + +    /* pointer to last entry in list */ +    last = list + (len - 1); + +    /* do simple left shift by one */ +    if (rot == 1) { +        tmp = *list; +        memcpy(list, list + 1, len - 1); +        *last = tmp; +        return; +    } + +    /* do simple right shift by one */ +    if (rot == len - 1) { +        tmp = *last; +        memmove(list + 1, list, len - 1); +        *list = tmp; +        return; +    } + +    /* otherwise do rotate as a set of cycles in place */ +    cycles = gcd(len, rot);             /* number of cycles */ +    do { +        start = from = list + cycles;   /* start index is arbitrary */ +        tmp = *from;                    /* save entry to be overwritten */ +        for (;;) { +            to = from;                  /* next step in cycle */ +            from += rot;                /* go right rot positions */ +            if (from > last) from -= len;   /* (pointer better not wrap) */ +            if (from == start) break;   /* all but one shifted */ +            *to = *from;                /* shift left */ +        } +        *to = tmp;                      /* complete the circle */ +    } while (--cycles); +} + +/* structure for gzip file read operations */ +typedef struct { +    int fd;                     /* file descriptor */ +    int size;                   /* 1 << size is bytes in buf */ +    unsigned left;              /* bytes available at next */ +    unsigned char *buf;         /* buffer */ +    z_const unsigned char *next;    /* next byte in buffer */ +    char *name;                 /* file name for error messages */ +} file; + +/* reload buffer */ +local int readin(file *in) +{ +    int len; + +    len = read(in->fd, in->buf, 1 << in->size); +    if (len == -1) bye("error reading ", in->name); +    in->left = (unsigned)len; +    in->next = in->buf; +    return len; +} + +/* read from file in, exit if end-of-file */ +local int readmore(file *in) +{ +    if (readin(in) == 0) bye("unexpected end of ", in->name); +    return 0; +} + +#define read1(in) (in->left == 0 ? readmore(in) : 0, \ +                   in->left--, *(in->next)++) + +/* skip over n bytes of in */ +local void skip(file *in, unsigned n) +{ +    unsigned bypass; + +    if (n > in->left) { +        n -= in->left; +        bypass = n & ~((1U << in->size) - 1); +        if (bypass) { +            if (lseek(in->fd, (off_t)bypass, SEEK_CUR) == -1) +                bye("seeking ", in->name); +            n -= bypass; +        } +        readmore(in); +        if (n > in->left) +            bye("unexpected end of ", in->name); +    } +    in->left -= n; +    in->next += n; +} + +/* read a four-byte unsigned integer, little-endian, from in */ +unsigned long read4(file *in) +{ +    unsigned long val; + +    val = read1(in); +    val += (unsigned)read1(in) << 8; +    val += (unsigned long)read1(in) << 16; +    val += (unsigned long)read1(in) << 24; +    return val; +} + +/* skip over gzip header */ +local void gzheader(file *in) +{ +    int flags; +    unsigned n; + +    if (read1(in) != 31 || read1(in) != 139) bye(in->name, " not a gzip file"); +    if (read1(in) != 8) bye("unknown compression method in", in->name); +    flags = read1(in); +    if (flags & 0xe0) bye("unknown header flags set in", in->name); +    skip(in, 6); +    if (flags & 4) { +        n = read1(in); +        n += (unsigned)(read1(in)) << 8; +        skip(in, n); +    } +    if (flags & 8) while (read1(in) != 0) ; +    if (flags & 16) while (read1(in) != 0) ; +    if (flags & 2) skip(in, 2); +} + +/* decompress gzip file "name", return strm with a deflate stream ready to +   continue compression of the data in the gzip file, and return a file +   descriptor pointing to where to write the compressed data -- the deflate +   stream is initialized to compress using level "level" */ +local int gzscan(char *name, z_stream *strm, int level) +{ +    int ret, lastbit, left, full; +    unsigned have; +    unsigned long crc, tot; +    unsigned char *window; +    off_t lastoff, end; +    file gz; + +    /* open gzip file */ +    gz.name = name; +    gz.fd = open(name, O_RDWR, 0); +    if (gz.fd == -1) bye("cannot open ", name); +    gz.buf = malloc(CHUNK); +    if (gz.buf == NULL) bye("out of memory", ""); +    gz.size = LGCHUNK; +    gz.left = 0; + +    /* skip gzip header */ +    gzheader(&gz); + +    /* prepare to decompress */ +    window = malloc(DSIZE); +    if (window == NULL) bye("out of memory", ""); +    strm->zalloc = Z_NULL; +    strm->zfree = Z_NULL; +    strm->opaque = Z_NULL; +    ret = inflateInit2(strm, -15); +    if (ret != Z_OK) bye("out of memory", " or library mismatch"); + +    /* decompress the deflate stream, saving append information */ +    lastbit = 0; +    lastoff = lseek(gz.fd, 0L, SEEK_CUR) - gz.left; +    left = 0; +    strm->avail_in = gz.left; +    strm->next_in = gz.next; +    crc = crc32(0L, Z_NULL, 0); +    have = full = 0; +    do { +        /* if needed, get more input */ +        if (strm->avail_in == 0) { +            readmore(&gz); +            strm->avail_in = gz.left; +            strm->next_in = gz.next; +        } + +        /* set up output to next available section of sliding window */ +        strm->avail_out = DSIZE - have; +        strm->next_out = window + have; + +        /* inflate and check for errors */ +        ret = inflate(strm, Z_BLOCK); +        if (ret == Z_STREAM_ERROR) bye("internal stream error!", ""); +        if (ret == Z_MEM_ERROR) bye("out of memory", ""); +        if (ret == Z_DATA_ERROR) +            bye("invalid compressed data--format violated in", name); + +        /* update crc and sliding window pointer */ +        crc = crc32(crc, window + have, DSIZE - have - strm->avail_out); +        if (strm->avail_out) +            have = DSIZE - strm->avail_out; +        else { +            have = 0; +            full = 1; +        } + +        /* process end of block */ +        if (strm->data_type & 128) { +            if (strm->data_type & 64) +                left = strm->data_type & 0x1f; +            else { +                lastbit = strm->data_type & 0x1f; +                lastoff = lseek(gz.fd, 0L, SEEK_CUR) - strm->avail_in; +            } +        } +    } while (ret != Z_STREAM_END); +    inflateEnd(strm); +    gz.left = strm->avail_in; +    gz.next = strm->next_in; + +    /* save the location of the end of the compressed data */ +    end = lseek(gz.fd, 0L, SEEK_CUR) - gz.left; + +    /* check gzip trailer and save total for deflate */ +    if (crc != read4(&gz)) +        bye("invalid compressed data--crc mismatch in ", name); +    tot = strm->total_out; +    if ((tot & 0xffffffffUL) != read4(&gz)) +        bye("invalid compressed data--length mismatch in", name); + +    /* if not at end of file, warn */ +    if (gz.left || readin(&gz)) +        fprintf(stderr, +            "gzappend warning: junk at end of gzip file overwritten\n"); + +    /* clear last block bit */ +    lseek(gz.fd, lastoff - (lastbit != 0), SEEK_SET); +    if (read(gz.fd, gz.buf, 1) != 1) bye("reading after seek on ", name); +    *gz.buf = (unsigned char)(*gz.buf ^ (1 << ((8 - lastbit) & 7))); +    lseek(gz.fd, -1L, SEEK_CUR); +    if (write(gz.fd, gz.buf, 1) != 1) bye("writing after seek to ", name); + +    /* if window wrapped, build dictionary from window by rotating */ +    if (full) { +        rotate(window, DSIZE, have); +        have = DSIZE; +    } + +    /* set up deflate stream with window, crc, total_in, and leftover bits */ +    ret = deflateInit2(strm, level, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY); +    if (ret != Z_OK) bye("out of memory", ""); +    deflateSetDictionary(strm, window, have); +    strm->adler = crc; +    strm->total_in = tot; +    if (left) { +        lseek(gz.fd, --end, SEEK_SET); +        if (read(gz.fd, gz.buf, 1) != 1) bye("reading after seek on ", name); +        deflatePrime(strm, 8 - left, *gz.buf); +    } +    lseek(gz.fd, end, SEEK_SET); + +    /* clean up and return */ +    free(window); +    free(gz.buf); +    return gz.fd; +} + +/* append file "name" to gzip file gd using deflate stream strm -- if last +   is true, then finish off the deflate stream at the end */ +local void gztack(char *name, int gd, z_stream *strm, int last) +{ +    int fd, len, ret; +    unsigned left; +    unsigned char *in, *out; + +    /* open file to compress and append */ +    fd = 0; +    if (name != NULL) { +        fd = open(name, O_RDONLY, 0); +        if (fd == -1) +            fprintf(stderr, "gzappend warning: %s not found, skipping ...\n", +                    name); +    } + +    /* allocate buffers */ +    in = malloc(CHUNK); +    out = malloc(CHUNK); +    if (in == NULL || out == NULL) bye("out of memory", ""); + +    /* compress input file and append to gzip file */ +    do { +        /* get more input */ +        len = read(fd, in, CHUNK); +        if (len == -1) { +            fprintf(stderr, +                    "gzappend warning: error reading %s, skipping rest ...\n", +                    name); +            len = 0; +        } +        strm->avail_in = (unsigned)len; +        strm->next_in = in; +        if (len) strm->adler = crc32(strm->adler, in, (unsigned)len); + +        /* compress and write all available output */ +        do { +            strm->avail_out = CHUNK; +            strm->next_out = out; +            ret = deflate(strm, last && len == 0 ? Z_FINISH : Z_NO_FLUSH); +            left = CHUNK - strm->avail_out; +            while (left) { +                len = write(gd, out + CHUNK - strm->avail_out - left, left); +                if (len == -1) bye("writing gzip file", ""); +                left -= (unsigned)len; +            } +        } while (strm->avail_out == 0 && ret != Z_STREAM_END); +    } while (len != 0); + +    /* write trailer after last entry */ +    if (last) { +        deflateEnd(strm); +        out[0] = (unsigned char)(strm->adler); +        out[1] = (unsigned char)(strm->adler >> 8); +        out[2] = (unsigned char)(strm->adler >> 16); +        out[3] = (unsigned char)(strm->adler >> 24); +        out[4] = (unsigned char)(strm->total_in); +        out[5] = (unsigned char)(strm->total_in >> 8); +        out[6] = (unsigned char)(strm->total_in >> 16); +        out[7] = (unsigned char)(strm->total_in >> 24); +        len = 8; +        do { +            ret = write(gd, out + 8 - len, len); +            if (ret == -1) bye("writing gzip file", ""); +            len -= ret; +        } while (len); +        close(gd); +    } + +    /* clean up and return */ +    free(out); +    free(in); +    if (fd > 0) close(fd); +} + +/* process the compression level option if present, scan the gzip file, and +   append the specified files, or append the data from stdin if no other file +   names are provided on the command line -- the gzip file must be writable +   and seekable */ +int main(int argc, char **argv) +{ +    int gd, level; +    z_stream strm; + +    /* ignore command name */ +    argc--; argv++; + +    /* provide usage if no arguments */ +    if (*argv == NULL) { +        printf( +            "gzappend 1.2 (11 Oct 2012) Copyright (C) 2003, 2012 Mark Adler\n" +               ); +        printf( +            "usage: gzappend [-level] file.gz [ addthis [ andthis ... ]]\n"); +        return 0; +    } + +    /* set compression level */ +    level = Z_DEFAULT_COMPRESSION; +    if (argv[0][0] == '-') { +        if (argv[0][1] < '0' || argv[0][1] > '9' || argv[0][2] != 0) +            bye("invalid compression level", ""); +        level = argv[0][1] - '0'; +        if (*++argv == NULL) bye("no gzip file name after options", ""); +    } + +    /* prepare to append to gzip file */ +    gd = gzscan(*argv++, &strm, level); + +    /* append files on command line, or from stdin if none */ +    if (*argv == NULL) +        gztack(NULL, gd, &strm, 1); +    else +        do { +            gztack(*argv, gd, &strm, argv[1] == NULL); +        } while (*++argv != NULL); +    return 0; +} diff --git a/win32/zlib/examples/gzjoin.c b/win32/zlib/examples/gzjoin.c new file mode 100644 index 0000000..89e8098 --- /dev/null +++ b/win32/zlib/examples/gzjoin.c @@ -0,0 +1,449 @@ +/* gzjoin -- command to join gzip files into one gzip file + +  Copyright (C) 2004, 2005, 2012 Mark Adler, all rights reserved +  version 1.2, 14 Aug 2012 + +  This software is provided 'as-is', without any express or implied +  warranty.  In no event will the author be held liable for any damages +  arising from the use of this software. + +  Permission is granted to anyone to use this software for any purpose, +  including commercial applications, and to alter it and redistribute it +  freely, subject to the following restrictions: + +  1. The origin of this software must not be misrepresented; you must not +     claim that you wrote the original software. If you use this software +     in a product, an acknowledgment in the product documentation would be +     appreciated but is not required. +  2. Altered source versions must be plainly marked as such, and must not be +     misrepresented as being the original software. +  3. This notice may not be removed or altered from any source distribution. + +  Mark Adler    madler@alumni.caltech.edu + */ + +/* + * Change history: + * + * 1.0  11 Dec 2004     - First version + * 1.1  12 Jun 2005     - Changed ssize_t to long for portability + * 1.2  14 Aug 2012     - Clean up for z_const usage + */ + +/* +   gzjoin takes one or more gzip files on the command line and writes out a +   single gzip file that will uncompress to the concatenation of the +   uncompressed data from the individual gzip files.  gzjoin does this without +   having to recompress any of the data and without having to calculate a new +   crc32 for the concatenated uncompressed data.  gzjoin does however have to +   decompress all of the input data in order to find the bits in the compressed +   data that need to be modified to concatenate the streams. + +   gzjoin does not do an integrity check on the input gzip files other than +   checking the gzip header and decompressing the compressed data.  They are +   otherwise assumed to be complete and correct. + +   Each joint between gzip files removes at least 18 bytes of previous trailer +   and subsequent header, and inserts an average of about three bytes to the +   compressed data in order to connect the streams.  The output gzip file +   has a minimal ten-byte gzip header with no file name or modification time. + +   This program was written to illustrate the use of the Z_BLOCK option of +   inflate() and the crc32_combine() function.  gzjoin will not compile with +   versions of zlib earlier than 1.2.3. + */ + +#include <stdio.h>      /* fputs(), fprintf(), fwrite(), putc() */ +#include <stdlib.h>     /* exit(), malloc(), free() */ +#include <fcntl.h>      /* open() */ +#include <unistd.h>     /* close(), read(), lseek() */ +#include "zlib.h" +    /* crc32(), crc32_combine(), inflateInit2(), inflate(), inflateEnd() */ + +#define local static + +/* exit with an error (return a value to allow use in an expression) */ +local int bail(char *why1, char *why2) +{ +    fprintf(stderr, "gzjoin error: %s%s, output incomplete\n", why1, why2); +    exit(1); +    return 0; +} + +/* -- simple buffered file input with access to the buffer -- */ + +#define CHUNK 32768         /* must be a power of two and fit in unsigned */ + +/* bin buffered input file type */ +typedef struct { +    char *name;             /* name of file for error messages */ +    int fd;                 /* file descriptor */ +    unsigned left;          /* bytes remaining at next */ +    unsigned char *next;    /* next byte to read */ +    unsigned char *buf;     /* allocated buffer of length CHUNK */ +} bin; + +/* close a buffered file and free allocated memory */ +local void bclose(bin *in) +{ +    if (in != NULL) { +        if (in->fd != -1) +            close(in->fd); +        if (in->buf != NULL) +            free(in->buf); +        free(in); +    } +} + +/* open a buffered file for input, return a pointer to type bin, or NULL on +   failure */ +local bin *bopen(char *name) +{ +    bin *in; + +    in = malloc(sizeof(bin)); +    if (in == NULL) +        return NULL; +    in->buf = malloc(CHUNK); +    in->fd = open(name, O_RDONLY, 0); +    if (in->buf == NULL || in->fd == -1) { +        bclose(in); +        return NULL; +    } +    in->left = 0; +    in->next = in->buf; +    in->name = name; +    return in; +} + +/* load buffer from file, return -1 on read error, 0 or 1 on success, with +   1 indicating that end-of-file was reached */ +local int bload(bin *in) +{ +    long len; + +    if (in == NULL) +        return -1; +    if (in->left != 0) +        return 0; +    in->next = in->buf; +    do { +        len = (long)read(in->fd, in->buf + in->left, CHUNK - in->left); +        if (len < 0) +            return -1; +        in->left += (unsigned)len; +    } while (len != 0 && in->left < CHUNK); +    return len == 0 ? 1 : 0; +} + +/* get a byte from the file, bail if end of file */ +#define bget(in) (in->left ? 0 : bload(in), \ +                  in->left ? (in->left--, *(in->next)++) : \ +                    bail("unexpected end of file on ", in->name)) + +/* get a four-byte little-endian unsigned integer from file */ +local unsigned long bget4(bin *in) +{ +    unsigned long val; + +    val = bget(in); +    val += (unsigned long)(bget(in)) << 8; +    val += (unsigned long)(bget(in)) << 16; +    val += (unsigned long)(bget(in)) << 24; +    return val; +} + +/* skip bytes in file */ +local void bskip(bin *in, unsigned skip) +{ +    /* check pointer */ +    if (in == NULL) +        return; + +    /* easy case -- skip bytes in buffer */ +    if (skip <= in->left) { +        in->left -= skip; +        in->next += skip; +        return; +    } + +    /* skip what's in buffer, discard buffer contents */ +    skip -= in->left; +    in->left = 0; + +    /* seek past multiples of CHUNK bytes */ +    if (skip > CHUNK) { +        unsigned left; + +        left = skip & (CHUNK - 1); +        if (left == 0) { +            /* exact number of chunks: seek all the way minus one byte to check +               for end-of-file with a read */ +            lseek(in->fd, skip - 1, SEEK_CUR); +            if (read(in->fd, in->buf, 1) != 1) +                bail("unexpected end of file on ", in->name); +            return; +        } + +        /* skip the integral chunks, update skip with remainder */ +        lseek(in->fd, skip - left, SEEK_CUR); +        skip = left; +    } + +    /* read more input and skip remainder */ +    bload(in); +    if (skip > in->left) +        bail("unexpected end of file on ", in->name); +    in->left -= skip; +    in->next += skip; +} + +/* -- end of buffered input functions -- */ + +/* skip the gzip header from file in */ +local void gzhead(bin *in) +{ +    int flags; + +    /* verify gzip magic header and compression method */ +    if (bget(in) != 0x1f || bget(in) != 0x8b || bget(in) != 8) +        bail(in->name, " is not a valid gzip file"); + +    /* get and verify flags */ +    flags = bget(in); +    if ((flags & 0xe0) != 0) +        bail("unknown reserved bits set in ", in->name); + +    /* skip modification time, extra flags, and os */ +    bskip(in, 6); + +    /* skip extra field if present */ +    if (flags & 4) { +        unsigned len; + +        len = bget(in); +        len += (unsigned)(bget(in)) << 8; +        bskip(in, len); +    } + +    /* skip file name if present */ +    if (flags & 8) +        while (bget(in) != 0) +            ; + +    /* skip comment if present */ +    if (flags & 16) +        while (bget(in) != 0) +            ; + +    /* skip header crc if present */ +    if (flags & 2) +        bskip(in, 2); +} + +/* write a four-byte little-endian unsigned integer to out */ +local void put4(unsigned long val, FILE *out) +{ +    putc(val & 0xff, out); +    putc((val >> 8) & 0xff, out); +    putc((val >> 16) & 0xff, out); +    putc((val >> 24) & 0xff, out); +} + +/* Load up zlib stream from buffered input, bail if end of file */ +local void zpull(z_streamp strm, bin *in) +{ +    if (in->left == 0) +        bload(in); +    if (in->left == 0) +        bail("unexpected end of file on ", in->name); +    strm->avail_in = in->left; +    strm->next_in = in->next; +} + +/* Write header for gzip file to out and initialize trailer. */ +local void gzinit(unsigned long *crc, unsigned long *tot, FILE *out) +{ +    fwrite("\x1f\x8b\x08\0\0\0\0\0\0\xff", 1, 10, out); +    *crc = crc32(0L, Z_NULL, 0); +    *tot = 0; +} + +/* Copy the compressed data from name, zeroing the last block bit of the last +   block if clr is true, and adding empty blocks as needed to get to a byte +   boundary.  If clr is false, then the last block becomes the last block of +   the output, and the gzip trailer is written.  crc and tot maintains the +   crc and length (modulo 2^32) of the output for the trailer.  The resulting +   gzip file is written to out.  gzinit() must be called before the first call +   of gzcopy() to write the gzip header and to initialize crc and tot. */ +local void gzcopy(char *name, int clr, unsigned long *crc, unsigned long *tot, +                  FILE *out) +{ +    int ret;                /* return value from zlib functions */ +    int pos;                /* where the "last block" bit is in byte */ +    int last;               /* true if processing the last block */ +    bin *in;                /* buffered input file */ +    unsigned char *start;   /* start of compressed data in buffer */ +    unsigned char *junk;    /* buffer for uncompressed data -- discarded */ +    z_off_t len;            /* length of uncompressed data (support > 4 GB) */ +    z_stream strm;          /* zlib inflate stream */ + +    /* open gzip file and skip header */ +    in = bopen(name); +    if (in == NULL) +        bail("could not open ", name); +    gzhead(in); + +    /* allocate buffer for uncompressed data and initialize raw inflate +       stream */ +    junk = malloc(CHUNK); +    strm.zalloc = Z_NULL; +    strm.zfree = Z_NULL; +    strm.opaque = Z_NULL; +    strm.avail_in = 0; +    strm.next_in = Z_NULL; +    ret = inflateInit2(&strm, -15); +    if (junk == NULL || ret != Z_OK) +        bail("out of memory", ""); + +    /* inflate and copy compressed data, clear last-block bit if requested */ +    len = 0; +    zpull(&strm, in); +    start = in->next; +    last = start[0] & 1; +    if (last && clr) +        start[0] &= ~1; +    strm.avail_out = 0; +    for (;;) { +        /* if input used and output done, write used input and get more */ +        if (strm.avail_in == 0 && strm.avail_out != 0) { +            fwrite(start, 1, strm.next_in - start, out); +            start = in->buf; +            in->left = 0; +            zpull(&strm, in); +        } + +        /* decompress -- return early when end-of-block reached */ +        strm.avail_out = CHUNK; +        strm.next_out = junk; +        ret = inflate(&strm, Z_BLOCK); +        switch (ret) { +        case Z_MEM_ERROR: +            bail("out of memory", ""); +        case Z_DATA_ERROR: +            bail("invalid compressed data in ", in->name); +        } + +        /* update length of uncompressed data */ +        len += CHUNK - strm.avail_out; + +        /* check for block boundary (only get this when block copied out) */ +        if (strm.data_type & 128) { +            /* if that was the last block, then done */ +            if (last) +                break; + +            /* number of unused bits in last byte */ +            pos = strm.data_type & 7; + +            /* find the next last-block bit */ +            if (pos != 0) { +                /* next last-block bit is in last used byte */ +                pos = 0x100 >> pos; +                last = strm.next_in[-1] & pos; +                if (last && clr) +                    in->buf[strm.next_in - in->buf - 1] &= ~pos; +            } +            else { +                /* next last-block bit is in next unused byte */ +                if (strm.avail_in == 0) { +                    /* don't have that byte yet -- get it */ +                    fwrite(start, 1, strm.next_in - start, out); +                    start = in->buf; +                    in->left = 0; +                    zpull(&strm, in); +                } +                last = strm.next_in[0] & 1; +                if (last && clr) +                    in->buf[strm.next_in - in->buf] &= ~1; +            } +        } +    } + +    /* update buffer with unused input */ +    in->left = strm.avail_in; +    in->next = in->buf + (strm.next_in - in->buf); + +    /* copy used input, write empty blocks to get to byte boundary */ +    pos = strm.data_type & 7; +    fwrite(start, 1, in->next - start - 1, out); +    last = in->next[-1]; +    if (pos == 0 || !clr) +        /* already at byte boundary, or last file: write last byte */ +        putc(last, out); +    else { +        /* append empty blocks to last byte */ +        last &= ((0x100 >> pos) - 1);       /* assure unused bits are zero */ +        if (pos & 1) { +            /* odd -- append an empty stored block */ +            putc(last, out); +            if (pos == 1) +                putc(0, out);               /* two more bits in block header */ +            fwrite("\0\0\xff\xff", 1, 4, out); +        } +        else { +            /* even -- append 1, 2, or 3 empty fixed blocks */ +            switch (pos) { +            case 6: +                putc(last | 8, out); +                last = 0; +            case 4: +                putc(last | 0x20, out); +                last = 0; +            case 2: +                putc(last | 0x80, out); +                putc(0, out); +            } +        } +    } + +    /* update crc and tot */ +    *crc = crc32_combine(*crc, bget4(in), len); +    *tot += (unsigned long)len; + +    /* clean up */ +    inflateEnd(&strm); +    free(junk); +    bclose(in); + +    /* write trailer if this is the last gzip file */ +    if (!clr) { +        put4(*crc, out); +        put4(*tot, out); +    } +} + +/* join the gzip files on the command line, write result to stdout */ +int main(int argc, char **argv) +{ +    unsigned long crc, tot;     /* running crc and total uncompressed length */ + +    /* skip command name */ +    argc--; +    argv++; + +    /* show usage if no arguments */ +    if (argc == 0) { +        fputs("gzjoin usage: gzjoin f1.gz [f2.gz [f3.gz ...]] > fjoin.gz\n", +              stderr); +        return 0; +    } + +    /* join gzip files on command line and write to stdout */ +    gzinit(&crc, &tot, stdout); +    while (argc--) +        gzcopy(*argv++, argc, &crc, &tot, stdout); + +    /* done */ +    return 0; +} diff --git a/win32/zlib/examples/gzlog.c b/win32/zlib/examples/gzlog.c new file mode 100644 index 0000000..922f878 --- /dev/null +++ b/win32/zlib/examples/gzlog.c @@ -0,0 +1,1059 @@ +/* + * gzlog.c + * Copyright (C) 2004, 2008, 2012 Mark Adler, all rights reserved + * For conditions of distribution and use, see copyright notice in gzlog.h + * version 2.2, 14 Aug 2012 + */ + +/* +   gzlog provides a mechanism for frequently appending short strings to a gzip +   file that is efficient both in execution time and compression ratio.  The +   strategy is to write the short strings in an uncompressed form to the end of +   the gzip file, only compressing when the amount of uncompressed data has +   reached a given threshold. + +   gzlog also provides protection against interruptions in the process due to +   system crashes.  The status of the operation is recorded in an extra field +   in the gzip file, and is only updated once the gzip file is brought to a +   valid state.  The last data to be appended or compressed is saved in an +   auxiliary file, so that if the operation is interrupted, it can be completed +   the next time an append operation is attempted. + +   gzlog maintains another auxiliary file with the last 32K of data from the +   compressed portion, which is preloaded for the compression of the subsequent +   data.  This minimizes the impact to the compression ratio of appending. + */ + +/* +   Operations Concept: + +   Files (log name "foo"): +   foo.gz -- gzip file with the complete log +   foo.add -- last message to append or last data to compress +   foo.dict -- dictionary of the last 32K of data for next compression +   foo.temp -- temporary dictionary file for compression after this one +   foo.lock -- lock file for reading and writing the other files +   foo.repairs -- log file for log file recovery operations (not compressed) + +   gzip file structure: +   - fixed-length (no file name) header with extra field (see below) +   - compressed data ending initially with empty stored block +   - uncompressed data filling out originally empty stored block and +     subsequent stored blocks as needed (16K max each) +   - gzip trailer +   - no junk at end (no other gzip streams) + +   When appending data, the information in the first three items above plus the +   foo.add file are sufficient to recover an interrupted append operation.  The +   extra field has the necessary information to restore the start of the last +   stored block and determine where to append the data in the foo.add file, as +   well as the crc and length of the gzip data before the append operation. + +   The foo.add file is created before the gzip file is marked for append, and +   deleted after the gzip file is marked as complete.  So if the append +   operation is interrupted, the data to add will still be there.  If due to +   some external force, the foo.add file gets deleted between when the append +   operation was interrupted and when recovery is attempted, the gzip file will +   still be restored, but without the appended data. + +   When compressing data, the information in the first two items above plus the +   foo.add file are sufficient to recover an interrupted compress operation. +   The extra field has the necessary information to find the end of the +   compressed data, and contains both the crc and length of just the compressed +   data and of the complete set of data including the contents of the foo.add +   file. + +   Again, the foo.add file is maintained during the compress operation in case +   of an interruption.  If in the unlikely event the foo.add file with the data +   to be compressed is missing due to some external force, a gzip file with +   just the previous compressed data will be reconstructed.  In this case, all +   of the data that was to be compressed is lost (approximately one megabyte). +   This will not occur if all that happened was an interruption of the compress +   operation. + +   The third state that is marked is the replacement of the old dictionary with +   the new dictionary after a compress operation.  Once compression is +   complete, the gzip file is marked as being in the replace state.  This +   completes the gzip file, so an interrupt after being so marked does not +   result in recompression.  Then the dictionary file is replaced, and the gzip +   file is marked as completed.  This state prevents the possibility of +   restarting compression with the wrong dictionary file. + +   All three operations are wrapped by a lock/unlock procedure.  In order to +   gain exclusive access to the log files, first a foo.lock file must be +   exclusively created.  When all operations are complete, the lock is +   released by deleting the foo.lock file.  If when attempting to create the +   lock file, it already exists and the modify time of the lock file is more +   than five minutes old (set by the PATIENCE define below), then the old +   lock file is considered stale and deleted, and the exclusive creation of +   the lock file is retried.  To assure that there are no false assessments +   of the staleness of the lock file, the operations periodically touch the +   lock file to update the modified date. + +   Following is the definition of the extra field with all of the information +   required to enable the above append and compress operations and their +   recovery if interrupted.  Multi-byte values are stored little endian +   (consistent with the gzip format).  File pointers are eight bytes long. +   The crc's and lengths for the gzip trailer are four bytes long.  (Note that +   the length at the end of a gzip file is used for error checking only, and +   for large files is actually the length modulo 2^32.)  The stored block +   length is two bytes long.  The gzip extra field two-byte identification is +   "ap" for append.  It is assumed that writing the extra field to the file is +   an "atomic" operation.  That is, either all of the extra field is written +   to the file, or none of it is, if the operation is interrupted right at the +   point of updating the extra field.  This is a reasonable assumption, since +   the extra field is within the first 52 bytes of the file, which is smaller +   than any expected block size for a mass storage device (usually 512 bytes or +   larger). + +   Extra field (35 bytes): +   - Pointer to first stored block length -- this points to the two-byte length +     of the first stored block, which is followed by the two-byte, one's +     complement of that length.  The stored block length is preceded by the +     three-bit header of the stored block, which is the actual start of the +     stored block in the deflate format.  See the bit offset field below. +   - Pointer to the last stored block length.  This is the same as above, but +     for the last stored block of the uncompressed data in the gzip file. +     Initially this is the same as the first stored block length pointer. +     When the stored block gets to 16K (see the MAX_STORE define), then a new +     stored block as added, at which point the last stored block length pointer +     is different from the first stored block length pointer.  When they are +     different, the first bit of the last stored block header is eight bits, or +     one byte back from the block length. +   - Compressed data crc and length.  This is the crc and length of the data +     that is in the compressed portion of the deflate stream.  These are used +     only in the event that the foo.add file containing the data to compress is +     lost after a compress operation is interrupted. +   - Total data crc and length.  This is the crc and length of all of the data +     stored in the gzip file, compressed and uncompressed.  It is used to +     reconstruct the gzip trailer when compressing, as well as when recovering +     interrupted operations. +   - Final stored block length.  This is used to quickly find where to append, +     and allows the restoration of the original final stored block state when +     an append operation is interrupted. +   - First stored block start as the number of bits back from the final stored +     block first length byte.  This value is in the range of 3..10, and is +     stored as the low three bits of the final byte of the extra field after +     subtracting three (0..7).  This allows the last-block bit of the stored +     block header to be updated when a new stored block is added, for the case +     when the first stored block and the last stored block are the same.  (When +     they are different, the numbers of bits back is known to be eight.)  This +     also allows for new compressed data to be appended to the old compressed +     data in the compress operation, overwriting the previous first stored +     block, or for the compressed data to be terminated and a valid gzip file +     reconstructed on the off chance that a compression operation was +     interrupted and the data to compress in the foo.add file was deleted. +   - The operation in process.  This is the next two bits in the last byte (the +     bits under the mask 0x18).  The are interpreted as 0: nothing in process, +     1: append in process, 2: compress in process, 3: replace in process. +   - The top three bits of the last byte in the extra field are reserved and +     are currently set to zero. + +   Main procedure: +   - Exclusively create the foo.lock file using the O_CREAT and O_EXCL modes of +     the system open() call.  If the modify time of an existing lock file is +     more than PATIENCE seconds old, then the lock file is deleted and the +     exclusive create is retried. +   - Load the extra field from the foo.gz file, and see if an operation was in +     progress but not completed.  If so, apply the recovery procedure below. +   - Perform the append procedure with the provided data. +   - If the uncompressed data in the foo.gz file is 1MB or more, apply the +     compress procedure. +   - Delete the foo.lock file. + +   Append procedure: +   - Put what to append in the foo.add file so that the operation can be +     restarted if this procedure is interrupted. +   - Mark the foo.gz extra field with the append operation in progress. +   + Restore the original last-block bit and stored block length of the last +     stored block from the information in the extra field, in case a previous +     append operation was interrupted. +   - Append the provided data to the last stored block, creating new stored +     blocks as needed and updating the stored blocks last-block bits and +     lengths. +   - Update the crc and length with the new data, and write the gzip trailer. +   - Write over the extra field (with a single write operation) with the new +     pointers, lengths, and crc's, and mark the gzip file as not in process. +     Though there is still a foo.add file, it will be ignored since nothing +     is in process.  If a foo.add file is leftover from a previously +     completed operation, it is truncated when writing new data to it. +   - Delete the foo.add file. + +   Compress and replace procedures: +   - Read all of the uncompressed data in the stored blocks in foo.gz and write +     it to foo.add.  Also write foo.temp with the last 32K of that data to +     provide a dictionary for the next invocation of this procedure. +   - Rewrite the extra field marking foo.gz with a compression in process. +   * If there is no data provided to compress (due to a missing foo.add file +     when recovering), reconstruct and truncate the foo.gz file to contain +     only the previous compressed data and proceed to the step after the next +     one.  Otherwise ... +   - Compress the data with the dictionary in foo.dict, and write to the +     foo.gz file starting at the bit immediately following the last previously +     compressed block.  If there is no foo.dict, proceed anyway with the +     compression at slightly reduced efficiency.  (For the foo.dict file to be +     missing requires some external failure beyond simply the interruption of +     a compress operation.)  During this process, the foo.lock file is +     periodically touched to assure that that file is not considered stale by +     another process before we're done.  The deflation is terminated with a +     non-last empty static block (10 bits long), that is then located and +     written over by a last-bit-set empty stored block. +   - Append the crc and length of the data in the gzip file (previously +     calculated during the append operations). +   - Write over the extra field with the updated stored block offsets, bits +     back, crc's, and lengths, and mark foo.gz as in process for a replacement +     of the dictionary. +   @ Delete the foo.add file. +   - Replace foo.dict with foo.temp. +   - Write over the extra field, marking foo.gz as complete. + +   Recovery procedure: +   - If not a replace recovery, read in the foo.add file, and provide that data +     to the appropriate recovery below.  If there is no foo.add file, provide +     a zero data length to the recovery.  In that case, the append recovery +     restores the foo.gz to the previous compressed + uncompressed data state. +     For the the compress recovery, a missing foo.add file results in foo.gz +     being restored to the previous compressed-only data state. +   - Append recovery: +     - Pick up append at + step above +   - Compress recovery: +     - Pick up compress at * step above +   - Replace recovery: +     - Pick up compress at @ step above +   - Log the repair with a date stamp in foo.repairs + */ + +#include <sys/types.h> +#include <stdio.h>      /* rename, fopen, fprintf, fclose */ +#include <stdlib.h>     /* malloc, free */ +#include <string.h>     /* strlen, strrchr, strcpy, strncpy, strcmp */ +#include <fcntl.h>      /* open */ +#include <unistd.h>     /* lseek, read, write, close, unlink, sleep, */ +                        /* ftruncate, fsync */ +#include <errno.h>      /* errno */ +#include <time.h>       /* time, ctime */ +#include <sys/stat.h>   /* stat */ +#include <sys/time.h>   /* utimes */ +#include "zlib.h"       /* crc32 */ + +#include "gzlog.h"      /* header for external access */ + +#define local static +typedef unsigned int uint; +typedef unsigned long ulong; + +/* Macro for debugging to deterministically force recovery operations */ +#ifdef DEBUG +    #include <setjmp.h>         /* longjmp */ +    jmp_buf gzlog_jump;         /* where to go back to */ +    int gzlog_bail = 0;         /* which point to bail at (1..8) */ +    int gzlog_count = -1;       /* number of times through to wait */ +#   define BAIL(n) do { if (n == gzlog_bail && gzlog_count-- == 0) \ +                            longjmp(gzlog_jump, gzlog_bail); } while (0) +#else +#   define BAIL(n) +#endif + +/* how old the lock file can be in seconds before considering it stale */ +#define PATIENCE 300 + +/* maximum stored block size in Kbytes -- must be in 1..63 */ +#define MAX_STORE 16 + +/* number of stored Kbytes to trigger compression (must be >= 32 to allow +   dictionary construction, and <= 204 * MAX_STORE, in order for >> 10 to +   discard the stored block headers contribution of five bytes each) */ +#define TRIGGER 1024 + +/* size of a deflate dictionary (this cannot be changed) */ +#define DICT 32768U + +/* values for the operation (2 bits) */ +#define NO_OP 0 +#define APPEND_OP 1 +#define COMPRESS_OP 2 +#define REPLACE_OP 3 + +/* macros to extract little-endian integers from an unsigned byte buffer */ +#define PULL2(p) ((p)[0]+((uint)((p)[1])<<8)) +#define PULL4(p) (PULL2(p)+((ulong)PULL2(p+2)<<16)) +#define PULL8(p) (PULL4(p)+((off_t)PULL4(p+4)<<32)) + +/* macros to store integers into a byte buffer in little-endian order */ +#define PUT2(p,a) do {(p)[0]=a;(p)[1]=(a)>>8;} while(0) +#define PUT4(p,a) do {PUT2(p,a);PUT2(p+2,a>>16);} while(0) +#define PUT8(p,a) do {PUT4(p,a);PUT4(p+4,a>>32);} while(0) + +/* internal structure for log information */ +#define LOGID "\106\035\172"    /* should be three non-zero characters */ +struct log { +    char id[4];     /* contains LOGID to detect inadvertent overwrites */ +    int fd;         /* file descriptor for .gz file, opened read/write */ +    char *path;     /* allocated path, e.g. "/var/log/foo" or "foo" */ +    char *end;      /* end of path, for appending suffices such as ".gz" */ +    off_t first;    /* offset of first stored block first length byte */ +    int back;       /* location of first block id in bits back from first */ +    uint stored;    /* bytes currently in last stored block */ +    off_t last;     /* offset of last stored block first length byte */ +    ulong ccrc;     /* crc of compressed data */ +    ulong clen;     /* length (modulo 2^32) of compressed data */ +    ulong tcrc;     /* crc of total data */ +    ulong tlen;     /* length (modulo 2^32) of total data */ +    time_t lock;    /* last modify time of our lock file */ +}; + +/* gzip header for gzlog */ +local unsigned char log_gzhead[] = { +    0x1f, 0x8b,                 /* magic gzip id */ +    8,                          /* compression method is deflate */ +    4,                          /* there is an extra field (no file name) */ +    0, 0, 0, 0,                 /* no modification time provided */ +    0, 0xff,                    /* no extra flags, no OS specified */ +    39, 0, 'a', 'p', 35, 0      /* extra field with "ap" subfield */ +                                /* 35 is EXTRA, 39 is EXTRA + 4 */ +}; + +#define HEAD sizeof(log_gzhead)     /* should be 16 */ + +/* initial gzip extra field content (52 == HEAD + EXTRA + 1) */ +local unsigned char log_gzext[] = { +    52, 0, 0, 0, 0, 0, 0, 0,    /* offset of first stored block length */ +    52, 0, 0, 0, 0, 0, 0, 0,    /* offset of last stored block length */ +    0, 0, 0, 0, 0, 0, 0, 0,     /* compressed data crc and length */ +    0, 0, 0, 0, 0, 0, 0, 0,     /* total data crc and length */ +    0, 0,                       /* final stored block data length */ +    5                           /* op is NO_OP, last bit 8 bits back */ +}; + +#define EXTRA sizeof(log_gzext)     /* should be 35 */ + +/* initial gzip data and trailer */ +local unsigned char log_gzbody[] = { +    1, 0, 0, 0xff, 0xff,        /* empty stored block (last) */ +    0, 0, 0, 0,                 /* crc */ +    0, 0, 0, 0                  /* uncompressed length */ +}; + +#define BODY sizeof(log_gzbody) + +/* Exclusively create foo.lock in order to negotiate exclusive access to the +   foo.* files.  If the modify time of an existing lock file is greater than +   PATIENCE seconds in the past, then consider the lock file to have been +   abandoned, delete it, and try the exclusive create again.  Save the lock +   file modify time for verification of ownership.  Return 0 on success, or -1 +   on failure, usually due to an access restriction or invalid path.  Note that +   if stat() or unlink() fails, it may be due to another process noticing the +   abandoned lock file a smidge sooner and deleting it, so those are not +   flagged as an error. */ +local int log_lock(struct log *log) +{ +    int fd; +    struct stat st; + +    strcpy(log->end, ".lock"); +    while ((fd = open(log->path, O_CREAT | O_EXCL, 0644)) < 0) { +        if (errno != EEXIST) +            return -1; +        if (stat(log->path, &st) == 0 && time(NULL) - st.st_mtime > PATIENCE) { +            unlink(log->path); +            continue; +        } +        sleep(2);       /* relinquish the CPU for two seconds while waiting */ +    } +    close(fd); +    if (stat(log->path, &st) == 0) +        log->lock = st.st_mtime; +    return 0; +} + +/* Update the modify time of the lock file to now, in order to prevent another +   task from thinking that the lock is stale.  Save the lock file modify time +   for verification of ownership. */ +local void log_touch(struct log *log) +{ +    struct stat st; + +    strcpy(log->end, ".lock"); +    utimes(log->path, NULL); +    if (stat(log->path, &st) == 0) +        log->lock = st.st_mtime; +} + +/* Check the log file modify time against what is expected.  Return true if +   this is not our lock.  If it is our lock, touch it to keep it. */ +local int log_check(struct log *log) +{ +    struct stat st; + +    strcpy(log->end, ".lock"); +    if (stat(log->path, &st) || st.st_mtime != log->lock) +        return 1; +    log_touch(log); +    return 0; +} + +/* Unlock a previously acquired lock, but only if it's ours. */ +local void log_unlock(struct log *log) +{ +    if (log_check(log)) +        return; +    strcpy(log->end, ".lock"); +    unlink(log->path); +    log->lock = 0; +} + +/* Check the gzip header and read in the extra field, filling in the values in +   the log structure.  Return op on success or -1 if the gzip header was not as +   expected.  op is the current operation in progress last written to the extra +   field.  This assumes that the gzip file has already been opened, with the +   file descriptor log->fd. */ +local int log_head(struct log *log) +{ +    int op; +    unsigned char buf[HEAD + EXTRA]; + +    if (lseek(log->fd, 0, SEEK_SET) < 0 || +        read(log->fd, buf, HEAD + EXTRA) != HEAD + EXTRA || +        memcmp(buf, log_gzhead, HEAD)) { +        return -1; +    } +    log->first = PULL8(buf + HEAD); +    log->last = PULL8(buf + HEAD + 8); +    log->ccrc = PULL4(buf + HEAD + 16); +    log->clen = PULL4(buf + HEAD + 20); +    log->tcrc = PULL4(buf + HEAD + 24); +    log->tlen = PULL4(buf + HEAD + 28); +    log->stored = PULL2(buf + HEAD + 32); +    log->back = 3 + (buf[HEAD + 34] & 7); +    op = (buf[HEAD + 34] >> 3) & 3; +    return op; +} + +/* Write over the extra field contents, marking the operation as op.  Use fsync +   to assure that the device is written to, and in the requested order.  This +   operation, and only this operation, is assumed to be atomic in order to +   assure that the log is recoverable in the event of an interruption at any +   point in the process.  Return -1 if the write to foo.gz failed. */ +local int log_mark(struct log *log, int op) +{ +    int ret; +    unsigned char ext[EXTRA]; + +    PUT8(ext, log->first); +    PUT8(ext + 8, log->last); +    PUT4(ext + 16, log->ccrc); +    PUT4(ext + 20, log->clen); +    PUT4(ext + 24, log->tcrc); +    PUT4(ext + 28, log->tlen); +    PUT2(ext + 32, log->stored); +    ext[34] = log->back - 3 + (op << 3); +    fsync(log->fd); +    ret = lseek(log->fd, HEAD, SEEK_SET) < 0 || +          write(log->fd, ext, EXTRA) != EXTRA ? -1 : 0; +    fsync(log->fd); +    return ret; +} + +/* Rewrite the last block header bits and subsequent zero bits to get to a byte +   boundary, setting the last block bit if last is true, and then write the +   remainder of the stored block header (length and one's complement).  Leave +   the file pointer after the end of the last stored block data.  Return -1 if +   there is a read or write failure on the foo.gz file */ +local int log_last(struct log *log, int last) +{ +    int back, len, mask; +    unsigned char buf[6]; + +    /* determine the locations of the bytes and bits to modify */ +    back = log->last == log->first ? log->back : 8; +    len = back > 8 ? 2 : 1;                 /* bytes back from log->last */ +    mask = 0x80 >> ((back - 1) & 7);        /* mask for block last-bit */ + +    /* get the byte to modify (one or two back) into buf[0] -- don't need to +       read the byte if the last-bit is eight bits back, since in that case +       the entire byte will be modified */ +    buf[0] = 0; +    if (back != 8 && (lseek(log->fd, log->last - len, SEEK_SET) < 0 || +                      read(log->fd, buf, 1) != 1)) +        return -1; + +    /* change the last-bit of the last stored block as requested -- note +       that all bits above the last-bit are set to zero, per the type bits +       of a stored block being 00 and per the convention that the bits to +       bring the stream to a byte boundary are also zeros */ +    buf[1] = 0; +    buf[2 - len] = (*buf & (mask - 1)) + (last ? mask : 0); + +    /* write the modified stored block header and lengths, move the file +       pointer to after the last stored block data */ +    PUT2(buf + 2, log->stored); +    PUT2(buf + 4, log->stored ^ 0xffff); +    return lseek(log->fd, log->last - len, SEEK_SET) < 0 || +           write(log->fd, buf + 2 - len, len + 4) != len + 4 || +           lseek(log->fd, log->stored, SEEK_CUR) < 0 ? -1 : 0; +} + +/* Append len bytes from data to the locked and open log file.  len may be zero +   if recovering and no .add file was found.  In that case, the previous state +   of the foo.gz file is restored.  The data is appended uncompressed in +   deflate stored blocks.  Return -1 if there was an error reading or writing +   the foo.gz file. */ +local int log_append(struct log *log, unsigned char *data, size_t len) +{ +    uint put; +    off_t end; +    unsigned char buf[8]; + +    /* set the last block last-bit and length, in case recovering an +       interrupted append, then position the file pointer to append to the +       block */ +    if (log_last(log, 1)) +        return -1; + +    /* append, adding stored blocks and updating the offset of the last stored +       block as needed, and update the total crc and length */ +    while (len) { +        /* append as much as we can to the last block */ +        put = (MAX_STORE << 10) - log->stored; +        if (put > len) +            put = (uint)len; +        if (put) { +            if (write(log->fd, data, put) != put) +                return -1; +            BAIL(1); +            log->tcrc = crc32(log->tcrc, data, put); +            log->tlen += put; +            log->stored += put; +            data += put; +            len -= put; +        } + +        /* if we need to, add a new empty stored block */ +        if (len) { +            /* mark current block as not last */ +            if (log_last(log, 0)) +                return -1; + +            /* point to new, empty stored block */ +            log->last += 4 + log->stored + 1; +            log->stored = 0; +        } + +        /* mark last block as last, update its length */ +        if (log_last(log, 1)) +            return -1; +        BAIL(2); +    } + +    /* write the new crc and length trailer, and truncate just in case (could +       be recovering from partial append with a missing foo.add file) */ +    PUT4(buf, log->tcrc); +    PUT4(buf + 4, log->tlen); +    if (write(log->fd, buf, 8) != 8 || +        (end = lseek(log->fd, 0, SEEK_CUR)) < 0 || ftruncate(log->fd, end)) +        return -1; + +    /* write the extra field, marking the log file as done, delete .add file */ +    if (log_mark(log, NO_OP)) +        return -1; +    strcpy(log->end, ".add"); +    unlink(log->path);          /* ignore error, since may not exist */ +    return 0; +} + +/* Replace the foo.dict file with the foo.temp file.  Also delete the foo.add +   file, since the compress operation may have been interrupted before that was +   done.  Returns 1 if memory could not be allocated, or -1 if reading or +   writing foo.gz fails, or if the rename fails for some reason other than +   foo.temp not existing.  foo.temp not existing is a permitted error, since +   the replace operation may have been interrupted after the rename is done, +   but before foo.gz is marked as complete. */ +local int log_replace(struct log *log) +{ +    int ret; +    char *dest; + +    /* delete foo.add file */ +    strcpy(log->end, ".add"); +    unlink(log->path);         /* ignore error, since may not exist */ +    BAIL(3); + +    /* rename foo.name to foo.dict, replacing foo.dict if it exists */ +    strcpy(log->end, ".dict"); +    dest = malloc(strlen(log->path) + 1); +    if (dest == NULL) +        return -2; +    strcpy(dest, log->path); +    strcpy(log->end, ".temp"); +    ret = rename(log->path, dest); +    free(dest); +    if (ret && errno != ENOENT) +        return -1; +    BAIL(4); + +    /* mark the foo.gz file as done */ +    return log_mark(log, NO_OP); +} + +/* Compress the len bytes at data and append the compressed data to the +   foo.gz deflate data immediately after the previous compressed data.  This +   overwrites the previous uncompressed data, which was stored in foo.add +   and is the data provided in data[0..len-1].  If this operation is +   interrupted, it picks up at the start of this routine, with the foo.add +   file read in again.  If there is no data to compress (len == 0), then we +   simply terminate the foo.gz file after the previously compressed data, +   appending a final empty stored block and the gzip trailer.  Return -1 if +   reading or writing the log.gz file failed, or -2 if there was a memory +   allocation failure. */ +local int log_compress(struct log *log, unsigned char *data, size_t len) +{ +    int fd; +    uint got, max; +    ssize_t dict; +    off_t end; +    z_stream strm; +    unsigned char buf[DICT]; + +    /* compress and append compressed data */ +    if (len) { +        /* set up for deflate, allocating memory */ +        strm.zalloc = Z_NULL; +        strm.zfree = Z_NULL; +        strm.opaque = Z_NULL; +        if (deflateInit2(&strm, Z_DEFAULT_COMPRESSION, Z_DEFLATED, -15, 8, +                         Z_DEFAULT_STRATEGY) != Z_OK) +            return -2; + +        /* read in dictionary (last 32K of data that was compressed) */ +        strcpy(log->end, ".dict"); +        fd = open(log->path, O_RDONLY, 0); +        if (fd >= 0) { +            dict = read(fd, buf, DICT); +            close(fd); +            if (dict < 0) { +                deflateEnd(&strm); +                return -1; +            } +            if (dict) +                deflateSetDictionary(&strm, buf, (uint)dict); +        } +        log_touch(log); + +        /* prime deflate with last bits of previous block, position write +           pointer to write those bits and overwrite what follows */ +        if (lseek(log->fd, log->first - (log->back > 8 ? 2 : 1), +                SEEK_SET) < 0 || +            read(log->fd, buf, 1) != 1 || lseek(log->fd, -1, SEEK_CUR) < 0) { +            deflateEnd(&strm); +            return -1; +        } +        deflatePrime(&strm, (8 - log->back) & 7, *buf); + +        /* compress, finishing with a partial non-last empty static block */ +        strm.next_in = data; +        max = (((uint)0 - 1) >> 1) + 1; /* in case int smaller than size_t */ +        do { +            strm.avail_in = len > max ? max : (uint)len; +            len -= strm.avail_in; +            do { +                strm.avail_out = DICT; +                strm.next_out = buf; +                deflate(&strm, len ? Z_NO_FLUSH : Z_PARTIAL_FLUSH); +                got = DICT - strm.avail_out; +                if (got && write(log->fd, buf, got) != got) { +                    deflateEnd(&strm); +                    return -1; +                } +                log_touch(log); +            } while (strm.avail_out == 0); +        } while (len); +        deflateEnd(&strm); +        BAIL(5); + +        /* find start of empty static block -- scanning backwards the first one +           bit is the second bit of the block, if the last byte is zero, then +           we know the byte before that has a one in the top bit, since an +           empty static block is ten bits long */ +        if ((log->first = lseek(log->fd, -1, SEEK_CUR)) < 0 || +            read(log->fd, buf, 1) != 1) +            return -1; +        log->first++; +        if (*buf) { +            log->back = 1; +            while ((*buf & ((uint)1 << (8 - log->back++))) == 0) +                ;       /* guaranteed to terminate, since *buf != 0 */ +        } +        else +            log->back = 10; + +        /* update compressed crc and length */ +        log->ccrc = log->tcrc; +        log->clen = log->tlen; +    } +    else { +        /* no data to compress -- fix up existing gzip stream */ +        log->tcrc = log->ccrc; +        log->tlen = log->clen; +    } + +    /* complete and truncate gzip stream */ +    log->last = log->first; +    log->stored = 0; +    PUT4(buf, log->tcrc); +    PUT4(buf + 4, log->tlen); +    if (log_last(log, 1) || write(log->fd, buf, 8) != 8 || +        (end = lseek(log->fd, 0, SEEK_CUR)) < 0 || ftruncate(log->fd, end)) +        return -1; +    BAIL(6); + +    /* mark as being in the replace operation */ +    if (log_mark(log, REPLACE_OP)) +        return -1; + +    /* execute the replace operation and mark the file as done */ +    return log_replace(log); +} + +/* log a repair record to the .repairs file */ +local void log_log(struct log *log, int op, char *record) +{ +    time_t now; +    FILE *rec; + +    now = time(NULL); +    strcpy(log->end, ".repairs"); +    rec = fopen(log->path, "a"); +    if (rec == NULL) +        return; +    fprintf(rec, "%.24s %s recovery: %s\n", ctime(&now), op == APPEND_OP ? +            "append" : (op == COMPRESS_OP ? "compress" : "replace"), record); +    fclose(rec); +    return; +} + +/* Recover the interrupted operation op.  First read foo.add for recovering an +   append or compress operation.  Return -1 if there was an error reading or +   writing foo.gz or reading an existing foo.add, or -2 if there was a memory +   allocation failure. */ +local int log_recover(struct log *log, int op) +{ +    int fd, ret = 0; +    unsigned char *data = NULL; +    size_t len = 0; +    struct stat st; + +    /* log recovery */ +    log_log(log, op, "start"); + +    /* load foo.add file if expected and present */ +    if (op == APPEND_OP || op == COMPRESS_OP) { +        strcpy(log->end, ".add"); +        if (stat(log->path, &st) == 0 && st.st_size) { +            len = (size_t)(st.st_size); +            if ((off_t)len != st.st_size || +                    (data = malloc(st.st_size)) == NULL) { +                log_log(log, op, "allocation failure"); +                return -2; +            } +            if ((fd = open(log->path, O_RDONLY, 0)) < 0) { +                log_log(log, op, ".add file read failure"); +                return -1; +            } +            ret = (size_t)read(fd, data, len) != len; +            close(fd); +            if (ret) { +                log_log(log, op, ".add file read failure"); +                return -1; +            } +            log_log(log, op, "loaded .add file"); +        } +        else +            log_log(log, op, "missing .add file!"); +    } + +    /* recover the interrupted operation */ +    switch (op) { +    case APPEND_OP: +        ret = log_append(log, data, len); +        break; +    case COMPRESS_OP: +        ret = log_compress(log, data, len); +        break; +    case REPLACE_OP: +        ret = log_replace(log); +    } + +    /* log status */ +    log_log(log, op, ret ? "failure" : "complete"); + +    /* clean up */ +    if (data != NULL) +        free(data); +    return ret; +} + +/* Close the foo.gz file (if open) and release the lock. */ +local void log_close(struct log *log) +{ +    if (log->fd >= 0) +        close(log->fd); +    log->fd = -1; +    log_unlock(log); +} + +/* Open foo.gz, verify the header, and load the extra field contents, after +   first creating the foo.lock file to gain exclusive access to the foo.* +   files.  If foo.gz does not exist or is empty, then write the initial header, +   extra, and body content of an empty foo.gz log file.  If there is an error +   creating the lock file due to access restrictions, or an error reading or +   writing the foo.gz file, or if the foo.gz file is not a proper log file for +   this object (e.g. not a gzip file or does not contain the expected extra +   field), then return true.  If there is an error, the lock is released. +   Otherwise, the lock is left in place. */ +local int log_open(struct log *log) +{ +    int op; + +    /* release open file resource if left over -- can occur if lock lost +       between gzlog_open() and gzlog_write() */ +    if (log->fd >= 0) +        close(log->fd); +    log->fd = -1; + +    /* negotiate exclusive access */ +    if (log_lock(log) < 0) +        return -1; + +    /* open the log file, foo.gz */ +    strcpy(log->end, ".gz"); +    log->fd = open(log->path, O_RDWR | O_CREAT, 0644); +    if (log->fd < 0) { +        log_close(log); +        return -1; +    } + +    /* if new, initialize foo.gz with an empty log, delete old dictionary */ +    if (lseek(log->fd, 0, SEEK_END) == 0) { +        if (write(log->fd, log_gzhead, HEAD) != HEAD || +            write(log->fd, log_gzext, EXTRA) != EXTRA || +            write(log->fd, log_gzbody, BODY) != BODY) { +            log_close(log); +            return -1; +        } +        strcpy(log->end, ".dict"); +        unlink(log->path); +    } + +    /* verify log file and load extra field information */ +    if ((op = log_head(log)) < 0) { +        log_close(log); +        return -1; +    } + +    /* check for interrupted process and if so, recover */ +    if (op != NO_OP && log_recover(log, op)) { +        log_close(log); +        return -1; +    } + +    /* touch the lock file to prevent another process from grabbing it */ +    log_touch(log); +    return 0; +} + +/* See gzlog.h for the description of the external methods below */ +gzlog *gzlog_open(char *path) +{ +    size_t n; +    struct log *log; + +    /* check arguments */ +    if (path == NULL || *path == 0) +        return NULL; + +    /* allocate and initialize log structure */ +    log = malloc(sizeof(struct log)); +    if (log == NULL) +        return NULL; +    strcpy(log->id, LOGID); +    log->fd = -1; + +    /* save path and end of path for name construction */ +    n = strlen(path); +    log->path = malloc(n + 9);              /* allow for ".repairs" */ +    if (log->path == NULL) { +        free(log); +        return NULL; +    } +    strcpy(log->path, path); +    log->end = log->path + n; + +    /* gain exclusive access and verify log file -- may perform a +       recovery operation if needed */ +    if (log_open(log)) { +        free(log->path); +        free(log); +        return NULL; +    } + +    /* return pointer to log structure */ +    return log; +} + +/* gzlog_compress() return values: +    0: all good +   -1: file i/o error (usually access issue) +   -2: memory allocation failure +   -3: invalid log pointer argument */ +int gzlog_compress(gzlog *logd) +{ +    int fd, ret; +    uint block; +    size_t len, next; +    unsigned char *data, buf[5]; +    struct log *log = logd; + +    /* check arguments */ +    if (log == NULL || strcmp(log->id, LOGID)) +        return -3; + +    /* see if we lost the lock -- if so get it again and reload the extra +       field information (it probably changed), recover last operation if +       necessary */ +    if (log_check(log) && log_open(log)) +        return -1; + +    /* create space for uncompressed data */ +    len = ((size_t)(log->last - log->first) & ~(((size_t)1 << 10) - 1)) + +          log->stored; +    if ((data = malloc(len)) == NULL) +        return -2; + +    /* do statement here is just a cheap trick for error handling */ +    do { +        /* read in the uncompressed data */ +        if (lseek(log->fd, log->first - 1, SEEK_SET) < 0) +            break; +        next = 0; +        while (next < len) { +            if (read(log->fd, buf, 5) != 5) +                break; +            block = PULL2(buf + 1); +            if (next + block > len || +                read(log->fd, (char *)data + next, block) != block) +                break; +            next += block; +        } +        if (lseek(log->fd, 0, SEEK_CUR) != log->last + 4 + log->stored) +            break; +        log_touch(log); + +        /* write the uncompressed data to the .add file */ +        strcpy(log->end, ".add"); +        fd = open(log->path, O_WRONLY | O_CREAT | O_TRUNC, 0644); +        if (fd < 0) +            break; +        ret = (size_t)write(fd, data, len) != len; +        if (ret | close(fd)) +            break; +        log_touch(log); + +        /* write the dictionary for the next compress to the .temp file */ +        strcpy(log->end, ".temp"); +        fd = open(log->path, O_WRONLY | O_CREAT | O_TRUNC, 0644); +        if (fd < 0) +            break; +        next = DICT > len ? len : DICT; +        ret = (size_t)write(fd, (char *)data + len - next, next) != next; +        if (ret | close(fd)) +            break; +        log_touch(log); + +        /* roll back to compressed data, mark the compress in progress */ +        log->last = log->first; +        log->stored = 0; +        if (log_mark(log, COMPRESS_OP)) +            break; +        BAIL(7); + +        /* compress and append the data (clears mark) */ +        ret = log_compress(log, data, len); +        free(data); +        return ret; +    } while (0); + +    /* broke out of do above on i/o error */ +    free(data); +    return -1; +} + +/* gzlog_write() return values: +    0: all good +   -1: file i/o error (usually access issue) +   -2: memory allocation failure +   -3: invalid log pointer argument */ +int gzlog_write(gzlog *logd, void *data, size_t len) +{ +    int fd, ret; +    struct log *log = logd; + +    /* check arguments */ +    if (log == NULL || strcmp(log->id, LOGID)) +        return -3; +    if (data == NULL || len <= 0) +        return 0; + +    /* see if we lost the lock -- if so get it again and reload the extra +       field information (it probably changed), recover last operation if +       necessary */ +    if (log_check(log) && log_open(log)) +        return -1; + +    /* create and write .add file */ +    strcpy(log->end, ".add"); +    fd = open(log->path, O_WRONLY | O_CREAT | O_TRUNC, 0644); +    if (fd < 0) +        return -1; +    ret = (size_t)write(fd, data, len) != len; +    if (ret | close(fd)) +        return -1; +    log_touch(log); + +    /* mark log file with append in progress */ +    if (log_mark(log, APPEND_OP)) +        return -1; +    BAIL(8); + +    /* append data (clears mark) */ +    if (log_append(log, data, len)) +        return -1; + +    /* check to see if it's time to compress -- if not, then done */ +    if (((log->last - log->first) >> 10) + (log->stored >> 10) < TRIGGER) +        return 0; + +    /* time to compress */ +    return gzlog_compress(log); +} + +/* gzlog_close() return values: +    0: ok +   -3: invalid log pointer argument */ +int gzlog_close(gzlog *logd) +{ +    struct log *log = logd; + +    /* check arguments */ +    if (log == NULL || strcmp(log->id, LOGID)) +        return -3; + +    /* close the log file and release the lock */ +    log_close(log); + +    /* free structure and return */ +    if (log->path != NULL) +        free(log->path); +    strcpy(log->id, "bad"); +    free(log); +    return 0; +} diff --git a/win32/zlib/examples/gzlog.h b/win32/zlib/examples/gzlog.h new file mode 100644 index 0000000..86f0cec --- /dev/null +++ b/win32/zlib/examples/gzlog.h @@ -0,0 +1,91 @@ +/* gzlog.h +  Copyright (C) 2004, 2008, 2012 Mark Adler, all rights reserved +  version 2.2, 14 Aug 2012 + +  This software is provided 'as-is', without any express or implied +  warranty.  In no event will the author be held liable for any damages +  arising from the use of this software. + +  Permission is granted to anyone to use this software for any purpose, +  including commercial applications, and to alter it and redistribute it +  freely, subject to the following restrictions: + +  1. The origin of this software must not be misrepresented; you must not +     claim that you wrote the original software. If you use this software +     in a product, an acknowledgment in the product documentation would be +     appreciated but is not required. +  2. Altered source versions must be plainly marked as such, and must not be +     misrepresented as being the original software. +  3. This notice may not be removed or altered from any source distribution. + +  Mark Adler    madler@alumni.caltech.edu + */ + +/* Version History: +   1.0  26 Nov 2004  First version +   2.0  25 Apr 2008  Complete redesign for recovery of interrupted operations +                     Interface changed slightly in that now path is a prefix +                     Compression now occurs as needed during gzlog_write() +                     gzlog_write() now always leaves the log file as valid gzip +   2.1   8 Jul 2012  Fix argument checks in gzlog_compress() and gzlog_write() +   2.2  14 Aug 2012  Clean up signed comparisons + */ + +/* +   The gzlog object allows writing short messages to a gzipped log file, +   opening the log file locked for small bursts, and then closing it.  The log +   object works by appending stored (uncompressed) data to the gzip file until +   1 MB has been accumulated.  At that time, the stored data is compressed, and +   replaces the uncompressed data in the file.  The log file is truncated to +   its new size at that time.  After each write operation, the log file is a +   valid gzip file that can decompressed to recover what was written. + +   The gzlog operations can be interupted at any point due to an application or +   system crash, and the log file will be recovered the next time the log is +   opened with gzlog_open(). + */ + +#ifndef GZLOG_H +#define GZLOG_H + +/* gzlog object type */ +typedef void gzlog; + +/* Open a gzlog object, creating the log file if it does not exist.  Return +   NULL on error.  Note that gzlog_open() could take a while to complete if it +   has to wait to verify that a lock is stale (possibly for five minutes), or +   if there is significant contention with other instantiations of this object +   when locking the resource.  path is the prefix of the file names created by +   this object.  If path is "foo", then the log file will be "foo.gz", and +   other auxiliary files will be created and destroyed during the process: +   "foo.dict" for a compression dictionary, "foo.temp" for a temporary (next) +   dictionary, "foo.add" for data being added or compressed, "foo.lock" for the +   lock file, and "foo.repairs" to log recovery operations performed due to +   interrupted gzlog operations.  A gzlog_open() followed by a gzlog_close() +   will recover a previously interrupted operation, if any. */ +gzlog *gzlog_open(char *path); + +/* Write to a gzlog object.  Return zero on success, -1 if there is a file i/o +   error on any of the gzlog files (this should not happen if gzlog_open() +   succeeded, unless the device has run out of space or leftover auxiliary +   files have permissions or ownership that prevent their use), -2 if there is +   a memory allocation failure, or -3 if the log argument is invalid (e.g. if +   it was not created by gzlog_open()).  This function will write data to the +   file uncompressed, until 1 MB has been accumulated, at which time that data +   will be compressed.  The log file will be a valid gzip file upon successful +   return. */ +int gzlog_write(gzlog *log, void *data, size_t len); + +/* Force compression of any uncompressed data in the log.  This should be used +   sparingly, if at all.  The main application would be when a log file will +   not be appended to again.  If this is used to compress frequently while +   appending, it will both significantly increase the execution time and +   reduce the compression ratio.  The return codes are the same as for +   gzlog_write(). */ +int gzlog_compress(gzlog *log); + +/* Close a gzlog object.  Return zero on success, -3 if the log argument is +   invalid.  The log object is freed, and so cannot be referenced again. */ +int gzlog_close(gzlog *log); + +#endif diff --git a/win32/zlib/examples/zlib_how.html b/win32/zlib/examples/zlib_how.html new file mode 100644 index 0000000..444ff1c --- /dev/null +++ b/win32/zlib/examples/zlib_how.html @@ -0,0 +1,545 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" +  "http://www.w3.org/TR/REC-html40/loose.dtd"> +<html> +<head> +<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"> +<title>zlib Usage Example</title> +<!--  Copyright (c) 2004, 2005 Mark Adler.  --> +</head> +<body bgcolor="#FFFFFF" text="#000000" link="#0000FF" vlink="#00A000"> +<h2 align="center"> zlib Usage Example </h2> +We often get questions about how the <tt>deflate()</tt> and <tt>inflate()</tt> functions should be used. +Users wonder when they should provide more input, when they should use more output, +what to do with a <tt>Z_BUF_ERROR</tt>, how to make sure the process terminates properly, and +so on.  So for those who have read <tt>zlib.h</tt> (a few times), and +would like further edification, below is an annotated example in C of simple routines to compress and decompress +from an input file to an output file using <tt>deflate()</tt> and <tt>inflate()</tt> respectively.  The +annotations are interspersed between lines of the code.  So please read between the lines. +We hope this helps explain some of the intricacies of <em>zlib</em>. +<p> +Without further adieu, here is the program <a href="zpipe.c"><tt>zpipe.c</tt></a>: +<pre><b> +/* zpipe.c: example of proper use of zlib's inflate() and deflate() +   Not copyrighted -- provided to the public domain +   Version 1.4  11 December 2005  Mark Adler */ + +/* Version history: +   1.0  30 Oct 2004  First version +   1.1   8 Nov 2004  Add void casting for unused return values +                     Use switch statement for inflate() return values +   1.2   9 Nov 2004  Add assertions to document zlib guarantees +   1.3   6 Apr 2005  Remove incorrect assertion in inf() +   1.4  11 Dec 2005  Add hack to avoid MSDOS end-of-line conversions +                     Avoid some compiler warnings for input and output buffers + */ +</b></pre><!-- --> +We now include the header files for the required definitions.  From +<tt>stdio.h</tt> we use <tt>fopen()</tt>, <tt>fread()</tt>, <tt>fwrite()</tt>, +<tt>feof()</tt>, <tt>ferror()</tt>, and <tt>fclose()</tt> for file i/o, and +<tt>fputs()</tt> for error messages.  From <tt>string.h</tt> we use +<tt>strcmp()</tt> for command line argument processing. +From <tt>assert.h</tt> we use the <tt>assert()</tt> macro. +From <tt>zlib.h</tt> +we use the basic compression functions <tt>deflateInit()</tt>, +<tt>deflate()</tt>, and <tt>deflateEnd()</tt>, and the basic decompression +functions <tt>inflateInit()</tt>, <tt>inflate()</tt>, and +<tt>inflateEnd()</tt>. +<pre><b> +#include <stdio.h> +#include <string.h> +#include <assert.h> +#include "zlib.h" +</b></pre><!-- --> +This is an ugly hack required to avoid corruption of the input and output data on +Windows/MS-DOS systems.  Without this, those systems would assume that the input and output +files are text, and try to convert the end-of-line characters from one standard to +another.  That would corrupt binary data, and in particular would render the compressed data unusable. +This sets the input and output to binary which suppresses the end-of-line conversions. +<tt>SET_BINARY_MODE()</tt> will be used later on <tt>stdin</tt> and <tt>stdout</tt>, at the beginning of <tt>main()</tt>. +<pre><b> +#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__) +#  include <fcntl.h> +#  include <io.h> +#  define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY) +#else +#  define SET_BINARY_MODE(file) +#endif +</b></pre><!-- --> +<tt>CHUNK</tt> is simply the buffer size for feeding data to and pulling data +from the <em>zlib</em> routines.  Larger buffer sizes would be more efficient, +especially for <tt>inflate()</tt>.  If the memory is available, buffers sizes +on the order of 128K or 256K bytes should be used. +<pre><b> +#define CHUNK 16384 +</b></pre><!-- --> +The <tt>def()</tt> routine compresses data from an input file to an output file.  The output data +will be in the <em>zlib</em> format, which is different from the <em>gzip</em> or <em>zip</em> +formats.  The <em>zlib</em> format has a very small header of only two bytes to identify it as +a <em>zlib</em> stream and to provide decoding information, and a four-byte trailer with a fast +check value to verify the integrity of the uncompressed data after decoding. +<pre><b> +/* Compress from file source to file dest until EOF on source. +   def() returns Z_OK on success, Z_MEM_ERROR if memory could not be +   allocated for processing, Z_STREAM_ERROR if an invalid compression +   level is supplied, Z_VERSION_ERROR if the version of zlib.h and the +   version of the library linked do not match, or Z_ERRNO if there is +   an error reading or writing the files. */ +int def(FILE *source, FILE *dest, int level) +{ +</b></pre> +Here are the local variables for <tt>def()</tt>.  <tt>ret</tt> will be used for <em>zlib</em> +return codes.  <tt>flush</tt> will keep track of the current flushing state for <tt>deflate()</tt>, +which is either no flushing, or flush to completion after the end of the input file is reached. +<tt>have</tt> is the amount of data returned from <tt>deflate()</tt>.  The <tt>strm</tt> structure +is used to pass information to and from the <em>zlib</em> routines, and to maintain the +<tt>deflate()</tt> state.  <tt>in</tt> and <tt>out</tt> are the input and output buffers for +<tt>deflate()</tt>. +<pre><b> +    int ret, flush; +    unsigned have; +    z_stream strm; +    unsigned char in[CHUNK]; +    unsigned char out[CHUNK]; +</b></pre><!-- --> +The first thing we do is to initialize the <em>zlib</em> state for compression using +<tt>deflateInit()</tt>.  This must be done before the first use of <tt>deflate()</tt>. +The <tt>zalloc</tt>, <tt>zfree</tt>, and <tt>opaque</tt> fields in the <tt>strm</tt> +structure must be initialized before calling <tt>deflateInit()</tt>.  Here they are +set to the <em>zlib</em> constant <tt>Z_NULL</tt> to request that <em>zlib</em> use +the default memory allocation routines.  An application may also choose to provide +custom memory allocation routines here.  <tt>deflateInit()</tt> will allocate on the +order of 256K bytes for the internal state. +(See <a href="zlib_tech.html"><em>zlib Technical Details</em></a>.) +<p> +<tt>deflateInit()</tt> is called with a pointer to the structure to be initialized and +the compression level, which is an integer in the range of -1 to 9.  Lower compression +levels result in faster execution, but less compression.  Higher levels result in +greater compression, but slower execution.  The <em>zlib</em> constant Z_DEFAULT_COMPRESSION, +equal to -1, +provides a good compromise between compression and speed and is equivalent to level 6. +Level 0 actually does no compression at all, and in fact expands the data slightly to produce +the <em>zlib</em> format (it is not a byte-for-byte copy of the input). +More advanced applications of <em>zlib</em> +may use <tt>deflateInit2()</tt> here instead.  Such an application may want to reduce how +much memory will be used, at some price in compression.  Or it may need to request a +<em>gzip</em> header and trailer instead of a <em>zlib</em> header and trailer, or raw +encoding with no header or trailer at all. +<p> +We must check the return value of <tt>deflateInit()</tt> against the <em>zlib</em> constant +<tt>Z_OK</tt> to make sure that it was able to +allocate memory for the internal state, and that the provided arguments were valid. +<tt>deflateInit()</tt> will also check that the version of <em>zlib</em> that the <tt>zlib.h</tt> +file came from matches the version of <em>zlib</em> actually linked with the program.  This +is especially important for environments in which <em>zlib</em> is a shared library. +<p> +Note that an application can initialize multiple, independent <em>zlib</em> streams, which can +operate in parallel.  The state information maintained in the structure allows the <em>zlib</em> +routines to be reentrant. +<pre><b> +    /* allocate deflate state */ +    strm.zalloc = Z_NULL; +    strm.zfree = Z_NULL; +    strm.opaque = Z_NULL; +    ret = deflateInit(&strm, level); +    if (ret != Z_OK) +        return ret; +</b></pre><!-- --> +With the pleasantries out of the way, now we can get down to business.  The outer <tt>do</tt>-loop +reads all of the input file and exits at the bottom of the loop once end-of-file is reached. +This loop contains the only call of <tt>deflate()</tt>.  So we must make sure that all of the +input data has been processed and that all of the output data has been generated and consumed +before we fall out of the loop at the bottom. +<pre><b> +    /* compress until end of file */ +    do { +</b></pre> +We start off by reading data from the input file.  The number of bytes read is put directly +into <tt>avail_in</tt>, and a pointer to those bytes is put into <tt>next_in</tt>.  We also +check to see if end-of-file on the input has been reached.  If we are at the end of file, then <tt>flush</tt> is set to the +<em>zlib</em> constant <tt>Z_FINISH</tt>, which is later passed to <tt>deflate()</tt> to +indicate that this is the last chunk of input data to compress.  We need to use <tt>feof()</tt> +to check for end-of-file as opposed to seeing if fewer than <tt>CHUNK</tt> bytes have been read.  The +reason is that if the input file length is an exact multiple of <tt>CHUNK</tt>, we will miss +the fact that we got to the end-of-file, and not know to tell <tt>deflate()</tt> to finish +up the compressed stream.  If we are not yet at the end of the input, then the <em>zlib</em> +constant <tt>Z_NO_FLUSH</tt> will be passed to <tt>deflate</tt> to indicate that we are still +in the middle of the uncompressed data. +<p> +If there is an error in reading from the input file, the process is aborted with +<tt>deflateEnd()</tt> being called to free the allocated <em>zlib</em> state before returning +the error.  We wouldn't want a memory leak, now would we?  <tt>deflateEnd()</tt> can be called +at any time after the state has been initialized.  Once that's done, <tt>deflateInit()</tt> (or +<tt>deflateInit2()</tt>) would have to be called to start a new compression process.  There is +no point here in checking the <tt>deflateEnd()</tt> return code.  The deallocation can't fail. +<pre><b> +        strm.avail_in = fread(in, 1, CHUNK, source); +        if (ferror(source)) { +            (void)deflateEnd(&strm); +            return Z_ERRNO; +        } +        flush = feof(source) ? Z_FINISH : Z_NO_FLUSH; +        strm.next_in = in; +</b></pre><!-- --> +The inner <tt>do</tt>-loop passes our chunk of input data to <tt>deflate()</tt>, and then +keeps calling <tt>deflate()</tt> until it is done producing output.  Once there is no more +new output, <tt>deflate()</tt> is guaranteed to have consumed all of the input, i.e., +<tt>avail_in</tt> will be zero. +<pre><b> +        /* run deflate() on input until output buffer not full, finish +           compression if all of source has been read in */ +        do { +</b></pre> +Output space is provided to <tt>deflate()</tt> by setting <tt>avail_out</tt> to the number +of available output bytes and <tt>next_out</tt> to a pointer to that space. +<pre><b> +            strm.avail_out = CHUNK; +            strm.next_out = out; +</b></pre> +Now we call the compression engine itself, <tt>deflate()</tt>.  It takes as many of the +<tt>avail_in</tt> bytes at <tt>next_in</tt> as it can process, and writes as many as +<tt>avail_out</tt> bytes to <tt>next_out</tt>.  Those counters and pointers are then +updated past the input data consumed and the output data written.  It is the amount of +output space available that may limit how much input is consumed. +Hence the inner loop to make sure that +all of the input is consumed by providing more output space each time.  Since <tt>avail_in</tt> +and <tt>next_in</tt> are updated by <tt>deflate()</tt>, we don't have to mess with those +between <tt>deflate()</tt> calls until it's all used up. +<p> +The parameters to <tt>deflate()</tt> are a pointer to the <tt>strm</tt> structure containing +the input and output information and the internal compression engine state, and a parameter +indicating whether and how to flush data to the output.  Normally <tt>deflate</tt> will consume +several K bytes of input data before producing any output (except for the header), in order +to accumulate statistics on the data for optimum compression.  It will then put out a burst of +compressed data, and proceed to consume more input before the next burst.  Eventually, +<tt>deflate()</tt> +must be told to terminate the stream, complete the compression with provided input data, and +write out the trailer check value.  <tt>deflate()</tt> will continue to compress normally as long +as the flush parameter is <tt>Z_NO_FLUSH</tt>.  Once the <tt>Z_FINISH</tt> parameter is provided, +<tt>deflate()</tt> will begin to complete the compressed output stream.  However depending on how +much output space is provided, <tt>deflate()</tt> may have to be called several times until it +has provided the complete compressed stream, even after it has consumed all of the input.  The flush +parameter must continue to be <tt>Z_FINISH</tt> for those subsequent calls. +<p> +There are other values of the flush parameter that are used in more advanced applications.  You can +force <tt>deflate()</tt> to produce a burst of output that encodes all of the input data provided +so far, even if it wouldn't have otherwise, for example to control data latency on a link with +compressed data.  You can also ask that <tt>deflate()</tt> do that as well as erase any history up to +that point so that what follows can be decompressed independently, for example for random access +applications.  Both requests will degrade compression by an amount depending on how often such +requests are made. +<p> +<tt>deflate()</tt> has a return value that can indicate errors, yet we do not check it here.  Why +not?  Well, it turns out that <tt>deflate()</tt> can do no wrong here.  Let's go through +<tt>deflate()</tt>'s return values and dispense with them one by one.  The possible values are +<tt>Z_OK</tt>, <tt>Z_STREAM_END</tt>, <tt>Z_STREAM_ERROR</tt>, or <tt>Z_BUF_ERROR</tt>.  <tt>Z_OK</tt> +is, well, ok.  <tt>Z_STREAM_END</tt> is also ok and will be returned for the last call of +<tt>deflate()</tt>.  This is already guaranteed by calling <tt>deflate()</tt> with <tt>Z_FINISH</tt> +until it has no more output.  <tt>Z_STREAM_ERROR</tt> is only possible if the stream is not +initialized properly, but we did initialize it properly.  There is no harm in checking for +<tt>Z_STREAM_ERROR</tt> here, for example to check for the possibility that some +other part of the application inadvertently clobbered the memory containing the <em>zlib</em> state. +<tt>Z_BUF_ERROR</tt> will be explained further below, but +suffice it to say that this is simply an indication that <tt>deflate()</tt> could not consume +more input or produce more output.  <tt>deflate()</tt> can be called again with more output space +or more available input, which it will be in this code. +<pre><b> +            ret = deflate(&strm, flush);    /* no bad return value */ +            assert(ret != Z_STREAM_ERROR);  /* state not clobbered */ +</b></pre> +Now we compute how much output <tt>deflate()</tt> provided on the last call, which is the +difference between how much space was provided before the call, and how much output space +is still available after the call.  Then that data, if any, is written to the output file. +We can then reuse the output buffer for the next call of <tt>deflate()</tt>.  Again if there +is a file i/o error, we call <tt>deflateEnd()</tt> before returning to avoid a memory leak. +<pre><b> +            have = CHUNK - strm.avail_out; +            if (fwrite(out, 1, have, dest) != have || ferror(dest)) { +                (void)deflateEnd(&strm); +                return Z_ERRNO; +            } +</b></pre> +The inner <tt>do</tt>-loop is repeated until the last <tt>deflate()</tt> call fails to fill the +provided output buffer.  Then we know that <tt>deflate()</tt> has done as much as it can with +the provided input, and that all of that input has been consumed.  We can then fall out of this +loop and reuse the input buffer. +<p> +The way we tell that <tt>deflate()</tt> has no more output is by seeing that it did not fill +the output buffer, leaving <tt>avail_out</tt> greater than zero.  However suppose that +<tt>deflate()</tt> has no more output, but just so happened to exactly fill the output buffer! +<tt>avail_out</tt> is zero, and we can't tell that <tt>deflate()</tt> has done all it can. +As far as we know, <tt>deflate()</tt> +has more output for us.  So we call it again.  But now <tt>deflate()</tt> produces no output +at all, and <tt>avail_out</tt> remains unchanged as <tt>CHUNK</tt>.  That <tt>deflate()</tt> call +wasn't able to do anything, either consume input or produce output, and so it returns +<tt>Z_BUF_ERROR</tt>.  (See, I told you I'd cover this later.)  However this is not a problem at +all.  Now we finally have the desired indication that <tt>deflate()</tt> is really done, +and so we drop out of the inner loop to provide more input to <tt>deflate()</tt>. +<p> +With <tt>flush</tt> set to <tt>Z_FINISH</tt>, this final set of <tt>deflate()</tt> calls will +complete the output stream.  Once that is done, subsequent calls of <tt>deflate()</tt> would return +<tt>Z_STREAM_ERROR</tt> if the flush parameter is not <tt>Z_FINISH</tt>, and do no more processing +until the state is reinitialized. +<p> +Some applications of <em>zlib</em> have two loops that call <tt>deflate()</tt> +instead of the single inner loop we have here.  The first loop would call +without flushing and feed all of the data to <tt>deflate()</tt>.  The second loop would call +<tt>deflate()</tt> with no more +data and the <tt>Z_FINISH</tt> parameter to complete the process.  As you can see from this +example, that can be avoided by simply keeping track of the current flush state. +<pre><b> +        } while (strm.avail_out == 0); +        assert(strm.avail_in == 0);     /* all input will be used */ +</b></pre><!-- --> +Now we check to see if we have already processed all of the input file.  That information was +saved in the <tt>flush</tt> variable, so we see if that was set to <tt>Z_FINISH</tt>.  If so, +then we're done and we fall out of the outer loop.  We're guaranteed to get <tt>Z_STREAM_END</tt> +from the last <tt>deflate()</tt> call, since we ran it until the last chunk of input was +consumed and all of the output was generated. +<pre><b> +        /* done when last data in file processed */ +    } while (flush != Z_FINISH); +    assert(ret == Z_STREAM_END);        /* stream will be complete */ +</b></pre><!-- --> +The process is complete, but we still need to deallocate the state to avoid a memory leak +(or rather more like a memory hemorrhage if you didn't do this).  Then +finally we can return with a happy return value. +<pre><b> +    /* clean up and return */ +    (void)deflateEnd(&strm); +    return Z_OK; +} +</b></pre><!-- --> +Now we do the same thing for decompression in the <tt>inf()</tt> routine. <tt>inf()</tt> +decompresses what is hopefully a valid <em>zlib</em> stream from the input file and writes the +uncompressed data to the output file.  Much of the discussion above for <tt>def()</tt> +applies to <tt>inf()</tt> as well, so the discussion here will focus on the differences between +the two. +<pre><b> +/* Decompress from file source to file dest until stream ends or EOF. +   inf() returns Z_OK on success, Z_MEM_ERROR if memory could not be +   allocated for processing, Z_DATA_ERROR if the deflate data is +   invalid or incomplete, Z_VERSION_ERROR if the version of zlib.h and +   the version of the library linked do not match, or Z_ERRNO if there +   is an error reading or writing the files. */ +int inf(FILE *source, FILE *dest) +{ +</b></pre> +The local variables have the same functionality as they do for <tt>def()</tt>.  The +only difference is that there is no <tt>flush</tt> variable, since <tt>inflate()</tt> +can tell from the <em>zlib</em> stream itself when the stream is complete. +<pre><b> +    int ret; +    unsigned have; +    z_stream strm; +    unsigned char in[CHUNK]; +    unsigned char out[CHUNK]; +</b></pre><!-- --> +The initialization of the state is the same, except that there is no compression level, +of course, and two more elements of the structure are initialized.  <tt>avail_in</tt> +and <tt>next_in</tt> must be initialized before calling <tt>inflateInit()</tt>.  This +is because the application has the option to provide the start of the zlib stream in +order for <tt>inflateInit()</tt> to have access to information about the compression +method to aid in memory allocation.  In the current implementation of <em>zlib</em> +(up through versions 1.2.x), the method-dependent memory allocations are deferred to the first call of +<tt>inflate()</tt> anyway.  However those fields must be initialized since later versions +of <em>zlib</em> that provide more compression methods may take advantage of this interface. +In any case, no decompression is performed by <tt>inflateInit()</tt>, so the +<tt>avail_out</tt> and <tt>next_out</tt> fields do not need to be initialized before calling. +<p> +Here <tt>avail_in</tt> is set to zero and <tt>next_in</tt> is set to <tt>Z_NULL</tt> to +indicate that no input data is being provided. +<pre><b> +    /* allocate inflate state */ +    strm.zalloc = Z_NULL; +    strm.zfree = Z_NULL; +    strm.opaque = Z_NULL; +    strm.avail_in = 0; +    strm.next_in = Z_NULL; +    ret = inflateInit(&strm); +    if (ret != Z_OK) +        return ret; +</b></pre><!-- --> +The outer <tt>do</tt>-loop decompresses input until <tt>inflate()</tt> indicates +that it has reached the end of the compressed data and has produced all of the uncompressed +output.  This is in contrast to <tt>def()</tt> which processes all of the input file. +If end-of-file is reached before the compressed data self-terminates, then the compressed +data is incomplete and an error is returned. +<pre><b> +    /* decompress until deflate stream ends or end of file */ +    do { +</b></pre> +We read input data and set the <tt>strm</tt> structure accordingly.  If we've reached the +end of the input file, then we leave the outer loop and report an error, since the +compressed data is incomplete.  Note that we may read more data than is eventually consumed +by <tt>inflate()</tt>, if the input file continues past the <em>zlib</em> stream. +For applications where <em>zlib</em> streams are embedded in other data, this routine would +need to be modified to return the unused data, or at least indicate how much of the input +data was not used, so the application would know where to pick up after the <em>zlib</em> stream. +<pre><b> +        strm.avail_in = fread(in, 1, CHUNK, source); +        if (ferror(source)) { +            (void)inflateEnd(&strm); +            return Z_ERRNO; +        } +        if (strm.avail_in == 0) +            break; +        strm.next_in = in; +</b></pre><!-- --> +The inner <tt>do</tt>-loop has the same function it did in <tt>def()</tt>, which is to +keep calling <tt>inflate()</tt> until has generated all of the output it can with the +provided input. +<pre><b> +        /* run inflate() on input until output buffer not full */ +        do { +</b></pre> +Just like in <tt>def()</tt>, the same output space is provided for each call of <tt>inflate()</tt>. +<pre><b> +            strm.avail_out = CHUNK; +            strm.next_out = out; +</b></pre> +Now we run the decompression engine itself.  There is no need to adjust the flush parameter, since +the <em>zlib</em> format is self-terminating. The main difference here is that there are +return values that we need to pay attention to.  <tt>Z_DATA_ERROR</tt> +indicates that <tt>inflate()</tt> detected an error in the <em>zlib</em> compressed data format, +which means that either the data is not a <em>zlib</em> stream to begin with, or that the data was +corrupted somewhere along the way since it was compressed.  The other error to be processed is +<tt>Z_MEM_ERROR</tt>, which can occur since memory allocation is deferred until <tt>inflate()</tt> +needs it, unlike <tt>deflate()</tt>, whose memory is allocated at the start by <tt>deflateInit()</tt>. +<p> +Advanced applications may use +<tt>deflateSetDictionary()</tt> to prime <tt>deflate()</tt> with a set of likely data to improve the +first 32K or so of compression.  This is noted in the <em>zlib</em> header, so <tt>inflate()</tt> +requests that that dictionary be provided before it can start to decompress.  Without the dictionary, +correct decompression is not possible.  For this routine, we have no idea what the dictionary is, +so the <tt>Z_NEED_DICT</tt> indication is converted to a <tt>Z_DATA_ERROR</tt>. +<p> +<tt>inflate()</tt> can also return <tt>Z_STREAM_ERROR</tt>, which should not be possible here, +but could be checked for as noted above for <tt>def()</tt>.  <tt>Z_BUF_ERROR</tt> does not need to be +checked for here, for the same reasons noted for <tt>def()</tt>.  <tt>Z_STREAM_END</tt> will be +checked for later. +<pre><b> +            ret = inflate(&strm, Z_NO_FLUSH); +            assert(ret != Z_STREAM_ERROR);  /* state not clobbered */ +            switch (ret) { +            case Z_NEED_DICT: +                ret = Z_DATA_ERROR;     /* and fall through */ +            case Z_DATA_ERROR: +            case Z_MEM_ERROR: +                (void)inflateEnd(&strm); +                return ret; +            } +</b></pre> +The output of <tt>inflate()</tt> is handled identically to that of <tt>deflate()</tt>. +<pre><b> +            have = CHUNK - strm.avail_out; +            if (fwrite(out, 1, have, dest) != have || ferror(dest)) { +                (void)inflateEnd(&strm); +                return Z_ERRNO; +            } +</b></pre> +The inner <tt>do</tt>-loop ends when <tt>inflate()</tt> has no more output as indicated +by not filling the output buffer, just as for <tt>deflate()</tt>.  In this case, we cannot +assert that <tt>strm.avail_in</tt> will be zero, since the deflate stream may end before the file +does. +<pre><b> +        } while (strm.avail_out == 0); +</b></pre><!-- --> +The outer <tt>do</tt>-loop ends when <tt>inflate()</tt> reports that it has reached the +end of the input <em>zlib</em> stream, has completed the decompression and integrity +check, and has provided all of the output.  This is indicated by the <tt>inflate()</tt> +return value <tt>Z_STREAM_END</tt>.  The inner loop is guaranteed to leave <tt>ret</tt> +equal to <tt>Z_STREAM_END</tt> if the last chunk of the input file read contained the end +of the <em>zlib</em> stream.  So if the return value is not <tt>Z_STREAM_END</tt>, the +loop continues to read more input. +<pre><b> +        /* done when inflate() says it's done */ +    } while (ret != Z_STREAM_END); +</b></pre><!-- --> +At this point, decompression successfully completed, or we broke out of the loop due to no +more data being available from the input file.  If the last <tt>inflate()</tt> return value +is not <tt>Z_STREAM_END</tt>, then the <em>zlib</em> stream was incomplete and a data error +is returned.  Otherwise, we return with a happy return value.  Of course, <tt>inflateEnd()</tt> +is called first to avoid a memory leak. +<pre><b> +    /* clean up and return */ +    (void)inflateEnd(&strm); +    return ret == Z_STREAM_END ? Z_OK : Z_DATA_ERROR; +} +</b></pre><!-- --> +That ends the routines that directly use <em>zlib</em>.  The following routines make this +a command-line program by running data through the above routines from <tt>stdin</tt> to +<tt>stdout</tt>, and handling any errors reported by <tt>def()</tt> or <tt>inf()</tt>. +<p> +<tt>zerr()</tt> is used to interpret the possible error codes from <tt>def()</tt> +and <tt>inf()</tt>, as detailed in their comments above, and print out an error message. +Note that these are only a subset of the possible return values from <tt>deflate()</tt> +and <tt>inflate()</tt>. +<pre><b> +/* report a zlib or i/o error */ +void zerr(int ret) +{ +    fputs("zpipe: ", stderr); +    switch (ret) { +    case Z_ERRNO: +        if (ferror(stdin)) +            fputs("error reading stdin\n", stderr); +        if (ferror(stdout)) +            fputs("error writing stdout\n", stderr); +        break; +    case Z_STREAM_ERROR: +        fputs("invalid compression level\n", stderr); +        break; +    case Z_DATA_ERROR: +        fputs("invalid or incomplete deflate data\n", stderr); +        break; +    case Z_MEM_ERROR: +        fputs("out of memory\n", stderr); +        break; +    case Z_VERSION_ERROR: +        fputs("zlib version mismatch!\n", stderr); +    } +} +</b></pre><!-- --> +Here is the <tt>main()</tt> routine used to test <tt>def()</tt> and <tt>inf()</tt>.  The +<tt>zpipe</tt> command is simply a compression pipe from <tt>stdin</tt> to <tt>stdout</tt>, if +no arguments are given, or it is a decompression pipe if <tt>zpipe -d</tt> is used.  If any other +arguments are provided, no compression or decompression is performed.  Instead a usage +message is displayed.  Examples are <tt>zpipe < foo.txt > foo.txt.z</tt> to compress, and +<tt>zpipe -d < foo.txt.z > foo.txt</tt> to decompress. +<pre><b> +/* compress or decompress from stdin to stdout */ +int main(int argc, char **argv) +{ +    int ret; + +    /* avoid end-of-line conversions */ +    SET_BINARY_MODE(stdin); +    SET_BINARY_MODE(stdout); + +    /* do compression if no arguments */ +    if (argc == 1) { +        ret = def(stdin, stdout, Z_DEFAULT_COMPRESSION); +        if (ret != Z_OK) +            zerr(ret); +        return ret; +    } + +    /* do decompression if -d specified */ +    else if (argc == 2 && strcmp(argv[1], "-d") == 0) { +        ret = inf(stdin, stdout); +        if (ret != Z_OK) +            zerr(ret); +        return ret; +    } + +    /* otherwise, report usage */ +    else { +        fputs("zpipe usage: zpipe [-d] < source > dest\n", stderr); +        return 1; +    } +} +</b></pre> +<hr> +<i>Copyright (c) 2004, 2005 by Mark Adler<br>Last modified 11 December 2005</i> +</body> +</html> diff --git a/win32/zlib/examples/zpipe.c b/win32/zlib/examples/zpipe.c new file mode 100644 index 0000000..83535d1 --- /dev/null +++ b/win32/zlib/examples/zpipe.c @@ -0,0 +1,205 @@ +/* zpipe.c: example of proper use of zlib's inflate() and deflate() +   Not copyrighted -- provided to the public domain +   Version 1.4  11 December 2005  Mark Adler */ + +/* Version history: +   1.0  30 Oct 2004  First version +   1.1   8 Nov 2004  Add void casting for unused return values +                     Use switch statement for inflate() return values +   1.2   9 Nov 2004  Add assertions to document zlib guarantees +   1.3   6 Apr 2005  Remove incorrect assertion in inf() +   1.4  11 Dec 2005  Add hack to avoid MSDOS end-of-line conversions +                     Avoid some compiler warnings for input and output buffers + */ + +#include <stdio.h> +#include <string.h> +#include <assert.h> +#include "zlib.h" + +#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__) +#  include <fcntl.h> +#  include <io.h> +#  define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY) +#else +#  define SET_BINARY_MODE(file) +#endif + +#define CHUNK 16384 + +/* Compress from file source to file dest until EOF on source. +   def() returns Z_OK on success, Z_MEM_ERROR if memory could not be +   allocated for processing, Z_STREAM_ERROR if an invalid compression +   level is supplied, Z_VERSION_ERROR if the version of zlib.h and the +   version of the library linked do not match, or Z_ERRNO if there is +   an error reading or writing the files. */ +int def(FILE *source, FILE *dest, int level) +{ +    int ret, flush; +    unsigned have; +    z_stream strm; +    unsigned char in[CHUNK]; +    unsigned char out[CHUNK]; + +    /* allocate deflate state */ +    strm.zalloc = Z_NULL; +    strm.zfree = Z_NULL; +    strm.opaque = Z_NULL; +    ret = deflateInit(&strm, level); +    if (ret != Z_OK) +        return ret; + +    /* compress until end of file */ +    do { +        strm.avail_in = fread(in, 1, CHUNK, source); +        if (ferror(source)) { +            (void)deflateEnd(&strm); +            return Z_ERRNO; +        } +        flush = feof(source) ? Z_FINISH : Z_NO_FLUSH; +        strm.next_in = in; + +        /* run deflate() on input until output buffer not full, finish +           compression if all of source has been read in */ +        do { +            strm.avail_out = CHUNK; +            strm.next_out = out; +            ret = deflate(&strm, flush);    /* no bad return value */ +            assert(ret != Z_STREAM_ERROR);  /* state not clobbered */ +            have = CHUNK - strm.avail_out; +            if (fwrite(out, 1, have, dest) != have || ferror(dest)) { +                (void)deflateEnd(&strm); +                return Z_ERRNO; +            } +        } while (strm.avail_out == 0); +        assert(strm.avail_in == 0);     /* all input will be used */ + +        /* done when last data in file processed */ +    } while (flush != Z_FINISH); +    assert(ret == Z_STREAM_END);        /* stream will be complete */ + +    /* clean up and return */ +    (void)deflateEnd(&strm); +    return Z_OK; +} + +/* Decompress from file source to file dest until stream ends or EOF. +   inf() returns Z_OK on success, Z_MEM_ERROR if memory could not be +   allocated for processing, Z_DATA_ERROR if the deflate data is +   invalid or incomplete, Z_VERSION_ERROR if the version of zlib.h and +   the version of the library linked do not match, or Z_ERRNO if there +   is an error reading or writing the files. */ +int inf(FILE *source, FILE *dest) +{ +    int ret; +    unsigned have; +    z_stream strm; +    unsigned char in[CHUNK]; +    unsigned char out[CHUNK]; + +    /* allocate inflate state */ +    strm.zalloc = Z_NULL; +    strm.zfree = Z_NULL; +    strm.opaque = Z_NULL; +    strm.avail_in = 0; +    strm.next_in = Z_NULL; +    ret = inflateInit(&strm); +    if (ret != Z_OK) +        return ret; + +    /* decompress until deflate stream ends or end of file */ +    do { +        strm.avail_in = fread(in, 1, CHUNK, source); +        if (ferror(source)) { +            (void)inflateEnd(&strm); +            return Z_ERRNO; +        } +        if (strm.avail_in == 0) +            break; +        strm.next_in = in; + +        /* run inflate() on input until output buffer not full */ +        do { +            strm.avail_out = CHUNK; +            strm.next_out = out; +            ret = inflate(&strm, Z_NO_FLUSH); +            assert(ret != Z_STREAM_ERROR);  /* state not clobbered */ +            switch (ret) { +            case Z_NEED_DICT: +                ret = Z_DATA_ERROR;     /* and fall through */ +            case Z_DATA_ERROR: +            case Z_MEM_ERROR: +                (void)inflateEnd(&strm); +                return ret; +            } +            have = CHUNK - strm.avail_out; +            if (fwrite(out, 1, have, dest) != have || ferror(dest)) { +                (void)inflateEnd(&strm); +                return Z_ERRNO; +            } +        } while (strm.avail_out == 0); + +        /* done when inflate() says it's done */ +    } while (ret != Z_STREAM_END); + +    /* clean up and return */ +    (void)inflateEnd(&strm); +    return ret == Z_STREAM_END ? Z_OK : Z_DATA_ERROR; +} + +/* report a zlib or i/o error */ +void zerr(int ret) +{ +    fputs("zpipe: ", stderr); +    switch (ret) { +    case Z_ERRNO: +        if (ferror(stdin)) +            fputs("error reading stdin\n", stderr); +        if (ferror(stdout)) +            fputs("error writing stdout\n", stderr); +        break; +    case Z_STREAM_ERROR: +        fputs("invalid compression level\n", stderr); +        break; +    case Z_DATA_ERROR: +        fputs("invalid or incomplete deflate data\n", stderr); +        break; +    case Z_MEM_ERROR: +        fputs("out of memory\n", stderr); +        break; +    case Z_VERSION_ERROR: +        fputs("zlib version mismatch!\n", stderr); +    } +} + +/* compress or decompress from stdin to stdout */ +int main(int argc, char **argv) +{ +    int ret; + +    /* avoid end-of-line conversions */ +    SET_BINARY_MODE(stdin); +    SET_BINARY_MODE(stdout); + +    /* do compression if no arguments */ +    if (argc == 1) { +        ret = def(stdin, stdout, Z_DEFAULT_COMPRESSION); +        if (ret != Z_OK) +            zerr(ret); +        return ret; +    } + +    /* do decompression if -d specified */ +    else if (argc == 2 && strcmp(argv[1], "-d") == 0) { +        ret = inf(stdin, stdout); +        if (ret != Z_OK) +            zerr(ret); +        return ret; +    } + +    /* otherwise, report usage */ +    else { +        fputs("zpipe usage: zpipe [-d] < source > dest\n", stderr); +        return 1; +    } +} diff --git a/win32/zlib/examples/zran.c b/win32/zlib/examples/zran.c new file mode 100644 index 0000000..278f9ad --- /dev/null +++ b/win32/zlib/examples/zran.c @@ -0,0 +1,409 @@ +/* zran.c -- example of zlib/gzip stream indexing and random access + * Copyright (C) 2005, 2012 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h +   Version 1.1  29 Sep 2012  Mark Adler */ + +/* Version History: + 1.0  29 May 2005  First version + 1.1  29 Sep 2012  Fix memory reallocation error + */ + +/* Illustrate the use of Z_BLOCK, inflatePrime(), and inflateSetDictionary() +   for random access of a compressed file.  A file containing a zlib or gzip +   stream is provided on the command line.  The compressed stream is decoded in +   its entirety, and an index built with access points about every SPAN bytes +   in the uncompressed output.  The compressed file is left open, and can then +   be read randomly, having to decompress on the average SPAN/2 uncompressed +   bytes before getting to the desired block of data. + +   An access point can be created at the start of any deflate block, by saving +   the starting file offset and bit of that block, and the 32K bytes of +   uncompressed data that precede that block.  Also the uncompressed offset of +   that block is saved to provide a referece for locating a desired starting +   point in the uncompressed stream.  build_index() works by decompressing the +   input zlib or gzip stream a block at a time, and at the end of each block +   deciding if enough uncompressed data has gone by to justify the creation of +   a new access point.  If so, that point is saved in a data structure that +   grows as needed to accommodate the points. + +   To use the index, an offset in the uncompressed data is provided, for which +   the latest accees point at or preceding that offset is located in the index. +   The input file is positioned to the specified location in the index, and if +   necessary the first few bits of the compressed data is read from the file. +   inflate is initialized with those bits and the 32K of uncompressed data, and +   the decompression then proceeds until the desired offset in the file is +   reached.  Then the decompression continues to read the desired uncompressed +   data from the file. + +   Another approach would be to generate the index on demand.  In that case, +   requests for random access reads from the compressed data would try to use +   the index, but if a read far enough past the end of the index is required, +   then further index entries would be generated and added. + +   There is some fair bit of overhead to starting inflation for the random +   access, mainly copying the 32K byte dictionary.  So if small pieces of the +   file are being accessed, it would make sense to implement a cache to hold +   some lookahead and avoid many calls to extract() for small lengths. + +   Another way to build an index would be to use inflateCopy().  That would +   not be constrained to have access points at block boundaries, but requires +   more memory per access point, and also cannot be saved to file due to the +   use of pointers in the state.  The approach here allows for storage of the +   index in a file. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "zlib.h" + +#define local static + +#define SPAN 1048576L       /* desired distance between access points */ +#define WINSIZE 32768U      /* sliding window size */ +#define CHUNK 16384         /* file input buffer size */ + +/* access point entry */ +struct point { +    off_t out;          /* corresponding offset in uncompressed data */ +    off_t in;           /* offset in input file of first full byte */ +    int bits;           /* number of bits (1-7) from byte at in - 1, or 0 */ +    unsigned char window[WINSIZE];  /* preceding 32K of uncompressed data */ +}; + +/* access point list */ +struct access { +    int have;           /* number of list entries filled in */ +    int size;           /* number of list entries allocated */ +    struct point *list; /* allocated list */ +}; + +/* Deallocate an index built by build_index() */ +local void free_index(struct access *index) +{ +    if (index != NULL) { +        free(index->list); +        free(index); +    } +} + +/* Add an entry to the access point list.  If out of memory, deallocate the +   existing list and return NULL. */ +local struct access *addpoint(struct access *index, int bits, +    off_t in, off_t out, unsigned left, unsigned char *window) +{ +    struct point *next; + +    /* if list is empty, create it (start with eight points) */ +    if (index == NULL) { +        index = malloc(sizeof(struct access)); +        if (index == NULL) return NULL; +        index->list = malloc(sizeof(struct point) << 3); +        if (index->list == NULL) { +            free(index); +            return NULL; +        } +        index->size = 8; +        index->have = 0; +    } + +    /* if list is full, make it bigger */ +    else if (index->have == index->size) { +        index->size <<= 1; +        next = realloc(index->list, sizeof(struct point) * index->size); +        if (next == NULL) { +            free_index(index); +            return NULL; +        } +        index->list = next; +    } + +    /* fill in entry and increment how many we have */ +    next = index->list + index->have; +    next->bits = bits; +    next->in = in; +    next->out = out; +    if (left) +        memcpy(next->window, window + WINSIZE - left, left); +    if (left < WINSIZE) +        memcpy(next->window + left, window, WINSIZE - left); +    index->have++; + +    /* return list, possibly reallocated */ +    return index; +} + +/* Make one entire pass through the compressed stream and build an index, with +   access points about every span bytes of uncompressed output -- span is +   chosen to balance the speed of random access against the memory requirements +   of the list, about 32K bytes per access point.  Note that data after the end +   of the first zlib or gzip stream in the file is ignored.  build_index() +   returns the number of access points on success (>= 1), Z_MEM_ERROR for out +   of memory, Z_DATA_ERROR for an error in the input file, or Z_ERRNO for a +   file read error.  On success, *built points to the resulting index. */ +local int build_index(FILE *in, off_t span, struct access **built) +{ +    int ret; +    off_t totin, totout;        /* our own total counters to avoid 4GB limit */ +    off_t last;                 /* totout value of last access point */ +    struct access *index;       /* access points being generated */ +    z_stream strm; +    unsigned char input[CHUNK]; +    unsigned char window[WINSIZE]; + +    /* initialize inflate */ +    strm.zalloc = Z_NULL; +    strm.zfree = Z_NULL; +    strm.opaque = Z_NULL; +    strm.avail_in = 0; +    strm.next_in = Z_NULL; +    ret = inflateInit2(&strm, 47);      /* automatic zlib or gzip decoding */ +    if (ret != Z_OK) +        return ret; + +    /* inflate the input, maintain a sliding window, and build an index -- this +       also validates the integrity of the compressed data using the check +       information at the end of the gzip or zlib stream */ +    totin = totout = last = 0; +    index = NULL;               /* will be allocated by first addpoint() */ +    strm.avail_out = 0; +    do { +        /* get some compressed data from input file */ +        strm.avail_in = fread(input, 1, CHUNK, in); +        if (ferror(in)) { +            ret = Z_ERRNO; +            goto build_index_error; +        } +        if (strm.avail_in == 0) { +            ret = Z_DATA_ERROR; +            goto build_index_error; +        } +        strm.next_in = input; + +        /* process all of that, or until end of stream */ +        do { +            /* reset sliding window if necessary */ +            if (strm.avail_out == 0) { +                strm.avail_out = WINSIZE; +                strm.next_out = window; +            } + +            /* inflate until out of input, output, or at end of block -- +               update the total input and output counters */ +            totin += strm.avail_in; +            totout += strm.avail_out; +            ret = inflate(&strm, Z_BLOCK);      /* return at end of block */ +            totin -= strm.avail_in; +            totout -= strm.avail_out; +            if (ret == Z_NEED_DICT) +                ret = Z_DATA_ERROR; +            if (ret == Z_MEM_ERROR || ret == Z_DATA_ERROR) +                goto build_index_error; +            if (ret == Z_STREAM_END) +                break; + +            /* if at end of block, consider adding an index entry (note that if +               data_type indicates an end-of-block, then all of the +               uncompressed data from that block has been delivered, and none +               of the compressed data after that block has been consumed, +               except for up to seven bits) -- the totout == 0 provides an +               entry point after the zlib or gzip header, and assures that the +               index always has at least one access point; we avoid creating an +               access point after the last block by checking bit 6 of data_type +             */ +            if ((strm.data_type & 128) && !(strm.data_type & 64) && +                (totout == 0 || totout - last > span)) { +                index = addpoint(index, strm.data_type & 7, totin, +                                 totout, strm.avail_out, window); +                if (index == NULL) { +                    ret = Z_MEM_ERROR; +                    goto build_index_error; +                } +                last = totout; +            } +        } while (strm.avail_in != 0); +    } while (ret != Z_STREAM_END); + +    /* clean up and return index (release unused entries in list) */ +    (void)inflateEnd(&strm); +    index->list = realloc(index->list, sizeof(struct point) * index->have); +    index->size = index->have; +    *built = index; +    return index->size; + +    /* return error */ +  build_index_error: +    (void)inflateEnd(&strm); +    if (index != NULL) +        free_index(index); +    return ret; +} + +/* Use the index to read len bytes from offset into buf, return bytes read or +   negative for error (Z_DATA_ERROR or Z_MEM_ERROR).  If data is requested past +   the end of the uncompressed data, then extract() will return a value less +   than len, indicating how much as actually read into buf.  This function +   should not return a data error unless the file was modified since the index +   was generated.  extract() may also return Z_ERRNO if there is an error on +   reading or seeking the input file. */ +local int extract(FILE *in, struct access *index, off_t offset, +                  unsigned char *buf, int len) +{ +    int ret, skip; +    z_stream strm; +    struct point *here; +    unsigned char input[CHUNK]; +    unsigned char discard[WINSIZE]; + +    /* proceed only if something reasonable to do */ +    if (len < 0) +        return 0; + +    /* find where in stream to start */ +    here = index->list; +    ret = index->have; +    while (--ret && here[1].out <= offset) +        here++; + +    /* initialize file and inflate state to start there */ +    strm.zalloc = Z_NULL; +    strm.zfree = Z_NULL; +    strm.opaque = Z_NULL; +    strm.avail_in = 0; +    strm.next_in = Z_NULL; +    ret = inflateInit2(&strm, -15);         /* raw inflate */ +    if (ret != Z_OK) +        return ret; +    ret = fseeko(in, here->in - (here->bits ? 1 : 0), SEEK_SET); +    if (ret == -1) +        goto extract_ret; +    if (here->bits) { +        ret = getc(in); +        if (ret == -1) { +            ret = ferror(in) ? Z_ERRNO : Z_DATA_ERROR; +            goto extract_ret; +        } +        (void)inflatePrime(&strm, here->bits, ret >> (8 - here->bits)); +    } +    (void)inflateSetDictionary(&strm, here->window, WINSIZE); + +    /* skip uncompressed bytes until offset reached, then satisfy request */ +    offset -= here->out; +    strm.avail_in = 0; +    skip = 1;                               /* while skipping to offset */ +    do { +        /* define where to put uncompressed data, and how much */ +        if (offset == 0 && skip) {          /* at offset now */ +            strm.avail_out = len; +            strm.next_out = buf; +            skip = 0;                       /* only do this once */ +        } +        if (offset > WINSIZE) {             /* skip WINSIZE bytes */ +            strm.avail_out = WINSIZE; +            strm.next_out = discard; +            offset -= WINSIZE; +        } +        else if (offset != 0) {             /* last skip */ +            strm.avail_out = (unsigned)offset; +            strm.next_out = discard; +            offset = 0; +        } + +        /* uncompress until avail_out filled, or end of stream */ +        do { +            if (strm.avail_in == 0) { +                strm.avail_in = fread(input, 1, CHUNK, in); +                if (ferror(in)) { +                    ret = Z_ERRNO; +                    goto extract_ret; +                } +                if (strm.avail_in == 0) { +                    ret = Z_DATA_ERROR; +                    goto extract_ret; +                } +                strm.next_in = input; +            } +            ret = inflate(&strm, Z_NO_FLUSH);       /* normal inflate */ +            if (ret == Z_NEED_DICT) +                ret = Z_DATA_ERROR; +            if (ret == Z_MEM_ERROR || ret == Z_DATA_ERROR) +                goto extract_ret; +            if (ret == Z_STREAM_END) +                break; +        } while (strm.avail_out != 0); + +        /* if reach end of stream, then don't keep trying to get more */ +        if (ret == Z_STREAM_END) +            break; + +        /* do until offset reached and requested data read, or stream ends */ +    } while (skip); + +    /* compute number of uncompressed bytes read after offset */ +    ret = skip ? 0 : len - strm.avail_out; + +    /* clean up and return bytes read or error */ +  extract_ret: +    (void)inflateEnd(&strm); +    return ret; +} + +/* Demonstrate the use of build_index() and extract() by processing the file +   provided on the command line, and the extracting 16K from about 2/3rds of +   the way through the uncompressed output, and writing that to stdout. */ +int main(int argc, char **argv) +{ +    int len; +    off_t offset; +    FILE *in; +    struct access *index = NULL; +    unsigned char buf[CHUNK]; + +    /* open input file */ +    if (argc != 2) { +        fprintf(stderr, "usage: zran file.gz\n"); +        return 1; +    } +    in = fopen(argv[1], "rb"); +    if (in == NULL) { +        fprintf(stderr, "zran: could not open %s for reading\n", argv[1]); +        return 1; +    } + +    /* build index */ +    len = build_index(in, SPAN, &index); +    if (len < 0) { +        fclose(in); +        switch (len) { +        case Z_MEM_ERROR: +            fprintf(stderr, "zran: out of memory\n"); +            break; +        case Z_DATA_ERROR: +            fprintf(stderr, "zran: compressed data error in %s\n", argv[1]); +            break; +        case Z_ERRNO: +            fprintf(stderr, "zran: read error on %s\n", argv[1]); +            break; +        default: +            fprintf(stderr, "zran: error %d while building index\n", len); +        } +        return 1; +    } +    fprintf(stderr, "zran: built index with %d access points\n", len); + +    /* use index by reading some bytes from an arbitrary offset */ +    offset = (index->list[index->have - 1].out << 1) / 3; +    len = extract(in, index, offset, buf, CHUNK); +    if (len < 0) +        fprintf(stderr, "zran: extraction failed: %s error\n", +                len == Z_MEM_ERROR ? "out of memory" : "input corrupted"); +    else { +        fwrite(buf, 1, len, stdout); +        fprintf(stderr, "zran: extracted %d bytes at %llu\n", len, offset); +    } + +    /* clean up and exit */ +    free_index(index); +    fclose(in); +    return 0; +} | 
