diff options
author | Pixel <Pixel> | 2002-05-27 16:27:51 +0000 |
---|---|---|
committer | Pixel <Pixel> | 2002-05-27 16:27:51 +0000 |
commit | aaa76739c3d6dbeb33b1f1027ec93c260fe55b35 (patch) | |
tree | 19789de0b6802c15453a771484d338b96e6c493c /dteutils.cpp | |
parent | 2238d7bdbbda2a107ee14429f4e477183dacb684 (diff) |
DTE rhaaaaaaa
Diffstat (limited to 'dteutils.cpp')
-rw-r--r-- | dteutils.cpp | 316 |
1 files changed, 288 insertions, 28 deletions
diff --git a/dteutils.cpp b/dteutils.cpp index b08cfe0..2759dc5 100644 --- a/dteutils.cpp +++ b/dteutils.cpp @@ -22,10 +22,129 @@ #include <stdlib.h> #include <string.h> #include "fileutils.h" +#include "generic.h" + +class thingtree { + public: + static void addstring(const char *, long); + static long look(const char *); + static void destroy(); + private: + thingtree(); + thingtree(char, bool, thingtree *, long); + ~thingtree(); + char thischar; + bool terminal; + thingtree * father, * child, * brother; + long thingentry; + static thingtree * root; +}; + +thingtree * thingtree::root = 0; + +thingtree::thingtree(char gchar, bool gterm, thingtree * gfather, long gthingentry) : thischar(gchar), terminal(gterm), thingentry(gthingentry) { + if (!gfather) { + if (!root) { + root = new thingtree(); + } + father = root; + } else { + father = gfather; + } + brother = father->child; + father->child = this; +} + +thingtree::thingtree() : thischar(0), terminal(false), father(0), child(0), brother(0) { } + +long thingtree::look(const char * s) { + const char * p; + long currentthing = -1; + thingtree * ptr = root; + + if (!ptr) { + printm(M_ERROR, "Error: thingtree not initialised\n"); + exit(-1); + } + + for (p = s; *p; p++) { +// printm(M_INFO, "Looking for '%c'\n", *p); + for (ptr = ptr->child; ptr; ptr = ptr->brother) { +// printm(M_INFO, "Looking in %p: %c\n", ptr, ptr->thischar); + if (ptr->thischar == *p) { + if (ptr->terminal) { + currentthing = ptr->thingentry; + } + break; + } + } + if (!ptr) { +// printm(M_INFO, "Not found.\n"); + break; + } + } + + if (currentthing == -1) { + printm(M_ERROR, "Error, can't find any entry for string '%s'\n", s); + } + + return currentthing; +} + +void thingtree::addstring(const char * s, long thingentry) { + const char * p; + thingtree * ptr, * fptr; + + if (!root) { + root = new thingtree(); + } + +// printm(M_INFO, "Creating new thingtree entry: %li='%s'\n", thingentry, s); + + ptr = root; + for (p = s; *p; p++) { + fptr = ptr; +// printm(M_INFO, "Finding entry for '%c'\n", *p); + for (ptr = ptr->child; ptr; ptr = ptr->brother) { +// printm(M_INFO, "Browsing childs: %p = %c\n", ptr, ptr->thischar); + if (ptr->thischar == *p) + break; + } + + if (!ptr) { +// printm(M_INFO, "Creating new branch for %c\n", *p); + ptr = new thingtree(*p, *(p + 1) == 0, fptr, thingentry); +// printm(M_INFO, "Created branch %p\n", ptr); + } else { + if (*(p + 1) == 0) { + ptr->terminal = true; + ptr->thingentry = thingentry; + } + } + } +} + +void thingtree::destroy() { + delete root; + root = 0; +} + +thingtree::~thingtree() { + if (father->child == this) + father->child = brother; + + while (child) + delete child; +} + +char * things[256]; char * dte_text; char dte_flags[256 * 256]; long dte_counters[256 * 256]; +long dte_entries[256]; +char alloweds[256]; +long dte_usage[256]; long dte_most; long dte_counter; @@ -33,12 +152,6 @@ long dte_text_size; int dte_size; long gain; -#ifdef USEASM -extern "C" { - void build_dte(void); - void dte_reset(void); -} -#else void dte_reset(void) { memset(dte_counters, 0, 0x40000); dte_most = 0; @@ -47,21 +160,72 @@ void dte_reset(void) { void build_dte(void) { int i; - unsigned short t; + unsigned short t, t2; +// unsigned short p = 0; for (i = 0; i < dte_text_size; i++) { t = *((unsigned short *) (dte_text + i)); - if (!dte_flags[t]) { - if ((++dte_counters[t]) > dte_counter) { + t2 = *((unsigned short *) (dte_text + i + 1)); +/* if (t == p) { + p = 0; + continue; + } + p = t; */ + if ((!dte_flags[t]) && (dte_flags[t2] != 3)) { + dte_counters[t]++; + if (dte_counters[t] > dte_counter) { dte_most = t; dte_counter = dte_counters[t]; } + } else if (dte_flags[t] == 3) { + i++; } } } -#endif -void dte_compress(char alloweds[]) { +void push_entry(long entry) { + int i; + char t[3], c1, c2; + t[2] = 0; + + c1 = dte_most & 0xff; + c2 = dte_most >> 8; + t[0] = things[c1][0]; + t[1] = things[c2][0]; + + for (i = 0; i < 256; i++) { + if (!dte_entries[i]) { + dte_entries[i] = entry; + things[i] = strdup(t); + thingtree::addstring(t, i); + break; + } + } +} + +char * read_line(FILE * f, char * b) { + int r, pos = 0; + + while (!feof(f)) { + if ((r = fgetc(f)) == EOF) + break; + if (r == 0x0d) { + if ((r = fgetc(f)) == EOF) + break; + } + if (r == 0x0a) { + b[pos] = 0; + return b; + } + b[pos++] = r; + } + + b[pos] = 0; + + return b; +} + +void dte_compress() { int i, j; char c1, c2; @@ -73,46 +237,142 @@ void dte_compress(char alloweds[]) { gain = 0; - fprintf(stderr, "Going for it: dte_size = %i\n", dte_size); + printm(M_STATUS, "Going for it: dte_size = %i\n", dte_size); for (i = 0; i < dte_size; i++) { dte_reset(); build_dte(); c1 = dte_most & 0xff; c2 = dte_most >> 8; - fprintf(stderr, "Entry #%i, most count: %li, couple = 0x%04x = (%c %c)\n", i, dte_counter, (int) dte_most, c1, c2); - dte_flags[c1 + c2 * 256] = 1; + c1 = things[c1][0]; + c2 = things[c2][0]; + printm(M_INFO, "Entry #%i, most count: %li, couple = 0x%04x = (%c %c)\n", i, dte_counter, (int) dte_most, c1, c2); + dte_flags[dte_most] = 3; gain += dte_counter; + push_entry(dte_most); + } + + printm(M_STATUS, "Estimated gain: %li bytes, new file size = %li\n", gain, dte_text_size - gain); +} + +void read_thingy(FILE * f) { + char line[256], * st, * thing; + long code; + int i; + + st = line + 2; + thing = line + 5; + line[0] = '0'; + line[1] = 'x'; + + dte_size = 0; + + for (i = 0; i < 256; i++) { + dte_entries[i] = -1; } - fprintf(stderr, "Total gain: %li bytes\n", gain); + while (!feof(f)) { + if (strlen(read_line(f, st))) { + line[4] = 0; + sscanf(line, "%li", &code); + switch (strlen(thing)) { + case 0: + dte_size++; + dte_entries[code] = 0; + break; + case 1: + alloweds[code] = 1; + default: + things[code] = strdup(thing); + thingtree::addstring(thing, code); + } + } + } } +void read_thingy_file(FILE * f) { + char line[10240], * p, * c, trans[5]; + long code; + int ptr = 0, i; + + for (i = 0; i < 256; i++) { + dte_usage[i] = 0; + } + + while (!feof(f)) { + if (!strlen(p = read_line(f, line))) + continue; + while (*p) { + if (*p == '<') { + if (!(c = strchr(p, '>'))) { + printm(M_ERROR, "Error in file: '<' not closed.\n"); + exit(-1); + } + p++; + if ((c - p) == 2) { + *c = 0; + sprintf(trans, "0x%s", p); + sscanf(trans, "%li", &code); + dte_text[ptr++] = code; + printm(M_BARE, "0x%02x-", code); + } else { + printm(M_BARE, "Unknow %s-", p); + } + p = c + 1; + } else { + if ((code = thingtree::look(p)) == -1) + exit(-1); + dte_text[ptr++] = code; + p += strlen(things[code]); + printm(M_BARE, "%s-", things[code]); + dte_usage[code]++; + } + } + printm(M_BARE, "\n"); + } + + dte_text[ptr] = dte_text[ptr + 1] = dte_text[ptr + 2] = dte_text[ptr + 3] = 0; + dte_text_size = ptr; +} #ifdef DTEMAIN int main(int argc, char ** argv) { + FILE * f, * t; + long old_size; int i; - FILE * f; - char alloweds[256]; - dte_size = 128; + verbosity = M_INFO; + printm(M_STATUS, "Reading thingy table\n"); + t = fopen(argv[2], "r"); + read_thingy(t); + fclose(t); + f = fopen(argv[1], "r"); dte_text_size = filesize(f); dte_text = (char *) calloc(dte_text_size + 4, 1); - fprintf(stderr, "Reading file, size = %li\n", dte_text_size); - fread(dte_text, 1, dte_text_size, f); + printm(M_STATUS, "Reading file, size = %li\n", dte_text_size); + read_thingy_file(f); fclose(f); - memset(alloweds, 0, 256); - fprintf(stderr, "Building alloweds table\n"); - for (i = 0; i < dte_text_size; i++) { - if ((dte_text[i] != '\n') && (dte_text[i] != '\r')) { - alloweds[dte_text[i]] = 1; - } - } + printm(M_STATUS, "True size = %li\n", old_size = dte_text_size); + + printm(M_STATUS, "Compressing file.\n"); + dte_compress(); - dte_compress(alloweds); + printm(M_STATUS, "Rereading file.\n"); + f = fopen(argv[1], "r"); + dte_text_size = filesize(f); + dte_text = (char *) calloc(dte_text_size + 4, 1); + read_thingy_file(f); + fclose(f); + + printm(M_STATUS, "True size = %li, real gain = %li\n", dte_text_size, old_size - dte_text_size); + + printm(M_INFO, "DTE Usage:\n"); + for (i = 0; i < 256; i++) { + printm(M_INFO, "Entry %i ('%s') used at %i\n", i, things[i], dte_usage[i]); + } free(dte_text); } |