summaryrefslogtreecommitdiff
path: root/dteutils.cpp
blob: 0cbf30c37efb95b1d5e3b2d224109a3654da8d43 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include "fileutils.h"

char * dte_text;
char dte_flags[256 * 256];
long dte_counters[256 * 256];

long dte_most;
long dte_counter;
long dte_text_size;
int dte_size;
long gain;

#ifdef USEASM
extern "C" {
    void build_dte(void);
    void dte_reset(void);
}
#else
void dte_reset(void) {
    memset(dte_counters, 0, 0x40000);
    dte_most = 0;
    dte_counter = 0;
}

void build_dte(void) {
    int i;
    unsigned short t;
    
    for (i = 0; i < dte_text_size; i++) {
	t = *((unsigned short *) (dte_text + i));
	if (!dte_flags[t]) {
	    if ((++dte_counters[t]) > dte_counter) {
		dte_most = t;
		dte_counter = dte_counters[t];
	    }
	}
    }
}
#endif

void dte_compress(char alloweds[]) {
    int i, j;
    char c1, c2;
    
    for (i = 0; i < 256; i++) {
	for (j = 0; j < 256; j++) {
	    dte_flags[i * 256 + j] = alloweds[i] ? alloweds[j] ? 0 : 1 : 1;
	}
    }
    
    gain = 0;
    
    fprintf(stderr, "Going for it: dte_size = %i\n", dte_size);
    for (i = 0; i < dte_size; i++) {
	dte_reset();
	build_dte();
	c1 = dte_most & 0xff;
	c2 = dte_most >> 8;
	fprintf(stderr, "Entry #%i, most count: %li, couple = 0x%04x = (%c %c)\n", i, dte_counter, (int) dte_most, c1, c2);    
	dte_flags[c1 + c2 * 256] = 1;
	gain += dte_counter;
    }
    
    fprintf(stderr, "Total gain: %li bytes\n", gain);
}


#ifdef DTEMAIN

int main(int argc, char ** argv) {
    int i;
    FILE * f;
    char alloweds[256];
    
    dte_size = 128;
    
    f = fopen(argv[1], "r");
    dte_text_size = filesize(f);
    dte_text = (char *) calloc(dte_text_size + 4, 1);
    fprintf(stderr, "Reading file, size = %li\n", dte_text_size);
    fread(dte_text, 1, dte_text_size, f);
    fclose(f);
    
    memset(alloweds, 0, 256);
    fprintf(stderr, "Building alloweds table\n");
    for (i = 0; i < dte_text_size; i++) {
	if ((dte_text[i] != '\n') && (dte_text[i] != '\r')) {
	    alloweds[dte_text[i]] = 1;
	}
    }
    
    dte_compress(alloweds);
    
    free(dte_text);
}

#endif