ref: a6e5d4bae6075c741a39fcba62a365d9dffaed93
dir: /sys/src/cmd/venti/srv/whack.c/
#include "stdinc.h"
#include "whack.h"
typedef struct Huff	Huff;
int compressblocks = 1;
enum
{
	MaxFastLen	= 9,
	BigLenCode	= 0x1f4,	/* minimum code for large lenth encoding */
	BigLenBits	= 9,
	BigLenBase	= 4,		/* starting items to encode for big lens */
	MinOffBits	= 6,
	MaxOffBits	= MinOffBits + 8,
	MaxLen		= 2051		/* max. length encodable in 24 bits */
};
enum
{
	StatBytes,
	StatOutBytes,
	StatLits,
	StatMatches,
	StatLitBits,
	StatOffBits,
	StatLenBits,
	MaxStat
};
struct Huff
{
	short	bits;				/* length of the code */
	ulong	encode;				/* the code */
};
static	Huff	lentab[MaxFastLen] =
{
	{2,	0x2},		/* 10 */
	{3,	0x6},		/* 110 */
	{5,	0x1c},		/* 11100 */
	{5,	0x1d},		/* 11101 */
	{6,	0x3c},		/* 111100 */
	{7,	0x7a},		/* 1111010 */
	{7,	0x7b},		/* 1111011 */
	{8,	0xf8},		/* 11111000 */
	{8,	0xf9},		/* 11111001 */
};
static int	thwmaxcheck;
void
whackinit(Whack *tw, int level)
{
	thwmaxcheck = (1 << level);
	thwmaxcheck -= thwmaxcheck >> 2;
	if(thwmaxcheck < 2)
		thwmaxcheck = 2;
	else if(thwmaxcheck > 1024)
		thwmaxcheck = 1024;
	memset(tw, 0, sizeof *tw);
	tw->begin = 2 * WhackMaxOff;
}
/*
 * find a string in the dictionary
 */
static int
whackmatch(Whack *b, uchar **ss, uchar *esrc, ulong h, ulong now)
{
	ushort then, off, last;
	int bestoff, bestlen, check;
	uchar *s, *t;
	s = *ss;
	if(esrc < s + MinMatch)
		return -1;
	if(s + MaxLen < esrc)
		esrc = s + MaxLen;
	bestoff = 0;
	bestlen = 0;
	check = thwmaxcheck;
	last = 0;
	for(then = b->hash[h]; check-- > 0; then = b->next[then & (WhackMaxOff - 1)]){
		off = now - then;
		if(off <= last || off > WhackMaxOff)
			break;
		/*
		 * don't need to check for the end because
		 * 1) s too close check above
		 */
		t = s - off;
		if(s[0] == t[0] && s[1] == t[1] && s[2] == t[2]){
			if(!bestlen || esrc - s > bestlen && s[bestlen] == t[bestlen]){
				t += 3;
				for(s += 3; s < esrc; s++){
					if(*s != *t)
						break;
					t++;
				}
				if(s - *ss > bestlen){
					bestlen = s - *ss;
					bestoff = off;
					if(bestlen > thwmaxcheck)
						break;
				}
			}
		}
		s = *ss;
		last = off;
	}
	*ss += bestlen;
	return bestoff;
}
/*
 * knuth vol. 3 multiplicative hashing
 * each byte x chosen according to rules
 * 1/4 < x < 3/10, 1/3 x < < 3/7, 4/7 < x < 2/3, 7/10 < x < 3/4
 * with reasonable spread between the bytes & their complements
 *
 * the 3 byte value appears to be as almost good as the 4 byte value,
 * and might be faster on some machines
 */
/*
#define hashit(c)	((((ulong)(c) * 0x6b43a9) >> (24 - HashLog)) & HashMask)
*/
#define hashit(c)	(((((ulong)(c) & 0xffffff) * 0x6b43a9b5) >> (32 - HashLog)) & HashMask)
/*
 * lz77 compression with single lookup in a hash table for each block
 */
int
whack(Whack *w, uchar *dst, uchar *src, int n, ulong stats[WhackStats])
{
	uchar *s, *ss, *sss, *esrc, *half, *wdst, *wdmax;
	ulong cont, code, wbits;
	ushort now;
	int toff, lithist, h, len, bits, use, wnbits, lits, matches, offbits, lenbits;
	if(!compressblocks || n < MinMatch)
		return -1;
	wdst = dst;
	wdmax = dst + n;
	now = w->begin;
	s = src;
	w->data = s;
	cont = (s[0] << 16) | (s[1] << 8) | s[2];
	esrc = s + n;
	half = s + (n >> 1);
	wnbits = 0;
	wbits = 0;
	lits = 0;
	matches = 0;
	offbits = 0;
	lenbits = 0;
	lithist = ~0;
	while(s < esrc){
		h = hashit(cont);
		sss = s;
		toff = whackmatch(w, &sss, esrc, h, now);
		ss = sss;
		len = ss - s;
		for(; wnbits >= 8; wnbits -= 8){
			if(wdst >= wdmax){
				w->begin = now;
				return -1;
			}
			*wdst++ = wbits >> (wnbits - 8);
		}
		if(len < MinMatch){
			toff = *s;
			lithist = (lithist << 1) | toff < 32 | toff > 127;
			if(lithist & 0x1e){
				wbits = (wbits << 9) | toff;
				wnbits += 9;
			}else if(lithist & 1){
				toff = (toff + 64) & 0xff;
				if(toff < 96){
					wbits = (wbits << 10) | toff;
					wnbits += 10;
				}else{
					wbits = (wbits << 11) | toff;
					wnbits += 11;
				}
			}else{
				wbits = (wbits << 8) | toff;
				wnbits += 8;
			}
			lits++;
			/*
			 * speed hack
			 * check for compression progress, bail if none achieved
			 */
			if(s > half){
				if(4 * (s - src) < 5 * lits){
					w->begin = now;
					return -1;
				}
				half = esrc;
			}
			if(s + MinMatch <= esrc){
				w->next[now & (WhackMaxOff - 1)] = w->hash[h];
				w->hash[h] = now;
				if(s + MinMatch < esrc)
					cont = (cont << 8) | s[MinMatch];
			}
			now++;
			s++;
			continue;
		}
		matches++;
		/*
		 * length of match
		 */
		if(len > MaxLen){
			len = MaxLen;
			ss = s + len;
		}
		len -= MinMatch;
		if(len < MaxFastLen){
			bits = lentab[len].bits;
			wbits = (wbits << bits) | lentab[len].encode;
			wnbits += bits;
			lenbits += bits;
		}else{
			code = BigLenCode;
			bits = BigLenBits;
			use = BigLenBase;
			len -= MaxFastLen;
			while(len >= use){
				len -= use;
				code = (code + use) << 1;
				use <<= (bits & 1) ^ 1;
				bits++;
			}
			wbits = (wbits << bits) | (code + len);
			wnbits += bits;
			lenbits += bits;
			for(; wnbits >= 8; wnbits -= 8){
				if(wdst >= wdmax){
					w->begin = now;
					return -1;
				}
				*wdst++ = wbits >> (wnbits - 8);
			}
		}
		/*
		 * offset in history
		 */
		toff--;
		for(bits = MinOffBits; toff >= (1 << bits); bits++)
			;
		if(bits < MaxOffBits-1){
			wbits = (wbits << 3) | (bits - MinOffBits);
			if(bits != MinOffBits)
				bits--;
			wnbits += bits + 3;
			offbits += bits + 3;
		}else{
			wbits = (wbits << 4) | 0xe | (bits - (MaxOffBits-1));
			bits--;
			wnbits += bits + 4;
			offbits += bits + 4;
		}
		wbits = (wbits << bits) | toff & ((1 << bits) - 1);
		for(; s != ss; s++){
			if(s + MinMatch <= esrc){
				h = hashit(cont);
				w->next[now & (WhackMaxOff - 1)] = w->hash[h];
				w->hash[h] = now;
				if(s + MinMatch < esrc)
					cont = (cont << 8) | s[MinMatch];
			}
			now++;
		}
	}
	w->begin = now;
	stats[StatBytes] += esrc - src;
	stats[StatLits] += lits;
	stats[StatMatches] += matches;
	stats[StatLitBits] += (wdst - (dst + 2)) * 8 + wnbits - offbits - lenbits;
	stats[StatOffBits] += offbits;
	stats[StatLenBits] += lenbits;
	if(wnbits & 7){
		wbits <<= 8 - (wnbits & 7);
		wnbits += 8 - (wnbits & 7);
	}
	for(; wnbits >= 8; wnbits -= 8){
		if(wdst >= wdmax)
			return -1;
		*wdst++ = wbits >> (wnbits - 8);
	}
	stats[StatOutBytes] += wdst - dst;
	return wdst - dst;
}
int
whackblock(uchar *dst, uchar *src, int ssize)
{
	Whack w;
	ulong stats[MaxStat];
	int r;
	whackinit(&w, 6);
	r = whack(&w, dst, src, ssize, stats);
	return r;
}