ref: babf901b4a508c3ec5d1f89655f10377bbdf9637
dir: /utils/sqz/sqz.c/
#include <lib9.h>
#include <a.out.h>
#include "squeeze.h"
/*
* forsyth@vitanuova.com
*/
typedef struct Word Word;
struct Word {
ulong v;
ushort freq;
ushort code;
Word* next;
};
typedef struct Squeeze Squeeze;
struct Squeeze {
int n;
/*union {*/
ulong tab[7*256];
Word* rep[7*256];
/*};*/
};
enum {
HMASK = 0xFFFF,
HSIZE = HMASK+1,
Codebufsize = 3*1024*1024
};
#define GET4(p) (((((((p)[0]<<8)|(p)[1])<<8)|(p)[2])<<8)|(p)[3])
#define GET4L(p) (((((((p)[3]<<8)|(p)[2])<<8)|(p)[1])<<8)|(p)[0])
#define PUT4(p,v) (((p)[0]=(v)>>24),((p)[1]=(v)>>16),((p)[2]=(v)>>8),((p)[3]=(v)))
static uchar prog[Codebufsize];
static uchar outbuf[Codebufsize];
static Word* hash1[HSIZE];
static Word* hash2[HSIZE];
static Sqhdr sqhdr;
static ulong chksum;
static int dflag; /* squeeze data, not text */
static int tflag; /* squeeze text, leave data as-is */
static int qflag = 1; /* enable powerpc option */
static int wflag; /* write output */
static int islittle; /* object code uses little-endian byte order */
static int debug;
static char* fname;
static void analyse(ulong*, int, Squeeze*, Squeeze*, Word**);
static Word** collate(Word**, int);
static void dumpsq(Squeeze*, int);
static void freehash(Word**);
static long Read(int, void*, long);
static void remap(Squeeze*);
static int squeeze(ulong*, int, uchar*, ulong);
static int squeezetab(int, int, Squeeze*, Word**, int);
static void squirt(int, Squeeze*);
static void Write(int, void*, long);
static void
usage(void)
{
fprint(2, "Usage: sqz [-w] [-t] [-d] [-q] q.out\n");
exits("usage");
}
void
main(int argc, char **argv)
{
int fd, n, ns, nst, nsd;
long txtlen, datlen, asis;
ulong topdat, toptxt;
Exec ex;
Squeeze sq3, sq4, sq5, sq6;
Word *top;
setbinmode();
/* fmtinstall('f', gfltconv); */
ARGBEGIN{
case 'D':
debug++;
break;
case 'd':
dflag++;
break;
case 'q':
qflag = 0;
break;
case 't':
tflag++;
break;
case 'w':
wflag++;
break;
default:
usage();
}ARGEND
fname = *argv;
if(fname == nil)
usage();
fd = open(fname, OREAD);
if(fd < 0){
fprint(2, "sqz: can't open %s: %r\n", fname);
exits("open");
}
Read(fd, &ex, sizeof(Exec));
txtlen = GET4((uchar*)&ex.text);
datlen = GET4((uchar*)&ex.data);
switch(GET4((uchar*)&ex.magic)){
case Q_MAGIC: /* powerpc */
islittle = 0;
break;
case E_MAGIC: /* arm */
islittle = 1;
qflag = 0;
break;
case 0xA0E1: /* arm AIF */
islittle = 1;
qflag = 0;
txtlen = GET4L((uchar*)&ex+(5*4))-sizeof(Exec);
datlen = GET4L((uchar*)&ex+(6*4));
break;
default:
fprint(2, "sqz: unknown magic for sqz: %8.8ux\n", GET4((uchar*)&ex.magic));
exits("bad magic");
}
if(qflag)
fprint(2, "PowerPC rules\n");
if(islittle)
fprint(2, "Little endian\n");
if(txtlen > sizeof(prog) || datlen > sizeof(prog) || txtlen+datlen > sizeof(prog)){
fprint(2, "sqz: executable too big: %lud+%lud; increase Codebufsize in sqz.c\n", txtlen, datlen);
exits("size");
}
if(dflag){
seek(fd, txtlen, 1);
Read(fd, prog, datlen);
}else{
Read(fd, prog, txtlen);
Read(fd, prog+txtlen, datlen);
}
close(fd);
asis = 0;
if(dflag)
n = datlen;
else if(tflag){
n = txtlen;
asis = datlen;
}else
n = txtlen+datlen;
if(dflag || tflag){
analyse((ulong*)prog, n/4, &sq3, &sq4, &top);
nst = squeeze((ulong*)prog, n/4, outbuf, top->v);
if(nst < 0)
exits("sqz");
nsd = 0;
remap(&sq3);
remap(&sq4);
toptxt = topdat = top->v;
}else{
analyse((ulong*)prog, txtlen/4, &sq3, &sq4, &top);
nst = squeeze((ulong*)prog, txtlen/4, outbuf, top->v);
if(nst < 0)
exits("sqz");
toptxt = top->v;
remap(&sq3);
remap(&sq4);
if(datlen/4){
freehash(hash1);
freehash(hash2);
analyse((ulong*)(prog+txtlen), datlen/4, &sq5, &sq6, &top);
nsd = squeeze((ulong*)(prog+txtlen), datlen/4, outbuf+nst, top->v);
if(nsd < 0)
exits("sqz");
topdat = top->v;
remap(&sq5);
remap(&sq6);
}else{
nsd = 0;
topdat = 0;
}
}
ns = nst+nsd;
fprint(2, "%d/%d bytes\n", ns, n);
fprint(2, "%8.8lux csum\n", chksum);
if(!wflag)
exits(0);
PUT4(sqhdr.magic, SQMAGIC);
PUT4(sqhdr.toptxt, toptxt);
PUT4(sqhdr.sum, chksum);
PUT4(sqhdr.text, nst);
PUT4(sqhdr.topdat, topdat);
PUT4(sqhdr.data, nsd);
PUT4(sqhdr.asis, asis);
PUT4(sqhdr.flags, 0);
Write(1, &sqhdr, SQHDRLEN);
Write(1, &ex, sizeof(Exec));
squirt(1, &sq3);
squirt(1, &sq4);
Write(1, outbuf, nst);
if(nsd){
squirt(1, &sq5);
squirt(1, &sq6);
Write(1, outbuf+nst, nsd);
}
if(asis)
Write(1, prog+txtlen, asis);
exits(0);
}
static void
analyse(ulong *prog, int nw, Squeeze *sq3, Squeeze *sq4, Word **top)
{
Word *w, **hp, **sorts, **resorts;
ulong *rp, *ep;
ulong v;
int i, nv1, nv2, nv, nz;
rp = prog;
ep = prog+nw;
nv = 0;
nz = 0;
while(rp < ep){
if(islittle){
v = GET4L((uchar*)rp);
}else{
v = GET4((uchar*)rp);
}
rp++;
chksum += v;
if(v == 0){
nz++;
if(0)
continue;
}
if(qflag){
QREMAP(v);
}
for(hp = &hash1[v&HMASK]; (w = *hp) != nil; hp = &w->next)
if(w->v == v)
break;
if(w == nil){
w = (Word*)malloc(sizeof(*w));
w->v = v;
w->freq = 0;
w->code = 0;
w->next = nil;
*hp = w;
nv++;
}
w->freq++;
}
sorts = collate(hash1, nv);
fprint(2, "phase 1: %d/%d words (%d zero), %d top (%8.8lux)\n", nv, nw, nz, sorts[0]->freq, sorts[0]->v);
*top = sorts[0];
nv1 = squeezetab(1, 0x900, sq3, sorts+1, nv-1)+1;
nv2 = 0;
for(i=nv1; i<nv; i++){
v = sorts[i]->v >> 8;
for(hp = &hash2[v&HMASK]; (w = *hp) != nil; hp = &w->next)
if(w->v == v)
break;
if(w == nil){
w = (Word*)malloc(sizeof(*w));
w->v = v;
w->freq = 0;
w->code = 0;
w->next = nil;
*hp = w;
nv2++;
}
w->freq++;
}
free(sorts);
resorts = collate(hash2, nv2);
fprint(2, "phase 2: %d/%d\n", nv2, nv-nv1);
squeezetab(2, 0x200, sq4, resorts, nv2);
free(resorts);
fprint(2, "phase 3: 1 4-code, %d 12-codes, %d 20-codes, %d uncoded\n",
sq3->n, sq4->n, nv-(sq3->n+sq4->n+1));
}
static int
wdcmp(void *a, void *b)
{
return (*(Word**)b)->freq - (*(Word**)a)->freq;
}
static Word **
collate(Word **tab, int nv)
{
Word *w, **hp, **sorts;
int i;
sorts = (Word**)malloc(nv*sizeof(Word**));
i = 0;
for(hp = &tab[0]; hp < &tab[HSIZE]; hp++)
for(w = *hp; w != nil; w = w->next)
sorts[i++] = w;
qsort(sorts, nv, sizeof(*sorts), wdcmp);
if(debug > 1)
for(i=0; i<nv; i++)
fprint(2, "%d\t%d\t%8.8lux\n", i, sorts[i]->freq, sorts[i]->v);
return sorts;
}
static int
tabcmp(void *a, void *b)
{
ulong av, bv;
av = (*(Word**)a)->v;
bv = (*(Word**)b)->v;
if(av > bv)
return 1;
if(av < bv)
return -1;
return 0;
}
static int
squeezetab(int tabno, int base, Squeeze *sq, Word **sorts, int nv)
{
int i;
if(nv >= 7*256)
nv = 7*256;
memset(sq, 0, sizeof(*sq));
for(i=0; i<nv; i++)
sq->rep[sq->n++] = sorts[i];
qsort(sq->rep, sq->n, sizeof(*sq->rep), tabcmp);
for(i=0; i<sq->n; i++)
sq->rep[i]->code = base + i;
if(debug)
dumpsq(sq, tabno);
return sq->n;
}
static void
dumpsq(Squeeze *sq, int n)
{
int i;
fprint(2, "table %d: %d entries\n", n, sq->n);
for(i=0; i<sq->n; i++)
fprint(2, "%.3x\t%8.8lux\t%lux\n", sq->rep[i]->code, sq->rep[i]->v, i? sq->rep[i]->v - sq->rep[i-1]->v: 0);
}
static void
remap(Squeeze *sq)
{
int i;
ulong v;
if(sq->n){
v = 0;
for(i=0; i<sq->n; i++){
sq->tab[i] = sq->rep[i]->v - v;
v += sq->tab[i];
}
}
}
static Word *
squash(Word **tab, ulong v)
{
Word *w, **hp;
for(hp = &tab[v&0xFFFF]; (w = *hp) != nil; hp = &w->next)
if(w->v == v)
return w;
return nil;
}
static void
freehash(Word **tab)
{
Word *w, **hp;
for(hp = &tab[0]; hp < &tab[HSIZE]; hp++)
while((w = *hp) != nil){
*hp = w->next;
free(w);
}
}
static int
squeeze(ulong *prog, int nw, uchar *out, ulong top)
{
ulong *rp, *ep;
ulong v, bits;
ulong e1, e2, e3, e4;
Word *w;
uchar bytes[8], *bp, *wp;
int ctl, n;
rp = prog;
ep = prog+nw;
bits = 0;
e1 = e2 = e3 = e4 = 0;
wp = out;
n = 0;
ctl = 0;
bp = bytes;
for(;;){
if(n == 2){
*wp++ = ctl;
if(0)
fprint(2, "%x\n", ctl);
memmove(wp, bytes, bp-bytes);
wp += bp-bytes;
bp = bytes;
ctl = 0;
n = 0;
}
ctl <<= 4;
n++;
if(rp >= ep){
if(n == 1)
break;
continue;
}
if(islittle){
v = GET4L((uchar*)rp);
}else{
v = GET4((uchar*)rp);
}
rp++;
if(qflag){
QREMAP(v);
}
if(v == top){
e1++;
bits += 4;
ctl |= 0;
continue;
}
w = squash(hash1, v);
if(w && w->code){
e2++;
bits += 4+8;
ctl |= w->code>>8;
*bp++ = w->code;
continue;
}
w = squash(hash2, v>>8);
if(w && w->code){
e3++;
bits += 4+8+8;
ctl |= w->code>>8;
*bp++ = w->code;
*bp++ = v & 0xFF;
if(debug > 2)
fprint(2, "%x %8.8lux %8.8lux\n", w->code, w->v, v);
continue;
}
e4++;
bits += 4+32;
ctl |= 0x1;
bp[0] = v;
bp[1] = v>>8;
bp[2] = v>>16;
bp[3] = v>>24;
bp += 4;
}
fprint(2, "enc: %lud 4-bits, %lud 12-bits %lud 20-bits %lud 36-bits -- %ld bytes\n",
e1, e2, e3, e4, wp-out);
return wp-out;
}
static void
squirt(int fd, Squeeze *sq)
{
uchar b[7*256*5 + 2], rep[5], *p, *q;
ulong v;
int i;
p = b+2;
for(i=0; i<sq->n; i++){
v = sq->tab[i];
q = rep;
do {
*q++ = v & 0x7F;
}while((v >>= 7) != 0);
do {
*p++ = *--q | 0x80;
}while(q != rep);
p[-1] &= ~0x80;
}
if(p > b+sizeof(b))
abort();
i = p-b;
b[0] = i>>8;
b[1] = i;
Write(fd, b, i);
fprint(2, "table: %d/%d\n", i, (sq->n+1)*4);
}
static long
Read(int fd, void *buf, long nb)
{
long n;
n = read(fd, buf, nb);
if(n < 0){
fprint(2, "sqz: %s: read error: %r\n", fname);
exits("read");
}
if(n < nb){
fprint(2, "sqz: %s: unexpected end-of-file\n", fname);
exits("read");
}
return n;
}
static void
Write(int fd, void *buf, long nb)
{
if(write(fd, buf, nb) != nb){
fprint(2, "sqz: write error: %r\n");
exits("write err");
}
}