code: 9ferno

ref: da7d6df6faf18e289fe0f3f61524dcc7fddeef18
dir: /os/boot/mpc/fblt.c/

View raw version
#include <u.h>
#include <libc.h>
#include <libg.h>
#include <gnot.h>

/*
 * bitblt operates a 'word' at a time.
 * WBITS is the number of bits in a word
 * LWBITS=log2(WBITS),
 * W2L is the number of words in a long
 * WMASK has bits set for the low order word of a long
 * WType is a pointer to a word
 */
#ifndef WBITS
#define WBITS	32
#define LWBITS	5
#define	W2L	1
#define WMASK	~0UL
typedef ulong	*WType;
#endif

#define DEBUG 

#ifdef TEST
/*
 * globals used for testing
 */
int	FORCEFORW;
int	FORCEBAKW;
GBitmap	*curdm, *cursm;
Point	curpt;
Rectangle curr;
Fcode	curf;
void	*mem;
#endif

static void
gbitexplode(ulong sw, ulong *buf, int sdep, int x)
{
	int j, o, q, n, nw, inc, qinc;
	ulong s, dw, pix;

	inc = 1 << sdep;
	pix = (1 << inc) - 1;
	nw = 1 << x;
	n = 32 >> x;
	qinc = (nw << sdep) - inc;
	for(o = 32 - n; o >= 0; o -= n){
		dw = 0;
		s = sw >> o;
		q = 0;
		for(j = 0; j < n; j += inc){
			dw |= (s & (pix << j)) << q;
			q += qinc;
		}
		for(j = 0; j < x; j++)
			dw |= dw << (inc << j);
		*buf++ = dw;
	}
}

/*
void
main(void)
{
	ulong buf[128];

	gbitexplode(0x7777, buf, 0, 3);
	exits(0);
}
*/

void
gbitblt(GBitmap *dm, Point pt, GBitmap *sm, Rectangle r, Fcode fcode)
{
	int	width;		/* width in bits of dst */
	int	wwidth;		/* floor width in words */
	int	height;		/* height in pixels minus 1 */
	int	sdep;		/* src ldepth */
	int 	ddep;		/* dst ldepth */
	int	deltadep;	/* diff between ldepths */
	int	sspan;		/* words between scanlines in src */
	int	dspan;		/* words between scanlines in dst */
	int	soff;		/* bit offset of src start point */
	int	sdest;		/* bit offset of src start point that matches doff when expanded */
	int	doff;		/* bit offset of dst start point */
	int	delta;		/* amount to shift src by */
	int	sign;		/* of delta */
	ulong	*saddr;
	ulong	*daddr;
	ulong	*s;
	ulong	*d;
	ulong	mask;
	ulong	tmp;		/* temp storage source word */
	ulong	sw;		/* source word constructed */
	ulong	dw;		/* dest word fetched */
	ulong	lmask;		/* affected pixels in leftmost dst word */
	ulong	rmask;		/* affected pixels in rightmost dst word */
	int	i;
	int	j;
	ulong	buf[32];	/* for expanding a source */
	ulong	*p;		/* pointer into buf */
	int	spare;		/* number of words already converted */


#ifdef TEST
	curdm = dm;
	cursm = sm;
	curpt = pt;
	curr = r;
	curf = fcode;
#endif

	gbitbltclip(&dm);

	width = r.max.x - r.min.x;
	if(width <= 0)
		return;
	height = r.max.y - r.min.y - 1;
	if(height < 0)
		return;

	ddep = dm->ldepth;
	pt.x <<= ddep;
	width <<= ddep;

	sdep = sm->ldepth;
	r.min.x <<= sdep;
	r.max.x <<= sdep;

	dspan = dm->width * W2L;
	sspan = sm->width * W2L;

	daddr = (ulong*)((WType)dm->base
			+ dm->zero*W2L + pt.y*dspan
			+ (pt.x >> LWBITS));
	saddr = (ulong*)((WType)sm->base
			+ sm->zero*W2L + r.min.y*sspan
			+ (r.min.x >> LWBITS));

	doff = pt.x & (WBITS - 1);
	lmask = WMASK >> doff;
	rmask = (WMASK << (WBITS - ((doff+width) & (WBITS-1))))&WMASK;
	if(!rmask)
		rmask = WMASK;
	soff = r.min.x & (WBITS-1);
	wwidth = ((pt.x+width-1)>>LWBITS) - (pt.x>>LWBITS);

	if(sm == dm){
#ifdef TEST
		if(!FORCEBAKW &&
		   (FORCEFORW || sm != dm || saddr > daddr ||
		    (saddr == daddr && soff > doff)))
			;
		else{
			daddr += height * dspan;
			saddr += height * sspan;
			sspan -= 2 * W2L * sm->width;
			dspan -= 2 * W2L * dm->width;
		}
#else
		if(r.min.y < pt.y){	/* bottom to top */
			daddr += height * dspan;
			saddr += height * sspan;
			sspan -= 2 * W2L * sm->width;
			dspan -= 2 * W2L * dm->width;
		}else if(r.min.y == pt.y && r.min.x < pt.x)
			abort()/*goto right*/;
#endif
	}
	if(wwidth == 0)		/* collapse masks for narrow cases */
		lmask &= rmask;
	fcode &= F;

	deltadep = ddep - sdep;
	sdest = doff >> deltadep;
	delta = soff - sdest;
	sign = 0;
	if(delta < 0){
		sign = 1;
		delta = -delta;
	}

	p = 0;
	for(j = 0; j <= height; j++){
		d = daddr;
		s = saddr;
		mask = lmask;
		tmp = 0;
		if(!sign)
			tmp = *s++;
		spare = 0;
		for(i = wwidth; i >= 0; i--){
			if(spare)
				sw = *p++;
			else{
				if(sign){
					sw = tmp << (WBITS-delta);
					tmp = *s++;
					sw |= tmp >> delta;
				}else{
					sw = tmp << delta;
					tmp = *s++;
					if(delta)
						sw |= tmp >> (WBITS-delta);
				}
				spare = 1 << deltadep;
				if(deltadep >= 1){
					gbitexplode(sw, buf, sdep, deltadep);
					p = buf;
					sw = *p++;
				}
			}

			dw = *d;
			switch(fcode){		/* ltor bit aligned */
			case Zero:	*d = dw & ~mask;		break;
			case DnorS:	*d = dw ^ ((~sw | dw) & mask);	break;
			case DandnotS:	*d = dw ^ ((sw & dw) & mask);	break;
			case notS:	*d = dw ^ ((~sw ^ dw) & mask);	break;
			case notDandS:	*d = dw ^ ((sw | dw) & mask);	break;
			case notD:	*d = dw ^ mask;			break;
			case DxorS:	*d = dw ^ (sw & mask);		break;
			case DnandS:	*d = dw ^ ((sw | ~dw) & mask);	break;
			case DandS:	*d = dw ^ ((~sw & dw) & mask);	break;
			case DxnorS:	*d = dw ^ (~sw & mask);		break;
			case D:						break;
			case DornotS:	*d = dw | (~sw & mask);		break;
			case S:		*d = dw ^ ((sw ^ dw) & mask);	break;
			case notDorS:	*d = dw ^ (~(sw & dw) & mask);	break;
			case DorS:	*d = dw | (sw & mask);		break;
			case F:		*d = dw | mask;			break;
			}
			d++;

			mask = WMASK;
			if(i == 1)
				mask = rmask;
			spare--;
		}
		saddr += sspan;
		daddr += dspan;
	}
}

#ifdef TEST
void	prprog(void);
GBitmap *bb1, *bb2;
ulong	*src, *dst, *xdst, *xans;
int	swds, dwds;
long	ticks;
int	timeit;

long
func(int f, long s, int sld, long d, int dld)
{
	long a;
	int sh, i, db, sb;

	db = 1 << dld;
	sb = 1 << sld;
	sh = db - sb;
	if(sh > 0) {
		a = s;
		for(i = sb; i<db; i += sb){
			a <<= sb;
			s |= a;
		}
	} else if(sh < 0)
		s >>= -sh;

	switch(f){
	case Zero:	d = 0;			break;
	case DnorS:	d = ~(d|s);		break;
	case DandnotS:	d = d & ~s;		break;
	case notS:	d = ~s;			break;
	case notDandS:	d = ~d & s;		break;
	case notD:	d = ~d;			break;
	case DxorS:	d = d ^ s;		break;
	case DnandS:	d = ~(d&s);		break;
	case DandS:	d = d & s;		break;
	case DxnorS:	d = ~(d^s);		break;
	case S:		d = s;			break;
	case DornotS:	d = d | ~s;		break;
	case D:		d = d;			break;
	case notDorS:	d = ~d | s;		break;
	case DorS:	d = d | s;		break;
	case F:		d = ~0;			break;
	}

	d &= ((1<<db)-1);
	return d;
}

void
run(int fr, int to, int w, int op)
{
	int i, j, f, t, fy, ty;
	extern long *_clock;

	fr += bb2->r.min.x;
	to += bb1->r.min.x;
	fy = bb2->r.min.y + 1;
	ty = bb1->r.min.y + 1;
	if(timeit) {
		memcpy(dst, xdst, dwds * sizeof(long));
		ticks -= *_clock;
		gbitblt(bb1, Pt(to,ty), bb2, Rect(fr,fy,fr+w,fy+2), op);
		ticks += *_clock;
		return;
	}
	f = fr;
	t = to;
	memcpy(dst, xdst, dwds * sizeof(long));
	for(i=0; i<w; i++) {
		gbitblt(bb1, Pt(t,ty), bb2, Rect(f,fy,f+1,fy+1), op);
		gbitblt(bb1, Pt(t,ty+1), bb2, Rect(f,fy+1,f+1,fy+2), op);
		f++;
		t++;
	}
	memcpy(xans, dst, dwds * sizeof(long));

	memcpy(dst, xdst, dwds * sizeof(long));
	gbitblt(bb1, Pt(to,ty), bb2, Rect(fr,fy,fr+w,fy+2), op);

	if(memcmp(xans, dst, dwds * sizeof(long))) {
		/*
		 * print src and dst row offset, width in bits, and forw/back
		 * then print for each of the four rows: the source (s),
		 * the dest (d), the good value of the answer (g),
		 * and the actual bad value of the answer (b)
		 */
		print("fr=%d to=%d w=%d fb=%d%d\n",
			fr, to, w, FORCEFORW, FORCEBAKW);
		print("dst bitmap b %#lux, z %d, w %d, ld %d, r [%d,%d][%d,%d]\n",
			bb1->base, bb1->zero, bb1->width, bb1->ldepth,
			bb1->r.min.x, bb1->r.min.y, bb1->r.max.x, bb1->r.max.y);
		print("src bitmap b %#lux, z %d, w %d, ld %d, r [%d,%d][%d,%d]\n",
			bb2->base, bb2->zero, bb2->width, bb2->ldepth,
			bb2->r.min.x, bb2->r.min.y, bb2->r.max.x, bb2->r.max.y);
		for(j=0; 7*j < dwds; j++) {
			print("\ns");
			for(i=0; i<7 && 7*j+i < dwds; i++)
				print(" %.8lux", src[7*j + i]);
			print("\nd");
			for(i=0; i<7 && 7*j+i < dwds; i++)
				print(" %.8lux", xdst[7*j + i]);
			print("\ng");
			for(i=0; i<7 && 7*j+i < dwds; i++)
				print(" %.8lux", xans[7*j + i]);
			print("\nb");
			for(i=0; i<7 && 7*j+i < dwds; i++)
				print(" %.8lux", dst[7*j + i]);
			print("\n");
		}
		prprog();
	}
}

void
prprog(void)
{
	exits(0);
}

int
main(int argc, char *argv[])
{
	int f, t, w, i, sld, dld, op, iters, simple;
	ulong s, d, spix, dpix, apix, fpix, m, *ps, *pd;
	Point sorg, dorg;
	GBitmap *bs, *bd;
	long seed;
	char *ct;

	sld = 0;
	dld = 0;
	timeit = 0;
	iters = 200;
	simple = 0;
	ARGBEGIN {
	case 'i':
		iters = atoi(ARGF());
		break;
	case 's':
		simple = 1;
		break;
	case 't':
		timeit = 1;
		ct = ARGF();
		if(ct)
			iters = atoi(ct);
		break;
	} ARGEND
	if(argc > 0)
		sld = atoi(argv[0]);
	if(argc > 1)
		dld = atoi(argv[1]);
	if(!timeit && !simple) {
		seed = time(0);
		print("seed %lux\n", seed); srand(seed);	/**/
	}

	print("sld %d dld %d\n", sld, dld);
	op = 1;

	/* bitmaps for 1-bit tests */
	bd = gballoc(Rect(0,0,32,1), dld);
	bs = gballoc(Rect(0,0,32,1), sld);
	for(i=0; i<bs->width; i++)
		bs->base[i] = lrand();

	/* bitmaps for rect tests */
	if(simple) {
		dorg = Pt(0,0);
		sorg = Pt(0,0);
	} else {
		dorg = Pt(nrand(63)-31,nrand(63)-31);
		sorg = Pt(nrand(63)-31,nrand(63)-31);
	}
	bb1 = gballoc(Rpt(dorg,add(dorg,Pt(200,4))), dld);
	bb2 = gballoc(Rpt(sorg,add(sorg,Pt(200,4))), sld);
	dwds = bb1->width * Dy(bb1->r);
	swds = bb2->width * Dy(bb2->r);
	dst = bb1->base;
	src = bb2->base;
	xdst = malloc(dwds * sizeof(long));
	xans =  malloc(dwds * sizeof(long));
	for(i=0; i<swds; i++)
		src[i] = lrand();
	for(i=0; i<dwds; i++)
		xdst[i] = lrand();

loop:
	print("Op %d\n", op);
	if(!timeit) {
		print("one pixel\n");
		ps = bs->base;
		pd = bd->base;
		FORCEFORW = 1;
		FORCEBAKW = 0;
		for(i=0; i<1000; i++, FORCEFORW = !FORCEFORW, FORCEBAKW = !FORCEBAKW) {
			f = nrand(32 >> sld);
			t = nrand(32 >> dld);
			s = lrand();
			d = lrand();
			ps[0] = s;
			pd[0] = d;
#ifdef T386
			spix = (byterev(s) >> (32 - ((f+1)<<sld))) & ((1 << (1<<sld)) - 1);
			dpix = (byterev(d) >> (32 - ((t+1)<<dld))) & ((1 << (1<<dld)) - 1);
#else
			spix = (s >> (32 - ((f+1)<<sld))) & ((1 << (1<<sld)) - 1);
			dpix = (d >> (32 - ((t+1)<<dld))) & ((1 << (1<<dld)) - 1);
#endif
#ifdef T386
			apix = byterev(func(op, spix, sld, dpix, dld) << (32 - ((t+1)<<dld)));
#else
			apix = func(op, spix, sld, dpix, dld) << (32 - ((t+1)<<dld));
#endif
			gbitblt(bd, Pt(t,0), bs, Rect(f,0,f+1,1), op);
			if(ps[0] != s) {
				print("bb src %.8lux %.8lux %d %d\n", ps[0], s, f, t);
				exits("error");
			}
			m = ((1 << (1<<dld)) - 1) << (32 - ((t+1)<<dld));
#ifdef T386
			m = byterev(m);
#endif
			if((pd[0] & ~m) != (d & ~m)) {
					print("bb dst1 %.8lux %.8lux\n",
						s, d);
					print("bb      %.8lux %.8lux %d %d\n",
						ps[0], pd[0], f, t);
					prprog();
					exits("error");
			}
			if((pd[0] & m) != apix) {
				spix <<= 32 - ((f+1)<<sld);
				dpix <<= 32 - ((t+1)<<dld);
#ifdef T386
				spix = byterev(spix);
				dpix = byterev(dpix);
#endif
				print("bb dst2 %.8lux %.8lux\n",
					s, d);
				print("bb      %.8lux %.8lux %d %d\n",
					ps[0], pd[0], f, t);
				print("bb      %.8lux %.8lux %.8lux %.8lux\n",
					spix, dpix, apix, pd[0] & m);
				prprog();
				exits("error");
			}
		}
	}

	print("for\n");
	FORCEFORW = 1;
	FORCEBAKW = 0;

	for(i=0; i<iters; i++) {
		f = nrand(64);
		t = nrand(64);
		w = nrand(130);
		run(f, t, w, op);
	}

	if(sld == dld) {
		print("bak\n");
		FORCEFORW = 0;
		FORCEBAKW = 1;
	
		for(i=0; i<iters; i++) {
			f = nrand(64);
			t = nrand(64);
			w = nrand(130);
			run(f, t, w, op);
		}
	}

	if(op < F) {
		op++;
		goto loop;
	}
	if(timeit)
		print("time: %d ticks\n", ticks);
	exits(0);
}


#endif