# code: drawterm

ref: a568eea78d38d3229d9c67645d508cb98946b0bc
parent: 31aac06d204f233a9452de880a1911d3dd1984bb
author: Erik Quanstrom <quanstro@quanstro.net>
date: Tue Dec 28 15:53:46 EST 2010

```devdraw: add new draw operators
```

`--- a/libmemdraw/draw.c`
`+++ b/libmemdraw/draw.c`
`@@ -10,23 +10,35 @@`
` #define RGB2K(r,g,b)	((156763*(r)+307758*(g)+59769*(b))>>19)`
` `
` /*`
`- * for 0 ≤ x ≤ 255*255, (x*0x0101+0x100)>>16 is a perfect approximation.`
`- * for 0 ≤ x < (1<<16), x/255 = ((x+1)*0x0101)>>16 is a perfect approximation.`
`- * the last one is perfect for all up to 1<<16, avoids a multiply, but requires a rathole.`
`+ * For 16-bit values, x / 255 == (t = x+1, (t+(t>>8)) >> 8).`
`+ * We add another 127 to round to the nearest value rather`
`+ * than truncate.`
`+ *`
`+ * CALCxy does x bytewise calculations on y input images (x=1,4; y=1,2).`
`+ * CALC2x does two parallel 16-bit calculations on y input images (y=1,2).`
`  */`
`-/* #define DIV255(x) (((x)*257+256)>>16)  */`
`-#define DIV255(x) ((((x)+1)*257)>>16)`
`-/* #define DIV255(x) (tmp=(x)+1, (tmp+(tmp>>8))>>8) */`
`+#define CALC11(a, v, tmp) \`
`+	(tmp=(a)*(v)+128, (tmp+(tmp>>8))>>8)`
` `
`-#define MUL(x, y, t)	(t = (x)*(y)+128, (t+(t>>8))>>8)`
`-#define MASK13	0xFF00FF00`
`-#define MASK02	0x00FF00FF`
`-#define MUL13(a, x, t)		(t = (a)*(((x)&MASK13)>>8)+128, ((t+((t>>8)&MASK02))>>8)&MASK02)`
`-#define MUL02(a, x, t)		(t = (a)*(((x)&MASK02)>>0)+128, ((t+((t>>8)&MASK02))>>8)&MASK02)`
`-#define MUL0123(a, x, s, t)	((MUL13(a, x, s)<<8)|MUL02(a, x, t))`
`+#define CALC12(a1, v1, a2, v2, tmp) \`
`+	(tmp=(a1)*(v1)+(a2)*(v2)+128, (tmp+(tmp>>8))>>8)`
` `
`-#define MUL2(u, v, x, y)	(t = (u)*(v)+(x)*(y)+256, (t+(t>>8))>>8)`
`+#define MASK 0xFF00FF`
` `
`+#define CALC21(a, vvuu, tmp) \`
`+	(tmp=(a)*(vvuu)+0x00800080, ((tmp+((tmp>>8)&MASK))>>8)&MASK)`
`+`
`+#define CALC41(a, rgba, tmp1, tmp2) \`
`+	(CALC21(a, rgba & MASK, tmp1) | \`
`+	 (CALC21(a, (rgba>>8)&MASK, tmp2)<<8))`
`+`
`+#define CALC22(a1, vvuu1, a2, vvuu2, tmp) \`
`+	(tmp=(a1)*(vvuu1)+(a2)*(vvuu2)+0x00800080, ((tmp+((tmp>>8)&MASK))>>8)&MASK)`
`+`
`+#define CALC42(a1, rgba1, a2, rgba2, tmp1, tmp2) \`
`+	(CALC22(a1, rgba1 & MASK, a2, rgba2 & MASK, tmp1) | \`
`+	 (CALC22(a1, (rgba1>>8) & MASK, a2, (rgba2>>8) & MASK, tmp2)<<8))`
`+`
` static void mktables(void);`
` typedef int Subdraw(Memdrawparam*);`
` static Subdraw chardraw, alphadraw, memoptdraw;`
`@@ -307,6 +319,9 @@`
`  * Conversion tables.`
`  */`
` static uchar replbit[1+8][256];		/* replbit[x][y] is the replication of the x-bit quantity y to 8-bit depth */`
`+static uchar conv18[256][8];		/* conv18[x][y] is the yth pixel in the depth-1 pixel x */`
`+static uchar conv28[256][4];		/* ... */`
`+static uchar conv48[256][2];`
` `
` /*`
`  * bitmap of how to replicate n bits to fill 8, for 1 ≤ n ≤ 8.`
`@@ -340,7 +355,7 @@`
` static void`
` mktables(void)`
` {`
`-	int i, j, small;`
`+	int i, j, mask, sh, small;`
` 		`
` 	if(tablesbuilt)`
` 		return;`
`@@ -357,6 +372,17 @@`
` 		}`
` 	}`
` `
`+	/* bit unpacking up to 8 bits, only powers of 2 */`
`+	for(i=0; i<256; i++){`
`+		for(j=0, sh=7, mask=1; j<8; j++, sh--)`
`+			conv18[i][j] = replbit[1][(i>>sh)&mask];`
`+`
`+		for(j=0, sh=6, mask=3; j<4; j++, sh-=2)`
`+			conv28[i][j] = replbit[2][(i>>sh)&mask];`
`+`
`+		for(j=0, sh=4, mask=15; j<2; j++, sh-=4)`
`+			conv48[i][j] = replbit[4][(i>>sh)&mask];`
`+	}`
` }`
` `
` static uchar ones = 0xff;`
`@@ -770,6 +796,50 @@`
` 	return bdst;`
` }`
` `
`+/*`
`+ * Do the channels in the buffers match enough`
`+ * that we can do word-at-a-time operations`
`+ * on the pixels?`
`+ */`
`+static int`
`+chanmatch(Buffer *bdst, Buffer *bsrc)`
`+{`
`+	uchar *drgb, *srgb;`
`+	`
`+	/*`
`+	 * first, r, g, b must be in the same place`
`+	 * in the rgba word.`
`+	 */`
`+	drgb = (uchar*)bdst->rgba;`
`+	srgb = (uchar*)bsrc->rgba;`
`+	if(bdst->red - drgb != bsrc->red - srgb`
`+	|| bdst->blu - drgb != bsrc->blu - srgb`
`+	|| bdst->grn - drgb != bsrc->grn - srgb)`
`+		return 0;`
`+	`
`+	/*`
`+	 * that implies alpha is in the same place,`
`+	 * if it is there at all (it might be == &ones).`
`+	 * if the destination is &ones, we can scribble`
`+	 * over the rgba slot just fine.`
`+	 */`
`+	if(bdst->alpha == &ones)`
`+		return 1;`
`+	`
`+	/*`
`+	 * if the destination is not ones but the src is,`
`+	 * then the simultaneous calculation will use`
`+	 * bogus bytes from the src's rgba.  no good.`
`+	 */`
`+	if(bsrc->alpha == &ones)`
`+		return 0;`
`+	`
`+	/*`
`+	 * otherwise, alphas are in the same place.`
`+	 */`
`+	return 1;`
`+}`
`+`
` static Buffer`
` alphacalc14(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)`
` {`
`@@ -776,26 +846,26 @@`
` 	Buffer obdst;`
` 	int fd, sadelta;`
` 	int i, sa, ma, q;`
`-	ulong s, t;`
`+	ulong t, t1;`
` `
` 	obdst = bdst;`
` 	sadelta = bsrc.alpha == &ones ? 0 : bsrc.delta;`
`-	q = bsrc.delta == 4 && bdst.delta == 4;`
`+	q = bsrc.delta == 4 && bdst.delta == 4 && chanmatch(&bdst, &bsrc);`
` `
` 	for(i=0; i<dx; i++){`
` 		sa = *bsrc.alpha;`
` 		ma = *bmask.alpha;`
`-		fd = MUL(sa, ma, t);`
`+		fd = CALC11(sa, ma, t);`
` 		if(op == DoutS)`
` 			fd = 255-fd;`
` `
` 		if(grey){`
`-			*bdst.grey = MUL(fd, *bdst.grey, t);`
`+			*bdst.grey = CALC11(fd, *bdst.grey, t);`
` 			bsrc.grey += bsrc.delta;`
` 			bdst.grey += bdst.delta;`
` 		}else{`
` 			if(q){`
`-				*bdst.rgba = MUL0123(fd, *bdst.rgba, s, t);`
`+				*bdst.rgba = CALC41(fd, *bdst.rgba, t, t1);`
` 				bsrc.rgba++;`
` 				bdst.rgba++;`
` 				bsrc.alpha += sadelta;`
`@@ -802,9 +872,9 @@`
` 				bmask.alpha += bmask.delta;`
` 				continue;`
` 			}`
`-			*bdst.red = MUL(fd, *bdst.red, t);`
`-			*bdst.grn = MUL(fd, *bdst.grn, t);`
`-			*bdst.blu = MUL(fd, *bdst.blu, t);`
`+			*bdst.red = CALC11(fd, *bdst.red, t);`
`+			*bdst.grn = CALC11(fd, *bdst.grn, t);`
`+			*bdst.blu = CALC11(fd, *bdst.blu, t);`
` 			bsrc.red += bsrc.delta;`
` 			bsrc.blu += bsrc.delta;`
` 			bsrc.grn += bsrc.delta;`
`@@ -813,7 +883,7 @@`
` 			bdst.grn += bdst.delta;`
` 		}`
` 		if(bdst.alpha != &ones){`
`-			*bdst.alpha = MUL(fd, *bdst.alpha, t);`
`+			*bdst.alpha = CALC11(fd, *bdst.alpha, t);`
` 			bdst.alpha += bdst.delta;`
` 		}`
` 		bmask.alpha += bmask.delta;`
`@@ -828,11 +898,11 @@`
` 	Buffer obdst;`
` 	int fs, sadelta;`
` 	int i, ma, da, q;`
`-	ulong s, t;`
`+	ulong t, t1;`
` `
` 	obdst = bdst;`
` 	sadelta = bsrc.alpha == &ones ? 0 : bsrc.delta;`
`-	q = bsrc.delta == 4 && bdst.delta == 4;`
`+	q = bsrc.delta == 4 && bdst.delta == 4 && chanmatch(&bdst, &bsrc);`
` `
` 	for(i=0; i<dx; i++){`
` 		ma = *bmask.alpha;`
`@@ -841,15 +911,15 @@`
` 			da = 255-da;`
` 		fs = ma;`
` 		if(op != S)`
`-			fs = MUL(fs, da, t);`
`+			fs = CALC11(fs, da, t);`
` `
` 		if(grey){`
`-			*bdst.grey = MUL(fs, *bsrc.grey, t);`
`+			*bdst.grey = CALC11(fs, *bsrc.grey, t);`
` 			bsrc.grey += bsrc.delta;`
` 			bdst.grey += bdst.delta;`
` 		}else{`
` 			if(q){`
`-				*bdst.rgba = MUL0123(fs, *bsrc.rgba, s, t);`
`+				*bdst.rgba = CALC41(fs, *bsrc.rgba, t, t1);`
` 				bsrc.rgba++;`
` 				bdst.rgba++;`
` 				bmask.alpha += bmask.delta;`
`@@ -856,9 +926,9 @@`
` 				bdst.alpha += bdst.delta;`
` 				continue;`
` 			}`
`-			*bdst.red = MUL(fs, *bsrc.red, t);`
`-			*bdst.grn = MUL(fs, *bsrc.grn, t);`
`-			*bdst.blu = MUL(fs, *bsrc.blu, t);`
`+			*bdst.red = CALC11(fs, *bsrc.red, t);`
`+			*bdst.grn = CALC11(fs, *bsrc.grn, t);`
`+			*bdst.blu = CALC11(fs, *bsrc.blu, t);`
` 			bsrc.red += bsrc.delta;`
` 			bsrc.blu += bsrc.delta;`
` 			bsrc.grn += bsrc.delta;`
`@@ -867,7 +937,7 @@`
` 			bdst.grn += bdst.delta;`
` 		}`
` 		if(bdst.alpha != &ones){`
`-			*bdst.alpha = MUL(fs, *bsrc.alpha, t);`
`+			*bdst.alpha = CALC11(fs, *bsrc.alpha, t);`
` 			bdst.alpha += bdst.delta;`
` 		}`
` 		bmask.alpha += bmask.delta;`
`@@ -882,11 +952,11 @@`
` 	Buffer obdst;`
` 	int fs, fd, sadelta;`
` 	int i, sa, ma, da, q;`
`-	ulong s, t, u, v;`
`+	ulong t, t1;`
` `
` 	obdst = bdst;`
` 	sadelta = bsrc.alpha == &ones ? 0 : bsrc.delta;`
`-	q = bsrc.delta == 4 && bdst.delta == 4;`
`+	q = bsrc.delta == 4 && bdst.delta == 4 && chanmatch(&bdst, &bsrc);`
` `
` 	for(i=0; i<dx; i++){`
` 		sa = *bsrc.alpha;`
`@@ -893,24 +963,24 @@`
` 		ma = *bmask.alpha;`
` 		da = *bdst.alpha;`
` 		if(op == SatopD)`
`-			fs = MUL(ma, da, t);`
`+			fs = CALC11(ma, da, t);`
` 		else`
`-			fs = MUL(ma, 255-da, t);`
`+			fs = CALC11(ma, 255-da, t);`
` 		if(op == DoverS)`
` 			fd = 255;`
` 		else{`
`-			fd = MUL(sa, ma, t);`
`+			fd = CALC11(sa, ma, t);`
` 			if(op != DatopS)`
` 				fd = 255-fd;`
` 		}`
` `
` 		if(grey){`
`-			*bdst.grey = MUL(fs, *bsrc.grey, s)+MUL(fd, *bdst.grey, t);`
`+			*bdst.grey = CALC12(fs, *bsrc.grey, fd, *bdst.grey, t);`
` 			bsrc.grey += bsrc.delta;`
` 			bdst.grey += bdst.delta;`
` 		}else{`
` 			if(q){`
`-				*bdst.rgba = MUL0123(fs, *bsrc.rgba, s, t)+MUL0123(fd, *bdst.rgba, u, v);`
`+				*bdst.rgba = CALC42(fs, *bsrc.rgba, fd, *bdst.rgba, t, t1);`
` 				bsrc.rgba++;`
` 				bdst.rgba++;`
` 				bsrc.alpha += sadelta;`
`@@ -918,9 +988,9 @@`
` 				bdst.alpha += bdst.delta;`
` 				continue;`
` 			}`
`-			*bdst.red = MUL(fs, *bsrc.red, s)+MUL(fd, *bdst.red, t);`
`-			*bdst.grn = MUL(fs, *bsrc.grn, s)+MUL(fd, *bdst.grn, t);`
`-			*bdst.blu = MUL(fs, *bsrc.blu, s)+MUL(fd, *bdst.blu, t);`
`+			*bdst.red = CALC12(fs, *bsrc.red, fd, *bdst.red, t);`
`+			*bdst.grn = CALC12(fs, *bsrc.grn, fd, *bdst.grn, t);`
`+			*bdst.blu = CALC12(fs, *bsrc.blu, fd, *bdst.blu, t);`
` 			bsrc.red += bsrc.delta;`
` 			bsrc.blu += bsrc.delta;`
` 			bsrc.grn += bsrc.delta;`
`@@ -929,7 +999,7 @@`
` 			bdst.grn += bdst.delta;`
` 		}`
` 		if(bdst.alpha != &ones){`
`-			*bdst.alpha = MUL(fs, sa, s)+MUL(fd, da, t);`
`+			*bdst.alpha = CALC12(fs, sa, fd, da, t);`
` 			bdst.alpha += bdst.delta;`
` 		}`
` 		bmask.alpha += bmask.delta;`
`@@ -953,25 +1023,25 @@`
` 	Buffer obdst;`
` 	int fd, sadelta;`
` 	int i, sa, ma, q;`
`-	ulong s, t, u, v;`
`+	ulong t, t1;`
` `
` 	USED(op);`
` 	obdst = bdst;`
` 	sadelta = bsrc.alpha == &ones ? 0 : bsrc.delta;`
`-	q = bsrc.delta == 4 && bdst.delta == 4;`
`+	q = bsrc.delta == 4 && bdst.delta == 4 && chanmatch(&bdst, &bsrc);`
` `
` 	for(i=0; i<dx; i++){`
` 		sa = *bsrc.alpha;`
` 		ma = *bmask.alpha;`
`-		fd = 255-MUL(sa, ma, t);`
`+		fd = 255-CALC11(sa, ma, t);`
` `
` 		if(grey){`
`-			*bdst.grey = MUL(ma, *bsrc.grey, s)+MUL(fd, *bdst.grey, t);`
`+			*bdst.grey = CALC12(ma, *bsrc.grey, fd, *bdst.grey, t);`
` 			bsrc.grey += bsrc.delta;`
` 			bdst.grey += bdst.delta;`
` 		}else{`
` 			if(q){`
`-				*bdst.rgba = MUL0123(ma, *bsrc.rgba, s, t)+MUL0123(fd, *bdst.rgba, u, v);`
`+				*bdst.rgba = CALC42(ma, *bsrc.rgba, fd, *bdst.rgba, t, t1);`
` 				bsrc.rgba++;`
` 				bdst.rgba++;`
` 				bsrc.alpha += sadelta;`
`@@ -978,9 +1048,9 @@`
` 				bmask.alpha += bmask.delta;`
` 				continue;`
` 			}`
`-			*bdst.red = MUL(ma, *bsrc.red, s)+MUL(fd, *bdst.red, t);`
`-			*bdst.grn = MUL(ma, *bsrc.grn, s)+MUL(fd, *bdst.grn, t);`
`-			*bdst.blu = MUL(ma, *bsrc.blu, s)+MUL(fd, *bdst.blu, t);`
`+			*bdst.red = CALC12(ma, *bsrc.red, fd, *bdst.red, t);`
`+			*bdst.grn = CALC12(ma, *bsrc.grn, fd, *bdst.grn, t);`
`+			*bdst.blu = CALC12(ma, *bsrc.blu, fd, *bdst.blu, t);`
` 			bsrc.red += bsrc.delta;`
` 			bsrc.blu += bsrc.delta;`
` 			bsrc.grn += bsrc.delta;`
`@@ -989,7 +1059,7 @@`
` 			bdst.grn += bdst.delta;`
` 		}`
` 		if(bdst.alpha != &ones){`
`-			*bdst.alpha = MUL(ma, sa, s)+MUL(fd, *bdst.alpha, t);`
`+			*bdst.alpha = CALC12(ma, sa, fd, *bdst.alpha, t);`
` 			bdst.alpha += bdst.delta;`
` 		}`
` 		bmask.alpha += bmask.delta;`
`@@ -1045,7 +1115,7 @@`
` 	Buffer obdst;`
` 	int fd;`
` 	int i, ma;`
`-	ulong s, t;`
`+	ulong t;`
` `
` 	USED(op);`
` 	obdst = bdst;`
`@@ -1055,13 +1125,13 @@`
` 		fd = 255-ma;`
` `
` 		if(grey){`
`-			*bdst.grey = MUL(ma, *bsrc.grey, s)+MUL(fd, *bdst.grey, t);`
`+			*bdst.grey = CALC12(ma, *bsrc.grey, fd, *bdst.grey, t);`
` 			bsrc.grey += bsrc.delta;`
` 			bdst.grey += bdst.delta;`
` 		}else{`
`-			*bdst.red = MUL(ma, *bsrc.red, s)+MUL(fd, *bdst.red, t);`
`-			*bdst.grn = MUL(ma, *bsrc.grn, s)+MUL(fd, *bdst.grn, t);`
`-			*bdst.blu = MUL(ma, *bsrc.blu, s)+MUL(fd, *bdst.blu, t);`
`+			*bdst.red = CALC12(ma, *bsrc.red, fd, *bdst.red, t);`
`+			*bdst.grn = CALC12(ma, *bsrc.grn, fd, *bdst.grn, t);`
`+			*bdst.blu = CALC12(ma, *bsrc.blu, fd, *bdst.blu, t);`
` 			bsrc.red += bsrc.delta;`
` 			bsrc.blu += bsrc.delta;`
` 			bsrc.grn += bsrc.delta;`
`@@ -1070,7 +1140,7 @@`
` 			bdst.grn += bdst.delta;`
` 		}`
` 		if(bdst.alpha != &ones){`
`-			*bdst.alpha = ma+MUL(fd, *bdst.alpha, t);`
`+			*bdst.alpha = ma+CALC11(fd, *bdst.alpha, t);`
` 			bdst.alpha += bdst.delta;`
` 		}`
` 		bmask.alpha += bmask.delta;`
`@@ -1117,7 +1187,7 @@`
` 	Buffer obdst;`
` 	int fs, fd;`
` 	int i, ma, da, zero;`
`-	ulong s, t;`
`+	ulong t;`
` `
` 	obdst = bdst;`
` 	zero = !(op&1);`
`@@ -1134,7 +1204,7 @@`
` `
` 		if(grey){`
` 			if(ma)`
`-				*bdst.grey = MUL(fs, *bsrc.grey, s)+MUL(fd, *bdst.grey, t);`
`+				*bdst.grey = CALC12(fs, *bsrc.grey, fd, *bdst.grey, t);`
` 			else if(zero)`
` 				*bdst.grey = 0;`
` 			bsrc.grey += bsrc.delta;`
`@@ -1141,9 +1211,9 @@`
` 			bdst.grey += bdst.delta;`
` 		}else{`
` 			if(ma){`
`-				*bdst.red = MUL(fs, *bsrc.red, s)+MUL(fd, *bdst.red, t);`
`-				*bdst.grn = MUL(fs, *bsrc.grn, s)+MUL(fd, *bdst.grn, t);`
`-				*bdst.blu = MUL(fs, *bsrc.blu, s)+MUL(fd, *bdst.blu, t);`
`+				*bdst.red = CALC12(fs, *bsrc.red, fd, *bdst.red, t);`
`+				*bdst.grn = CALC12(fs, *bsrc.grn, fd, *bdst.grn, t);`
`+				*bdst.blu = CALC12(fs, *bsrc.blu, fd, *bdst.blu, t);`
` 			}`
` 			else if(zero)`
` 				*bdst.red = *bdst.grn = *bdst.blu = 0;`
`@@ -1157,7 +1227,7 @@`
` 		bmask.alpha += bmask.delta;`
` 		if(bdst.alpha != &ones){`
` 			if(ma)`
`-				*bdst.alpha = fs+MUL(fd, da, t);`
`+				*bdst.alpha = fs+CALC11(fd, da, t);`
` 			else if(zero)`
` 				*bdst.alpha = 0;`
` 			bdst.alpha += bdst.delta;`
`@@ -1890,7 +1960,7 @@`
` 	default:`
` 		assert(0 /* boolcopyfn */);`
` 	}`
`-	return 0;`
`+	return nil;`
` }`
` `
` /*`
`@@ -2353,7 +2423,7 @@`
` `
` 	bx = -bsh-1;`
` 	ex = -bsh-1-dx;`
`-	bits = 0;`
`+	SET(bits);`
` 	v = par->sdval;`
` `
` 	/* make little endian */`
`@@ -2457,7 +2527,6 @@`
` {`
` 	ulong bits;`
` 	int d, y;`
`-	uchar p[4];`
` `
` 	if(val == DNofill)`
` 		return;`
`@@ -2471,11 +2540,6 @@`
` 	default:	/* 1, 2, 4, 8, 16, 32 */`
` 		for(d=i->depth; d<32; d*=2)`
` 			bits = (bits << d) | bits;`
`-		p[0] = bits;		/* make little endian */`
`-		p[1] = bits>>8;`
`-		p[2] = bits>>16;`
`-		p[3] = bits>>24;`
`-		bits = *(ulong*)p;`
` 		memsetl(wordaddr(i, i->r.min), bits, i->width*Dy(i->r));`
` 		break;`
` 	}`