ref: 2e4717b4e21ee49ef77271fed6e4ad6b38f532b1
parent: 8bbc6b260915019b8f31c3a22ae4c50f4e69653a
	author: rodri <rgl@antares-labs.eu>
	date: Thu Sep  4 15:49:57 EDT 2025
	
libmemdraw: add image correlation and other improvements i replaced the bit-shifting in some of the pixel sampling routines for a single SWAR multiplication, which should be faster in most of the arches we support; although i've only tested it on amd64 and arm64. also wrote memimagecorrelate() which allows us to correlate and convolve images with a filter or kernel. it's limited to a single filter for all of the color channels, since we don't have support for images with more than 8bpc yet, and we use 32-bit fixed-point numbers for every kernel component. we still had a decision outstanding for memaffinewarp() as to how to control the filter it uses for the transformation. i wasn't getting good results when trying to integrate convolution into the sampler, so i added a parameter to choose whether you want a smooth, higher quality result or a faster but blocky one. a more knowledgeable user can then do their own post-processing with the correlation operator.
--- a/sys/include/memdraw.h
+++ b/sys/include/memdraw.h
@@ -151,8 +151,10 @@
extern void memarc(Memimage*, Point, int, int, int, Memimage*, Point, int, int, int);
extern Rectangle memlinebbox(Point, Point, int, int, int);
extern int memlineendsize(int);
-extern int memaffinewarp(Memimage*, Rectangle, Memimage*, Point, Warp);
+extern int memaffinewarp(Memimage*, Rectangle, Memimage*, Point, Warp, int);
extern void mkwarp(Warp, double[3][3]);
+extern Memimage* allocmemimagekernel(double*, int, int, double);
+extern int memimagecorrelate(Memimage*, Rectangle, Memimage*, Point, Memimage*);
extern void _memmkcmap(void);
extern int memimageinit(void);
--- a/sys/man/2/memdraw
+++ b/sys/man/2/memdraw
@@ -26,6 +26,8 @@
memimagedraw,
memaffinewarp,
mkwarp,
+allocmemimagekernel,
+memimagecorrelate,
drawclip,
drawclipnorepl,
memlinebbox,
@@ -138,8 +140,12 @@
void memimagedraw(Memimage *dst, Rectangle r, Memimage *src,
Point sp, Memimage *mask, Point mp, Drawop op)
int memaffinewarp(Memimage *dst, Rectangle r, Memimage *src,
- Point sp, Warp w)
+ Point sp, Warp w, int smooth)
void mkwarp(Warp w, double m[3][3])
+Memimage* allocmemimagekernel(double *k, int dx, int dy,
+ double denom)
+int memimagecorrelate(Memimage *dst, Rectangle r, Memimage *src,
+ Point sp, Memimage *k)
.PP
.ft L
.nf
@@ -399,6 +405,10 @@
and the
.B repl
bit, but applied to the affine mappings.
+If
+.B smooth
+is set, each sample will be interpolated for better quality, which
+will make the transformation slower.
.PP
.I Mkwarp
converts a 3×3 row-major matrix of
@@ -413,6 +423,38 @@
ignoring the last row which is always set to
.B "[0 0 1]"
for affine transformations.
+.PP
+.I Allocmemimagekernel
+takes
+.B k
+as a matrix with
+.B dx
+columns and
+.B dy
+rows, and turns it into a 32-bit depth
+.B Memimage
+with each pixel representing an element of the matrix as a fixed-point
+number—as required by
+.IR memimagecorrelate.
+Each of the elements will be divided by
+.B denom
+except when it's zero, where a sum of coefficients (Σkᵢ) will be
+precalculated and used instead.
+.PP
+.I Memimagecorrelate
+correlates image
+.B src
+with the filter
+.BR k ,
+and stores the result in the region of
+.B dst
+defined by
+.BR r .
+It also follows the conventions of
+.IR draw (2)
+regarding clipping and source image replication.
+The result is not normalized, but the same can be accomplished by
+normalizing the kernel (k/Σkᵢ) before calling this routine.
.PP
.I Drawclip
takes the images involved in a draw operation,
--- /dev/null
+++ b/sys/src/libmemdraw/convolvetest.c
@@ -1,0 +1,258 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include <draw.h>
+#include <memdraw.h>
+
+uvlong t0;
+
+void
+profbegin(void)
+{+ char buf[128];
+ int fd;
+
+ snprint(buf, sizeof(buf), "/proc/%d/ctl", getpid());
+ fd = open(buf, OWRITE);
+ if(fd < 0)
+		sysfatal("open: %r");+ fprint(fd, "profile\n");
+ close(fd);
+ t0 = nsec();
+}
+
+void
+profend(void)
+{+ char buf[128];
+ double dt;
+
+ dt = (nsec() - t0)/1e6;
+ fprint(2, "dt: %fms\n", dt);
+
+ snprint(buf, sizeof(buf), "%d", getpid());
+	switch(fork()){+ case -1:
+ abort();
+ case 0:
+ dup(2, 1);
+		print("tprof %s\n", buf);+		execl("/bin/tprof", "tprof", buf, nil);+ abort();
+ default:
+ free(wait());
+ break;
+ }
+}
+
+void
+initworkrects(Rectangle *wr, int nwr, Rectangle *fbr)
+{+ int i, Δy;
+
+ wr[0] = *fbr;
+ Δy = Dy(wr[0])/nwr;
+ wr[0].max.y = wr[0].min.y + Δy;
+ for(i = 1; i < nwr; i++)
+ wr[i] = rectaddpt(wr[i-1], Pt(0,Δy));
+ if(wr[nwr-1].max.y < fbr->max.y)
+ wr[nwr-1].max.y = fbr->max.y;
+}
+
+void *
+erealloc(void *v, ulong n)
+{+ void *nv;
+
+ nv = realloc(v, n);
+ if(nv == nil)
+		sysfatal("realloc: %r");+ if(v == nil)
+ setmalloctag(nv, getcallerpc(&v));
+ else
+ setrealloctag(nv, getcallerpc(&v));
+ return nv;
+}
+
+void
+reversem(double *k, int len)
+{+ double *e, t;
+
+ e = k + len;
+	while(k < e){+ t = *k;
+ *k++ = *--e;
+ *e = t;
+ }
+}
+
+#define isspace(c) ((c) == ' ' || (c) == '\t')
+Memimage *
+readimagekernel(int fd, double denom, int reverse)
+{+ Memimage *ik;
+ Biobuf *bin;
+ double *k, d;
+ int nk, dx0, dx, dy;
+ char *line;
+
+ k = nil;
+ nk = 0;
+ dx0 = dx = dy = 0;
+
+ bin = Bfdopen(fd, OREAD);
+ if(bin == nil)
+		sysfatal("Bfdopen: %r");+
+	while((line = Brdstr(bin, '\n', 1)) != nil){+		while(*line){+ d = strtod(line, &line);
+			if(*line && !isspace(*line)){+ free(k);
+ Bterm(bin);
+				werrstr("bad image kernel file");+ return nil;
+ }
+ k = erealloc(k, (nk + 1)*sizeof(double));
+ k[nk++] = d;
+ dx++;
+ }
+		if(dy > 0 && dx != dx0){+ free(k);
+ Bterm(bin);
+			werrstr("image kernel file has inconsistent dx");+ return nil;
+ }
+ if(dy++ == 0)
+ dx0 = dx;
+ dx = 0;
+ }
+ Bterm(bin);
+
+ if(reverse)
+ reversem(k, dx0*dy);
+
+ ik = allocmemimagekernel(k, dx0, dy, denom);
+ free(k);
+	if(ik == nil){+		werrstr("allocmemimagekernel: %r");+ return nil;
+ }
+ return ik;
+}
+
+void
+usage(void)
+{+ fprint(2, "usage: %s [-cRp] kernel [denom]\n", argv0);
+	exits("usage");+}
+
+void
+main(int argc, char *argv[])
+{+ Memimage *dst, *src;
+ Memimage *ikern;
+ Rectangle dr, *wr;
+ int convolve, dorepl, parallel, fd, nproc, i;
+ char *nprocs, *kernf;
+ double denom;
+
+ kernf = nil;
+ denom = 0;
+ convolve = 1;
+ dorepl = 0;
+ parallel = 0;
+	ARGBEGIN{+ case 'c':
+ convolve = 0; /* correlate */
+ break;
+ case 'R':
+ dorepl++;
+ break;
+ case 'p':
+ parallel++;
+ break;
+ default:
+ usage();
+ }ARGEND;
+	switch(argc){+ case 2:
+ denom = strtod(argv[1], nil);
+ /* fallthrough */
+ case 1:
+ kernf = argv[0];
+ break;
+ default:
+ usage();
+ }
+
+ if(memimageinit() != 0)
+		sysfatal("memimageinit: %r");+
+ fd = open(kernf, OREAD);
+ if(fd < 0)
+		sysfatal("open: %r");+ ikern = readimagekernel(fd, denom, convolve);
+ if(ikern == nil)
+		sysfatal("readimagekernel: %r");+ close(fd);
+
+ src = readmemimage(0);
+ if(dorepl)
+ src->flags |= Frepl;
+
+ dr = src->r;
+ dst = allocmemimage(dr, src->chan);
+ memfillcolor(dst, DTransparent);
+
+ profbegin();
+	if(parallel){+		nprocs = getenv("NPROC");+ if(nprocs == nil || (nproc = strtoul(nprocs, nil, 10)) < 2)
+ nproc = 1;
+ free(nprocs);
+
+ wr = malloc(nproc*sizeof(Rectangle));
+ initworkrects(wr, nproc, &dr);
+
+		for(i = 0; i < nproc; i++){+			switch(rfork(RFPROC|RFMEM)){+ case -1:
+				sysfatal("rfork: %r");+ case 0:
+ if(memimagecorrelate(dst, wr[i], src, src->r.min, ikern) < 0)
+ fprint(2, "[%d] memimagecorrelate: %r\n", getpid());
+ exits(nil);
+ }
+ }
+ while(waitpid() != -1)
+ ;
+
+ free(wr);
+	}else{+ if(memimagecorrelate(dst, dr, src, src->r.min, ikern) < 0)
+			sysfatal("memimagecorrelate: %r");+
+// dr = rectaddpt(Rect(0,0,Dx(dst->r)/2,Dy(dst->r)/2), dst->r.min);
+// dst->clipr = dr;
+// if(memimagecorrelate(dst, dr, src, src->r.min, ikern) < 0)
+//			sysfatal("memimagecorrelate: %r");+// dr = rectaddpt(Rect(Dx(dst->r)/2+1,0,Dx(dst->r),Dy(dst->r)/2), dst->r.min);
+// dst->clipr = dst->r;
+// if(memimagecorrelate(dst, dr, src, src->r.min, ikern) < 0)
+//			sysfatal("memimagecorrelate: %r");+// dr = rectaddpt(Rect(0,Dy(dst->r)/2+1,Dx(dst->r)/2,Dy(dst->r)), dst->r.min);
+// if(memimagecorrelate(dst, dr, src, src->r.min, ikern) < 0)
+//			sysfatal("memimagecorrelate: %r");+// dr = rectaddpt(Rect(Dx(dst->r)/2+1,Dy(dst->r)/2+1,Dx(dst->r),Dy(dst->r)), dst->r.min);
+// dst->clipr = dr;
+// if(memimagecorrelate(dst, dr, src, src->r.min, ikern) < 0)
+//			sysfatal("memimagecorrelate: %r");+ }
+ profend();
+ writememimage(1, dst);
+
+ exits(nil);
+}
--- a/sys/src/libmemdraw/warp.c
+++ b/sys/src/libmemdraw/warp.c
@@ -9,12 +9,17 @@
/* 25.7 fixed-point number operations */
#define FMASK ((1<<7) - 1)
+#define flt2fix(n) ((long)((n)*(1<<7) + ((n) < 0? -0.5: 0.5)))
+#define fix2flt(n) ((n)/128.0)
#define int2fix(n) ((vlong)(n)<<7)
#define fix2int(n) ((n)>>7)
#define fixmul(a,b) ((vlong)(a)*(vlong)(b) >> 7)
+#define fixdiv(a,b) (((vlong)(a) << 7)/(vlong)(b))
#define fixfrac(n) ((n)&FMASK)
#define lerp(a,b,t) ((a) + ((((b) - (a))*(t))>>7))
+#define clamp(a,b,c) ((a)<(b)?(b):((a)>(c)?(c):(a)))
+
typedef struct Sampler Sampler;
typedef struct Blitter Blitter;
@@ -26,7 +31,9 @@
int bpl;
int cmask;
long Δx, Δy;
- ulong (*getpixel)(Sampler*, Point);
+ Memimage *k; /* filtering kernel */
+ Point kcp; /* kernel center point */
+ ulong (*fn)(Sampler*, Point);
};
struct Blitter
@@ -35,11 +42,34 @@
uchar *a;
int bpl;
int cmask;
- void (*putpixel)(Blitter*, Point, ulong);
+ void (*fn)(Blitter*, Point, ulong);
};
+static void *getsampfn(ulong);
+static void *getblitfn(ulong);
+
+static void
+initsampler(Sampler *s, Memimage *i)
+{+ s->i = i;
+ s->a = i->data->bdata + i->zero;
+ s->bpl = sizeof(ulong)*i->width;
+ s->cmask = (1ULL << i->depth) - 1;
+ s->fn = getsampfn(i->chan);
+}
+
+static void
+initblitter(Blitter *b, Memimage *i)
+{+ b->i = i;
+ b->a = i->data->bdata + i->zero;
+ b->bpl = sizeof(ulong)*i->width;
+ b->cmask = (1ULL << i->depth) - 1;
+ b->fn = getblitfn(i->chan);
+}
+
static Point
-fix_xform(Point p, Warp m)
+xform(Point p, Warp m)
 { 	return (Point){fixmul(p.x, m[0][0]) + fixmul(p.y, m[0][1]) + m[0][2],
@@ -57,8 +87,7 @@
npack = 8;
off = pt.x % npack;
v = p[0] >> (npack-1-off) & 0x1;
- v *= 0xFFFFFFFF;
- return v|0xFF;
+ return v*0xFFFFFF00 | 0xFF;
}
static ulong
@@ -71,9 +100,7 @@
npack = 8/2;
off = pt.x % npack;
v = p[0] >> 2*(npack-1-off) & 0x3;
- v |= v<<2;
- v |= v<<4;
- return (v<<24)|(v<<16)|(v<<8)|0xFF;
+ return v*0x55555500 | 0xFF;
}
static ulong
@@ -86,32 +113,27 @@
npack = 8/4;
off = pt.x % npack;
v = p[0] >> 4*(npack-1-off) & 0xF;
- v |= v<<4;
- return (v<<24)|(v<<16)|(v<<8)|0xFF;
+ return v*0x11111100 | 0xFF;
}
static ulong
getpixel_k8(Sampler *s, Point pt)
 {- uchar *p, v;
+ uchar *p;
p = s->a + pt.y*s->bpl + pt.x;
- v = p[0];
- return (v<<24)|(v<<16)|(v<<8)|0xFF;
+ return p[0]*0x01010100 | 0xFF;
}
static ulong
getpixel_m8(Sampler *s, Point pt)
 {- uchar *p, m, r, g, b;
+ uchar *p, m;
p = s->a + pt.y*s->bpl + pt.x;
m = p[0];
p = s->i->cmap->cmap2rgb+3*m;
- r = p[0];
- g = p[1];
- b = p[2];
- return (r<<24)|(g<<16)|(b<<8)|0xFF;
+ return (p[0]<<24)|(p[1]<<16)|(p[2]<<8)|0xFF;
}
static ulong
@@ -149,88 +171,64 @@
static ulong
getpixel_r8g8b8(Sampler *s, Point pt)
 {- uchar *p, r, g, b;
+ uchar *p;
p = s->a + pt.y*s->bpl + pt.x*3;
- b = p[0];
- g = p[1];
- r = p[2];
- return (r<<24)|(g<<16)|(b<<8)|0xFF;
+ return (p[2]<<24)|(p[1]<<16)|(p[0]<<8)|0xFF;
}
static ulong
getpixel_r8g8b8a8(Sampler *s, Point pt)
 {- uchar *p, r, g, b, a;
+ uchar *p;
p = s->a + pt.y*s->bpl + pt.x*4;
- a = p[0];
- b = p[1];
- g = p[2];
- r = p[3];
- return (r<<24)|(g<<16)|(b<<8)|a;
+ return (p[3]<<24)|(p[2]<<16)|(p[1]<<8)|p[0];
}
static ulong
getpixel_a8r8g8b8(Sampler *s, Point pt)
 {- uchar *p, r, g, b, a;
+ uchar *p;
p = s->a + pt.y*s->bpl + pt.x*4;
- b = p[0];
- g = p[1];
- r = p[2];
- a = p[3];
- return (r<<24)|(g<<16)|(b<<8)|a;
+ return (p[2]<<24)|(p[1]<<16)|(p[0]<<8)|p[3];
}
static ulong
getpixel_x8r8g8b8(Sampler *s, Point pt)
 {- uchar *p, r, g, b;
+ uchar *p;
p = s->a + pt.y*s->bpl + pt.x*4;
- b = p[0];
- g = p[1];
- r = p[2];
- return (r<<24)|(g<<16)|(b<<8)|0xFF;
+ return (p[2]<<24)|(p[1]<<16)|(p[0]<<8)|0xFF;
}
static ulong
getpixel_b8g8r8(Sampler *s, Point pt)
 {- uchar *p, r, g, b;
+ uchar *p;
p = s->a + pt.y*s->bpl + pt.x*3;
- r = p[0];
- g = p[1];
- b = p[2];
- return (r<<24)|(g<<16)|(b<<8)|0xFF;
+ return (p[0]<<24)|(p[1]<<16)|(p[2]<<8)|0xFF;
}
static ulong
getpixel_a8b8g8r8(Sampler *s, Point pt)
 {- uchar *p, r, g, b, a;
+ uchar *p;
p = s->a + pt.y*s->bpl + pt.x*4;
- r = p[0];
- g = p[1];
- b = p[2];
- a = p[3];
- return (r<<24)|(g<<16)|(b<<8)|a;
+ return (p[0]<<24)|(p[1]<<16)|(p[2]<<8)|p[3];
}
static ulong
getpixel_x8b8g8r8(Sampler *s, Point pt)
 {- uchar *p, r, g, b;
+ uchar *p;
p = s->a + pt.y*s->bpl + pt.x*4;
- r = p[0];
- g = p[1];
- b = p[2];
- return (r<<24)|(g<<16)|(b<<8)|0xFF;
+ return (p[0]<<24)|(p[1]<<16)|(p[2]<<8)|0xFF;
}
static ulong
@@ -391,12 +389,11 @@
 {uchar *p, r, g, b, m;
- p = blt->a + dp.y*blt->bpl + dp.x;
-
r = rgba>>24;
g = rgba>>16;
b = rgba>>8;
m = RGB2K(r,g,b);
+ p = blt->a + dp.y*blt->bpl + dp.x;
p[0] = m;
}
@@ -405,12 +402,11 @@
 {uchar *p, r, g, b, m;
- p = blt->a + dp.y*blt->bpl + dp.x;
-
r = rgba>>24;
g = rgba>>16;
b = rgba>>8;
m = blt->i->cmap->rgb2cmap[(r>>4)*256+(g>>4)*16+(b>>4)];
+ p = blt->a + dp.y*blt->bpl + dp.x;
p[0] = m;
}
@@ -420,7 +416,6 @@
uchar *p, r, g, b;
ushort v;
- p = blt->a + dp.y*blt->bpl + dp.x*2;
r = rgba>>24;
g = rgba>>16;
b = rgba>>8;
@@ -427,6 +422,7 @@
v = r>>(8-5);
v = (v<<5)|(g>>(8-5));
v = (v<<5)|(b>>(8-5));
+ p = blt->a + dp.y*blt->bpl + dp.x*2;
p[0] = v;
p[1] = v>>8;
}
@@ -437,7 +433,6 @@
uchar *p, r, g, b;
ushort v;
- p = blt->a + dp.y*blt->bpl + dp.x*2;
r = rgba>>24;
g = rgba>>16;
b = rgba>>8;
@@ -444,6 +439,7 @@
v = r>>(8-5);
v = (v<<6)|(g>>(8-6));
v = (v<<5)|(b>>(8-5));
+ p = blt->a + dp.y*blt->bpl + dp.x*2;
p[0] = v;
p[1] = v>>8;
}
@@ -589,7 +585,7 @@
}
static void *
-initsampfn(ulong chan)
+getsampfn(ulong chan)
 { 	switch(chan){case GREY1: return getpixel_k1;
@@ -611,7 +607,7 @@
}
static void *
-initblitfn(ulong chan)
+getblitfn(ulong chan)
 { 	switch(chan){case GREY1: return putpixel_k1;
@@ -637,10 +633,10 @@
 {if(p.x >= s->r.min.x && p.x < s->r.max.x
&& p.y >= s->r.min.y && p.y < s->r.max.y)
- return s->getpixel(s, p);
+ return s->fn(s, p);
 	else if(s->i->flags & Frepl){p = drawrepl(s->r, p);
- return s->getpixel(s, p);
+ return s->fn(s, p);
}
/* edge handler: constant */
return 0;
@@ -683,16 +679,44 @@
}
static ulong
-nearest(Sampler *s, Point p)
+correlate(Sampler *s, Point p)
 {- return sample1(s, p);
-}
+ Point sp;
+ int r, g, b, a, Σr, Σg, Σb, Σa;
+ long *kp, kv;
+ ulong v;
-static ulong (*sample)(Sampler*, Point) = bilinear;
+ kp = (long*)(s->k->data->bdata + s->k->zero);
+ Σr = Σg = Σb = Σa = 0;
+ for(sp.y = 0; sp.y < s->k->r.max.y; sp.y++)
+	for(sp.x = 0; sp.x < s->k->r.max.x; sp.x++){+ v = sample1(s, addpt(p, subpt(sp, s->kcp)));
+ r = v>>24 & 0xFF; r = int2fix(r);
+ g = v>>16 & 0xFF; g = int2fix(g);
+ b = v>>8 & 0xFF; b = int2fix(b);
+ a = v>>0 & 0xFF; a = int2fix(a);
+
+ kv = kp[sp.y*s->k->r.max.x + sp.x];
+ r = fixmul(r, kv);
+ g = fixmul(g, kv);
+ b = fixmul(b, kv);
+ a = fixmul(a, kv);
+
+ Σr += r; Σg += g; Σb += b; Σa += a;
+ }
+ r = clamp(fix2int(Σr), 0, 0xFF);
+ g = clamp(fix2int(Σg), 0, 0xFF);
+ b = clamp(fix2int(Σb), 0, 0xFF);
+ a = clamp(fix2int(Σa), 0, 0xFF);
+
+ return r<<24|g<<16|b<<8|a;
+}
+
int
-memaffinewarp(Memimage *d, Rectangle r, Memimage *s, Point sp0, Warp m)
+memaffinewarp(Memimage *d, Rectangle r, Memimage *s, Point sp0, Warp m, int smooth)
 {+ ulong (*sample)(Sampler*, Point) = sample1;
Sampler samp;
Blitter blit;
Point sp, dp, p2, p2₀;
@@ -708,17 +732,11 @@
if(rectclip(&samp.r, s->r) == 0)
return 0;
- samp.i = s;
- samp.a = s->data->bdata + s->zero;
- samp.bpl = sizeof(ulong)*s->width;
- samp.cmask = (1ULL << s->depth) - 1;
- samp.getpixel = initsampfn(s->chan);
+ if(smooth)
+ sample = bilinear;
- blit.i = d;
- blit.a = d->data->bdata + d->zero;
- blit.bpl = sizeof(ulong)*d->width;
- blit.cmask = (1ULL << d->depth) - 1;
- blit.putpixel = initblitfn(d->chan);
+ initsampler(&samp, s);
+ initblitter(&blit, d);
/*
* incremental affine warping technique from:
@@ -726,7 +744,7 @@
* Lee, S., Lee, GG., Jang, E.S., Kim, WY,
* Intelligent Computing. ICIC 2006. LNCS, vol 4113.
*/
-	p2 = p2₀ = fix_xform((Point){int2fix(r.min.x - dr.min.x), int2fix(r.min.y - dr.min.y)}, m);+	p2 = p2₀ = xform((Point){int2fix(r.min.x - dr.min.x) + (1<<6), int2fix(r.min.y - dr.min.y) + (1<<6)}, m); 	for(dp.y = r.min.y; dp.y < r.max.y; dp.y++){ 	for(dp.x = r.min.x; dp.x < r.max.x; dp.x++){samp.Δx = fixfrac(p2.x);
@@ -736,7 +754,7 @@
sp.y = sp0.y + fix2int(p2.y);
c = sample(&samp, sp);
- blit.putpixel(&blit, dp, c);
+ blit.fn(&blit, dp, c);
p2.x += m[0][0];
p2.y += m[1][0];
@@ -743,6 +761,89 @@
}
p2.x = p2₀.x += m[0][1];
p2.y = p2₀.y += m[1][1];
+ }
+ return 0;
+}
+
+static double
+coeffsum(double *m, int len)
+{+ double *e, Σ;
+
+ e = m + len;
+ Σ = 0;
+ while(m < e)
+ Σ += *m++;
+ return Σ;
+}
+
+Memimage *
+allocmemimagekernel(double *k, int dx, int dy, double denom)
+{+ Memimage *ik;
+ long *kern;
+ double t;
+ int x, y;
+
+ ik = allocmemimage(Rect(0,0,dx,dy), RGBA32); /* any 32-bit depth chan would do */
+	if(ik == nil){+		werrstr("could not allocate image kernel: %r");+ return nil;
+ }
+
+ if(denom == 0)
+ denom = coeffsum(k, dx*dy);
+ denom = denom? 1/denom: 1;
+
+ kern = (long*)(ik->data->bdata + ik->zero);
+ for(y = 0; y < dy; y++)
+	for(x = 0; x < dx; x++){+ t = k[y*dx + x];
+ t *= denom;
+ kern[y*dx + x] = flt2fix(t);
+ }
+
+ return ik;
+}
+
+int
+memimagecorrelate(Memimage *d, Rectangle r, Memimage *s, Point sp0, Memimage *k)
+{+ Sampler samp;
+ Blitter blit;
+ Point sp, dp;
+ Rectangle dr;
+ ulong c;
+
+ dr = d->clipr;
+ rectclip(&dr, d->r);
+ if(rectclip(&r, dr) == 0)
+ return 0;
+
+ samp.r = s->clipr;
+ if(rectclip(&samp.r, s->r) == 0)
+ return 0;
+
+ initsampler(&samp, s);
+ initblitter(&blit, d);
+
+	if(k == nil){+		werrstr("kernel image is nil");+ return -1;
+ }
+	if(k->depth != 32){+		werrstr("kernel image depth is not 32");+ return -1;
+ }
+ samp.k = k;
+ samp.kcp = Pt(Dx(k->r)/2, Dy(k->r)/2);
+
+ for(dp.y = r.min.y; dp.y < r.max.y; dp.y++)
+	for(dp.x = r.min.x; dp.x < r.max.x; dp.x++){+ sp.x = sp0.x + dp.x - dr.min.x;
+ sp.y = sp0.y + dp.y - dr.min.y;
+ c = correlate(&samp, sp);
+ blit.fn(&blit, dp, c);
}
return 0;
}
--- a/sys/src/libmemdraw/warptest.c
+++ b/sys/src/libmemdraw/warptest.c
@@ -85,7 +85,7 @@
void
usage(void)
 {- fprint(2, "usage: %s [-s sx sy] [-t tx ty] [-r θ]\n", argv0);
+ fprint(2, "usage: %s [-SRp] [-s sx sy] [-t tx ty] [-r θ]\n", argv0);
 	exits("usage");}
@@ -96,12 +96,13 @@
Warp w;
Rectangle dr, *wr;
double sx, sy, tx, ty, θ, c, s;
- int dorepl, parallel, nproc, i;
+ int smooth, dorepl, parallel, nproc, i;
char *nprocs;
sx = sy = 1;
tx = ty = 0;
θ = 0;
+ smooth = 0;
dorepl = 0;
parallel = 0;
 	ARGBEGIN{@@ -116,6 +117,9 @@
case 'r':
θ = strtod(EARGF(usage()), nil)*DEG;
break;
+ case 'S':
+ smooth++;
+ break;
case 'R':
dorepl++;
break;
@@ -175,7 +179,7 @@
case -1:
 				sysfatal("rfork: %r");case 0:
- if(memaffinewarp(dst, wr[i], src, src->r.min, w) < 0)
+ if(memaffinewarp(dst, wr[i], src, src->r.min, w, smooth) < 0)
fprint(2, "[%d] memaffinewarp: %r\n", getpid());
exits(nil);
}
@@ -185,23 +189,23 @@
free(wr);
 	}else{- if(memaffinewarp(dst, dr, src, src->r.min, w) < 0)
+ if(memaffinewarp(dst, dr, src, src->r.min, w, smooth) < 0)
 			sysfatal("memaffinewarp: %r");// dr = rectaddpt(Rect(0,0,Dx(dst->r)/2,Dy(dst->r)/2), dst->r.min);
// dst->clipr = dr;
-// if(memaffinewarp(dst, dr, src, src->r.min, w) < 0)
+// if(memaffinewarp(dst, dr, src, src->r.min, w, smooth) < 0)
 //			sysfatal("memaffinewarp: %r");// dr = rectaddpt(Rect(Dx(dst->r)/2+1,0,Dx(dst->r),Dy(dst->r)/2), dst->r.min);
// dst->clipr = dst->r;
-// if(memaffinewarp(dst, dr, src, src->r.min, w) < 0)
+// if(memaffinewarp(dst, dr, src, src->r.min, w, smooth) < 0)
 //			sysfatal("memaffinewarp: %r");// dr = rectaddpt(Rect(0,Dy(dst->r)/2+1,Dx(dst->r)/2,Dy(dst->r)), dst->r.min);
-// if(memaffinewarp(dst, dr, src, src->r.min, w) < 0)
+// if(memaffinewarp(dst, dr, src, src->r.min, w, smooth) < 0)
 //			sysfatal("memaffinewarp: %r");// dr = rectaddpt(Rect(Dx(dst->r)/2+1,Dy(dst->r)/2+1,Dx(dst->r),Dy(dst->r)), dst->r.min);
// dst->clipr = dr;
-// if(memaffinewarp(dst, dr, src, src->r.min, w) < 0)
+// if(memaffinewarp(dst, dr, src, src->r.min, w, smooth) < 0)
 //			sysfatal("memaffinewarp: %r");}
profend();
--
⑨