code: plan9front

Download patch

ref: c7dcc82b0be805717efbe77c98eaadf3ee1e31af
parent: 546f8cfeca6fca0b6b246c8dbf035027e3f15f8c
author: Ori Bernstein <ori@eigenstate.org>
date: Sat Sep 11 13:46:26 EDT 2021

git/query: fix spurious merge requests

Due to the way LCA is defined, a using a strict LCA
on a graph like this:

 <--a--b--c--d--e--f--g
     \               /
       +-----h-------

can lead to spurious requests to merge. This happens
because 'lca(b, g)' would return 'a', since it can be
reached in one step from 'b', and 2 steps from 'g', while
reaching 'b' from 'a' would be a longer path.

As a result, we need to implement an lca variant that
returns the starting node if one is reachable from the
other, even if it's already found the technically correct
least common ancestor.

This replaces our LCA algorithm with one based on the
painting we do while finding a twixt, making it give
the resutls we want.
git/query: fix spurious merge requests

Due to the way LCA is defined, a using a strict LCA
on a graph like this:

 <--a--b--c--d--e--f--g
     \               /
       +-----h-------

can lead to spurious requests to merge. This happens
because 'lca(b, g)' would return 'a', since it can be
reached in one step from 'b', and 2 steps from 'g', while
reaching 'b' from 'a' would be a longer path.

As a result, we need to implement an lca variant that
returns the starting node if one is reachable from the
other, even if it's already found the technically correct
least common ancestor.

This replaces our LCA algorithm with one based on the
painting we do while finding a twixt.

--- a/sys/src/cmd/git/git.h
+++ b/sys/src/cmd/git/git.h
@@ -18,6 +18,8 @@
 typedef struct Objlist	Objlist;
 typedef struct Dtab	Dtab;
 typedef struct Dblock	Dblock;
+typedef struct Objq	Objq;
+typedef struct Qelt	Qelt;
 
 enum {
 	Pathmax		= 512,
@@ -151,6 +153,20 @@
 	int	sz;
 };
 
+struct Qelt {
+	Object	*o;
+	vlong	mtime;
+	int	color;
+	int	dist;
+};
+
+struct Objq {
+	Qelt	*heap;
+	int	nheap;
+	int	heapsz;
+	int	nkeep;
+};
+
 struct Dtab {
 	Object	*o;
 	uchar	*base;
@@ -301,3 +317,9 @@
 int	readphase(Conn *);
 int	writephase(Conn *);
 void	closeconn(Conn *);
+
+/* queues */
+void	qinit(Objq*);
+void	qclear(Objq*);
+void	qput(Objq*, Object*, int, int);
+int	qpop(Objq*, Qelt*);
--- a/sys/src/cmd/git/log.c
+++ b/sys/src/cmd/git/log.c
@@ -15,10 +15,8 @@
 char	*commitid;
 int	shortlog;
 
-Object	**heap;
-int	nheap;
-int	heapsz;
 Objset	done;
+Objq	objq;
 Pfilt	*pathfilt;
 
 void
@@ -192,64 +190,10 @@
 }
 
 static void
-qput(Object *o)
-{
-	Object *p;
-	int i;
-
-	if(oshas(&done, o->hash))
-		return;
-	osadd(&done, o);
-	if(nheap == heapsz){
-		heapsz *= 2;
-		heap = earealloc(heap, heapsz, sizeof(Object*));
-	}
-	heap[nheap++] = o;
-	for(i = nheap - 1; i > 0; i = (i-1)/2){
-		o = heap[i];
-		p = heap[(i-1)/2];
-		if(o->commit->mtime < p->commit->mtime)
-			break;
-		heap[i] = p;
-		heap[(i-1)/2] = o;
-	}
-}
-
-static Object*
-qpop(void)
-{
-	Object *o, *t;
-	int i, l, r, m;
-
-	if(nheap == 0)
-		return nil;
-
-	i = 0;
-	o = heap[0];
-	t = heap[--nheap];
-	heap[0] = t;
-	while(1){
-		m = i;
-		l = 2*i+1;
-		r = 2*i+2;
-		if(l < nheap && heap[m]->commit->mtime < heap[l]->commit->mtime)
-			m = l;
-		if(r < nheap && heap[m]->commit->mtime < heap[r]->commit->mtime)
-			m = r;
-		else
-			break;
-		t = heap[m];
-		heap[m] = heap[i];
-		heap[i] = t;
-		i = m;
-	}
-	return o;
-}
-
-static void
 showcommits(char *c)
 {
 	Object *o, *p;
+	Qelt e;
 	int i;
 	Hash h;
 
@@ -259,18 +203,20 @@
 		sysfatal("resolve %s: %r", c);
 	if((o = readobject(h)) == nil)
 		sysfatal("load %H: %r", h);
-	heapsz = 8;
-	heap = eamalloc(heapsz, sizeof(Object*));
+	qinit(&objq);
 	osinit(&done);
-	qput(o);
-	while((o = qpop()) != nil){
-		show(o);
-		for(i = 0; i < o->commit->nparent; i++){
-			if((p = readobject(o->commit->parent[i])) == nil)
+	qput(&objq, o, 0, 0);
+	while(qpop(&objq, &e)){
+		show(e.o);
+		for(i = 0; i < e.o->commit->nparent; i++){
+			if(oshas(&done, e.o->commit->parent[i]))
+				continue;
+			if((p = readobject(e.o->commit->parent[i])) == nil)
 				sysfatal("load %H: %r", o->commit->parent[i]);
-			qput(p);
+			osadd(&done, p);
+			qput(&objq, p, 0, 0);
 		}
-		unref(o);
+		unref(e.o);
 	}
 }
 
--- a/sys/src/cmd/git/query.c
+++ b/sys/src/cmd/git/query.c
@@ -158,6 +158,7 @@
 	char query[2048], repo[512];
 
 	ARGBEGIN{
+	case 'd':	chattygit++;	break;
 	case 'p':	fullpath++;	break;
 	case 'c':	changes++;	break;
 	case 'r':	reverse ^= 1;	break;
--- a/sys/src/cmd/git/ref.c
+++ b/sys/src/cmd/git/ref.c
@@ -5,9 +5,21 @@
 #include "git.h"
 
 typedef struct Eval	Eval;
-typedef struct XObject	XObject;
-typedef struct Objq	Objq;
+typedef struct Lcaq	Lcaq;
 
+struct Lcaq {
+	Objq;
+
+	Hash	*head;
+	Hash	*tail;
+	int	nhead;
+	int	ntail;
+
+	Object	*best;
+	int	dist;
+};
+
+
 enum {
 	Blank,
 	Keep,
@@ -22,19 +34,12 @@
 	int	stksz;
 };
 
-struct XObject {
-	Object	*obj;
-	Object	*mark;
-	XObject	*queue;
-	XObject	*next;
+static char *colors[] = {
+[Keep] "keep",
+[Drop] "drop",
+[Blank] "blank",
 };
 
-struct Objq {
-	Objq	*next;
-	Object	*o;
-	int	color;
-};
-
 static Object zcommit = {
 	.type=GCommit
 };
@@ -128,150 +133,98 @@
 	return 1;
 }
 
-XObject*
-hnode(XObject *ht[], Object *o)
+static int
+pickbest(Lcaq *q, Qelt *e, int color)
 {
-	XObject *h;
-	int	hh;
+	int i, best, exact;
 
-	hh = o->hash.h[0] & 0xff;
-	for(h = ht[hh]; h; h = h->next)
-		if(hasheq(&o->hash, &h->obj->hash))
-			return h;
-
-	h = emalloc(sizeof(*h));
-	h->obj = o;
-	h->mark = nil;
-	h->queue = nil;
-	h->next = ht[hh];
-	ht[hh] = h;
-	return h;
-}
-
-Object*
-ancestor(Object *a, Object *b)
-{
-	Object *o, *p, *r;
-	XObject *ht[256];
-	XObject *h, *q, *q1, *q2;
-	int i;
-
-	if(a == b)
-		return a;
-	if(a == nil || b == nil)
-		return nil;
-	r = nil;
-	memset(ht, 0, sizeof(ht));
-	q1 = nil;
-
-	h = hnode(ht, a);
-	h->mark = a;
-	h->queue = q1;
-	q1 = h;
-
-	h = hnode(ht, b);
-	h->mark = b;
-	h->queue = q1;
-	q1 = h;
-
-	while(1){
-		q2 = nil;
-		while(q = q1){
-			q1 = q->queue;
-			q->queue = nil;
-			o = q->obj;
-			for(i = 0; i < o->commit->nparent; i++){
-				p = readobject(o->commit->parent[i]);
-				if(p == nil)
-					goto err;
-				h = hnode(ht, p);
-				if(h->mark != nil){
-					if(h->mark != q->mark){
-						r = h->obj;
-						goto done;
-					}
-				} else {
-					h->mark = q->mark;
-					h->queue = q2;
-					q2 = h;
-				}
-			}
+	best = 0;
+	exact = 0;
+	if(color == Blank || e->color == color)
+		return 0;
+	if(e->dist < q->dist){
+		dprint(1, "found best (dist %d < %d): %H\n", e->dist, q->dist, e->o->hash);
+		best = 1;
+	}
+	for(i = 0; i < q->nhead; i++)
+		if(hasheq(&q->head[i], &e->o->hash)){
+			dprint(1, "found best (exact head): %H\n", e->o->hash);
+			best = 1;
+			exact = 1;
 		}
-		if(q2 == nil){
-err:
-			werrstr("no common ancestor");
-			break;
+	for(i = 0; i < q->ntail; i++)
+		if(hasheq(&q->tail[i], &e->o->hash)){
+			dprint(1, "found best (exact tail): %H\n", e->o->hash);
+			best = 1;
+			exact = 1;
 		}
-		q1 = q2;
+	if(best){
+		q->best = e->o;
+		q->dist = e->dist;
 	}
-done:
-	for(i=0; i<nelem(ht); i++){
-		while(h = ht[i]){
-			ht[i] = h->next;
-			free(h);
-		}
-	}
-	return r;
+	return exact;
 }
 
-int
-lca(Eval *ev)
-{
-	Object *a, *b, *o;
-
-	if(ev->nstk < 2){
-		werrstr("ancestor needs 2 objects");
-		return -1;
-	}
-	a = pop(ev);
-	b = pop(ev);
-	o = ancestor(a, b);
-	if(o == nil)
-		return -1;
-	push(ev, o);
-	return 0;
-}
-
 static int
-repaint(Objset *keep, Objset *drop, Object *o)
+repaint(Lcaq *lcaq, Objset *keep, Objset *drop, Object *o, int dist, int ancestor)
 {
+	Lcaq objq;
+	Qelt e;
 	Object *p;
 	int i;
 
-	if(!oshas(keep, o->hash) && !oshas(drop, o->hash)){
-		dprint(2, "repaint: blank => drop %H\n", o->hash);
-		osadd(drop, o);
-		return 0;
+	qinit(&objq);
+	if((o = readobject(o->hash)) == nil)
+		return -1;
+	qput(&objq, o, Drop, dist);
+	while(qpop(&objq, &e)){
+		o = e.o;
+		if(oshas(drop, o->hash))
+			continue;
+		if(ancestor && pickbest(lcaq, &e, Keep))
+			goto out;
+		if(!oshas(keep, o->hash)){
+			dprint(2, "repaint: blank => drop %H\n", o->hash);
+			osadd(drop, o);
+			continue;
+		}
+		for(i = 0; i < o->commit->nparent; i++){
+			if(oshas(drop, o->commit->parent[i]))
+				continue;
+			if((p = readobject(o->commit->parent[i])) == nil)
+				goto out;
+			if(p->type != GCommit){
+				fprint(2, "hash %H not commit\n", p->hash);
+				unref(p);
+			}
+			qput(&objq, p, Drop, e.dist+1);
+		}
+		unref(e.o);
 	}
-	if(oshas(keep, o->hash))
-		dprint(2, "repaint: keep => drop %H\n", o->hash);
-	osadd(drop, o);
-	for(i = 0; i < o->commit->nparent; i++){
-		if((p = readobject(o->commit->parent[i])) == nil)
-			return -1;
-		if(repaint(keep, drop, p) == -1)
-			return -1;
-		unref(p);
-	}
+out:
+	qclear(&objq);
 	return 0;
 }
 
-int
-findtwixt(Hash *head, int nhead, Hash *tail, int ntail, Object ***res, int *nres)
+static int
+paint(Hash *head, int nhead, Hash *tail, int ntail, Object ***res, int *nres, int ancestor)
 {
-	Objq *q, *e, *n, **p;
+	Qelt e;
+	Lcaq objq;
 	Objset keep, drop;
 	Object *o, *c;
 	int i, ncolor;
 
-	e = nil;
-	q = nil;
-	p = &q;
 	osinit(&keep);
 	osinit(&drop);
+	qinit(&objq);
+	objq.head = head;
+	objq.nhead = nhead;
+	objq.tail = tail;
+	objq.ntail = ntail;
+	objq.dist = 1<<30;
+
 	for(i = 0; i < nhead; i++){
-		if(hasheq(&head[i], &Zhash))
-			continue;
 		if((o = readobject(head[i])) == nil){
 			fprint(2, "warning: %H does not point at commit\n", o->hash);
 			werrstr("read head %H: %r", head[i]);
@@ -282,17 +235,11 @@
 			unref(o);
 			continue;
 		}
-		dprint(1, "twixt init: keep %H\n", o->hash);
-		e = emalloc(sizeof(Objq));
-		e->o = o;
-		e->color = Keep;
-		*p = e;
-		p = &e->next;
+		dprint(1, "init: keep %H\n", o->hash);
+		qput(&objq, o, Keep, 0);
 		unref(o);
 	}		
 	for(i = 0; i < ntail; i++){
-		if(hasheq(&tail[i], &Zhash))
-			continue;
 		if((o = readobject(tail[i])) == nil){
 			werrstr("read tail %H: %r", tail[i]);
 			return -1;
@@ -303,35 +250,33 @@
 			continue;
 		}
 		dprint(1, "init: drop %H\n", o->hash);
-		e = emalloc(sizeof(Objq));
-		e->o = o;
-		e->color = Drop;
-		*p = e;
-		p = &e->next;
+		qput(&objq, o, Drop, 0);
 		unref(o);
 	}
 
 	dprint(1, "finding twixt commits\n");
-	while(q != nil){
-		if(oshas(&drop, q->o->hash))
+	while(qpop(&objq, &e)){
+		if(oshas(&drop, e.o->hash))
 			ncolor = Drop;
-		else if(oshas(&keep, q->o->hash))
+		else if(oshas(&keep, e.o->hash))
 			ncolor = Keep;
 		else
 			ncolor = Blank;
-		if(ncolor == Drop || ncolor == Keep && q->color == Keep)
-			goto next;
-		if(ncolor == Keep && q->color == Drop){
-			if(repaint(&keep, &drop, q->o) == -1)
+		if(ancestor && pickbest(&objq, &e, ncolor))
+			goto exactlca;
+		if(ncolor == Keep && e.color == Keep || ncolor == Drop)
+			continue;
+		if(ncolor == Keep && e.color == Drop){
+			if(repaint(&objq, &keep, &drop, e.o, e.dist, ancestor) == -1)
 				goto error;
 		}else if (ncolor == Blank) {
-			dprint(2, "visit: %s %H\n", q->color == Keep ? "keep" : "drop", q->o->hash);
-			if(q->color == Keep)
-				osadd(&keep, q->o);
+			if(e.color == Keep)
+				osadd(&keep, e.o);
 			else
-				osadd(&drop, q->o);
-			for(i = 0; i < q->o->commit->nparent; i++){
-				if((c = readobject(q->o->commit->parent[i])) == nil)
+				osadd(&drop, e.o);
+			o = readobject(e.o->hash);
+			for(i = 0; i < o->commit->nparent; i++){
+				if((c = readobject(e.o->commit->parent[i])) == nil)
 					goto error;
 				if(c->type != GCommit){
 					fprint(2, "warning: %H does not point at commit\n", c->hash);
@@ -338,40 +283,82 @@
 					unref(c);
 					continue;
 				}
-				dprint(2, "enqueue: %s %H\n", q->color == Keep ? "keep" : "drop", c->hash);
-				n = emalloc(sizeof(Objq));
-				n->color = q->color;
-				n->next = nil;
-				n->o = c;
-				e->next = n;
-				e = n;
+				dprint(2, "\tenqueue: %s %H\n", colors[e.color], c->hash);
+				qput(&objq, c, e.color, e.dist+1);
 				unref(c);
 			}
-		}else{
-			sysfatal("oops");
+			unref(o);
 		}
-next:
-		n = q->next;
-		free(q);
-		q = n;
 	}
-	*res = eamalloc(keep.nobj, sizeof(Object*));
-	*nres = 0;
-	for(i = 0; i < keep.sz; i++){
-		if(keep.obj[i] != nil && !oshas(&drop, keep.obj[i]->hash)){
-			(*res)[*nres] = keep.obj[i];
-			(*nres)++;
+exactlca:
+	if(ancestor){
+		dprint(1, "found ancestor\n");
+		if(objq.best == nil){
+			*nres = 0;
+			*res = nil;
+		}else{
+			*nres = 1;
+			*res = eamalloc(1, sizeof(Object*));
+			(*res)[0] = objq.best;
 		}
+	}else{
+		dprint(1, "found twixt\n");
+		*res = eamalloc(keep.nobj, sizeof(Object*));
+		*nres = 0;
+		for(i = 0; i < keep.sz; i++){
+			if(keep.obj[i] != nil && !oshas(&drop, keep.obj[i]->hash)){
+				(*res)[*nres] = keep.obj[i];
+				(*nres)++;
+			}
+		}
 	}
 	osclear(&keep);
 	osclear(&drop);
 	return 0;
 error:
-	for(; q != nil; q = n) {
-		n = q->next;
-		free(q);
-	}
+	dprint(1, "twixt error: %r\n");
+	free(objq.heap);
 	return -1;
+}
+
+int
+findtwixt(Hash *head, int nhead, Hash *tail, int ntail, Object ***res, int *nres)
+{
+	return paint(head, nhead, tail, ntail, res, nres, 0);
+}
+
+Object*
+ancestor(Object *a, Object *b)
+{
+	Object **o, *r;
+	int n;
+
+	if(paint(&a->hash, 1, &b->hash, 1, &o, &n, 1) == -1 || n == 0)
+		return nil;
+	r = o[0];
+	free(o);
+	return ref(r);
+}
+
+int
+lca(Eval *ev)
+{
+	Object *a, *b, **o;
+	int n;
+
+	if(ev->nstk < 2){
+		werrstr("ancestor needs 2 objects");
+		return -1;
+	}
+	n = 0;
+	b = pop(ev);
+	a = pop(ev);
+	paint(&a->hash, 1, &b->hash, 1, &o, &n, 1);
+	if(n == 0)
+		return -1;
+	push(ev, *o);
+	free(o);
+	return 0;
 }
 
 static int
--- a/sys/src/cmd/git/util.c
+++ b/sys/src/cmd/git/util.c
@@ -321,3 +321,74 @@
 	}
 	return pct;
 }
+
+void
+qinit(Objq *q)
+{
+	memset(q, 0, sizeof(Objq));
+	q->nheap = 0;
+	q->heapsz = 8;
+	q->heap = eamalloc(q->heapsz, sizeof(Qelt));
+}
+
+void
+qclear(Objq *q)
+{
+	free(q->heap);
+}
+
+void
+qput(Objq *q, Object *o, int color, int dist)
+{
+	Qelt t;
+	int i;
+
+	if(q->nheap == q->heapsz){
+		q->heapsz *= 2;
+		q->heap = earealloc(q->heap, q->heapsz, sizeof(Qelt));
+	}
+	q->heap[q->nheap].o = o;
+	q->heap[q->nheap].color = color;
+	q->heap[q->nheap].dist = dist;
+	q->heap[q->nheap].mtime = o->commit->mtime;
+	for(i = q->nheap; i > 0; i = (i-1)/2){
+		if(q->heap[i].mtime < q->heap[(i-1)/2].mtime)
+			break;
+		t = q->heap[i];
+		q->heap[i] = q->heap[(i-1)/2];
+		q->heap[(i-1)/2] = t;
+	}
+	q->nheap++;
+}
+
+int
+qpop(Objq *q, Qelt *e)
+{
+	int i, l, r, m;
+	Qelt t;
+
+	if(q->nheap == 0)
+		return 0;
+	*e = q->heap[0];
+	if(--q->nheap == 0)
+		return 1;
+
+	i = 0;
+	q->heap[0] = q->heap[q->nheap];
+	while(1){
+		m = i;
+		l = 2*i+1;
+		r = 2*i+2;
+		if(l < q->nheap && q->heap[m].mtime < q->heap[l].mtime)
+			m = l;
+		if(r < q->nheap && q->heap[m].mtime < q->heap[r].mtime)
+			m = r;
+		if(m == i)
+			break;
+		t = q->heap[m];
+		q->heap[m] = q->heap[i];
+		q->heap[i] = t;
+		i = m;
+	}
+	return 1;
+}