git: plan9front

Download patch

ref: a9795be0f1f20d9068ba373419311b1afb8925f5
parent: 2346ea488600e7e735c2275e5bcd310bbbf9810c
author: rodri <rgl@antares-labs.eu>
date: Mon Apr 7 20:44:59 EDT 2025

libgeometry: 40% more efficient versions of (det|adj|inv)m3

thanks to compgeom sensei David Eberly.

i also took the chance to get rid of the commented out
Cayley-Hamilton versions. if anybody wants them, they
are already on the tree history.

--- a/sys/src/libgeometry/matrix.c
+++ b/sys/src/libgeometry/matrix.c
@@ -7,7 +7,7 @@
 void
 identity(Matrix m)
 {
-	memset(m, 0, 3*3*sizeof(double));
+	memset(m, 0, sizeof m);
 	m[0][0] = m[1][1] = m[2][2] = 1;
 }
 
@@ -40,10 +40,12 @@
 	a[0][0] = t0*b[0][0] + t1*b[1][0] + t2*b[2][0];
 	a[0][1] = t0*b[0][1] + t1*b[1][1] + t2*b[2][1];
 	a[0][2] = t0*b[0][2] + t1*b[1][2] + t2*b[2][2];
+
 	t0 = a[1][0]; t1 = a[1][1]; t2 = a[1][2];
 	a[1][0] = t0*b[0][0] + t1*b[1][0] + t2*b[2][0];
 	a[1][1] = t0*b[0][1] + t1*b[1][1] + t2*b[2][1];
 	a[1][2] = t0*b[0][2] + t1*b[1][2] + t2*b[2][2];
+
 	t0 = a[2][0]; t1 = a[2][1]; t2 = a[2][2];
 	a[2][0] = t0*b[0][0] + t1*b[1][0] + t2*b[2][0];
 	a[2][1] = t0*b[0][1] + t1*b[1][1] + t2*b[2][1];
@@ -112,38 +114,18 @@
 	tmp[0][0] =  m[1][1]*m[2][2] - m[1][2]*m[2][1];
 	tmp[0][1] = -m[0][1]*m[2][2] + m[0][2]*m[2][1];
 	tmp[0][2] =  m[0][1]*m[1][2] - m[0][2]*m[1][1];
+
 	tmp[1][0] = -m[1][0]*m[2][2] + m[1][2]*m[2][0];
 	tmp[1][1] =  m[0][0]*m[2][2] - m[0][2]*m[2][0];
 	tmp[1][2] = -m[0][0]*m[1][2] + m[0][2]*m[1][0];
+
 	tmp[2][0] =  m[1][0]*m[2][1] - m[1][1]*m[2][0];
 	tmp[2][1] = -m[0][0]*m[2][1] + m[0][1]*m[2][0];
 	tmp[2][2] =  m[0][0]*m[1][1] - m[0][1]*m[1][0];
-	memmove(m, tmp, 3*3*sizeof(double));
+
+	memmove(m, tmp, sizeof tmp);
 }
 
-/* Cayley-Hamilton */
-//void
-//invertm(Matrix m)
-//{
-//	Matrix m², r;
-//	double det, trm, trm²;
-//
-//	det = detm(m);
-//	if(det == 0)
-//		return;
-//	trm = tracem(m);
-//	memmove(m², m, 3*3*sizeof(double));
-//	mulm(m², m²);
-//	trm² = tracem(m²);
-//	identity(r);
-//	smulm(r, (trm*trm - trm²)/2);
-//	smulm(m, trm);
-//	subm(r, m);
-//	addm(r, m²);
-//	smulm(r, 1/det);
-//	memmove(m, r, 3*3*sizeof(double));
-//}
-
 /* Cramer's */
 void
 invm(Matrix m)
@@ -172,7 +154,7 @@
 void
 identity3(Matrix3 m)
 {
-	memset(m, 0, 4*4*sizeof(double));
+	memset(m, 0, sizeof m);
 	m[0][0] = m[1][1] = m[2][2] = m[3][3] = 1;
 }
 
@@ -206,16 +188,19 @@
 	a[0][1] = t0*b[0][1] + t1*b[1][1] + t2*b[2][1] + t3*b[3][1];
 	a[0][2] = t0*b[0][2] + t1*b[1][2] + t2*b[2][2] + t3*b[3][2];
 	a[0][3] = t0*b[0][3] + t1*b[1][3] + t2*b[2][3] + t3*b[3][3];
+
 	t0 = a[1][0]; t1 = a[1][1]; t2 = a[1][2]; t3 = a[1][3];
 	a[1][0] = t0*b[0][0] + t1*b[1][0] + t2*b[2][0] + t3*b[3][0];
 	a[1][1] = t0*b[0][1] + t1*b[1][1] + t2*b[2][1] + t3*b[3][1];
 	a[1][2] = t0*b[0][2] + t1*b[1][2] + t2*b[2][2] + t3*b[3][2];
 	a[1][3] = t0*b[0][3] + t1*b[1][3] + t2*b[2][3] + t3*b[3][3];
+
 	t0 = a[2][0]; t1 = a[2][1]; t2 = a[2][2]; t3 = a[2][3];
 	a[2][0] = t0*b[0][0] + t1*b[1][0] + t2*b[2][0] + t3*b[3][0];
 	a[2][1] = t0*b[0][1] + t1*b[1][1] + t2*b[2][1] + t3*b[3][1];
 	a[2][2] = t0*b[0][2] + t1*b[1][2] + t2*b[2][2] + t3*b[3][2];
 	a[2][3] = t0*b[0][3] + t1*b[1][3] + t2*b[2][3] + t3*b[3][3];
+
 	t0 = a[3][0]; t1 = a[3][1]; t2 = a[3][2]; t3 = a[3][3];
 	a[3][0] = t0*b[0][0] + t1*b[1][0] + t2*b[2][0] + t3*b[3][0];
 	a[3][1] = t0*b[0][1] + t1*b[1][1] + t2*b[2][1] + t3*b[3][1];
@@ -246,21 +231,30 @@
 		}
 }
 
+/*
+ * extracted from invm3(2)
+ */
 double
 detm3(Matrix3 m)
 {
-	return m[0][0]*(m[1][1]*(m[2][2]*m[3][3] - m[2][3]*m[3][2])+
-			m[1][2]*(m[2][3]*m[3][1] - m[2][1]*m[3][3])+
-			m[1][3]*(m[2][1]*m[3][2] - m[2][2]*m[3][1]))
-	      -m[0][1]*(m[1][0]*(m[2][2]*m[3][3] - m[2][3]*m[3][2])+
-			m[1][2]*(m[2][3]*m[3][0] - m[2][0]*m[3][3])+
-			m[1][3]*(m[2][0]*m[3][2] - m[2][2]*m[3][0]))
-	      +m[0][2]*(m[1][0]*(m[2][1]*m[3][3] - m[2][3]*m[3][1])+
-			m[1][1]*(m[2][3]*m[3][0] - m[2][0]*m[3][3])+
-			m[1][3]*(m[2][0]*m[3][1] - m[2][1]*m[3][0]))
-	      -m[0][3]*(m[1][0]*(m[2][1]*m[3][2] - m[2][2]*m[3][1])+
-			m[1][1]*(m[2][2]*m[3][0] - m[2][0]*m[3][2])+
-			m[1][2]*(m[2][0]*m[3][1] - m[2][1]*m[3][0]));
+	double s0, s1, s2, s3, s4, s5;
+	double c0, c1, c2, c3, c4, c5;
+
+	s0 = m[0][0]*m[1][1] - m[1][0]*m[0][1];
+	s1 = m[0][0]*m[1][2] - m[1][0]*m[0][2];
+	s2 = m[0][0]*m[1][3] - m[1][0]*m[0][3];
+	s3 = m[0][1]*m[1][2] - m[1][1]*m[0][2];
+	s4 = m[0][1]*m[1][3] - m[1][1]*m[0][3];
+	s5 = m[0][2]*m[1][3] - m[1][2]*m[0][3];
+
+	c5 = m[2][2]*m[3][3] - m[3][2]*m[2][3];
+	c4 = m[2][1]*m[3][3] - m[3][1]*m[2][3];
+	c3 = m[2][1]*m[3][2] - m[3][1]*m[2][2];
+	c2 = m[2][0]*m[3][3] - m[3][0]*m[2][3];
+	c1 = m[2][0]*m[3][2] - m[3][0]*m[2][2];
+	c0 = m[2][0]*m[3][1] - m[3][0]*m[2][1];
+
+	return s0*c5 - s1*c4 + s2*c3 + s3*c2 - s4*c1 + s5*c0;
 }
 
 double
@@ -288,102 +282,104 @@
 	return minorm3(m, row, col)*((row+col & 1) == 0? 1: -1);
 }
 
+/*
+ * extracted from invm3(2)
+ */
 void
 adjm3(Matrix3 m)
 {
+	double s0, s1, s2, s3, s4, s5;
+	double c0, c1, c2, c3, c4, c5;
 	Matrix3 tmp;
 
-	tmp[0][0]=m[1][1]*(m[2][2]*m[3][3] - m[2][3]*m[3][2])+
-		  m[2][1]*(m[1][3]*m[3][2] - m[1][2]*m[3][3])+
-		  m[3][1]*(m[1][2]*m[2][3] - m[1][3]*m[2][2]);
-	tmp[0][1]=m[0][1]*(m[2][3]*m[3][2] - m[2][2]*m[3][3])+
-		  m[2][1]*(m[0][2]*m[3][3] - m[0][3]*m[3][2])+
-		  m[3][1]*(m[0][3]*m[2][2] - m[0][2]*m[2][3]);
-	tmp[0][2]=m[0][1]*(m[1][2]*m[3][3] - m[1][3]*m[3][2])+
-		  m[1][1]*(m[0][3]*m[3][2] - m[0][2]*m[3][3])+
-		  m[3][1]*(m[0][2]*m[1][3] - m[0][3]*m[1][2]);
-	tmp[0][3]=m[0][1]*(m[1][3]*m[2][2] - m[1][2]*m[2][3])+
-		  m[1][1]*(m[0][2]*m[2][3] - m[0][3]*m[2][2])+
-		  m[2][1]*(m[0][3]*m[1][2] - m[0][2]*m[1][3]);
-	tmp[1][0]=m[1][0]*(m[2][3]*m[3][2] - m[2][2]*m[3][3])+
-		  m[2][0]*(m[1][2]*m[3][3] - m[1][3]*m[3][2])+
-		  m[3][0]*(m[1][3]*m[2][2] - m[1][2]*m[2][3]);
-	tmp[1][1]=m[0][0]*(m[2][2]*m[3][3] - m[2][3]*m[3][2])+
-		  m[2][0]*(m[0][3]*m[3][2] - m[0][2]*m[3][3])+
-		  m[3][0]*(m[0][2]*m[2][3] - m[0][3]*m[2][2]);
-	tmp[1][2]=m[0][0]*(m[1][3]*m[3][2] - m[1][2]*m[3][3])+
-		  m[1][0]*(m[0][2]*m[3][3] - m[0][3]*m[3][2])+
-		  m[3][0]*(m[0][3]*m[1][2] - m[0][2]*m[1][3]);
-	tmp[1][3]=m[0][0]*(m[1][2]*m[2][3] - m[1][3]*m[2][2])+
-		  m[1][0]*(m[0][3]*m[2][2] - m[0][2]*m[2][3])+
-		  m[2][0]*(m[0][2]*m[1][3] - m[0][3]*m[1][2]);
-	tmp[2][0]=m[1][0]*(m[2][1]*m[3][3] - m[2][3]*m[3][1])+
-		  m[2][0]*(m[1][3]*m[3][1] - m[1][1]*m[3][3])+
-		  m[3][0]*(m[1][1]*m[2][3] - m[1][3]*m[2][1]);
-	tmp[2][1]=m[0][0]*(m[2][3]*m[3][1] - m[2][1]*m[3][3])+
-		  m[2][0]*(m[0][1]*m[3][3] - m[0][3]*m[3][1])+
-		  m[3][0]*(m[0][3]*m[2][1] - m[0][1]*m[2][3]);
-	tmp[2][2]=m[0][0]*(m[1][1]*m[3][3] - m[1][3]*m[3][1])+
-		  m[1][0]*(m[0][3]*m[3][1] - m[0][1]*m[3][3])+
-		  m[3][0]*(m[0][1]*m[1][3] - m[0][3]*m[1][1]);
-	tmp[2][3]=m[0][0]*(m[1][3]*m[2][1] - m[1][1]*m[2][3])+
-		  m[1][0]*(m[0][1]*m[2][3] - m[0][3]*m[2][1])+
-		  m[2][0]*(m[0][3]*m[1][1] - m[0][1]*m[1][3]);
-	tmp[3][0]=m[1][0]*(m[2][2]*m[3][1] - m[2][1]*m[3][2])+
-		  m[2][0]*(m[1][1]*m[3][2] - m[1][2]*m[3][1])+
-		  m[3][0]*(m[1][2]*m[2][1] - m[1][1]*m[2][2]);
-	tmp[3][1]=m[0][0]*(m[2][1]*m[3][2] - m[2][2]*m[3][1])+
-		  m[2][0]*(m[0][2]*m[3][1] - m[0][1]*m[3][2])+
-		  m[3][0]*(m[0][1]*m[2][2] - m[0][2]*m[2][1]);
-	tmp[3][2]=m[0][0]*(m[1][2]*m[3][1] - m[1][1]*m[3][2])+
-		  m[1][0]*(m[0][1]*m[3][2] - m[0][2]*m[3][1])+
-		  m[3][0]*(m[0][2]*m[1][1] - m[0][1]*m[1][2]);
-	tmp[3][3]=m[0][0]*(m[1][1]*m[2][2] - m[1][2]*m[2][1])+
-		  m[1][0]*(m[0][2]*m[2][1] - m[0][1]*m[2][2])+
-		  m[2][0]*(m[0][1]*m[1][2] - m[0][2]*m[1][1]);
-	memmove(m, tmp, 4*4*sizeof(double));
-}
+	s0 = m[0][0]*m[1][1] - m[1][0]*m[0][1];
+	s1 = m[0][0]*m[1][2] - m[1][0]*m[0][2];
+	s2 = m[0][0]*m[1][3] - m[1][0]*m[0][3];
+	s3 = m[0][1]*m[1][2] - m[1][1]*m[0][2];
+	s4 = m[0][1]*m[1][3] - m[1][1]*m[0][3];
+	s5 = m[0][2]*m[1][3] - m[1][2]*m[0][3];
 
-/* Cayley-Hamilton */
-//void
-//invertm3(Matrix3 m)
-//{
-//	Matrix3 m², m³, r;
-//	double det, trm, trm², trm³;
-//
-//	det = detm3(m);
-//	if(det == 0)
-//		return;
-//	trm = tracem3(m);
-//	memmove(m³, m, 4*4*sizeof(double));
-//	mulm(m³, m³);
-//	mulm(m³, m);
-//	trm³ = tracem3(m³);
-//	memmove(m², m, 4*4*sizeof(double));
-//	mulm(m², m²);
-//	trm² = tracem3(m²);
-//	identity3(r);
-//	smulm3(r, (trm*trm*trm - 3*trm*trm² + 2*trm³)/6);
-//	smulm3(m, (trm*trm - trm²)/2);
-//	smulm3(m², trm);
-//	subm(r, m);
-//	addm(r, m²);
-//	subm(r, m³);
-//	smulm(r, 1/det);
-//	memmove(m, r, 4*4*sizeof(double));
-//}
+	c5 = m[2][2]*m[3][3] - m[3][2]*m[2][3];
+	c4 = m[2][1]*m[3][3] - m[3][1]*m[2][3];
+	c3 = m[2][1]*m[3][2] - m[3][1]*m[2][2];
+	c2 = m[2][0]*m[3][3] - m[3][0]*m[2][3];
+	c1 = m[2][0]*m[3][2] - m[3][0]*m[2][2];
+	c0 = m[2][0]*m[3][1] - m[3][0]*m[2][1];
 
-/* Cramer's */
+	tmp[0][0] = ( m[1][1]*c5 - m[1][2]*c4 + m[1][3]*c3);
+	tmp[0][1] = (-m[0][1]*c5 + m[0][2]*c4 - m[0][3]*c3);
+	tmp[0][2] = ( m[3][1]*s5 - m[3][2]*s4 + m[3][3]*s3);
+	tmp[0][3] = (-m[2][1]*s5 + m[2][2]*s4 - m[2][3]*s3);
+
+	tmp[1][0] = (-m[1][0]*c5 + m[1][2]*c2 - m[1][3]*c1);
+	tmp[1][1] = ( m[0][0]*c5 - m[0][2]*c2 + m[0][3]*c1);
+	tmp[1][2] = (-m[3][0]*s5 + m[3][2]*s2 - m[3][3]*s1);
+	tmp[1][3] = ( m[2][0]*s5 - m[2][2]*s2 + m[2][3]*s1);
+
+	tmp[2][0] = ( m[1][0]*c4 - m[1][1]*c2 + m[1][3]*c0);
+	tmp[2][1] = (-m[0][0]*c4 + m[0][1]*c2 - m[0][3]*c0);
+	tmp[2][2] = ( m[3][0]*s4 - m[3][1]*s2 + m[3][3]*s0);
+	tmp[2][3] = (-m[2][0]*s4 + m[2][1]*s2 - m[2][3]*s0);
+
+	tmp[3][0] = (-m[1][0]*c3 + m[1][1]*c1 - m[1][2]*c0);
+	tmp[3][1] = ( m[0][0]*c3 - m[0][1]*c1 + m[0][2]*c0);
+	tmp[3][2] = (-m[3][0]*s3 + m[3][1]*s1 - m[3][2]*s0);
+	tmp[3][3] = ( m[2][0]*s3 - m[2][1]*s1 + m[2][2]*s0);
+
+	memmove(m, tmp, sizeof tmp);
+}
+
+/*
+ * David Eberly, “The Laplace Expansion Theorem: Computing the Determinants and Inverses of Matrices”, June 2024, p. 9
+ */
 void
 invm3(Matrix3 m)
 {
-	double det;
+	double s0, s1, s2, s3, s4, s5;
+	double c0, c1, c2, c3, c4, c5;
+	double Δ;
+	Matrix3 tmp;
 
-	det = detm3(m);
-	if(det == 0)
-		return; /* singular matrices are not invertible */
-	adjm3(m);
-	smulm3(m, 1/det);
+	s0 = m[0][0]*m[1][1] - m[1][0]*m[0][1];
+	s1 = m[0][0]*m[1][2] - m[1][0]*m[0][2];
+	s2 = m[0][0]*m[1][3] - m[1][0]*m[0][3];
+	s3 = m[0][1]*m[1][2] - m[1][1]*m[0][2];
+	s4 = m[0][1]*m[1][3] - m[1][1]*m[0][3];
+	s5 = m[0][2]*m[1][3] - m[1][2]*m[0][3];
+
+	c5 = m[2][2]*m[3][3] - m[3][2]*m[2][3];
+	c4 = m[2][1]*m[3][3] - m[3][1]*m[2][3];
+	c3 = m[2][1]*m[3][2] - m[3][1]*m[2][2];
+	c2 = m[2][0]*m[3][3] - m[3][0]*m[2][3];
+	c1 = m[2][0]*m[3][2] - m[3][0]*m[2][2];
+	c0 = m[2][0]*m[3][1] - m[3][0]*m[2][1];
+
+	Δ = s0*c5 - s1*c4 + s2*c3 + s3*c2 - s4*c1 + s5*c0;
+	if(Δ == 0)
+		return;
+	Δ = 1/Δ;
+
+	tmp[0][0] = ( m[1][1]*c5 - m[1][2]*c4 + m[1][3]*c3)*Δ;
+	tmp[0][1] = (-m[0][1]*c5 + m[0][2]*c4 - m[0][3]*c3)*Δ;
+	tmp[0][2] = ( m[3][1]*s5 - m[3][2]*s4 + m[3][3]*s3)*Δ;
+	tmp[0][3] = (-m[2][1]*s5 + m[2][2]*s4 - m[2][3]*s3)*Δ;
+
+	tmp[1][0] = (-m[1][0]*c5 + m[1][2]*c2 - m[1][3]*c1)*Δ;
+	tmp[1][1] = ( m[0][0]*c5 - m[0][2]*c2 + m[0][3]*c1)*Δ;
+	tmp[1][2] = (-m[3][0]*s5 + m[3][2]*s2 - m[3][3]*s1)*Δ;
+	tmp[1][3] = ( m[2][0]*s5 - m[2][2]*s2 + m[2][3]*s1)*Δ;
+
+	tmp[2][0] = ( m[1][0]*c4 - m[1][1]*c2 + m[1][3]*c0)*Δ;
+	tmp[2][1] = (-m[0][0]*c4 + m[0][1]*c2 - m[0][3]*c0)*Δ;
+	tmp[2][2] = ( m[3][0]*s4 - m[3][1]*s2 + m[3][3]*s0)*Δ;
+	tmp[2][3] = (-m[2][0]*s4 + m[2][1]*s2 - m[2][3]*s0)*Δ;
+
+	tmp[3][0] = (-m[1][0]*c3 + m[1][1]*c1 - m[1][2]*c0)*Δ;
+	tmp[3][1] = ( m[0][0]*c3 - m[0][1]*c1 + m[0][2]*c0)*Δ;
+	tmp[3][2] = (-m[3][0]*s3 + m[3][1]*s1 - m[3][2]*s0)*Δ;
+	tmp[3][3] = ( m[2][0]*s3 - m[2][1]*s1 + m[2][2]*s0)*Δ;
+
+	memmove(m, tmp, sizeof tmp);
 }
 
 Point3
--