/** @file Matrix.cpp @author Morgan McGuire, http://graphics.cs.williams.edu */ #include "G3D/Matrix.h" #include "G3D/TextOutput.h" static inline G3D::Matrix::T negate(G3D::Matrix::T x) { return -x; } namespace G3D { int Matrix::debugNumCopyOps = 0; int Matrix::debugNumAllocOps = 0; void Matrix::serialize(TextOutput& t) const { t.writeSymbol("%"); t.writeNumber(rows()); t.writeSymbol("x"); t.writeNumber(cols()); t.pushIndent(); t.writeNewline(); t.writeSymbol("["); for (int r = 0; r < rows(); ++r) { for (int c = 0; c < cols(); ++c) { t.writeNumber(impl->get(r, c)); if (c < cols() - 1) { t.writeSymbol(","); } else { if (r < rows() - 1) { t.writeSymbol(";"); t.writeNewline(); } } } } t.writeSymbol("]"); t.popIndent(); t.writeNewline(); } std::string Matrix::toString(const std::string& name) const { std::string s; if (name != "") { s += format("%s = \n", name.c_str()); } s += "["; for (int r = 0; r < rows(); ++r) { for (int c = 0; c < cols(); ++c) { double v = impl->get(r, c); if (::fabs(v) < 0.00001) { // Don't print "negative zero" s += format("% 10.04g", 0.0); } else if (v == iRound(v)) { // Print integers nicely s += format("% 10.04g", v); } else { s += format("% 10.04f", v); } if (c < cols() - 1) { s += ","; } else if (r < rows() - 1) { s += ";\n "; } else { s += "]\n"; } } } return s; } #define INPLACE(OP)\ ImplRef A = impl;\ \ if (! A.unique()) {\ impl.reset(new Impl(A->R, A->C));\ }\ \ A->OP(B, *impl); Matrix& Matrix::operator*=(const T& B) { INPLACE(mul) return *this; } Matrix& Matrix::operator-=(const T& B) { INPLACE(sub) return *this; } Matrix& Matrix::operator+=(const T& B) { INPLACE(add) return *this; } Matrix& Matrix::operator/=(const T& B) { INPLACE(div) return *this; } Matrix& Matrix::operator*=(const Matrix& B) { // We can't optimize this one *this = *this * B; return *this; } Matrix& Matrix::operator-=(const Matrix& _B) { const Impl& B = *_B.impl; INPLACE(sub) return *this; } Matrix& Matrix::operator+=(const Matrix& _B) { const Impl& B = *_B.impl; INPLACE(add) return *this; } void Matrix::arrayMulInPlace(const Matrix& _B) { const Impl& B = *_B.impl; INPLACE(arrayMul) } void Matrix::arrayDivInPlace(const Matrix& _B) { const Impl& B = *_B.impl; INPLACE(arrayDiv) } #undef INPLACE Matrix Matrix::fromDiagonal(const Matrix& d) { debugAssert((d.rows() == 1) || (d.cols() == 1)); int n = d.numElements(); Matrix D = zero(n, n); for (int i = 0; i < n; ++i) { D.set(i, i, d.impl->data[i]); } return D; } void Matrix::set(int r, int c, T v) { if (! impl.unique()) { // Copy the data before mutating; this object is shared impl.reset(new Impl(*impl)); } impl->set(r, c, v); } void Matrix::setRow(int r, const Matrix& vec) { debugAssertM(vec.cols() == cols(), "A row must be set to a vector of the same size."); debugAssertM(vec.rows() == 1, "A row must be set to a row vector."); debugAssert(r >= 0); debugAssert(r < rows()); if (! impl.unique()) { // Copy the data before mutating; this object is shared impl.reset(new Impl(*impl)); } impl->setRow(r, vec.impl->data); } void Matrix::setCol(int c, const Matrix& vec) { debugAssertM(vec.rows() == rows(), "A column must be set to a vector of the same size."); debugAssertM(vec.cols() == 1, "A column must be set to a column vector."); debugAssert(c >= 0); debugAssert(c < cols()); if (! impl.unique()) { // Copy the data before mutating; this object is shared impl.reset(new Impl(*impl)); } impl->setCol(c, vec.impl->data); } Matrix::T Matrix::get(int r, int c) const { return impl->get(r, c); } Matrix Matrix::row(int r) const { debugAssert(r >= 0); debugAssert(r < rows()); Matrix out(1, cols()); out.impl->setRow(1, impl->elt[r]); return out; } Matrix Matrix::col(int c) const { debugAssert(c >= 0); debugAssert(c < cols()); Matrix out(rows(), 1); T* outData = out.impl->data; // Get a pointer to the first element in the column const T* inElt = &(impl->elt[0][c]); int R = rows(); int C = cols(); for (int r = 0; r < R; ++r) { outData[r] = *inElt; // Skip around to the next row inElt += C; } return out; } Matrix Matrix::zero(int R, int C) { Impl* A = new Impl(R, C); A->setZero(); return Matrix(A); } Matrix Matrix::one(int R, int C) { Impl* A = new Impl(R, C); for (int i = R * C - 1; i >= 0; --i) { A->data[i] = 1.0; } return Matrix(A); } Matrix Matrix::random(int R, int C) { Impl* A = new Impl(R, C); for (int i = R * C - 1; i >= 0; --i) { A->data[i] = G3D::uniformRandom(0.0, 1.0); } return Matrix(A); } Matrix Matrix::identity(int N) { Impl* m = new Impl(N, N); m->setZero(); for (int i = 0; i < N; ++i) { m->elt[i][i] = 1.0; } return Matrix(m); } // Implement an explicit-output unary method by trampolining to the impl #define TRAMPOLINE_EXPLICIT_1(method)\ void Matrix::method(Matrix& out) const {\ if ((out.impl == impl) && impl.unique()) {\ impl->method(*out.impl);\ } else {\ out = this->method();\ }\ } TRAMPOLINE_EXPLICIT_1(abs) TRAMPOLINE_EXPLICIT_1(negate) TRAMPOLINE_EXPLICIT_1(arrayLog) TRAMPOLINE_EXPLICIT_1(arrayExp) TRAMPOLINE_EXPLICIT_1(arrayCos) TRAMPOLINE_EXPLICIT_1(arraySin) void Matrix::mulRow(int r, const T& v) { debugAssert(r >= 0 && r < rows()); if (! impl.unique()) { impl.reset(new Impl(*impl)); } impl->mulRow(r, v); } void Matrix::transpose(Matrix& out) const { if ((out.impl == impl) && impl.unique() && (impl->R == impl->C)) { // In place impl->transpose(*out.impl); } else { out = this->transpose(); } } Matrix3 Matrix::toMatrix3() const { debugAssert(impl->R == 3); debugAssert(impl->C == 3); return Matrix3( impl->get(0,0), impl->get(0,1), impl->get(0,2), impl->get(1,0), impl->get(1,1), impl->get(1,2), impl->get(2,0), impl->get(2,1), impl->get(2,2)); } Matrix4 Matrix::toMatrix4() const { debugAssert(impl->R == 4); debugAssert(impl->C == 4); return Matrix4( impl->get(0,0), impl->get(0,1), impl->get(0,2), impl->get(0,3), impl->get(1,0), impl->get(1,1), impl->get(1,2), impl->get(1,3), impl->get(2,0), impl->get(2,1), impl->get(2,2), impl->get(2,3), impl->get(3,0), impl->get(3,1), impl->get(3,2), impl->get(3,3)); } Vector2 Matrix::toVector2() const { debugAssert(impl->R * impl->C == 2); if (impl->R > impl->C) { return Vector2(impl->get(0,0), impl->get(1,0)); } else { return Vector2(impl->get(0,0), impl->get(0,1)); } } Vector3 Matrix::toVector3() const { debugAssert(impl->R * impl->C == 3); if (impl->R > impl->C) { return Vector3(impl->get(0,0), impl->get(1,0), impl->get(2, 0)); } else { return Vector3(impl->get(0,0), impl->get(0,1), impl->get(0, 2)); } } Vector4 Matrix::toVector4() const { debugAssert( ((impl->R == 4) && (impl->C == 1)) || ((impl->R == 1) && (impl->C == 4))); if (impl->R > impl->C) { return Vector4(impl->get(0,0), impl->get(1,0), impl->get(2, 0), impl->get(3,0)); } else { return Vector4(impl->get(0,0), impl->get(0,1), impl->get(0, 2), impl->get(0,3)); } } void Matrix::swapRows(int r0, int r1) { debugAssert(r0 >= 0 && r0 < rows()); debugAssert(r1 >= 0 && r1 < rows()); if (r0 == r1) { return; } if (! impl.unique()) { impl.reset(new Impl(*impl)); } impl->swapRows(r0, r1); } void Matrix::swapAndNegateCols(int c0, int c1) { debugAssert(c0 >= 0 && c0 < cols()); debugAssert(c1 >= 0 && c1 < cols()); if (c0 == c1) { return; } if (! impl.unique()) { impl.reset(new Impl(*impl)); } impl->swapAndNegateCols(c0, c1); } Matrix Matrix::subMatrix(int r1, int r2, int c1, int c2) const { debugAssert(r2>=r1); debugAssert(c2>=c1); debugAssert(c2=0); debugAssert(c1>=0); Matrix X(r2 - r1 + 1, c2 - c1 + 1); for (int r = 0; r < X.rows(); ++r) { for (int c = 0; c < X.cols(); ++c) { X.set(r, c, get(r + r1, c + c1)); } } return X; } bool Matrix::anyNonZero() const { return impl->anyNonZero(); } bool Matrix::allNonZero() const { return impl->allNonZero(); } void Matrix::svd(Matrix& U, Array& d, Matrix& V, bool sort) const { debugAssert(rows() >= cols()); debugAssertM(&U != &V, "Arguments to SVD must be different matrices"); debugAssertM(&U != this, "Arguments to SVD must be different matrices"); debugAssertM(&V != this, "Arguments to SVD must be different matrices"); int R = rows(); int C = cols(); // Make sure we don't overwrite a shared matrix if (! V.impl.unique()) { V = Matrix::zero(C, C); } else { V.impl->setSize(C, C); } if (&U != this || ! impl.unique()) { // Make a copy of this for in-place SVD U.impl.reset(new Impl(*impl)); } d.resize(C); const char* ret = svdCore(U.impl->elt, R, C, d.getCArray(), V.impl->elt); debugAssertM(ret == NULL, ret); (void)ret; if (sort) { // Sort the singular values from greatest to least Array rank; rank.resize(C); for (int c = 0; c < C; ++c) { rank[c].col = c; rank[c].value = d[c]; } rank.sort(SORT_INCREASING); Matrix Uold = U; Matrix Vold = V; U = Matrix(U.rows(), U.cols()); V = Matrix(V.rows(), V.cols()); // Now permute U, d, and V appropriately for (int c0 = 0; c0 < C; ++c0) { const int c1 = rank[c0].col; d[c0] = rank[c0].value; U.setCol(c0, Uold.col(c1)); V.setCol(c0, Vold.col(c1)); } } } #define COMPARE_SCALAR(OP)\ Matrix Matrix::operator OP (const T& scalar) const {\ int R = rows();\ int C = cols();\ int N = R * C;\ Matrix out = Matrix::zero(R, C);\ \ const T* raw = impl->data;\ T* outRaw = out.impl->data;\ for (int i = 0; i < N; ++i) {\ outRaw[i] = raw[i] OP scalar;\ }\ \ return out;\ } COMPARE_SCALAR(<) COMPARE_SCALAR(<=) COMPARE_SCALAR(>) COMPARE_SCALAR(>=) COMPARE_SCALAR(==) COMPARE_SCALAR(!=) #undef COMPARE_SCALAR double Matrix::normSquared() const { int R = rows(); int C = cols(); int N = R * C; double sum = 0.0; const T* raw = impl->data; for (int i = 0; i < N; ++i) { sum += square(raw[i]); } return sum; } double Matrix::norm() const { return sqrt(normSquared()); } /////////////////////////////////////////////////////////// Matrix::Impl::Impl(const Matrix3& M) : elt(NULL), data(NULL), R(0), C(0), dataSize(0){ setSize(3, 3); for (int r = 0; r < 3; ++r) { for (int c = 0; c < 3; ++c) { set(r, c, M[r][c]); } } } Matrix::Impl::Impl(const Matrix4& M): elt(NULL), data(NULL), R(0), C(0), dataSize(0) { setSize(4, 4); for (int r = 0; r < 4; ++r) { for (int c = 0; c < 4; ++c) { set(r, c, M[r][c]); } } } void Matrix::Impl::setSize(int newRows, int newCols) { if ((R == newRows) && (C == newCols)) { // Nothing to do return; } int newSize = newRows * newCols; R = newRows; C = newCols; // Only allocate if we need more space // or the size difference is ridiculous if ((newSize > dataSize) || (newSize < dataSize / 4)) { System::alignedFree(data); data = (float*)System::alignedMalloc(R * C * sizeof(T), 16); ++Matrix::debugNumAllocOps; dataSize = newSize; } // Construct the row pointers //delete[] elt; System::free(elt); elt = (T**)System::malloc(R * sizeof(T*));// new T*[R]; for (int r = 0; r < R; ++ r) { elt[r] = data + r * C; } } Matrix::Impl::~Impl() { //delete[] elt; System::free(elt); System::alignedFree(data); } Matrix::Impl& Matrix::Impl::operator=(const Impl& m) { setSize(m.R, m.C); System::memcpy(data, m.data, R * C * sizeof(T)); ++Matrix::debugNumCopyOps; return *this; } void Matrix::Impl::setZero() { System::memset(data, 0, R * C * sizeof(T)); } void Matrix::Impl::swapRows(int r0, int r1) { T* R0 = elt[r0]; T* R1 = elt[r1]; for (int c = 0; c < C; ++c) { T temp = R0[c]; R0[c] = R1[c]; R1[c] = temp; } } void Matrix::Impl::swapAndNegateCols(int c0, int c1) { for (int r = 0; r < R; ++r) { T* row = elt[r]; const T temp = -row[c0]; row[c0] = -row[c1]; row[c1] = temp; } } void Matrix::Impl::mulRow(int r, const T& v) { T* row = elt[r]; for (int c = 0; c < C; ++c) { row[c] *= v; } } void Matrix::Impl::mul(const Impl& B, Impl& out) const { const Impl& A = *this; debugAssertM( (this != &out) && (&B != &out), "Output argument to mul cannot be the same as an input argument."); debugAssert(A.C == B.R); debugAssert(A.R == out.R); debugAssert(B.C == out.C); for (int r = 0; r < out.R; ++r) { for (int c = 0; c < out.C; ++c) { T sum = 0.0; for (int i = 0; i < A.C; ++i) { sum += A.get(r, i) * B.get(i, c); } out.set(r, c, sum); } } } // We're about to define several similar methods, // so use a macro to share implementations. This // must be a macro because the difference between // the macros is the operation in the inner loop. #define IMPLEMENT_ARRAY_2(method, OP)\ void Matrix::Impl::method(const Impl& B, Impl& out) const {\ const Impl& A = *this;\ \ debugAssert(A.C == B.C);\ debugAssert(A.R == B.R);\ debugAssert(A.C == out.C);\ debugAssert(A.R == out.R);\ \ for (int i = R * C - 1; i >= 0; --i) {\ out.data[i] = A.data[i] OP B.data[i];\ }\ } #define IMPLEMENT_ARRAY_1(method, f)\ void Matrix::Impl::method(Impl& out) const {\ const Impl& A = *this;\ \ debugAssert(A.C == out.C);\ debugAssert(A.R == out.R);\ \ for (int i = R * C - 1; i >= 0; --i) {\ out.data[i] = f(A.data[i]);\ }\ } #define IMPLEMENT_ARRAY_SCALAR(method, OP)\ void Matrix::Impl::method(Matrix::T B, Impl& out) const {\ const Impl& A = *this;\ \ debugAssert(A.C == out.C);\ debugAssert(A.R == out.R);\ \ for (int i = R * C - 1; i >= 0; --i) {\ out.data[i] = A.data[i] OP B;\ }\ } IMPLEMENT_ARRAY_2(add, +) IMPLEMENT_ARRAY_2(sub, -) IMPLEMENT_ARRAY_2(arrayMul, *) IMPLEMENT_ARRAY_2(arrayDiv, /) IMPLEMENT_ARRAY_SCALAR(add, +) IMPLEMENT_ARRAY_SCALAR(sub, -) IMPLEMENT_ARRAY_SCALAR(mul, *) IMPLEMENT_ARRAY_SCALAR(div, /) IMPLEMENT_ARRAY_1(abs, ::fabs) IMPLEMENT_ARRAY_1(negate, ::negate) IMPLEMENT_ARRAY_1(arrayLog, ::log) IMPLEMENT_ARRAY_1(arraySqrt, ::sqrt) IMPLEMENT_ARRAY_1(arrayExp, ::exp) IMPLEMENT_ARRAY_1(arrayCos, ::cos) IMPLEMENT_ARRAY_1(arraySin, ::sin) #undef IMPLEMENT_ARRAY_SCALAR #undef IMPLEMENT_ARRAY_1 #undef IMPLEMENT_ARRAY_2 // lsub is special because the argument order is reversed void Matrix::Impl::lsub(Matrix::T B, Impl& out) const { const Impl& A = *this; debugAssert(A.C == out.C); debugAssert(A.R == out.R); for (int i = R * C - 1; i >= 0; --i) { out.data[i] = B - A.data[i]; } } void Matrix::Impl::inverseViaAdjoint(Impl& out) const { debugAssert(&out != this); // Inverse = adjoint / determinant adjoint(out); // Don't call the determinant method when we already have an // adjoint matrix; there's a faster way of computing it: the dot // product of the first row and the adjoint's first col. double det = 0.0; for (int r = R - 1; r >= 0; --r) { det += elt[0][r] * out.elt[r][0]; } out.div(Matrix::T(det), out); } void Matrix::Impl::transpose(Impl& out) const { debugAssert(out.R == C); debugAssert(out.C == R); if (&out == this) { // Square matrix in place for (int r = 0; r < R; ++r) { for (int c = r + 1; c < C; ++c) { T temp = get(r, c); out.set(r, c, get(c, r)); out.set(c, r, temp); } } } else { for (int r = 0; r < R; ++r) { for (int c = 0; c < C; ++c) { out.set(c, r, get(r, c)); } } } } void Matrix::Impl::adjoint(Impl& out) const { cofactor(out); // Transpose is safe to perform in place out.transpose(out); } void Matrix::Impl::cofactor(Impl& out) const { debugAssert(&out != this); for(int r = 0; r < R; ++r) { for(int c = 0; c < C; ++c) { out.set(r, c, cofactor(r, c)); } } } Matrix::T Matrix::Impl::cofactor(int r, int c) const { // Strang p. 217 float s = isEven(r + c) ? 1.0f : -1.0f; return s * determinant(r, c); } Matrix::T Matrix::Impl::determinant(int nr, int nc) const { debugAssert(R > 0); debugAssert(C > 0); Impl A(R - 1, C - 1); withoutRowAndCol(nr, nc, A); return A.determinant(); } void Matrix::Impl::setRow(int r, const T* vals) { debugAssert(r >= 0); System::memcpy(elt[r], vals, sizeof(T) * C); } void Matrix::Impl::setCol(int c, const T* vals) { for (int r = 0; r < R; ++r) { elt[r][c] = vals[r]; } } Matrix::T Matrix::Impl::determinant() const { debugAssert(R == C); // Compute using cofactors switch(R) { case 0: return 0; case 1: // Determinant of a 1x1 is the element return elt[0][0]; case 2: // Determinant of a 2x2 is ad-bc return elt[0][0] * elt[1][1] - elt[0][1] * elt[1][0]; case 3: { // Determinant of an nxn matrix is the dot product of the first // row with the first row of cofactors. The base cases of this // method get called a lot, so we spell out the implementation // for the 3x3 case. float cofactor00 = elt[1][1] * elt[2][2] - elt[1][2] * elt[2][1]; float cofactor10 = elt[1][2] * elt[2][0] - elt[1][0] * elt[2][2]; float cofactor20 = elt[1][0] * elt[2][1] - elt[1][1] * elt[2][0]; return Matrix::T( elt[0][0] * cofactor00 + elt[0][1] * cofactor10 + elt[0][2] * cofactor20); } default: { // Determinant of an n x n matrix is the dot product of the first // row with the first row of cofactors T det = 0; for (int c = 0; c < C; ++c) { det += elt[0][c] * cofactor(0, c); } return det; } } } void Matrix::Impl::withoutRowAndCol(int excludeRow, int excludeCol, Impl& out) const { debugAssert(out.R == R - 1); debugAssert(out.C == C - 1); for (int r = 0; r < out.R; ++r) { for (int c = 0; c < out.C; ++c) { out.elt[r][c] = elt[r + ((r >= excludeRow) ? 1 : 0)][c + ((c >= excludeCol) ? 1 : 0)]; } } } Matrix Matrix::pseudoInverse(float tolerance) const { if ((cols() == 1) || (rows() == 1)) { return vectorPseudoInverse(); } else if ((cols() <= 4) || (rows() <= 4)) { return partitionPseudoInverse(); } else { return svdPseudoInverse(tolerance); } } /* Public function for testing purposes only. Use pseudoInverse(), as it contains optimizations for nonsingular matrices with at least one small (<5) dimension. */ // See http://en.wikipedia.org/wiki/Moore%E2%80%93Penrose_pseudoinverse Matrix Matrix::svdPseudoInverse(float tolerance) const { if (cols() > rows()) { return transpose().svdPseudoInverse(tolerance).transpose(); } // Matrices from SVD Matrix U, V; // Diagonal elements Array d; svd(U, d, V); if (rows() == 1) { d.resize(1, false); } if (tolerance < 0) { // TODO: Should be eps(d[0]), which is the largest diagonal tolerance = G3D::max(rows(), cols()) * 0.0001f; } Matrix X; int r = 0; for (int i = 0; i < d.size(); ++i) { if (d[i] > tolerance) { d[i] = Matrix::T(1) / d[i]; ++r; } } if (r == 0) { // There were no non-zero elements X = zero(cols(), rows()); } else { // Use the first r columns // Test code (the rest is below) /* d.resize(r); Matrix testU = U.subMatrix(0, U.rows() - 1, 0, r - 1); Matrix testV = V.subMatrix(0, V.rows() - 1, 0, r - 1); Matrix testX = testV * Matrix::fromDiagonal(d) * testU.transpose(); X = testX; */ // We want to do this: // // d.resize(r); // U = U.subMatrix(0, U.rows() - 1, 0, r - 1); // X = V * Matrix::fromDiagonal(d) * U.transpose(); // // but creating a large diagonal matrix and then // multiplying by it is wasteful. So we instead // explicitly perform A = (D * U')' = U * D, and // then multiply X = V * A'. Matrix A = Matrix(U.rows(), r); const T* dPtr = d.getCArray(); for (int i = 0; i < A.rows(); ++i) { const T* Urow = U.impl->elt[i]; T* Arow = A.impl->elt[i]; const int Acols = A.cols(); for (int j = 0; j < Acols; ++j) { // A(i,j) = U(i,:) * D(:,j) // This is non-zero only at j = i because D is diagonal // A(i,j) = U(i,j) * D(j,j) Arow[j] = Urow[j] * dPtr[j]; } } // // Compute X = V.subMatrix(0, V.rows() - 1, 0, r - 1) * A.transpose() // // Avoid the explicit subMatrix call, and by storing A' instead of A, avoid // both the transpose and the memory incoherence of striding across memory // in big steps. alwaysAssertM(A.cols() == r, "Internal dimension mismatch during pseudoInverse()"); alwaysAssertM(V.cols() >= r, "Internal dimension mismatch during pseudoInverse()"); X = Matrix(V.rows(), A.rows()); T** Xelt = X.impl->elt; for (int i = 0; i < X.rows(); ++i) { const T* Vrow = V.impl->elt[i]; for (int j = 0; j < X.cols(); ++j) { const T* Arow = A.impl->elt[j]; T sum = 0; for (int k = 0; k < r; ++k) { sum += Vrow[k] * Arow[k]; } Xelt[i][j] = sum; } } /* // Test that results are the same after optimizations: Matrix diff = X - testX; T n = diff.norm(); debugAssert(n < 0.0001); */ } return X; } // Computes pseudoinverse for a vector Matrix Matrix::vectorPseudoInverse() const { // If vector A has nonzero elements: transpose A, then divide each elt. by the squared norm // If A is zero vector: transpose A double x = 0.0; if (anyNonZero()) { x = 1.0 / normSquared(); } Matrix A(cols(), rows()); T** Aelt = A.impl->elt; for (int r = 0; r < rows(); ++r) { const T* MyRow = impl->elt[r]; for (int c = 0; c < cols(); ++c) { Aelt[c][r] = T(MyRow[c] * x); } } return Matrix(A); } Matrix Matrix::rowPartPseudoInverse() const{ int m = rows(); int n = cols(); alwaysAssertM((m<=n),"Row-partitioned block matrix pseudoinverse requires Relt; T** Belt = B.impl->elt; for (int i = 0; i < m; ++i) { const T* Arow = Aelt[i]; for (int j = 0; j < m; ++j) { const T* Brow = Aelt[j]; T sum = 0; for (int k = 0; k < n; ++k) { sum += Arow[k] * Brow[k]; } Belt[i][j] = sum; } } // B has size m x m switch (m) { case 2: return row2PseudoInverse(B); case 3: return row3PseudoInverse(B); case 4: return row4PseudoInverse(B); default: alwaysAssertM(false, "G3D internal error: Should have used the vector or general case!"); return Matrix(); } } Matrix Matrix::colPartPseudoInverse() const{ int m = rows(); int n = cols(); alwaysAssertM((m>=n),"Column-partitioned block matrix pseudoinverse requires R>C"); // TODO: Put each of the individual cases in its own helper function // TODO: Push the B computation down into the individual cases // B = A' * A Matrix A = *this; Matrix B = Matrix(n, n); T** Aelt = A.impl->elt; T** Belt = B.impl->elt; for (int i = 0; i < n; ++i) { for (int j = 0; j < n; ++j) { T sum = 0; for (int k = 0; k < m; ++k) { sum += Aelt[k][i] * Aelt[k][j]; } Belt[i][j] = sum; } } // B has size n x n switch (n) { case 2: return col2PseudoInverse(B); case 3: return col3PseudoInverse(B); case 4: return col4PseudoInverse(B); default: alwaysAssertM(false, "G3D internal error: Should have used the vector or general case!"); return Matrix(); } } Matrix Matrix::col2PseudoInverse(const Matrix& B) const { Matrix A = *this; int m = rows(); int n = cols(); (void)n; // Row-major 2x2 matrix const float B2[2][2] = {{B.get(0,0), B.get(0,1)}, {B.get(1,0), B.get(1,1)}}; float det = (B2[0][0]*B2[1][1]) - (B2[0][1]*B2[1][0]); if (fuzzyEq(det, T(0))) { // Matrix was singular; the block matrix pseudo-inverse can't // handle that, so fall back to the old case return svdPseudoInverse(); } else { // invert using formula at http://www.netsoc.tcd.ie/~jgilbert/maths_site/applets/algebra/matrix_inversion.html // Multiply by Binv * A' Matrix X(cols(), rows()); T** Xelt = X.impl->elt; T** Aelt = A.impl->elt; float binv00 = B2[1][1]/det, binv01 = -B2[1][0]/det; float binv10 = -B2[0][1]/det, binv11 = B2[0][0]/det; for (int j = 0; j < m; ++j) { const T* Arow = Aelt[j]; float a0 = Arow[0]; float a1 = Arow[1]; Xelt[0][j] = binv00 * a0 + binv01 * a1; Xelt[1][j] = binv10 * a0 + binv11 * a1; } return X; } } Matrix Matrix::col3PseudoInverse(const Matrix& B) const { Matrix A = *this; int m = rows(); int n = cols(); Matrix3 B3 = B.toMatrix3(); if (fuzzyEq(B3.determinant(), (T)0.0)) { // Matrix was singular; the block matrix pseudo-inverse can't // handle that, so fall back to the old case return svdPseudoInverse(); } else { Matrix3 B3inv = B3.inverse(); // Multiply by Binv * A' Matrix X(cols(), rows()); T** Xelt = X.impl->elt; T** Aelt = A.impl->elt; for (int i = 0; i < n; ++i) { T* Xrow = Xelt[i]; for (int j = 0; j < m; ++j) { const T* Arow = Aelt[j]; T sum = 0; const float* Binvrow = B3inv[i]; for (int k = 0; k < n; ++k) { sum += Binvrow[k] * Arow[k]; } Xrow[j] = sum; } } return X; } } Matrix Matrix::col4PseudoInverse(const Matrix& B) const { Matrix A = *this; int m = rows(); int n = cols(); Matrix4 B4 = B.toMatrix4(); if (fuzzyEq(B4.determinant(), (T)0.0)) { // Matrix was singular; the block matrix pseudo-inverse can't // handle that, so fall back to the old case return svdPseudoInverse(); } else { Matrix4 B4inv = B4.inverse(); // Multiply by Binv * A' Matrix X(cols(), rows()); T** Xelt = X.impl->elt; T** Aelt = A.impl->elt; for (int i = 0; i < n; ++i) { T* Xrow = Xelt[i]; for (int j = 0; j < m; ++j) { const T* Arow = Aelt[j]; T sum = 0; const float* Binvrow = B4inv[i]; for (int k = 0; k < n; ++k) { sum += Binvrow[k] * Arow[k]; } Xrow[j] = sum; } } return X; } } Matrix Matrix::row2PseudoInverse(const Matrix& B) const { Matrix A = *this; int m = rows(); int n = cols(); (void)m; // Row-major 2x2 matrix const float B2[2][2] = {{B.get(0,0), B.get(0,1)}, {B.get(1,0), B.get(1,1)}}; float det = (B2[0][0]*B2[1][1]) - (B2[0][1]*B2[1][0]); if (fuzzyEq(det, T(0))) { // Matrix was singular; the block matrix pseudo-inverse can't // handle that, so fall back to the old case return svdPseudoInverse(); } else { // invert using formula at http://www.netsoc.tcd.ie/~jgilbert/maths_site/applets/algebra/matrix_inversion.html // Multiply by Binv * A' Matrix X(cols(), rows()); T** Xelt = X.impl->elt; T** Aelt = A.impl->elt; float binv00 = B2[1][1]/det, binv01 = -B2[1][0]/det; float binv10 = -B2[0][1]/det, binv11 = B2[0][0]/det; for (int j = 0; j < n; ++j) { Xelt[j][0] = Aelt[0][j] * binv00 + Aelt[1][j] * binv10; Xelt[j][1] = Aelt[0][j] * binv01 + Aelt[1][j] * binv11; } return X; } } Matrix Matrix::row3PseudoInverse(const Matrix& B) const { Matrix A = *this; int m = rows(); int n = cols(); Matrix3 B3 = B.toMatrix3(); if (fuzzyEq(B3.determinant(), (T)0.0)) { // Matrix was singular; the block matrix pseudo-inverse can't // handle that, so fall back to the old case return svdPseudoInverse(); } else { Matrix3 B3inv = B3.inverse(); // Multiply by Binv * A' Matrix X(cols(), rows()); T** Xelt = X.impl->elt; T** Aelt = A.impl->elt; for (int i = 0; i < n; ++i) { T* Xrow = Xelt[i]; for (int j = 0; j < m; ++j) { T sum = 0; for (int k = 0; k < m; ++k) { sum += Aelt[k][i] * B3inv[j][k]; } Xrow[j] = sum; } } return X; } } Matrix Matrix::row4PseudoInverse(const Matrix& B) const { Matrix A = *this; int m = rows(); int n = cols(); Matrix4 B4 = B.toMatrix4(); if (fuzzyEq(B4.determinant(), (T)0.0)) { // Matrix was singular; the block matrix pseudo-inverse can't // handle that, so fall back to the old case return svdPseudoInverse(); } else { Matrix4 B4inv = B4.inverse(); // Multiply by Binv * A' Matrix X(cols(), rows()); T** Xelt = X.impl->elt; T** Aelt = A.impl->elt; for (int i = 0; i < n; ++i) { T* Xrow = Xelt[i]; for (int j = 0; j < m; ++j) { T sum = 0; for (int k = 0; k < m; ++k) { sum += Aelt[k][i] * B4inv[j][k]; } Xrow[j] = sum; } } return X; } } // Uses the block matrix pseudoinverse to compute the pseudoinverse of a full-rank mxn matrix with m >= n // http://en.wikipedia.org/wiki/Block_matrix_pseudoinverse Matrix Matrix::partitionPseudoInverse() const { // Logic: // A^-1 = (A'A)^-1 A' // A has few (n) columns, so A'A is small (n x n) and fast to invert int m = rows(); int n = cols(); if (m < n) { // TODO: optimize by pushing through the transpose //return transpose().partitionPseudoInverse().transpose(); return rowPartPseudoInverse(); } else { return colPartPseudoInverse(); } } void Matrix::Impl::inverseInPlaceGaussJordan() { debugAssertM(R == C, format( "Cannot perform Gauss-Jordan inverse on a non-square matrix." " (Argument was %dx%d)", R, C)); // Exchange to float elements # define SWAP(x, y) {float temp = x; x = y; y = temp;} // The integer arrays pivot, rowIndex, and colIndex are // used for bookkeeping on the pivoting static Array colIndex, rowIndex, pivot; int col = 0, row = 0; colIndex.resize(R); rowIndex.resize(R); pivot.resize(R); static const int NO_PIVOT = -1; // Initialize the pivot array to default values. for (int i = 0; i < R; ++i) { pivot[i] = NO_PIVOT; } // This is the main loop over the columns to be reduced // Loop over the columns. for (int c = 0; c < R; ++c) { // Find the largest element and use that as a pivot float largestMagnitude = 0.0; // This is the outer loop of the search for a pivot element for (int r = 0; r < R; ++r) { // Unless we've already found the pivot, keep going if (pivot[r] != 0) { // Find the largest pivot for (int k = 0; k < R; ++k) { if (pivot[k] == NO_PIVOT) { const float mag = fabs(elt[r][k]); if (mag >= largestMagnitude) { largestMagnitude = mag; row = r; col = k; } } } } } pivot[col] += 1; // Interchange columns so that the pivot element is on the diagonal (we'll have to undo this // at the end) if (row != col) { for (int k = 0; k < R; ++k) { SWAP(elt[row][k], elt[col][k]) } } // The pivot is now at [row, col] rowIndex[c] = row; colIndex[c] = col; double piv = elt[col][col]; debugAssertM(piv != 0.0, "Matrix is singular"); // Divide everything by the pivot (avoid computing the division // multiple times). const double pivotInverse = 1.0 / piv; elt[col][col] = 1.0; for (int k = 0; k < R; ++k) { elt[col][k] *= Matrix::T(pivotInverse); } // Reduce all rows for (int r = 0; r < R; ++r) { // Skip over the pivot row if (r != col) { double oldValue = elt[r][col]; elt[r][col] = 0.0; for (int k = 0; k < R; ++k) { elt[r][k] -= Matrix::T(elt[col][k] * oldValue); } } } } // Put the columns back in the correct locations for (int i = R - 1; i >= 0; --i) { if (rowIndex[i] != colIndex[i]) { for (int k = 0; k < R; ++k) { SWAP(elt[k][rowIndex[i]], elt[k][colIndex[i]]); } } } # undef SWAP } bool Matrix::Impl::anyNonZero() const { int N = R * C; for (int i = 0; i < N; ++i) { if (data[i] != 0.0) { return true; } } return false; } bool Matrix::Impl::allNonZero() const { int N = R * C; for (int i = 0; i < N; ++i) { if (data[i] == 0.0) { return false; } } return true; } /** Helper for svdCore */ static double pythag(double a, double b) { double at = fabs(a), bt = fabs(b), ct, result; if (at > bt) { ct = bt / at; result = at * sqrt(1.0 + square(ct)); } else if (bt > 0.0) { ct = at / bt; result = bt * sqrt(1.0 + square(ct)); } else { result = 0.0; } return result; } #define SIGN(a, b) ((b) >= 0.0 ? fabs(a) : -fabs(a)) const char* Matrix::svdCore(float** U, int rows, int cols, float* D, float** V) { const int MAX_ITERATIONS = 30; int flag, i, its, j, jj, k, l = 0, nm = 0; double c, f, h, s, x, y, z; double anorm = 0.0, g = 0.0, scale = 0.0; // Temp row vector double* rv1; debugAssertM(rows >= cols, "Must have more rows than columns"); rv1 = (double*)System::alignedMalloc(cols * sizeof(double), 16); debugAssert(rv1); // Householder reduction to bidiagonal form for (i = 0; i < cols; ++i) { // Left-hand reduction l = i + 1; rv1[i] = scale * g; g = s = scale = 0.0; if (i < rows) { for (k = i; k < rows; ++k) { scale += fabs((double)U[k][i]); } if (scale) { for (k = i; k < rows; ++k) { U[k][i] = (float)((double)U[k][i]/scale); s += ((double)U[k][i] * (double)U[k][i]); } f = (double)U[i][i]; g = -sign(f)*(sqrt(s)); h = f * g - s; U[i][i] = (float)(f - g); if (i != cols - 1) { for (j = l; j < cols; ++j) { for (s = 0.0, k = i; k < rows; ++k) { s += ((double)U[k][i] * (double)U[k][j]); } f = s / h; for (k = i; k < rows; ++k) { U[k][j] += (float)(f * (double)U[k][i]); } } } for (k = i; k < rows; ++k) { U[k][i] = (float)((double)U[k][i]*scale); } } } D[i] = (float)(scale * g); // right-hand reduction g = s = scale = 0.0; if (i < rows && i != cols - 1) { for (k = l; k < cols; ++k) { scale += fabs((double)U[i][k]); } if (scale) { for (k = l; k < cols; ++k) { U[i][k] = (float)((double)U[i][k]/scale); s += ((double)U[i][k] * (double)U[i][k]); } f = (double)U[i][l]; g = -SIGN(sqrt(s), f); h = f * g - s; U[i][l] = (float)(f - g); for (k = l; k < cols; ++k) { rv1[k] = (double)U[i][k] / h; } if (i != rows - 1) { for (j = l; j < rows; ++j) { for (s = 0.0, k = l; k < cols; ++k) { s += ((double)U[j][k] * (double)U[i][k]); } for (k = l; k < cols; ++k) { U[j][k] += (float)(s * rv1[k]); } } } for (k = l; k < cols; ++k) { U[i][k] = (float)((double)U[i][k]*scale); } } } anorm = max(anorm, fabs((double)D[i]) + fabs(rv1[i])); } // accumulate the right-hand transformation for (i = cols - 1; i >= 0; --i) { if (i < cols - 1) { if (g) { for (j = l; j < cols; ++j) { V[j][i] = (float)(((double)U[i][j] / (double)U[i][l]) / g); } // double division to avoid underflow for (j = l; j < cols; ++j) { for (s = 0.0, k = l; k < cols; ++k) { s += ((double)U[i][k] * (double)V[k][j]); } for (k = l; k < cols; ++k) { V[k][j] += (float)(s * (double)V[k][i]); } } } for (j = l; j < cols; ++j) { V[i][j] = V[j][i] = 0.0; } } V[i][i] = 1.0; g = rv1[i]; l = i; } // accumulate the left-hand transformation for (i = cols - 1; i >= 0; --i) { l = i + 1; g = (double)D[i]; if (i < cols - 1) { for (j = l; j < cols; ++j) { U[i][j] = 0.0; } } if (g) { g = 1.0 / g; if (i != cols - 1) { for (j = l; j < cols; ++j) { for (s = 0.0, k = l; k < rows; ++k) { s += ((double)U[k][i] * (double)U[k][j]); } f = (s / (double)U[i][i]) * g; for (k = i; k < rows; ++k) { U[k][j] += (float)(f * (double)U[k][i]); } } } for (j = i; j < rows; ++j) { U[j][i] = (float)((double)U[j][i]*g); } } else { for (j = i; j < rows; ++j) { U[j][i] = 0.0; } } ++U[i][i]; } // diagonalize the bidiagonal form for (k = cols - 1; k >= 0; --k) { // loop over singular values for (its = 0; its < MAX_ITERATIONS; ++its) { // loop over allowed iterations flag = 1; for (l = k; l >= 0; --l) { // test for splitting nm = l - 1; if (fabs(rv1[l]) + anorm == anorm) { flag = 0; break; } if (fabs((double)D[nm]) + anorm == anorm) { break; } } if (flag) { c = 0.0; s = 1.0; for (i = l; i <= k; ++i) { f = s * rv1[i]; if (fabs(f) + anorm != anorm) { g = (double)D[i]; h = pythag(f, g); D[i] = (float)h; h = 1.0 / h; c = g * h; s = (- f * h); for (j = 0; j < rows; ++j) { y = (double)U[j][nm]; z = (double)U[j][i]; U[j][nm] = (float)(y * c + z * s); U[j][i] = (float)(z * c - y * s); } } } } z = (double)D[k]; if (l == k) { // convergence if (z < 0.0) { // make singular value nonnegative D[k] = (float)(-z); for (j = 0; j < cols; ++j) { V[j][k] = (-V[j][k]); } } break; } if (its >= MAX_ITERATIONS) { free(rv1); rv1 = NULL; return "Failed to converge."; } // shift from bottom 2 x 2 minor x = (double)D[l]; nm = k - 1; y = (double)D[nm]; g = rv1[nm]; h = rv1[k]; f = ((y - z) * (y + z) + (g - h) * (g + h)) / (2.0 * h * y); g = pythag(f, 1.0); f = ((x - z) * (x + z) + h * ((y / (f + SIGN(g, f))) - h)) / x; // next QR transformation c = s = 1.0; for (j = l; j <= nm; ++j) { i = j + 1; g = rv1[i]; y = (double)D[i]; h = s * g; g = c * g; z = pythag(f, h); rv1[j] = z; c = f / z; s = h / z; f = x * c + g * s; g = g * c - x * s; h = y * s; y = y * c; for (jj = 0; jj < cols; ++jj) { x = (double)V[jj][j]; z = (double)V[jj][i]; V[jj][j] = (float)(x * c + z * s); V[jj][i] = (float)(z * c - x * s); } z = pythag(f, h); D[j] = (float)z; if (z) { z = 1.0 / z; c = f * z; s = h * z; } f = (c * g) + (s * y); x = (c * y) - (s * g); for (jj = 0; jj < rows; ++jj) { y = (double)U[jj][j]; z = (double)U[jj][i]; U[jj][j] = (float)(y * c + z * s); U[jj][i] = (float)(z * c - y * s); } } rv1[l] = 0.0; rv1[k] = f; D[k] = (float)x; } } System::alignedFree(rv1); rv1 = NULL; return NULL; } #undef SIGN }