stan-dev · syclik · Aug 25, 2015 · Jul 22, 2015 · Jul 23, 2015 · Jul 25, 2015
diff --git a/stan/math/rev/mat/fun/cholesky_decompose.hpp b/stan/math/rev/mat/fun/cholesky_decompose.hpp
@@ -0,0 +1,156 @@
+#ifndef STAN_MATH_REV_MAT_FUN_CHOLESKY_DECOMPOSE_HPP
+#define STAN_MATH_REV_MAT_FUN_CHOLESKY_DECOMPOSE_HPP
+
+#include <stan/math/prim/mat/fun/Eigen.hpp>
+#include <stan/math/prim/mat/fun/typedefs.hpp>
+#include <stan/math/prim/mat/fun/cholesky_decompose.hpp>
+#include <stan/math/rev/scal/fun/value_of_rec.hpp>
+#include <stan/math/rev/scal/fun/value_of.hpp>
+#include <stan/math/rev/core.hpp>
+#include <stan/math/prim/mat/fun/value_of_rec.hpp>
+#include <stan/math/prim/mat/err/check_pos_definite.hpp>
+#include <stan/math/prim/mat/err/check_square.hpp>
+#include <stan/math/prim/mat/err/check_symmetric.hpp>
+
+namespace stan {
+  namespace math {
+
+    class cholesky_decompose_v_vari : public vari {
+    public:
+      int M_;  // A.rows() = A.cols()
+      vari** variRefA_;
+      vari** variRefL_;
+
+      /* ctor for cholesky function
+       *
+       * Stores varis for A
+       * Instantiates and stores varis for L
+       * Instantiates and stores dummy vari for
+       * upper triangular part of var result returned
+       * in cholesky_decompose function call 
+       *
+       * variRefL aren't on the chainable
+       * autodiff stack, only used for storage
+       * and computation. Note that varis for
+       * L are constructed externally in 
+       * cholesky_decompose.
+       *
+       * @param matrix A
+       * @param matrix L, cholesky factor of A
+       * */
+      cholesky_decompose_v_vari(const Eigen::Matrix<var, -1, -1>& A,
+                                const Eigen::Matrix<double, -1, -1>& L_A)
+        : vari(0.0),
+          M_(A.rows()),
+          variRefA_(ChainableStack::memalloc_.alloc_array<vari*>
+                    (A.rows() * (A.rows() + 1) / 2)),
+          variRefL_(ChainableStack::memalloc_.alloc_array<vari*>
+                    (A.rows() * (A.rows() + 1) / 2)) {
+            size_t pos = 0;
+        for (size_type i = 0; i < M_; ++i) {
+          for (size_type j = 0; j <= i; ++j) {
+            variRefA_[pos] = A.coeffRef(i, j).vi_;
+            variRefL_[pos] = new vari(L_A.coeffRef(i, j), false);
+            ++pos;
+          }
+        }
+      }
+
+      /* Reverse mode differentiation 
+       * algorithm refernce: 
+       *
+       * Mike Giles. An extended collection of matrix 
+       * derivative results for forward and reverse mode AD. 
+       * Jan. 2008.
+       *
+       * Note algorithm  as laid out in Giles is 
+       * row-major, so Eigen::Matrices are explicitly storage
+       * order RowMajor, whereas Eigen defaults to 
+       * ColumnMajor. Also note algorithm 
+       * starts by calculating the adjoint for 
+       * A(M_ - 1, M_ - 1), hence pos on line 94 is decremented
+       * to start at pos = M_ * (M_ + 1) / 2.
+       * */
+      virtual void chain() {
+        using Eigen::Matrix;
+        using Eigen::RowMajor;
+        Matrix<double, -1, -1, RowMajor> adjL(M_, M_);
+        Matrix<double, -1, -1, RowMajor> LA(M_, M_);
+        Matrix<double, -1, -1, RowMajor> adjA(M_, M_);
+        size_t pos = 0;
+        for (size_type i = 0; i < M_; ++i) {
+          for (size_type j = 0; j <= i; ++j) {
+            adjL.coeffRef(i, j) = variRefL_[pos]->adj_;
+            LA.coeffRef(i, j) = variRefL_[pos]->val_;
+            ++pos;
+          }
+        }
+
+        --pos;
+        for (int i = M_ - 1; i >= 0; --i) {
+          for (int j = i; j >= 0; --j) {
+            if (i == j) {
+              adjA.coeffRef(i, j) = 0.5 * adjL.coeff(i, j)
+                / LA.coeff(i, j);
+            } else {
+              adjA.coeffRef(i, j) = adjL.coeff(i, j)
+                / LA.coeff(j, j);
+              adjL.coeffRef(j, j) -= adjL.coeff(i, j)
+                * LA.coeff(i, j) / LA.coeff(j, j);
+            }
+            for (int k = j - 1; k >=0; --k) {
+              adjL.coeffRef(i, k) -= adjA.coeff(i, j)
+                * LA.coeff(j, k);
+              adjL.coeffRef(j, k) -= adjA.coeff(i, j)
+                * LA.coeff(i, k);
+            }
+            variRefA_[pos--]->adj_ += adjA.coeffRef(i, j);
+          }
+        }
+      }
+    };
+
+    /* Reverse mode specialization of
+     * cholesky decomposition
+     *
+     * Internally calls llt rather than using 
+     * stan::math::cholesky_decompose in order
+     * to use selfadjointView<Lower> optimization.
+     *
+     * Note chainable stack varis are created
+     * below in Matrix<var, -1, -1>
+     *
+     * @param Matrix A
+     * @return L cholesky factor of A
+     */
+    Eigen::Matrix<var, -1, -1>
+    cholesky_decompose(const Eigen::Matrix<var, -1, -1> &A) {
+      stan::math::check_square("cholesky_decompose", "A", A);
+      stan::math::check_symmetric("cholesky_decompose", "A", A);
+
+      Eigen::Matrix<double, -1, -1> L_A(value_of_rec(A));
+      Eigen::LLT<Eigen::MatrixXd> L_factor
+        = L_A.selfadjointView<Eigen::Lower>().llt();
+      check_pos_definite("cholesky_decompose", "m", L_factor);
+      L_A = L_factor.matrixL();
+
+      // NOTE: this is not a memory leak, this vari is used in the
+      // expression graph to evaluate the adjoint, but is not needed
+      // for the returned matrix.  Memory will be cleaned up with the
+      // arena allocator.
+      cholesky_decompose_v_vari *baseVari
+        = new cholesky_decompose_v_vari(A, L_A);
+      stan::math::vari dummy(0.0, false);
+      Eigen::Matrix<var, -1, -1> L(A.rows(), A.cols());
+      size_t pos = 0;
+      for (size_type i = 0; i < L.cols(); ++i) {
+        for (size_type j = 0; j <= i; ++j)
+          L.coeffRef(i, j).vi_ = baseVari->variRefL_[pos++];
+        for (size_type k = (i + 1); k < L.cols(); ++k)
+          L.coeffRef(i, k).vi_ = &dummy;
+      }
+      return L;
+    }
+  }
+}
+#endif
diff --git a/test/unit/math/mix/mat/fun/cholesky_decompose_test.cpp b/test/unit/math/mix/mat/fun/cholesky_decompose_test.cpp
@@ -4,6 +4,9 @@
 #include <gtest/gtest.h>
 #include <stan/math/fwd/scal/fun/value_of.hpp>
 #include <stan/math/rev/scal/fun/value_of.hpp>
+#include <stan/math/prim/mat/fun/value_of_rec.hpp>
+#include <stan/math/rev/mat/fun/dot_self.hpp>
+#include <stan/math/fwd/mat/fun/dot_self.hpp>
 #include <stan/math/fwd/scal/fun/value_of_rec.hpp>
 #include <stan/math/rev/scal/fun/value_of_rec.hpp>
 #include <stan/math/rev/core.hpp>
@@ -17,10 +20,9 @@
 #include <stan/math/fwd/scal/fun/exp.hpp>
 #include <stan/math/fwd/scal/fun/fabs.hpp>
 #include <stan/math/rev/scal/fun/fabs.hpp>
+#include <stan/math/prim/mat/prob/multi_normal_cholesky_log.hpp>
 #include <stan/math/prim/mat/fun/cholesky_decompose.hpp>
 #include <stan/math/prim/mat/fun/cov_matrix_constrain.hpp>
-#include <stan/math/rev/mat/functor/gradient.hpp>
-#include <stan/math/prim/mat/functor/finite_diff_gradient.hpp>
 #include <stan/math/mix/mat/functor/hessian.hpp>
 #include <stan/math/prim/mat/functor/finite_diff_hessian.hpp>
 #include <stan/math/mix/mat/functor/grad_hessian.hpp>
@@ -41,39 +43,6 @@ struct chol_functor {
   }
 };
 
-void test_gradients(int size) {
-  std::vector<std::vector<chol_functor> > functowns;
-  std::vector<std::vector<Eigen::Matrix<double, -1, 1> > > grads_ad;
-  std::vector<std::vector<Eigen::Matrix<double, -1, 1> > > grads_fd;
-  Eigen::Matrix<double, -1, -1> evals_ad(size,size);
-  Eigen::Matrix<double, -1, -1> evals_fd(size,size);
-  functowns.resize(size);
-  grads_ad.resize(size);
-  grads_fd.resize(size);
-
-  for (int i = 0; i < size; ++i)
-    for (int j = 0; j < size; ++j) {
-      functowns[i].push_back(chol_functor(i, j, size));
-      grads_fd[i].push_back(Eigen::Matrix<double, -1, 1>(size));
-      grads_ad[i].push_back(Eigen::Matrix<double, -1, 1>(size));
-    }
-
-  int numels = size + size * (size - 1) / 2;
-  Eigen::Matrix<double, -1, 1> x(numels);
-  for (int i = 0; i < numels; ++i)
-    x(i) = i / 10.0;
-
-  for (size_t i = 0; i < static_cast<size_t>(size); ++i)
-    for (size_t j = 0; j < static_cast<size_t>(size); ++j) {
-      stan::math::gradient(functowns[i][j], x, evals_ad(i,j), grads_ad[i][j]);
-      stan::math::finite_diff_gradient(functowns[i][j], x,
-                                       evals_fd(i,j), grads_fd[i][j]);
-      for (int k = 0; k < numels; ++k) 
-        EXPECT_NEAR(grads_fd[i][j](k), grads_ad[i][j](k), 1e-11);
-      EXPECT_FLOAT_EQ(evals_fd(i, j), evals_ad(i, j));
-    }
-}
-
 void test_hessians(int size) {
   std::vector<std::vector<chol_functor> > functowns;
   std::vector<std::vector<Eigen::Matrix<double, -1, 1> > > grads_ad;
@@ -100,7 +69,7 @@ void test_hessians(int size) {
   int numels = size + size * (size - 1) / 2;
   Eigen::Matrix<double, -1, 1> x(numels);
   for (int i = 0; i < numels; ++i)
-    x(i) = i / 10.0;
+    x(i) = i / 20.0;
 
   for (size_t i = 0; i < static_cast<size_t>(size); ++i)
     for (size_t j = 0; j < static_cast<size_t>(size); ++j) {
@@ -199,10 +168,6 @@ TEST(AgradMixMatrixCholeskyDecompose, exception_mat_ffv) {
   EXPECT_THROW(stan::math::cholesky_decompose(m), std::domain_error);
 }
 
-TEST(AgradMixMatrixCholeskyDecompose, mat_1st_deriv) {
-  test_gradients(3);
-}
-
 TEST(AgradMixMatrixCholeskyDecompose, mat_2nd_deriv) {
   test_hessians(3);
 }