stan-dev · wjn0 · Feb 13, 2021 · Mar 17, 2021 · Mar 17, 2021 · Mar 17, 2021
diff --git a/lib/stan_math b/lib/stan_math
diff --git a/src/stan/services/experimental/advi/defaults.hpp b/src/stan/services/experimental/advi/defaults.hpp
@@ -9,6 +9,17 @@ namespace services {
 namespace experimental {
 namespace advi {
 
+/**
+ * Rank of the approximation when using low-rank ADVI.
+ */
+struct rank {
+  static std::string description() {
+    return "Rank of the covariance approximation when using low-rank ADVI.";
+  }
+
+  static int default_value() { return 1; }
+};
+
 /**
  * Number of samples for Monte Carlo estimate of gradients.
  */

diff --git a/src/stan/services/experimental/advi/lowrank.hpp b/src/stan/services/experimental/advi/lowrank.hpp
@@ -0,0 +1,90 @@
+#ifndef STAN_SERVICES_EXPERIMENTAL_ADVI_LOWRANK_HPP
+#define STAN_SERVICES_EXPERIMENTAL_ADVI_LOWRANK_HPP
+
+#include <stan/callbacks/interrupt.hpp>
+#include <stan/callbacks/logger.hpp>
+#include <stan/callbacks/writer.hpp>
+#include <stan/services/util/experimental_message.hpp>
+#include <stan/services/util/initialize.hpp>
+#include <stan/services/util/create_rng.hpp>
+#include <stan/io/var_context.hpp>
+#include <stan/variational/advi.hpp>
+#include <boost/random/additive_combine.hpp>
+#include <string>
+#include <vector>
+
+namespace stan {
+namespace services {
+namespace experimental {
+namespace advi {
+
+/**
+ * Runs low rank ADVI.
+ *
+ * @tparam Model A model implementation
+ * @param[in] model Input model to test (with data already instantiated)
+ * @param[in] init var context for initialization
+ * @param[in] random_seed random seed for the random number generator
+ * @param[in] chain chain id to advance the random number generator
+ * @param[in] init_radius radius to initialize
+ * @param[in] grad_samples number of samples for Monte Carlo estimate
+ *   of gradients
+ * @param[in] elbo_samples number of samples for Monte Carlo estimate
+ *   of ELBO
+ * @param[in] max_iterations maximum number of iterations
+ * @param[in] tol_rel_obj convergence tolerance on the relative norm
+ *   of the objective
+ * @param[in] rank the rank of the approximation
+ * @param[in] eta stepsize scaling parameter for variational inference
+ * @param[in] adapt_engaged adaptation engaged?
+ * @param[in] adapt_iterations number of iterations for eta adaptation
+ * @param[in] eval_elbo evaluate ELBO every Nth iteration
+ * @param[in] output_samples number of posterior samples to draw and
+ *   save
+ * @param[in,out] interrupt callback to be called every iteration
+ * @param[in,out] logger Logger for messages
+ * @param[in,out] init_writer Writer callback for unconstrained inits
+ * @param[in,out] parameter_writer output for parameter values
+ * @param[in,out] diagnostic_writer output for diagnostic values
+ * @return error_codes::OK if successful
+ */
+template <class Model>
+int lowrank(Model& model, const stan::io::var_context& init,
+            unsigned int random_seed, unsigned int chain, double init_radius,
+            int grad_samples, int elbo_samples, int max_iterations,
+            double tol_rel_obj, int rank, double eta, bool adapt_engaged,
+            int adapt_iterations, int eval_elbo, int output_samples,
+            callbacks::interrupt& interrupt, callbacks::logger& logger,
+            callbacks::writer& init_writer, callbacks::writer& parameter_writer,
+            callbacks::writer& diagnostic_writer) {
+  util::experimental_message(logger);
+
+  boost::ecuyer1988 rng = util::create_rng(random_seed, chain);
+
+  std::vector<int> disc_vector;
+  std::vector<double> cont_vector = util::initialize(
+      model, init, rng, init_radius, true, logger, init_writer);
+
+  std::vector<std::string> names;
+  names.push_back("lp__");
+  names.push_back("log_p__");
+  names.push_back("log_g__");
+  model.constrained_param_names(names, true, true);
+  parameter_writer(names);
+
+  Eigen::VectorXd cont_params
+      = Eigen::Map<Eigen::VectorXd>(&cont_vector[0], cont_vector.size(), 1);
+
+  stan::variational::advi_lowrank<Model, boost::ecuyer1988> cmd_advi(
+      model, cont_params, rng, rank, grad_samples, elbo_samples, eval_elbo,
+      output_samples);
+  cmd_advi.run(eta, adapt_engaged, adapt_iterations, tol_rel_obj,
+               max_iterations, logger, parameter_writer, diagnostic_writer);
+
+  return 0;
+}
+}  // namespace advi
+}  // namespace experimental
+}  // namespace services
+}  // namespace stan
+#endif
diff --git a/src/stan/variational/advi.hpp b/src/stan/variational/advi.hpp
@@ -10,6 +10,7 @@
 #include <stan/variational/print_progress.hpp>
 #include <stan/variational/families/normal_fullrank.hpp>
 #include <stan/variational/families/normal_meanfield.hpp>
+#include <stan/variational/families/normal_lowrank.hpp>
 #include <boost/circular_buffer.hpp>
 #include <boost/lexical_cast.hpp>
 #include <algorithm>
@@ -30,14 +31,15 @@ namespace variational {
  *
  * Implements "black box" variational inference using stochastic gradient
  * ascent to maximize the Evidence Lower Bound for a given model
- * and variational family.
+ * and variational family. This base class encapsulates the mean-field,
+ * low-rank, and full-rank variational posterior classes.
  *
  * @tparam Model class of model
  * @tparam Q class of variational distribution
  * @tparam BaseRNG class of random number generator
  */
 template <class Model, class Q, class BaseRNG>
-class advi {
+class advi_base {
  public:
   /**
    * Constructor
@@ -54,9 +56,9 @@ class advi {
    * @throw std::runtime_error if eval_elbo is not positive
    * @throw std::runtime_error if n_posterior_samples is not positive
    */
-  advi(Model& m, Eigen::VectorXd& cont_params, BaseRNG& rng,
-       int n_monte_carlo_grad, int n_monte_carlo_elbo, int eval_elbo,
-       int n_posterior_samples)
+  advi_base(Model& m, Eigen::VectorXd& cont_params, BaseRNG& rng,
+            int n_monte_carlo_grad, int n_monte_carlo_elbo, int eval_elbo,
+            int n_posterior_samples)
       : model_(m),
         cont_params_(cont_params),
         rng_(rng),
@@ -192,10 +194,10 @@ class advi {
     }
 
     // Variational family to store gradients
-    Q elbo_grad = Q(model_.num_params_r());
+    Q elbo_grad = init_variational(model_.num_params_r());
 
     // Adaptive step-size sequence
-    Q history_grad_squared = Q(model_.num_params_r());
+    Q history_grad_squared = init_variational(model_.num_params_r());
     double tau = 1.0;
     double pre_factor = 0.9;
     double post_factor = 0.1;
@@ -287,7 +289,7 @@ class advi {
         history_grad_squared.set_to_zero();
       }
       ++eta_sequence_index;
-      variational = Q(cont_params_);
+      variational = init_variational(cont_params_);
     }
     return eta_best;
   }
@@ -317,10 +319,10 @@ class advi {
     stan::math::check_positive(function, "Maximum iterations", max_iterations);
 
     // Gradient parameters
-    Q elbo_grad = Q(model_.num_params_r());
+    Q elbo_grad = init_variational(model_.num_params_r());
 
     // Stepsize sequence parameters
-    Q history_grad_squared = Q(model_.num_params_r());
+    Q history_grad_squared = init_variational(model_.num_params_r());
     double tau = 1.0;
     double pre_factor = 0.9;
     double post_factor = 0.1;
@@ -462,7 +464,7 @@ class advi {
     diagnostic_writer("iter,time_in_seconds,ELBO");
 
     // Initialize variational approximation
-    Q variational = Q(cont_params_);
+    Q variational = init_variational(cont_params_);
 
     if (adapt_engaged) {
       eta = adapt_eta(variational, adapt_iterations, logger);
@@ -562,6 +564,86 @@ class advi {
   int n_monte_carlo_elbo_;
   int eval_elbo_;
   int n_posterior_samples_;
+
+ private:
+  virtual Q init_variational(Eigen::VectorXd& cont_params) const = 0;
+  virtual Q init_variational(size_t dimension) const = 0;
+};
+
+/**
+ * The ADVI implementation used for meanfield and full-rank approximations.
+ *
+ * @tparam Model Class of model
+ * @tparam Q Class of variational distribution, either mean-field or low-rank.
+ * @tparam BaseRNG Class of random number generator
+ */
+template <class Model, class Q, class BaseRNG>
+class advi : public advi_base<Model, Q, BaseRNG> {
+ public:
+  advi(Model& m, Eigen::VectorXd& cont_params, BaseRNG& rng,
+       int n_monte_carlo_grad, int n_monte_carlo_elbo, int eval_elbo,
+       int n_posterior_samples)
+      : advi_base<Model, Q, BaseRNG>(m, cont_params, rng, n_monte_carlo_grad,
+                                     n_monte_carlo_elbo, eval_elbo,
+                                     n_posterior_samples) {}
+
+ private:
+  Q init_variational(Eigen::VectorXd& cont_params) const {
+    return Q(cont_params);
+  }
+
+  Q init_variational(size_t dimension) const { return Q(dimension); }
+};
+
+/**
+ * The ADVI implementation used only for low-rank approximations.
+ *
+ * @tparam Model Class of model.
+ * @tparam BaseRNG Class of random number generator.
+ */
+template <class Model, class BaseRNG>
+class advi_lowrank
+    : public advi_base<Model, stan::variational::normal_lowrank, BaseRNG> {
+ public:
+  /**
+   * Constructor
+   *
+   * @param[in] m stan model
+   * @param[in] cont_params initialization of continuous parameters
+   * @param[in,out] rng random number generator
+   * @param[in] rank rank of approximation
+   * @param[in] n_monte_carlo_grad number of samples for gradient computation
+   * @param[in] n_monte_carlo_elbo number of samples for ELBO computation
+   * @param[in] eval_elbo evaluate ELBO at every "eval_elbo" iters
+   * @param[in] n_posterior_samples number of samples to draw from posterior
+   * @throw std::runtime_error if n_monte_carlo_grad is not positive
+   * @throw std::runtime_error if n_monte_carlo_elbo is not positive
+   * @throw std::runtime_error if eval_elbo is not positive
+   * @throw std::runtime_error if n_posterior_samples is not positive
+   */
+  advi_lowrank(Model& m, Eigen::VectorXd& cont_params, BaseRNG& rng,
+               size_t rank, int n_monte_carlo_grad, int n_monte_carlo_elbo,
+               int eval_elbo, int n_posterior_samples)
+      : advi_base<Model, stan::variational::normal_lowrank, BaseRNG>(
+            m, cont_params, rng, n_monte_carlo_grad, n_monte_carlo_elbo,
+            eval_elbo, n_posterior_samples),
+        rank_(rank) {
+    static const char* function = "stan::variational::advi_lowrank";
+    math::check_positive(function, "Approximation rank", rank_);
+  }
+
+ protected:
+  size_t rank_;
+
+ private:
+  stan::variational::normal_lowrank init_variational(
+      Eigen::VectorXd& cont_params) const {
+    return stan::variational::normal_lowrank(cont_params, rank_);
+  }
+
+  stan::variational::normal_lowrank init_variational(size_t dimension) const {
+    return stan::variational::normal_lowrank(dimension, rank_);
+  }
 };
 }  // namespace variational
 }  // namespace stan

diff --git a/src/stan/variational/base_family.hpp b/src/stan/variational/base_family.hpp
@@ -8,7 +8,6 @@
 
 namespace stan {
 namespace variational {
-
 class base_family {
  public:
   // Constructors
@@ -22,63 +21,18 @@ class base_family {
   // Distribution-based operations
   virtual const Eigen::VectorXd& mean() const = 0;
   virtual double entropy() const = 0;
-  virtual Eigen::VectorXd transform(const Eigen::VectorXd& eta) const = 0;
-  /**
-   * Assign a draw from this mean field approximation to the
-   * specified vector using the specified random number generator.
-   *
-   * @tparam BaseRNG Class of random number generator.
-   * @param[in] rng Base random number generator.
-   * @param[out] eta Vector to which the draw is assigned; dimension has to be
-   * the same as the dimension of variational q.
-   * @throws std::range_error If the index is out of range.
-   */
+
+  Eigen::VectorXd transform(const Eigen::VectorXd& eta) const;
+
   template <class BaseRNG>
-  void sample(BaseRNG& rng, Eigen::VectorXd& eta) const {
-    // Draw from standard normal and transform to real-coordinate space
-    for (int d = 0; d < dimension(); ++d)
-      eta(d) = stan::math::normal_rng(0, 1, rng);
-    eta = transform(eta);
-  }
-  /**
-   * Draw a posterior sample from the normal distribution,
-   * and return its log normal density. The constants are dropped.
-   *
-   * @param[in] rng Base random number generator.
-   * @param[out] eta Vector to which the draw is assigned; dimension has to be
-   * the same as the dimension of variational q. eta will be transformed into
-   * variational posteriors.
-   * @param[out] log_g The log  density in the variational approximation;
-   * The constant term is dropped.
-   * @throws std::range_error If the index is out of range.
-   */
+  Eigen::VectorXd sample(BaseRNG& rng, Eigen::VectorXd& eta) const;
+
+  double calc_log_g(Eigen::VectorXd& eta) const;
+
   template <class BaseRNG>
-  void sample_log_g(BaseRNG& rng, Eigen::VectorXd& eta, double& log_g) const {
-    // Draw from the approximation
-    for (int d = 0; d < dimension(); ++d) {
-      eta(d) = stan::math::normal_rng(0, 1, rng);
-    }
-    // Compute the log density before transformation
-    log_g = calc_log_g(eta);
-    // Transform to real-coordinate space
-    eta = transform(eta);
-  }
-  /**
-   * Compute the unnormalized log unit normal density wrt eta. All constants are
-   * dropped.
-   *
-   * @param[in] eta Vector; dimension has to be the same as the dimension
-   * of variational q.
-   * @return The unnormalized log density in the variational approximation;
-   * @throws std::range_error If the index is out of range.
-   */
-  double calc_log_g(const Eigen::VectorXd& eta) const {
-    double log_g = 0;
-    for (int d = 0; d < dimension(); ++d) {
-      log_g += -stan::math::square(eta(d)) * 0.5;
-    }
-    return log_g;
-  }
+  Eigen::VectorXd sample_log_g(BaseRNG& rng, Eigen::VectorXd& eta,
+                               double& log_g) const;
+
   template <class M, class BaseRNG>
   void calc_grad(base_family& elbo_grad, M& m, Eigen::VectorXd& cont_params,
                  int n_monte_carlo_grad, BaseRNG& rng,