ADMB Documentation  -a65f1c97
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
hmc_functions.cpp
Go to the documentation of this file.
1 // Added these from Johnoel's code for the RNG below. Hopefully play nice
2 // with everything else
3 #include <iostream>
4 #include <vector>
5 #include <cstdlib>
6 #include <cmath>
7 #include <fstream>
8 #include <sstream>
9 #include <stack>
10 #include <queue>
11 
12 #ifndef __OPENCC__
13 #include <random>
14 #endif
15 
16 //#include "nuts_da.h"
17 
18 using std::cout;
19 using std::endl;
20 using std::vector;
21 using std::ifstream;
22 using std::istringstream;
23 using std::pow;
24 using std::stack;
25 using std::queue;
26 
27 #include "admodel.h"
28 #ifndef OPT_LIB
29 #include <cassert>
30 #endif
31 
32 
33 
34 int function_minimizer::compute_next_window(int i, int warmup, int w1, int aws, int w3){
35  int anw;
36  anw = i+aws;
37  if(anw == (warmup-w3) )
38  return(anw);
39  // Check that the next anw is not too long. This will be the anw for the
40  // next time this is computed.
41  int nwb = anw+2*aws;
42  if(nwb >= warmup-w3){
43  // if(i != warmup-w3){
44  // cout << "Extending last slow window from" <<
45  // anw << "to" << warmup-w3 << endl;
46  // }
47  anw = warmup-w3;
48  }
49  return(anw);
50 }
51 
52 
53 bool function_minimizer::slow_phase(int i, int warmup, int w1, int w3)
54 {
55  // After w1, before start of w3
56  bool x1 = i>= w1; // after initial fast window
57  bool x2 = i<= (warmup-w3); // but before last fast window
58  bool x3 = i < warmup; // definitely not during sampling
59  return(x1 & x2 & x3);
60 }
61 
62 // Strip out the model name given full path
63 std::string function_minimizer::get_filename(const char* f) {
64  std::string s(f);
65  size_t pos = s.find_last_of("/\\");
66  std::string filename_exe = s.substr(pos + 1);
67  size_t dot_pos = s.find_last_of(".");
68  std::string filename = s.substr(pos + 1, dot_pos - pos - 1);
69  return filename;
70 }
71 
72 // This function is the heart of NUTS. It builds a single trajectory whose
73 // length depends on input j. It keeps doubling in direction v until
74 // finished or a U-turn occurs.
75 void function_minimizer::build_tree(int nvar, dvector& gr, dmatrix& chd, double eps, dvector& p,
76  dvector& y, dvector& gr2, double logu, int v, int j, double H0,
77  dvector& _thetaprime, dvector& _thetaplus, dvector& _thetaminus,
78  dvector& _rplus, dvector& _rminus,
79  double& _alphaprime, int& _nalphaprime, bool& _sprime,
80  int& _nprime, int& _nfevals, bool& _divergent,
81  const random_number_generator& rng,
82  dvector& gr2_end, dvector& _grprime, dvector& _gr2prime, double& _nllprime,
83  double& _Hprime, independent_variables& _parsaveprime) {
84 
85  if (j==0) {
86  // Take a single step in direction v from points p,y, which are updated
87  // internally by reference and thus represent the new point.
88  double nll= leapfrog(nvar, gr, chd, eps*v, p, y, gr2);
89  // These are the NLL and gradients at the last point
90  // evaluated, saved via reference, so I don't have to
91  // recalculate them when starting a new trajectory.
92  gr2_end=gr2;
93  // The new Hamiltonian value. ADMB returns negative log density so
94  // correct it
95  double Ham=-(nll+0.5*norm2(p));
96 
97  // Check for divergence. Either numerical (nll is nan) or a big
98  // difference in H. Screws up all the calculations so catch it here.
99  _divergent = (std::isnan(Ham) || logu > 1000+Ham);
100  if(_divergent){
101  _sprime=0;
102  _alphaprime=0; // these will be NaN otherwise
103  _nprime=0;
104  } else {
105  // No divergence
106  _nprime = logu < Ham;
107  _sprime=1;
108  _alphaprime = min(1.0, exp(Ham-H0));
109  // Update the tree elements, which are returned by reference in
110  // leapfrog.
111  _thetaprime = y;
112  _thetaminus = y;
113  _rminus = p;
114  _thetaplus = y;
115  _rplus = p;
116  _grprime=gr; _gr2prime=gr2; _nllprime=nll; _Hprime=Ham;
117  initial_params::copy_all_values(_parsaveprime,1.0);
118  }
119  _nalphaprime=1;
120  _nfevals++;
121  } else { // j > 1
122  // Build first half of tree.
123  build_tree(nvar, gr, chd, eps, p, y, gr2, logu, v, j-1,
124  H0, _thetaprime, _thetaplus, _thetaminus, _rplus, _rminus,
125  _alphaprime, _nalphaprime, _sprime,
126  _nprime, _nfevals, _divergent, rng,
127  gr2_end, _grprime, _gr2prime, _nllprime, _Hprime, _parsaveprime);
128  // If valid trajectory, build second half.
129  if (_sprime == 1) {
130  // Save copies of the global ones due to rerunning build_tree below
131  // which will overwrite some of the global variables we need to
132  // save. These are the ' versions of the paper, e.g., sprime'.
133  dvector thetaprime0(1,nvar);
134  independent_variables parsaveprime0(1,nvar);
135  dvector gr2prime0(1,nvar); dvector grprime0(1,nvar);
136  double nllprime0, Hprime0;
137  dvector thetaplus0(1,nvar);
138  dvector thetaminus0(1,nvar);
139  dvector rplus0(1,nvar);
140  dvector rminus0(1,nvar);
141  thetaprime0=_thetaprime;
142  parsaveprime0=_parsaveprime;
143  grprime0=_grprime;
144  gr2prime0=_gr2prime;
145  nllprime0=_nllprime;
146  Hprime0=_Hprime;
147  thetaplus0=_thetaplus;
148  thetaminus0=_thetaminus;
149  rplus0=_rplus;
150  rminus0=_rminus;
151  int nprime0 = _nprime;
152  double alphaprime0 = _alphaprime;
153  int nalphaprime0 = _nalphaprime;
154 
155  // Make subtree to the left
156  if (v == -1) {
157  build_tree(nvar, gr, chd, eps, _rminus, _thetaminus, gr2, logu, v, j-1,
158  H0, _thetaprime, _thetaplus, _thetaminus, _rplus, _rminus,
159  _alphaprime, _nalphaprime, _sprime,
160  _nprime, _nfevals, _divergent, rng,
161  gr2_end, _grprime, _gr2prime, _nllprime, _Hprime, _parsaveprime);
162  // Update the leftmost point
163  rminus0=_rminus;
164  thetaminus0=_thetaminus;
165  // Rest rightmost tree
166  _thetaplus=thetaplus0;
167  _rplus=rplus0;
168  } else {
169  // Make subtree to the right
170  build_tree(nvar, gr, chd, eps, _rplus, _thetaplus, gr2, logu, v, j-1,
171  H0, _thetaprime, _thetaplus, _thetaminus, _rplus, _rminus,
172  _alphaprime, _nalphaprime, _sprime,
173  _nprime, _nfevals, _divergent, rng,
174  gr2_end, _grprime, _gr2prime, _nllprime, _Hprime, _parsaveprime);
175  // Update the rightmost point
176  rplus0=_rplus;
177  thetaplus0=_thetaplus;
178  // Reset leftmost tree
179  _thetaminus=thetaminus0;
180  _rminus=rminus0;
181  }
182 
183  // This is (n'+n''). Can be zero so need to be careful??
184  int nprime_temp = nprime0 + _nprime;
185  if(std::isnan(static_cast<double>(nprime_temp))) nprime_temp=0;
186  // Choose whether to keep the proposal thetaprime.
187  double rr=randu(rng); // runif(1)
188  if (nprime_temp != 0 && rr < double(_nprime)/double(nprime_temp)) {
189  // Update theta prime to be the new proposal for this tree so far.
190  // _thetaprime already updated globally above so do nothing
191  } else {
192  // Reject it for the proposal from the last doubling.
193  _thetaprime = thetaprime0;
194  _parsaveprime=parsaveprime0;
195  _grprime=grprime0;
196  _gr2prime=gr2prime0;
197  _nllprime=nllprime0;
198  _Hprime=Hprime0;
199  }
200 
201  // Update the global reference variables
202  _alphaprime = alphaprime0 + _alphaprime;
203  _nalphaprime = nalphaprime0 + _nalphaprime;
204  // s' from the first execution above is 1 by definition (inside this
205  // if statement), while _sprime is s''. So need to reset s':
206  bool b = stop_criterion(nvar, thetaminus0, thetaplus0, rminus0, rplus0);
207  _sprime = _sprime && b;
208  _nprime = nprime_temp;
209  } // end building second trajectory
210  } // end recursion branch (j>0)
211 } // end function
212 
213 
214 bool function_minimizer::stop_criterion(int nvar, dvector& thetaminus, dvector& thetaplus,
215  dvector& rminus, dvector& rplus)
216 {
217  dvector thetavec(1, nvar);
218  thetavec=thetaplus-thetaminus;
219  // Manual implementation of inner_product, equivalent to this
220  // criterion = (thetavec*rminus+thetavec*rminus>=0) *
221  // (thetavec*rplus+thetavec*rplus>=0);
222  double x1=0;
223  double x2=0;
224  for(int i=1; i<=nvar; i++){
225  x1+=thetavec(i)*rminus(i);
226  x2+=thetavec(i)*rplus(i);
227  }
228  // TRUE if both are TRUE, FALSE if at least one.
229  bool criterion = (x1 >=0) && (x2 >=0);
230  return criterion;
231 }
232 
233 
234 double function_minimizer::adapt_eps(int ii, int iseps, double eps, double alpha,
235  double& adapt_delta, double& mu,
236  dvector& epsvec, dvector& epsbar,
237  dvector& Hbar){
238  double gamma=0.05; double t0=20; double kappa=0.75;
239  int m=ii+1;
240  // If divergence, there is 0 acceptance probability so alpha=0.
241  if(std::isnan(alpha)) alpha=0;
242  Hbar(m)= (1-1/(iseps+t0))*Hbar(m-1) + (adapt_delta-alpha)/(iseps+t0);
243  double logeps=mu-sqrt(iseps)*Hbar(m)/gamma;
244  epsvec(m)=exp(logeps);
245  double logepsbar= pow(iseps, -kappa)*logeps+(1-pow(iseps,-kappa))*log(epsbar(m-1));
246  epsbar(m)=exp(logepsbar);
247  return(epsvec(m));
248 }
249 
259 {
260  //initial_params::xinit(x);
261  double f=0.0;
262  if (mcmc2_flag==0 && lapprox)
263  {
264  cerr << "HMC not implemented for random effects models" << endl;
265  ad_exit(1);
266  }
267  else
268  {
269  dvariable vf=0.0;
270  dvar_vector vx=dvar_vector(y);
271  vf=initial_params::reset(vx);
273  userfunction();
274  dvar_vector d(1,nvar);
276  vf-=sum(log(d));
278  f=value(vf);
279  gradcalc(nvar,g);
280  }
281  return f;
282 }
283 
284 void function_minimizer::print_mcmc_progress(int is, int nmcmc, int nwarmup, int chain, int refresh)
285 {
286  // Modified from Stan: sample::progress.hpp; 9/9/2016
287  if(refresh>0){
288  if (is==1 || is == nmcmc || is % refresh ==0 ){
289  int width=1+(int)std::ceil(std::log10(static_cast<double>(nmcmc)));
290  cout << "Chain " << chain << ": " << "Iteration: " << std::setw(width) << is
291  << "/" << nmcmc << " [" << std::setw(3)
292  << int(100.0 * (double(is) / double(nmcmc) ))
293  << "%]" << (is <= nwarmup ? " (Warmup)" : " (Sampling)") << endl;
294  }
295  }
296 }
297 
298 void function_minimizer::print_mcmc_timing(double time_warmup, double time_total, int chain) {
299  double time_sampling=time_total-time_warmup;
300  std::string title= "Elapsed Time: ";
301  std::string title2="Chain " + std::to_string(chain) + ": ";
302  std::string u; // units
303  // Depending on how long it ran convert to sec/min/hour/days so
304  // the outputs are interpretable
305  if(time_total<=60){
306  u=" seconds";
307  } else if(time_total > 60 && time_total <=60*60){
308  time_total/=60; time_sampling/=60; time_warmup/=60;
309  u=" minutes";
310  } else if(time_total > (60*60) && time_total <= (360*24)){
311  time_total/=(60*60); time_sampling/=(60*60); time_warmup/=(60*60);
312  u=" hours";
313  } else {
314  time_total/=(24*60*60); time_sampling/=(24*60*60); time_warmup/=(24*60*60);
315  u=" days";
316  }
317  cout << title2 << title; printf("%5.2f", time_warmup); cout << u << " (Warmup | ";
318  printf("%.0f", 100*(time_warmup/time_total)); cout << "%)" << endl;
319  cout << title2 << std::string(title.size(), ' '); printf("%5.2f", time_sampling);
320  cout << u << " (Sampling | " ;
321  printf("%.0f", 100*(time_sampling/time_total)); cout <<"%)" << endl;
322  cout << title2 << std::string(title.size(), ' '); printf("%5.2f", time_total);
323  cout << u << " (Total | 100%)";
324  cout << endl;
325 }
326 
327 // This function holds the position (y) and momentum (p) vectors fixed and
328 // takes a single step of size eps. This process repeats until a reasonable
329 // eps is found. Thus need to make sure y and p are constant and only eps
330 // changes.
332  bool verbose_adapt_mass, bool verbose_find_epsilon,
333  int chain)
334 {
335  double eps=1; // initial eps
336  independent_variables z(1,nvar); // rotated bounded parameters
337  dvector p2(1,nvar); // updated momentum
338  dvector y2(1,nvar); // updated position
339  dvector gr(1,nvar); // gradients
340  dvector gr2(1,nvar); // updated rotated gradient
341 
342  // Calculate initial Hamiltonian value
343  double pprob1=0.5*norm2(p);
344  z=rotate_pars(chd,y);
345  double nllbegin=get_hybrid_monte_carlo_value(nvar,z,gr);
346  dvector gr2begin=rotate_gradient(gr,chd); // rotated gradient
347  double H1=nllbegin+pprob1;
348 
349  // Calculate H after a single step of size eps
350  double nll2=leapfrog(nvar, gr, chd, eps, p2, y2, gr2);
351  double pprob2=0.5*norm2(p2);
352  double H2=nll2+pprob2;
353  double alpha=exp(H1-H2);
354  // Determine whether eps=1 is too big or too small, i.e. whether to halve
355  // or double. If a=1, then eps keeps doubling until alpha passes 0.5;
356  // otherwise it halves until that happens.
357  double a;
358  if(alpha < 0.5 || std::isnan(alpha)){
359  // If divergence occurs or eps too big, halve it
360  a=-1;
361  } else {
362  // If stepsize too small, double it
363  a=1;
364  }
365 
366  for(int k=2; k<50; k++){
367  if(verbose_find_epsilon){
368  cout << "Chain " << chain << ": Find epsilson: iteration=" << k-1 << "; eps=" << eps << "; NLL1=" << nllbegin << "; p1=" << pprob1 << "; H1=" << H1 <<
369  "; NLL2=" << nll2 << "; p2=" << pprob2 <<"; H2=" << H2<< "; alpha=" << alpha << endl;
370  }
371  // Reinitialize position and momentum at each step.
372  p2=p;
373  y2=y;
374  gr2=gr2begin;
375 
376  // Make one leapfrog step and check acceptance ratio
377  nll2=leapfrog(nvar, gr, chd, eps, p2, y2, gr2);
378  pprob2=0.5*norm2(p2);
379  H2=nll2+pprob2;
380  alpha=exp(H1-H2);
381 
382  // Check if the 1/2 threshold has been crossed
383  if(!std::isnan(alpha) && pow(alpha,a) < pow(2,-a)){
384  if(verbose_find_epsilon){
385  cout << "Chain " << chain << ": Final epsilson: iteration=" << k << "; eps=" << eps << "; NLL1=" << nllbegin << "; p1=" << pprob1 << "; H1=" << H1 <<
386  "; NLL2=" << nll2 << "; p2=" << pprob2 <<"; H2=" << H2<< "; alpha=" << alpha << endl;
387  }
388  if(verbose_adapt_mass) {cout << "Chain " << chain << ": Found reasonable step size of " << eps << endl;}
389  return(eps);
390  } else {
391  // Otherwise either halve or double eps and do another iteration
392  eps=pow(2,a)*eps;
393  }
394  }
395  cerr << "Chain " << chain <<": Final epsilson: iteration=" << 50 << "; eps=" << eps << "; NLL1=" << nllbegin << "; p1=" << pprob1 << "; H1=" << H1 <<
396  "; NLL2=" << nll2 << "; p2=" << pprob2 <<"; H2=" << H2<< "; alpha=" << alpha << endl;
397  cerr << "Chain " << chain << ": Could not find reasonable initial step size. Is something wrong with model/initial value?" << endl;
398  ad_exit(1);
399  return(eps); // dummy to avoid compile warnings
400 } // end of function
401 
402 
408 double function_minimizer::leapfrog(int nvar, dvector& gr, dmatrix& chd, double eps, dvector& p, dvector& x,
409  dvector& gr2)
410 {
411  independent_variables y(1,nvar); // bounded parameters
412  dvector phalf;
413  // Update momentum by half step
414  phalf=p-eps/2*gr2;
415  // Update parameters by full step
416  x+=eps*phalf;
417  // Transform parameters via mass matrix to get new gradient
418  y=rotate_pars(chd,x);
419  // Get NLL and set updated gradient in gr by reference
420  double nll=get_hybrid_monte_carlo_value(nvar,y,gr);
421  // Update gradient via mass matrix
422  gr2=rotate_gradient(gr, chd);
423  // Last half step for momentum
424  p=phalf-eps/2*gr2;
425  return(nll);
426 }
427 
428 // This function reads in the hessian file to get the MLE values at the
429 // end. This is needed when the user doesn't pass an initial vector with
430 // -mcpin b/c the model is not necessarily run with -est. With adnuts it is
431 // not by default so need a default starting value.
433  int debug = 0;//flag to print debugging info to terminal
434  adstring tmpstring = "admodel.hes";
435  uistream cif((char*)tmpstring);
436  if (!cif) {
437  cerr << "Error reading admodel.hes file to get MLE values. Try re-optimizing model." << endl;
438  ad_exit(1);
439  }
440  int tmp_nvar = 0;
441  cif >> tmp_nvar;
442  if (nvar !=tmp_nvar) {
443 // cerr << "The number of variables in admodel.hes " << tmp_nvar << " does not match " << nvar <<
444 // ". Try re-optimizing model." << endl;
445 // ad_exit(1);
446  cout<<"NOTE: the number of active parameters ("<<nvar<<") does not equal the dimension in admodel.hes ("<<tmp_nvar<<")."<<endl;
447  cout<<" This could mean admodel.hes is old or random effects are in use."<<endl;
448  }
449  dmatrix hess(1,tmp_nvar,1,tmp_nvar);
450  cif >> hess;
451  if (!cif) {
452  cerr << "Error reading the Hessian matrix from admodel.hes. Try re-optimizing model." << endl;
453  ad_exit(1);
454  } else {
455  if (debug) cout<<"the hessian matrix"<<endl<<hess<<endl;
456  }
457  int oldHbf;
458  cif >> oldHbf;
459  if (!cif) {
460  cerr << "Error reading the hybrid flag from admodel.hes. Try re-optimizing model." << endl;
461  ad_exit(1);
462  }
463  dvector sscale(1,tmp_nvar);
464  cif >> sscale;
465  if (!cif) {
466  cerr << "Error reading the transformation scales from admodel.hes. Try re-optimizing model." << endl;
467  ad_exit(1);
468  } else {
469  if (debug) cout<<"sscale = "<<sscale<<endl;
470  }
471  // Read in the MLEs finally
472  int temp=0;
473  cif >> temp;
474  if (debug) cout<<"flag = "<<temp<<endl;
475  // temp is a unique flag to make sure the mle values were written (that
476  // admodel.hes is not too old)
477  if(temp != -987 || !cif){
478  cerr << "Error reading the check value from admodel.hes. Try re-optimizing model." << endl;
479  ad_exit(1);
480  }
481  cif >> mle;
482  if (debug) cout<<"mle = "<<mle<<endl;
483  if(!cif){
484  cerr << "Error reading the bounded MLE values from admodel.hes. Try re-optimizing model." << endl;
485  ad_exit(1);
486  }
487 }
488 
489 // Function written by Dave to help speed up some of the MCMC
490 // calculations. The code has chd*x which rotates the space but
491 // this is often a vector or at least a lower triangular
492 // matrix. Thus we can make it more efficient. Can go from x to
493 // y, or y to x, depending on if you pass m=chd or m=inverse(chd)
494 /*
495 @param
496 
497  */
499 {
500  if (x.indexmin() != m.colmin() || x.indexmax() != m.colmax())
501  {
502  cerr << " Incompatible array bounds in "
503  "dvector rotate_pars(const dmaxtrix& m, const dvector& x)\n";
504  ad_exit(21);
505  }
506 
507  dvector tmp(m.rowmin(),m.rowmax());
508  int mmin=m.rowmin();
509  int mmax=m.rowmax();
510  int xmin=x.indexmin();
511 
512  // If the metric is a dense matrix, chd will be lower diagonal so just
513  // loop over those. If doing adapt_mass then chd is still passed
514  // through as a matrix (poor programming) but only the digonal will be
515  // non-zero. Hence we can skip the off-diagonals and speed up the
516  // calculation.
517  if(diagonal_metric_flag==0){
518  for (int i=mmin; i<=mmax; i++)
519  {
520  tmp[i]=0;
521  double * pm= (double *) &(m(i,xmin));
522  double * px= (double *) &(x(xmin));
523  double tt=0.0;
524  for (int j=xmin; j<=i; j++)
525  {
526  tt+= *pm++ * *px++;
527  }
528  tmp[i]=tt;
529  }
530  } else if(diagonal_metric_flag==1){
531  // Only the diagonals are nonzero so skip the offdiagonals completely
532  for (int i=mmin; i<=mmax; i++)
533  {
534  double * pm= (double *) &(m(i,i));
535  double * px= (double *) &(x(i));
536  tmp[i]= *pm * *px;
537  }
538  } else {
539  cerr << "Invalid value for diagonal_metric_flag in rotate_pars" << endl;
540  ad_exit(21);
541  }
542  return(tmp);
543 }
544 
545 // See help for rotate_pars above, it's the same except rotating a gradient
546 // vector rather than a par vector (although math is different)
548 {
549  if (x.indexmin() != m.colmin() || x.indexmax() != m.colmax())
550  {
551  cerr << " Incompatible array bounds in "
552  "dvector rotate_gradient(const dvector& x, const dmatrix& m)\n";
553  ad_exit(21);
554  }
555  int mmin=m.colmin();
556  int mmax=m.colmax();
557  dvector tmp(mmin,mmax);
558  if(diagonal_metric_flag==0){
559  for (int j = mmin; j <= mmax; ++j)
560  {
561  dvector column = extract_column(m, j);
562  double* pm = (double*)&column(j);
563  double* px = (double*)&x(j);
564  double sum = *px * *pm;
565  for (int i=j; i < mmax; ++i)
566  {
567  sum += *(++px) * *(++pm);
568  }
569  tmp[j] = sum;
570  }
571  } else if(diagonal_metric_flag==1)
572  {
573  for (int i=mmin; i<=mmax; i++)
574  {
575  double * pm= (double *) &(m(i,i));
576  double * px= (double *) &(x(i));
577  tmp[i] = *pm * *px;
578  }
579  }else {
580  cerr << "Invalid value for diagonal_metric_flag in rotate_gradient" << endl;
581  ad_exit(21);
582  }
583  return(tmp);
584 }
585 
598 void function_minimizer::add_sample_diag(const int nvar, int& n, dvector& m, dvector& m2,
599  const independent_variables& q) {
600  n++;
601  //convert q to dvector ( better way to do this?)
602  dvector aq(1,nvar);
603  aq=q;
604  dvector delta=aq - m;
605  m += delta / n;
606  m2 += elem_prod(aq-m, delta);
607 }
608 
621 void function_minimizer::add_sample_dense(const int nvar, int& n, dvector& m, dmatrix& m2,
622  const independent_variables& q) {
623  n++;
624  //convert q to dvector ( better way to do this?)
625  dvector aq(1,nvar);
626  aq=q;
627  dvector delta=aq - m;
628  m += delta / n;
629  m2 += outer_prod(aq-m, delta);
630 }
631 
642  dmatrix& chd, dmatrix& chdinv){
643 
644  // Save copy before modifying it
645  dmatrix chd0(1,nvar,1,nvar);
646  chd0=chd;
647  bool success = true;
648  if(diagonal_metric_flag==0){
649  // will fail if not positive definite
650  success = choleski_decomp_hmc(metric, chd); // cholesky decomp of mass matrix
651  if(success){
652  chdinv=inv(chd);
653  } else {
654  // wasn't positive definite so don't update it (better to
655  // do diagonal?)), reset to original
656  chd=chd0;
657  }
658  } else {
659  // If diagonal, chd is just sqrt of diagonals and inverse the reciprocal
660  chd.initialize(); chdinv.initialize();
661  for(int i=1;i<=nvar;i++){
662  if(metric(i,i)<0){
663  cerr << "Element " << i << " of diagonal mass matrix was <0... setting to 1 instead" << endl;
664  chd(i,i)=1;
665  } else{
666  chd(i,i)=sqrt(metric(i,i));
667  }
668  chdinv(i,i)=1/chd(i,i);
669  }
670  }
671  return(success);
672 }
673 
674 
675 /* This function is a copy of choleski_decomp from dmat15.cpp,
676  except tweaked for use with adaptive dense stepsize in HMC. The
677  main difference is that instead of exiting on error it gives a
678  more informative error, also it returns a flag for whether it
679  succeeded, and updates L by reference. -Cole 3/2020
680 
681  @param metric A covariance matrix as estimated from warmup samples
682  @param L A cholesky decomposed matrix
683  @return A boolean whether algorithm succeeded, and L by reference
684 
685  */
687  // kludge to deal with constantness
688  dmatrix & M= * (dmatrix *) &metric;
689  if (M.colsize() != M.rowsize())
690  {
691  cerr << "Error in choleski_decomp_hmc. Mass matrix not square" << endl;
692  ad_exit(1);
693  }
694  int rowsave=M.rowmin();
695  int colsave=M.colmin();
696  M.rowshift(1);
697  M.colshift(1);
698  int n=M.rowmax();
700  // dmatrix L(1,n,1,n);
701  // #ifndef SAFE_INITIALIZE
702  L.initialize();
703  // #endif
704 
705  int i,j,k;
706  double tmp;
707  bool success=true;
708  adstring tmpstring = "Mass matrix not positive definite when updating dense mass matrix adaptation.";
709  if (M(1,1)<=0) {success=false; return success;}
710  // I didn't touch the actual algorithm
711  L(1,1)=sqrt(M(1,1));
712  for (i=2;i<=n;i++) {
713  L(i,1)=M(i,1)/L(1,1);
714  }
715  for (i=2;i<=n;i++) {
716  for (j=2;j<=i-1;j++) {
717  tmp=M(i,j);
718  for (k=1;k<=j-1;k++) {
719  tmp-=L(i,k)*L(j,k);
720  }
721  L(i,j)=tmp/L(j,j);
722  }
723  tmp=M(i,i);
724  for (k=1;k<=i-1;k++) {
725  tmp-=L(i,k)*L(i,k);
726  }
727  if (tmp<=0) {success=false; return success;}
728  L(i,i)=sqrt(tmp);
729  }
730  L.rowshift(rowsave);
731  L.colshift(colsave);
732  return success;
733 }
laplace_approximation_calculator * lapprox
Definition: admodel.h:1862
dvector log10(const dvector &vec)
Returns dvector with the common (base-10) logarithm of vec.
Definition: dvect6.cpp:273
d3_array elem_prod(const d3_array &a, const d3_array &b)
Returns d3_array results with computed elements product of a(i, j, k) * b(i, j, k).
Definition: d3arr2a.cpp:92
dvector rotate_pars(const dvector &m, const dvector &x)
void read_mle_hmc(int nvar, dvector &mle)
#define x
Vector of double precision numbers.
Definition: dvector.h:50
bool stop_criterion(int nvar, dvector &thetaminus, dvector &thetaplus, dvector &rminus, dvector &rplus)
int indexmin() const
Get minimum valid index.
Definition: dvector.h:199
double sum(const d3_array &darray)
Author: David Fournier Copyright (c) 2008-2012 Regents of the University of California.
Definition: d3arr.cpp:21
int compute_next_window(int i, int warmup, int w1, int aws, int w3)
void print_mcmc_progress(int is, int nmcmc, int nwarmup, int chain, int refresh)
exitptr ad_exit
Definition: gradstrc.cpp:53
bool choleski_decomp_hmc(const dmatrix &metric, dmatrix &L)
static dvariable reset(const dvar_vector &x)
Definition: model.cpp:345
ADMB variable vector.
Definition: fvar.hpp:2172
double leapfrog(int nvar, dvector &gr, dmatrix &chd, double eps, dvector &p, dvector &x, dvector &gr2)
Function to take a single HMC leapfrog step, given current position and momentum variables.
unsigned int colsize() const
Definition: fvar.hpp:2948
dvector rotate_gradient(const dvector &x, const dmatrix &m)
void gradcalc(int nvar, const dvector &g)
Definition: sgradclc.cpp:77
static int debug
double find_reasonable_stepsize(int nvar, dvector y, dvector p, dmatrix &chd, bool verbose_adapt_mass, bool verbose_find_epsilon, int chain)
dvector extract_column(const dmatrix &matrix, int j)
Extract copy of jth column vector from matrix m.
Definition: dmat6.cpp:34
static int stddev_vscale(const dvar_vector &d, const dvar_vector &x)
Definition: model.cpp:191
bool calculate_chd_and_inverse(int nvar, const dmatrix &metric, dmatrix &chd, dmatrix &chdinv)
Calculate the Cholesky decomposition and its inverse given a mass matrix.
double randu(const random_number_generator &rng)
Uniform random number generator.
Definition: rngen.cpp:198
Description not yet available.
Definition: fvar.hpp:7951
prnstream & endl(prnstream &)
Description not yet available.
Definition: fvar.hpp:1937
int rowmax() const
Definition: fvar.hpp:2929
d3_array sqrt(const d3_array &arr3)
Author: David Fournier Copyright (c) 2008-2012 Regents of the University of California.
Definition: d3arr2c.cpp:11
double get_hybrid_monte_carlo_value(int nvar, const independent_variables &y, dvector &g)
Written by Dave, commented by Cole starting 8/31/2016 Description not yet available.
#define min(a, b)
Definition: cbivnorm.cpp:188
int indexmax() const
Get maximum valid index.
Definition: dvector.h:204
static objective_function_value * pobjfun
Definition: admodel.h:2394
Description not yet available.
d3_array exp(const d3_array &arr3)
Returns d3_array results with computed exp from elements in arr3.
Definition: d3arr2a.cpp:28
#define M
Definition: rngen.cpp:57
int colmin(void) const
Definition: fvar.hpp:2939
dmatrix outer_prod(const dvector &v1, const dvector &v2)
Description not yet available.
Definition: dmat23.cpp:17
Description not yet available.
Definition: fvar.hpp:2819
double norm2(const d3_array &a)
Return sum of squared elements in a.
Definition: d3arr2a.cpp:167
static void copy_all_values(const dvector &x, const int &ii)
Definition: model3.cpp:9
double adapt_eps(int ii, int iseps, double eps, double alpha, double &adapt_delta, double &mu, dvector &epsvec, dvector &epsbar, dvector &Hbar)
void print_mcmc_timing(double time_warmup, double time_total, int chain)
bool slow_phase(int is, int warmup, int w1, int w3)
void rowshift(int min)
Changes the range of valid indices for the rows.
Definition: dmat9.cpp:48
size_t pos(const adstring &substr, const adstring &s)
Definition: string3.cpp:56
double eps
Definition: ftweak.cpp:13
void colshift(int min)
Description not yet available.
Definition: dmat9.cpp:68
unsigned int rowsize() const
Definition: fvar.hpp:2934
Description not yet available.
Definition: fvar.hpp:3516
int diagonal_metric_flag
Definition: admodel.h:1887
dvector column(const dmatrix &matrix, int j)
Author: David Fournier Copyright (c) 2008-2012 Regents of the University of California.
Definition: dmat6.cpp:13
dvector value(const df1_one_vector &v)
Definition: df11fun.cpp:69
virtual void userfunction(void)=0
std::string get_filename(const char *f)
void add_sample_dense(const int nvar, int &is2, dvector &m, dmatrix &m2, const independent_variables &q)
Calculate running covariance using Welford&#39;s &quot;online&quot; algorithm.
void initialize(void)
Author: David Fournier Copyright (c) 2008-2012 Regents of the University of California.
Definition: dmat7.cpp:12
int rowmin() const
Definition: fvar.hpp:2925
void build_tree(int nvar, dvector &gr, dmatrix &chd, double eps, dvector &p, dvector &y, dvector &gr2, double logu, int v, int j, double H0, dvector &_thetaprime, dvector &_thetaplus, dvector &_thetaminus, dvector &_rplus, dvector &_rminus, double &_alphaprime, int &_nalphaprime, bool &_sprime, int &_nprime, int &_nfevals, bool &_divergent, const random_number_generator &rng, dvector &gr2_end, dvector &_grprime, dvector &_gr2prime, double &_nllprime, double &_Hprime, independent_variables &_parsaveprime)
Fundamental data type for reverse mode automatic differentiation.
Definition: fvar.hpp:1518
df1_one_variable inv(const df1_one_variable &x)
Definition: df11fun.cpp:384
d3_array log(const d3_array &arr3)
Author: David Fournier Copyright (c) 2008-2012 Regents of the University of California.
Definition: d3arr2a.cpp:13
void add_sample_diag(const int nvar, int &n, dvector &m, dvector &m2, const independent_variables &q)
Calculate running covariance using Welford&#39;s &quot;online&quot; algorithm.
d3_array pow(const d3_array &m, int e)
Description not yet available.
Definition: d3arr6.cpp:17
int colmax(void) const
Definition: fvar.hpp:2943