ADMB Documentation  -a65f1c97
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
nuts.cpp
Go to the documentation of this file.
1 
2 // $Id$
10 #include "admodel.h"
11 #ifndef OPT_LIB
12 #include <cassert>
13 #endif
14 #include<ctime>
15 void read_empirical_covariance_matrix(int nvar, const dmatrix& S, const adstring& prog_name);
16 void read_hessian_matrix_and_scale1(int nvar, const dmatrix& _SS, double s, int mcmc2_flag);
17 
51 void function_minimizer::nuts_mcmc_routine(int nmcmc,int iseed0,double dscale,
52  int restart_flag) {
53  if (nmcmc<=0) {
54  cerr << endl << "Error: Negative iterations for MCMC not meaningful" << endl;
55  ad_exit(1);
56  }
57  // I haven't figured out what to do with RE yet, so leaving as is for
58  // now. It will throw an error later on. -Cole
59  uostream * pofs_psave=NULL;
63  // int nvar_re=initial_params::nvarcalc();
64  int nvar=initial_params::nvarcalc(); // get the number of active parameters
65  if (mcmc2_flag==0){
67  nvar=initial_params::nvarcalc(); // get the number of active parameters
68  }
70  independent_variables parsave(1,nvar);
72  int old_Hybrid_bounded_flag=-1;
73  int on,nopt = 0;
74 
76  // Step size. If not specified, will be adapted. If specified must be >0
77  // and will not be adapted.
78  double eps=0.1;
79  double _eps=-1.0;
80  int useDA=1; // whether to adapt step size
81  if ( (on=option_match(ad_comm::argc,ad_comm::argv,"-hyeps",nopt))>-1) {
82  if (!nopt){ // not specified means to adapt, using function below to find reasonable one
83  cerr << "Warning: No step size given after -hyeps, ignoring" << endl;
84  useDA=1;
85  } else { // read in specified value and do not adapt
86  istringstream ist(ad_comm::argv[on+1]);
87  ist >> _eps;
88  if (_eps<=0) {
89  cerr << "Error: step size (-hyeps argument) needs positive number";
90  ad_exit(1);
91  } else {
92  eps=_eps;
93  useDA=0;
94  }
95  }
96  }
97 
98  // Run duration. Can specify warmup and duration, or warmup and
99  // iter. Sampling period will stop after exceeding <duration> hours. If
100  // duration ends before warmup is finished there will be no samples.
101  double duration=0;
102  bool use_duration=0; // whether to use this or iter
103  if ( (on=option_match(ad_comm::argc,ad_comm::argv,"-duration",nopt))>-1) {
104  double _duration=0;
105  use_duration=1;
106  istringstream ist(ad_comm::argv[on+1]);
107  ist >> _duration;
108  if (_duration <0) {
109  cerr << "Error: duration must be > 0" << endl;
110  ad_exit(1);
111  } else {
112  // input is in minutes, duration is in seconds so convert
113  duration=_duration*60;
114  }
115  }
116  // chain number -- for console display purposes only, useful when running
117  // in parallel
118  int chain=1;
119  if ( (on=option_match(ad_comm::argc,ad_comm::argv,"-chain",nopt))>-1) {
120  int iii=atoi(ad_comm::argv[on+1]);
121  if (iii <1) {
122  cerr << "Error: chain must be >= 1" << endl;
123  ad_exit(1);
124  } else {
125  chain=iii;
126  }
127  }
128  // console refresh rate
129  int refresh=1;
130  if(nmcmc>10) refresh = (int)floor(nmcmc/10);
131  if ( (on=option_match(ad_comm::argc,ad_comm::argv,"-refresh",nopt))>-1) {
132  int iii=atoi(ad_comm::argv[on+1]);
133  if (iii < -1) {
134  cerr << iii << endl;
135  cerr << "Error: refresh must be >= -1. Use -1 for no refresh or positive integer for rate." << endl;
136  ad_exit(1);
137  } else {
138  refresh=iii;
139  }
140  }
141 
142  // Number of leapfrog steps.
143  if ( (on=option_match(ad_comm::argc,ad_comm::argv,"-hynstep",nopt))>-1) {
144  cerr << "Error: hynstep argument not allowed with NUTS " << endl;
145  ad_exit(1);
146  }
147  // Number of warmup samples if using adaptation of step size. Defaults to
148  // half of iterations.
149  int warmup= (int)nmcmc/2;
150  if ( (on=option_match(ad_comm::argc,ad_comm::argv,"-warmup",nopt))>-1) {
151  int iii=atoi(ad_comm::argv[on+1]);
152  if (iii <=0 || iii > nmcmc) {
153  cerr << "Error: warmup must be 0 < warmup < nmcmc" << endl;
154  ad_exit(1);
155  } else {
156  warmup=iii;
157  }
158  }
159  // Target acceptance rate for step size adaptation. Must be
160  // 0<adapt_delta<1. Defaults to 0.8.
161  double adapt_delta=0.8;
162  if ((on=option_match(ad_comm::argc,ad_comm::argv,"-adapt_delta",nopt))>-1) {
163  istringstream ist(ad_comm::argv[on+1]);
164  double _adapt_delta;
165  ist >> _adapt_delta;
166  if (_adapt_delta < 0 || _adapt_delta > 1 ) {
167  cerr << "Error: adapt_delta must be between 0 and 1" << endl;
168  ad_exit(1);
169  } else {
170  adapt_delta=_adapt_delta;
171  }
172  }
173  // Initial buffer window size for metric adaptation
174  int adapt_window=50;
175  if ((on=option_match(ad_comm::argc,ad_comm::argv,"-adapt_window",nopt))>-1) {
176  istringstream ist(ad_comm::argv[on+1]);
177  int _adapt_window;
178  ist >> _adapt_window;
179  if (_adapt_window < 1) {
180  cerr << "Error: adapt_window invalid" << endl;
181  ad_exit(1);
182  } else {
183  adapt_window=_adapt_window;
184  }
185  }
186  // Initial buffer window before adaptation of metric starts (first fast phase)
187  int adapt_term_buffer=75;
188  if ((on=option_match(ad_comm::argc,ad_comm::argv,"-adapt_term_buffer",nopt))>-1) {
189  istringstream ist(ad_comm::argv[on+1]);
190  int _adapt_term_buffer;
191  ist >> _adapt_term_buffer;
192  if (_adapt_term_buffer < 1 ) {
193  cerr << "Error: adapt_term_buffer invalid" << endl;
194  ad_exit(1);
195  } else {
196  adapt_term_buffer=_adapt_term_buffer;
197  }
198  }
199  // Initial buffer window before adaptation of metric starts (first fast phase)
200  int adapt_init_buffer=50;
201  if ((on=option_match(ad_comm::argc,ad_comm::argv,"-adapt_init_buffer",nopt))>-1) {
202  istringstream ist(ad_comm::argv[on+1]);
203  int _adapt_init_buffer;
204  ist >> _adapt_init_buffer;
205  if (_adapt_init_buffer < 1 ) {
206  cerr << "Error: adapt_init_buffer invalid" << endl;
207  ad_exit(1);
208  } else {
209  adapt_init_buffer=_adapt_init_buffer;
210  }
211  }
212 
213  // Max treedpeth is the number of times a NUTS trajectory will double in
214  // length before stopping. Thus length <= 2^max_treedepth+1
215  int max_treedepth=12;
216  if ((on=option_match(ad_comm::argc,ad_comm::argv,"-max_treedepth",nopt))>-1) {
217  istringstream ist(ad_comm::argv[on+1]);
218  int _max_treedepth;
219  ist >> _max_treedepth;
220  if (_max_treedepth < 0) {
221  cerr << "Error: max_treedepth must be > 0" << endl;
222  ad_exit(1);
223  } else {
224  max_treedepth=_max_treedepth;
225  }
226  }
227  // Use diagnoal covariance (identity mass matrix)
228  int diag_option=0;
229  if ( (on=option_match(ad_comm::argc,ad_comm::argv,"-mcdiag"))>-1) {
230  diag_option=1;
231  // cout << "Setting covariance matrix to diagonal with entries" << endl;
232  }
233  // Whether to do diagonal adaptation of the mass matrix
234  int adapt_mass=0;
235  diagonal_metric_flag=0; // global flag used in functions to do more efficient calculations
236  if ( (on=option_match(ad_comm::argc,ad_comm::argv,"-adapt_mass"))>-1) {
238  adapt_mass=1;
239  }
240  // Whether to do dense adaptation of the mass matrix
241  int adapt_mass_dense=0;
242  if ( (on=option_match(ad_comm::argc,ad_comm::argv,"-adapt_mass_dense"))>-1) {
243  if(adapt_mass==1){
244  cerr << "You can only specify one of adapt_mass and adapt_mass_dense" << endl;
245  ad_exit(1);
246  }
248  adapt_mass_dense=1;
249  }
250  // Whether to print mass matrix adaptation steps to console
251  int verbose_adapt_mass=0;
252  if ( (on=option_match(ad_comm::argc,ad_comm::argv,"-verbose_adapt_mass"))>-1) {
253  verbose_adapt_mass=1;
254  }
255  // Whether to print diagnostic info inside find_reasonable_stepsize function
256  int verbose_find_epsilon=0;
257  if ( (on=option_match(ad_comm::argc,ad_comm::argv,"-verbose_find_epsilon"))>-1) {
258  verbose_find_epsilon=1;
259  }
260  // Whether to print diagnostic information
261  int diagnostic_flag=0;
262  if ( (on=option_match(ad_comm::argc,ad_comm::argv,"-nutsdiagnostic"))>-1) {
263  diagnostic_flag=1;
264  }
265  // Restart chain from previous run?
266  int mcrestart_flag=option_match(ad_comm::argc,ad_comm::argv,"-mcr");
267  if(mcrestart_flag > -1){
268  cerr << endl << "Error: -mcr option not implemented for HMC" << endl;
269  ad_exit(1);
270  }
271  // Not sure what mcec is? Use empirical covariance?
272  if ( (on=option_match(ad_comm::argc,ad_comm::argv,"-mcec"))>-1) {
273  cerr << endl << "Error: -mcec option not yet implemented with HMC" << endl;
274  ad_exit(1);
275  }
276  // How much to thin, for now fixed at 1 and done externally.
277  if ( (on=option_match(ad_comm::argc,ad_comm::argv,"-mcsave"))>-1) {
278  cerr << "Option -mcsave does not currently work with HMC -- every iteration is saved" << endl;
279  ad_exit(1);
280  }
281  // Prepare the mass matrix for use. For now assuming mass matrix passed
282  // on the unconstrained scale using hybrid bounds, which is detectable
283  // becuase the hbf flag is 1 in the .cov file. In that case, there is no
284  // need to rescale. If unit mass matrix specified with -mcdiag then do
285  // not rescale. If the user didnt overwrite admodel.cov, then it needs to
286  // be rescaled since the scales in admodel.cov are with hbf=0. To rescale
287  // means we need to know the MLE in bounded space, read in as mle
288  // above. This prevents re-estimating the model each time. Then the new
289  // scales can be calculated and the matrix converted to bounded, then
290  // back to unbounded in hybrid space.
291  //
292  // Actually, I decided to force the user to rerun the model with -hbf 1
293  // so that the scales are correct. This code wasnt working quite right
294  // but leaving here in case someone wants to try adding this back later.
295  dmatrix S(1,nvar,1,nvar);
296  dvector old_scale(1,nvar);
297  bool rescale_covar=0;
298  if (diag_option){ // set covariance to be diagonal
299  S.initialize();
300  for (int i=1;i<=nvar;i++) {
301  S(i,i)=1;
302  }
303  } else {
304  // Need to grab old_scale values still
305  read_covariance_matrix(S,nvar,old_Hybrid_bounded_flag,old_scale);
306  // See above for why we need to check for this. Sometimes need to
307  // rescale, sometimes not.
308  if(old_Hybrid_bounded_flag != 1) {
309  rescale_covar=1;
310  }
311  }
312  if(rescale_covar){
313  // If no covar was pushed by the user, then the MLE one is used
314  // about. But since that admodel.cov file was written with the hbf=0
315  // transformations, it is wrong now that hbf=1. So convert to covar in
316  // transformed space using the old scales, and then back to the
317  // untransformed space using the new scales.
318 
319  // For now turning this off. Might be easier and more reliable
320  // to force user to rerun the model with the right hbf.
321  cerr << "Error: To use -nuts a Hessian using the hybrid transformations is needed." <<
322  endl << "...Rerun model with '-hbf' and try again" << endl;
323  ad_exit(1);
324  // cout << "Rescaling covariance matrix b/c scales don't match" << endl;
325  // cout << "old scale=" << old_scale << endl;
326  // cout << "current scale=" << current_scale << endl;
327  // // cout << "S before=" << S << endl;
328  // // Rescale the covariance matrix
329  // for (int i=1;i<=nvar;i++) {
330  // for (int j=1;j<=nvar;j++) {
331  // S(i,j)*=old_scale(i)*old_scale(j);
332  // S(i,j)/=current_scale(i)*current_scale(j);
333  // }
334  // }
335  }
336  dmatrix chd(1,nvar,1,nvar);
337  dmatrix chdinv(1,nvar,1,nvar);
338  // update chd and chdinv by reference
339  bool success;
340  success=calculate_chd_and_inverse(nvar, S, chd, chdinv);
341  if(!success){
342  cerr << "Cannot start algorithm, something is wrong with the mass matrix" << endl;
343  ad_exit(1);
344  }
345 
347 
351  // User-specified initial values
352  nopt=0;
353  independent_variables z0(1,nvar); // inits in bounded space
355  if ( (on=option_match(ad_comm::argc,ad_comm::argv,"-mcpin",nopt))>-1) {
356  if (nopt) {
357  cifstream cif((char *)ad_comm::argv[on+1]);
358  if (!cif) {
359  cerr << "Error trying to open mcmc par input file "
360  << ad_comm::argv[on+1] << endl;
361  ad_exit(1);
362  }
363  cif >> z0;
364  if (!cif) {
365  cerr << "Error reading from mcmc par input file "
366  << ad_comm::argv[on+1] << endl;
367  ad_exit(1);
368  }
369  } else {
370  cerr << "Illegal option with -mcpin" << endl;
371  }
372  } else {
373  // If user didnt specify one, use MLE values. Store saved MLEs from
374  // admodel.hes file in bounded space into initial parameters z0
375  read_mle_hmc(nvar, z0);
376  }
378 // Setup binary psv file to write samples to
379  pofs_psave=
380  new uostream((char*)(ad_comm::adprogram_name + adstring(".psv")));
381  if (!pofs_psave|| !(*pofs_psave)) {
382  cerr << "Error trying to open file" <<
384  ad_exit(1);
385  }
386  // These files holds the rotated (x), unbounded (y) and bounded (z)
387  // samples (not warmup). Can read this in to get empirical covariance on
388  // the unbounded scale and then put back into admodel.cov. Or look at how
389  // mass matrix is doing. For now turned off rotated and bounded since
390  // don't need those immediately
391  // ofstream rotated("rotated.csv", ios::trunc);
392  // ofstream bounded("bounded.csv", ios::trunc);
393  ofstream unbounded("unbounded.csv", ios::trunc);
394  // Save nvar first. If added mcrestart (mcrb) functionality later need to
395  // fix this line.
396  (*pofs_psave) << nvar;
397  // Setup random number generator, based on seed passed or hardcoded
398  int iseed = iseed0 ? iseed0 : rand();
399  random_number_generator rng(iseed);
400  // Run timings
401  double time_warmup=0;
402  double time_total=0;
403  std::clock_t start = clock();
404  time_t now = time(0);
405  tm* localtm = localtime(&now);
406  std::string m=get_filename((char*)ad_comm::adprogram_name);
407  cout << endl << endl << "Chain " << chain << ": Starting NUTS for model '" << m <<
408  "' at " << asctime(localtm);
409  if(use_duration==1){
410  cout << "Chain " << chain << ": Model will run for " << duration/60 <<
411  " minutes or until " << nmcmc << " total iterations" << endl;
412  }
413  // Check that the adaptation settings make sense given warmup
414  // and if using adaptation
415  int aws = adapt_window; // adapt window size
416  // Adapt next window... the next iteration at which the adaptation takes place
417  int anw = compute_next_window(adapt_init_buffer, warmup, adapt_init_buffer, aws, adapt_term_buffer);
418  if(adapt_mass || adapt_mass_dense){
419  if(adapt_init_buffer + adapt_window + adapt_term_buffer >= warmup) {
420  cerr << "Chain " << chain << ": Warning: Turning off mass matrix adaptation because warmup<= " <<
421  adapt_init_buffer+adapt_window + adapt_term_buffer << endl;
422  cerr << "Chain " << chain << ": Warning: Either increase warmup, adjust adaptation inputs, or turn off adaptation" << endl;
423  adapt_mass=0; adapt_mass_dense=0;
424  }
425  }
426  if( adapt_mass){
427  cout << "Chain " << chain << ": Using diagonal mass matrix adaptation" << endl;
428  } else if(adapt_mass_dense) {
429  cout << "Chain " << chain << ": Using dense mass matrix adaptation" << endl;
430  } else {
431  cout << "Chain " << chain << ": Not using mass matrix adaptation" << endl;
432  }
433 
434  if(verbose_adapt_mass==1){
435  dvector tmp=diagonal(S);
436  cout << "Chain " << chain << ": Initial margial variances: min=" << min(tmp) << " and max=" << max(tmp) << endl;
437  }
438 
439  if(diag_option) cout << "Chain " << chain <<": Initializing with unit diagonal mass matrix" << endl;
440  // write sampler parameters in format used by Shinystan
441  dvector epsvec(1,nmcmc+1), epsbar(1,nmcmc+1), Hbar(1,nmcmc+1);
442  epsvec.initialize(); epsbar.initialize(); Hbar.initialize();
443  double mu;
444  ofstream adaptation("adaptation.csv", ios::trunc);
445  adaptation << "accept_stat__,stepsize__,treedepth__,n_leapfrog__,divergent__,energy__,lp__" << endl;
447  // --------------------------------------------------
448 
450  // was specified by user, but need y0 and x0. First, pass z0
451  // through the model. Since this wasn't necessarily the last
452  // vector evaluated, need to propogate it through ADMB
453  // internally so can use xinit().
455  independent_variables y0(1,nvar); // inits in unbounded space
456  dvector current_scale(1,nvar);
457  gradient_structure::set_YES_DERIVATIVES(); // don't know what this does
458  // This copies the unbounded parameters into y0
460  // Don't know what this does or if necessary
461  dvector pen_vector(1,nvar);
462  initial_params::reset(dvar_vector(y0),pen_vector);
464  initial_params::stddev_scale(current_scale,y0);
466 
468  // Declare and initialize the variables needed for the algorithm
469  // Get NLL and gradient in unbounded space for initial value y0.
470  dvector gr(1,nvar); // gradients in unbounded space
471  dvector gr2(1,nvar); // gradients in rotated space
472 
473 
474  dvariable jac=0.0;
475  dvariable userNLL=0.0;
476 
477  // Get the Jacobian and user NLL at initial value.
478  dvar_vector vx=dvar_vector(y0);
479  jac=initial_params::reset(vx);
481  userfunction();
482  dvar_vector dtemp(1,nvar);
484  jac=sum(log(dtemp)); // get Jacobian adjustment from bounded pars
485  userNLL=*objective_function_value::pobjfun; // NLL defined by user
486  // For some reason if I comment this out then the next call to
487  // get_hybrid_mc will return gradients of 0. No idea why but
488  // leave this here.
489  gradcalc(nvar,gr);
490 
491  std::clock_t start0 = clock();
492  double nll=get_hybrid_monte_carlo_value(nvar,y0,gr);
493  double time_gradient = static_cast<double>(std::clock()-start0) / CLOCKS_PER_SEC;
494  gr2=rotate_gradient(gr, chd);
495  // Can now inverse rotate y0 to be x0 (algorithm space)
496  independent_variables x0(1,nvar); // inits in algorithm space
497  x0=rotate_pars(chdinv,y0);
498  // Now have z0, y0, x0, objective fn value, gradients in
499  // unbounded (y) and rotated (x) space all at the intial value.
500 
501  cout << "Chain " << chain << ": Initial negative log density= " << userNLL <<
502  ", Jacobian= " << jac << ", Total= " << userNLL-jac << endl;
503  cout << "Chain " << chain << ": Gradient eval took " << time_gradient <<
504  " sec. " << nmcmc << " iter w/ 100 steps would take " ;
505  time_gradient*= (nmcmc*100);
506  if(time_gradient<=60){
507  printf("%.2f", time_gradient); cout << " seconds." << endl;
508  } else if(time_gradient <=60*60){
509  printf("%.2f", time_gradient/60); cout << " minutes." << endl;
510  } else if(time_gradient <= (60*60*24)){
511  printf("%.2f", time_gradient/(60*60)); cout << " hours." << endl;
512  } else {
513  printf("%.2f", time_gradient/(24*60*60)); cout << " days." << endl;
514  }
515 
516  // Now initialize these into the algorithm arguments needed.
517  independent_variables theta(1,nvar);
518  // kind of a misnomer here: theta is in "x" or algorithm space
519  // (before applying the mass matrix rotation, and before the
520  // bounding functions are applied)
521  theta=x0;
522  independent_variables z(1,nvar);
523  independent_variables ynew(1,nvar); // local copy of y
524  z=z0; ynew=y0;
525  dvector p(1,nvar); // momentum vector
526  double alphasum=0; // running sum for calculating final accept ratio
527  // Global variables to track the extreme left and rightmost nodes of the
528  // entire tree. This gets overwritten due to passing things by reference
529  // in buildtree
530  dvector thetaminus_end(1,nvar); dvector thetaplus_end(1,nvar);
531  dvector rminus_end(1,nvar); dvector rplus_end(1,nvar);
532  dvector gr2minus_end(1,nvar); dvector gr2plus_end(1,nvar);
533  // References to left most and right most theta and momentum for current
534  // subtree inside of buildtree. The ones proceeded with _ are passed
535  // through buildtree by reference. Inside build_tree, _thetaplus is left
536  // the same if moving in the left direction. Thus these are the global
537  // left and right points.
538  dvector _thetaminus(1,nvar); dvector _thetaplus(1,nvar);
539  dvector _rminus(1,nvar); dvector _rplus(1,nvar);
540  // Hold temporary copies of these as new points are chosen to
541  // avoid recalculations at end of trajetory
542  dvector thetaprime(1,nvar); dvector _thetaprime(1,nvar);
543  dvector grprime(1,nvar); dvector _grprime(1,nvar);
544  dvector gr2prime(1,nvar); dvector _gr2prime(1,nvar);
545  double Hprime, _Hprime;
546  double nllprime, _nllprime;
547  independent_variables parsaveprime(1,nvar);
548  parsaveprime=z0;
549  independent_variables _parsaveprime(1,nvar);
550 
551  // These are used inside NUTS by reference
552  int _nalphaprime, _nprime, _nfevals;
553  double _alphaprime;
554  bool _sprime, _divergent;
555  int ndivergent=0; // # divergences post-warmup
556  int nsamples=0; // total samples, not always nmcmc if duration option used
557  // Declare some local variables used below.
558  double H0, H, logu, value, rn, alpha;
559  int n, j, v;
560  bool s,b;
561  // Mass matrix adapatation algorithm variables for diagonal
562  dvector am(1,nvar);
563  dvector am2(1,nvar);
564  // and dense
565  dvector adm(1,nvar);
566  dmatrix adm2(1,nvar,1,nvar);
567  int is2=0; int is3=0;
568  int iseps=0;
569 
570  dmatrix metric(1,nvar,1,nvar); // holds updated metric
571  if(diagnostic_flag){
572  cout << "Initial chd=" << chd << endl;
573  cout << "Initial negative log density=" << nll << endl;
574  cout << "Initial gr in unbounded space= " << gr << endl;
575  cout << "Initial gr in rotated space= " << gr2 << endl;
576  cout << "Initial bounded parameters=" << z0 << endl;
577  cout << "Initial unbounded parameters=" << y0 << endl;
578  cout << "Initial rotated, unbounded parameters=" << x0 << endl;
579  }
580  // Start of MCMC chain
581  for (int is=1;is<=nmcmc;is++) {
582  // Randomize momentum for next iteration, update H, and reset the tree
583  // elements.
584  p.fill_randn(rng);
585  _rminus=p; _rplus=p;
586  // Note: theta, nll, gr, and gr2 are caried over from end of last loop
587  _thetaprime=theta; _thetaminus=theta; _thetaplus=theta;
588  H0=-nll-0.5*norm2(p); // initial Hamiltonian value
589  logu=H0+log(randu(rng)); // slice variable
590  if(useDA && is==1){
591  // Setup dual averaging components to adapt step size
592  eps=find_reasonable_stepsize(nvar,theta,p,chd,true, verbose_find_epsilon, chain);
593  mu=log(eps);
594  epsvec(1)=eps; epsbar(1)=eps; Hbar(1)=0;
595  }
596  // Generate single NUTS trajectory by repeatedly doubling build_tree
597  _nprime=0; _divergent=0; _nfevals=0;
598  n=1; s=1; j=0;
599  // Reset global ones to initial point of this trajectory
600  thetaminus_end=theta; thetaplus_end=theta; thetaprime=theta;
601  rminus_end=p; rplus_end=p;
602  gr2minus_end=gr2; gr2plus_end=gr2; gr2prime=gr2;
603  nllprime=nll; grprime=gr;
604  Hprime=H0;
605  while (s) {
606  value = randu(rng); // runif(1)
607  v = 2 * (value < 0.5) - 1;
608  // Add a trajectory of length 2^j, built to the left or right of
609  // edges of the current entire tree.
610  if (v == 1) {
611  // Build a tree to the right from thetaplus. The leftmost
612  // point in the new subtree, which gets overwritten in
613  // both the global _end variable, but also the _ ref
614  // version. Need to reset to the rightmost point, since
615  // this may not have been last one executed and thus the
616  // gradients are wrong
617  gr2=gr2plus_end;
618  build_tree(nvar, gr, chd, eps, rplus_end, thetaplus_end, gr2, logu, v, j,
619  H0, _thetaprime, _thetaplus, _thetaminus, _rplus, _rminus,
620  _alphaprime, _nalphaprime, _sprime,
621  _nprime, _nfevals, _divergent, rng,
622  gr2plus_end, _grprime, _gr2prime, _nllprime, _Hprime, _parsaveprime);
623  // Moved right, so update extreme right tree. Done by
624  // reference in gr2plus_end, which is the gradient at the
625  // rightmost point of the global trajectory.
626  thetaplus_end=_thetaplus; rplus_end=_rplus;
627  } else {
628  // Same but to the left from thetaminus
629  gr2=gr2minus_end;
630  build_tree(nvar, gr, chd, eps, rminus_end, thetaminus_end, gr2, logu, v, j,
631  H0, _thetaprime, _thetaplus, _thetaminus, _rplus, _rminus,
632  _alphaprime, _nalphaprime, _sprime,
633  _nprime, _nfevals, _divergent, rng,
634  gr2minus_end, _grprime, _gr2prime, _nllprime, _Hprime, _parsaveprime);
635  thetaminus_end=_thetaminus; rminus_end=_rminus;
636  }
637  // _thetaprime is the proposed point from that sample (drawn
638  // uniformly), but still need to detemine whether to accept it at
639  // each doubling. The last accepted point becomes the next sample. If
640  // none are accepted, the same point is repeated twice.
641  rn = randu(rng); // Runif(1)
642  if (_sprime == 1 && rn < double(_nprime)/double(n)){
643  thetaprime=_thetaprime; // rotated, unbounded
644  gr2prime=_gr2prime; //
645  nllprime=_nllprime;
646  Hprime=_Hprime;
647  parsaveprime=_parsaveprime;
648  ynew=rotate_pars(chd,thetaprime); // unbounded
649  }
650 
651  // Test if a u-turn occured across the whole subtree j. Previously we
652  // only tested sub-subtrees.
653  b= stop_criterion(nvar, thetaminus_end, thetaplus_end, rminus_end, rplus_end);
654  s = _sprime && b;
655  // Increment valid points and depth
656  n += _nprime; ++j;
657  if(j>=max_treedepth) break;
658  } // end of single NUTS trajectory
659  // Instead of rerunning the model again (costly), I pass
660  // versions of the important parts by reference above. This
661  // is difference in ADMB 12.0 and 12.2 versions of this
662  // algorithm. Note these are also the intial values in the
663  // next iteration.
664  nll=nllprime; H=Hprime;
665  gr2=gr2prime; theta=thetaprime;
666  parsave=parsaveprime;
667  // Write the samples for this iteration to file
668  for(int i=1;i<nvar;i++) unbounded << ynew(i) << ", ";
669  unbounded << ynew(nvar) << endl;
670  (*pofs_psave) << parsave; // save all bounded draws to psv file
671  // Calculate approximate acceptance probability
672  alpha=0;
673  if(_nalphaprime>0) alpha=double(_alphaprime)/double(_nalphaprime);
674  if(is > warmup){
675  if(_divergent==1) ndivergent++; // Increment divergences only after warmup
676  // running sum of alpha to calculate average acceptance rate below
677  alphasum+=alpha;
678  }
679 
680  // Mass matrix adaptation.
681  bool slow=slow_phase(is, warmup, adapt_init_buffer, adapt_term_buffer);
682  // If in slow phase, do adaptation of mass matrix
683  if( (adapt_mass || adapt_mass_dense) & slow){
684  if(is== adapt_init_buffer){ // start of slow adaptation window
685  // Initialize algorithm: do I want to reinitialize after each update also?
686  am.initialize(); am2.initialize();
687  adm.initialize(); adm2.initialize();
688  is2=0; is3=0;
689  } else {
690  // Update metric with newest sample in unboudned space (ynew)
691  if(adapt_mass) add_sample_diag(nvar, is2, am, am2, ynew);
692  if(adapt_mass_dense) add_sample_dense(nvar, is3, adm, adm2, ynew);
693  if(is==anw){ // end of slow window
694  // Update the mass matrix and chd
695  if(adapt_mass){
696  metric.initialize();
697  dvector tmp=am2/(is2-1);
698  for(int i=1; i<=nvar; i++) metric(i,i) = tmp(i);
699  if(verbose_adapt_mass==1){
700  cout << "Chain " << chain << ": Iteration: " << is << ": Estimated diagonal variances, min=" << min(tmp) << " and max=" << max(tmp) << endl;
701  }
702  } else {
703  // dense metric
704  metric=adm2/(is3-1);
705  if(verbose_adapt_mass==1){
706  dvector tmp=diagonal(metric);
707  cout << "Chain " << chain <<
708  ": Iteration " << is << ": Estimated dense variances, min=" << min(tmp) << " and max=" << max(tmp) << endl;
709  }
710  }
711  // Note: Stan does this regularization of the metric to
712  // avoid Cholesky errors. I decided it against doing
713  // that and rather just catch the error and instead do
714  // a diagonal one. The next time it will have twice as
715  // many samples to estimate the metric. My reasoning
716  // was it will prevent a downward spiral where a
717  // poorly-estimated M will cause fewer effective
718  // samples, leading to worse M, etc. This will warn the
719  // user and continue on with the diagonal metric, thus
720  // being more stable. This is the adapted Stan
721  // regularization code:
722  //
723  // dmatrix Mtemp(1,nvar, 1,nvar);
724  // Mtemp.initialize();
725  // for(int i=1; i<=nvar; i++) Mtemp(i,i)=1e-3 * (5.0 / (is3 + 5.0));
726  // metric = (is3 / (is3 + 5.0)) * metric + Mtemp;
727  // Update chd and chdinv by reference, since metric changed
728  success=calculate_chd_and_inverse(nvar, metric, chd, chdinv);
729  if(!success && adapt_mass_dense){
730  // if it fails try a diagonal one which should be more reliable
731  cout << "Chain " << chain << ": Choleski decomposition of dense mass matrix failed. Trying diagonal update instead." << endl;
733  success=calculate_chd_and_inverse(nvar, metric, chd, chdinv);
735  }
736  if(!success && adapt_mass){
737  cout << "Chain " << chain << ": Error updating diagonal mass matrix. Fix model, turn off adaptation, or increase adapt_window" << endl;
738  ad_exit(1);
739  }
740  // Since chd changed need to refresh values and gradients
741  // in x space
742  nll=get_hybrid_monte_carlo_value(nvar,ynew,gr);
743  theta=rotate_pars(chdinv,ynew);
744  gr2=rotate_gradient(gr, chd);
745  // Calculate the next end window. If this overlaps into the final fast
746  // period, it will be stretched to that point (warmup-adapt_term_buffer)
747  aws *=2;
748  anw = compute_next_window(is, warmup, adapt_init_buffer, aws, adapt_term_buffer);
749  // Refind a reasonable step size since it can be really
750  // different after changing M and reset algorithm
751  // parameters (Turned off for 13.0 b/c I thought it worked better this way)
752  // if(useDA)
753  // eps=find_reasonable_stepsize(nvar,theta,p,chd, verbose_adapt_mass, verbose_find_epsilon, chain);
754  mu=log(epsvec(is));
755  Hbar(is)=0; epsvec(is)=epsbar(is);
756  // this is supposed to restart the eps adaptation counter
757  // but I think it works better without
758  // iseps=0;
759  } // end of slow window
760  } // end up updating mass matrix
761  } // end adaptation of mass matrix
762  // Adaptation of step size (eps).
763  if(useDA){
764  if(is <= warmup){
765  iseps++; // how many iterations since start of last adapt window
766  // cout << "i=" << is << ", iseps=" << iseps << ", alpha=" << alpha <<
767  // ", mu=" << mu << ", epsvec(is)=" << epsvec(is) << ", epsbar(is)=" << epsbar(is) <<
768  // ", Hbar(is)=" << Hbar(is) << endl;
769  eps=adapt_eps(is, iseps, eps, alpha, adapt_delta, mu, epsvec, epsbar, Hbar);
770  } else {
771  eps=epsbar(warmup);
772  }
773  }
774  adaptation << alpha << "," << eps <<"," << j <<","
775  << _nfevals <<"," << _divergent <<"," << -H << "," << -nll << endl;
776  print_mcmc_progress(is, nmcmc, warmup, chain, refresh);
777  if(is ==warmup) time_warmup = static_cast<double>(std::clock()-start) / CLOCKS_PER_SEC;
778  time_total = static_cast<double>(std::clock()-start) / CLOCKS_PER_SEC;
779  nsamples=is;
780  if(use_duration==1 && time_total > duration){
781  // If duration option used, break loop after <duration> minutes.
782  cout << "Chain " << chain << ":" << is << " samples generated after " << duration/60 <<
783  " minutes running." << endl;
784  break;
785  }
786  } // end of MCMC chain
787 
788  if(adapt_mass_dense | adapt_mass){
789  ofstream metricsave("adapted_metric.txt", ios::trunc);
790  metricsave << metric;
791  }
792 
793  // Information about run
794  if(ndivergent>0)
795  cout << "Chain " << chain <<": There were " << ndivergent << " divergent transitions after warmup" << endl;
796  if(useDA)
797  cout << "Chain " << chain << ": Final step size=" << eps << "; after " << warmup << " warmup iterations"<< endl;
798  cout << "Chain " << chain << ": Final acceptance ratio=";
799  printf("%.2f", alphasum /(nsamples-warmup));
800  if(useDA) cout << ", and target=" << adapt_delta << endl; else cout << endl;
801  print_mcmc_timing(time_warmup, time_total, chain);
802  // Close .psv file connection
803  if (pofs_psave) {
804  delete pofs_psave;
805  pofs_psave=NULL;
806  }
807 } // end of NUTS function
808 
static void restore_all_values(const dvector &x, const int &ii)
Definition: model3.cpp:105
dvector rotate_pars(const dvector &m, const dvector &x)
static void set_active_random_effects(void)
Definition: model.cpp:267
void read_mle_hmc(int nvar, dvector &mle)
void nuts_mcmc_routine(int, int, double, int)
The function implements the MCMC algorithm NUTS, which is a self-tuning variant of Hamiltonian Monte ...
Definition: nuts.cpp:51
dvector diagonal(const dmatrix &matrix)
Author: David Fournier Copyright (c) 2008-2012 Regents of the University of California.
Definition: dmat31.cpp:12
Vector of double precision numbers.
Definition: dvector.h:50
bool stop_criterion(int nvar, dvector &thetaminus, dvector &thetaplus, dvector &rminus, dvector &rplus)
static char ** argv
Definition: fvar.hpp:8866
double sum(const d3_array &darray)
Author: David Fournier Copyright (c) 2008-2012 Regents of the University of California.
Definition: d3arr.cpp:21
int compute_next_window(int i, int warmup, int w1, int aws, int w3)
void print_mcmc_progress(int is, int nmcmc, int nwarmup, int chain, int refresh)
exitptr ad_exit
Definition: gradstrc.cpp:53
static dvariable reset(const dvar_vector &x)
Definition: model.cpp:345
ADMB variable vector.
Definition: fvar.hpp:2172
static void restore_start_phase(void)
Definition: model.cpp:275
static adstring adprogram_name
Definition: fvar.hpp:8860
dvector rotate_gradient(const dvector &x, const dmatrix &m)
void gradcalc(int nvar, const dvector &g)
Definition: sgradclc.cpp:77
int atoi(adstring &s)
Returns a integer converted from input s.
Definition: atoi.cpp:20
double find_reasonable_stepsize(int nvar, dvector y, dvector p, dmatrix &chd, bool verbose_adapt_mass, bool verbose_find_epsilon, int chain)
static int nvarcalc()
Definition: model.cpp:152
static int stddev_vscale(const dvar_vector &d, const dvar_vector &x)
Definition: model.cpp:191
bool calculate_chd_and_inverse(int nvar, const dmatrix &metric, dmatrix &chd, dmatrix &chdinv)
Calculate the Cholesky decomposition and its inverse given a mass matrix.
double randu(const random_number_generator &rng)
Uniform random number generator.
Definition: rngen.cpp:198
void fill_randn(long int &n)
Fill vector with random numbers.
Definition: ranfill.cpp:231
Description not yet available.
Definition: fvar.hpp:7951
prnstream & endl(prnstream &)
Description not yet available.
Definition: fvar.hpp:1937
double get_hybrid_monte_carlo_value(int nvar, const independent_variables &y, dvector &g)
Written by Dave, commented by Cole starting 8/31/2016 Description not yet available.
#define min(a, b)
Definition: cbivnorm.cpp:188
Description not yet available.
Definition: fvar.hpp:3398
static int argc
Definition: fvar.hpp:8863
static objective_function_value * pobjfun
Definition: admodel.h:2394
Description not yet available.
static void xinit(const dvector &x)
Definition: model.cpp:226
static void set_inactive_random_effects(void)
Definition: model.cpp:288
Description not yet available.
Definition: fvar.hpp:2819
void read_covariance_matrix(const dmatrix &S, int nvar, int &hbf, dvector &sscale)
Definition: xxmcmc.cpp:1320
void initialize(void)
Initialze all elements of dvector to zero.
Definition: dvect5.cpp:10
double norm2(const d3_array &a)
Return sum of squared elements in a.
Definition: d3arr2a.cpp:167
void read_empirical_covariance_matrix(int nvar, const dmatrix &S, const adstring &prog_name)
Reads the covariance matrix from a file, which is the program name prepended to the extension &#39;...
Definition: xxmcmc.cpp:961
int option_match(int argc, char *argv[], const char *string)
Checks if the program has been invoked with a particular command line argument (&quot;string&quot;).
Definition: optmatch.cpp:25
double adapt_eps(int ii, int iseps, double eps, double alpha, double &adapt_delta, double &mu, dvector &epsvec, dvector &epsbar, dvector &Hbar)
void print_mcmc_timing(double time_warmup, double time_total, int chain)
bool slow_phase(int is, int warmup, int w1, int w3)
double eps
Definition: ftweak.cpp:13
static void set_YES_DERIVATIVES(void)
Enable accumulation of derivative information.
Definition: gradstrc.cpp:650
int diagonal_metric_flag
Definition: admodel.h:1887
void read_hessian_matrix_and_scale1(int nvar, const dmatrix &_SS, double s, int mcmc2_flag)
Definition: xxmcmc.cpp:1453
dvector value(const df1_one_vector &v)
Definition: df11fun.cpp:69
virtual void userfunction(void)=0
std::string get_filename(const char *f)
static int stddev_scale(const dvector &d, const dvector &x)
Definition: model.cpp:202
#define max(a, b)
Definition: cbivnorm.cpp:189
static int mc_phase
Definition: admodel.h:846
void add_sample_dense(const int nvar, int &is2, dvector &m, dmatrix &m2, const independent_variables &q)
Calculate running covariance using Welford&#39;s &quot;online&quot; algorithm.
void initialize(void)
Author: David Fournier Copyright (c) 2008-2012 Regents of the University of California.
Definition: dmat7.cpp:12
void build_tree(int nvar, dvector &gr, dmatrix &chd, double eps, dvector &p, dvector &y, dvector &gr2, double logu, int v, int j, double H0, dvector &_thetaprime, dvector &_thetaplus, dvector &_thetaminus, dvector &_rplus, dvector &_rminus, double &_alphaprime, int &_nalphaprime, bool &_sprime, int &_nprime, int &_nfevals, bool &_divergent, const random_number_generator &rng, dvector &gr2_end, dvector &_grprime, dvector &_gr2prime, double &_nllprime, double &_Hprime, independent_variables &_parsaveprime)
Fundamental data type for reverse mode automatic differentiation.
Definition: fvar.hpp:1518
d3_array log(const d3_array &arr3)
Author: David Fournier Copyright (c) 2008-2012 Regents of the University of California.
Definition: d3arr2a.cpp:13
void add_sample_diag(const int nvar, int &n, dvector &m, dvector &m2, const independent_variables &q)
Calculate running covariance using Welford&#39;s &quot;online&quot; algorithm.