1  using System;


2  using HeuristicLab.Common;


3 


4  namespace HeuristicLab.Algorithms.Bandits.Models {


5  // bayesian estimation of a Gaussian with


6  // 1) unknown mean and known variance


7  // 2) unknown mean and unknown variance


8  public class GaussianModel : IModel {


9  private OnlineMeanAndVarianceEstimator estimator = new OnlineMeanAndVarianceEstimator();


10 


11  // parameters of Gaussian prior for mean


12  private readonly double meanPriorMu;


13  private readonly double meanPriorVariance;


14 


15  private readonly bool knownVariance;


16  private readonly double rewardVariance = 0.1; // assumed know reward variance


17 


18  // parameters of Gamma prior for precision (= inverse variance)


19  private readonly int precisionPriorAlpha;


20  private readonly double precisionPriorBeta;


21 


22  // noninformative prior


23  private const double priorK = 1.0;


24 


25  // this constructor assumes the variance is known


26  public GaussianModel(double meanPriorMu, double meanPriorVariance, double rewardVariance = 0.1) {


27  this.meanPriorMu = meanPriorMu;


28  this.meanPriorVariance = meanPriorVariance;


29 


30  this.knownVariance = true;


31  this.rewardVariance = rewardVariance;


32  }


33 


34  // this constructor assumes the variance is also unknown


35  // uses Murphy 2007: Conjugate Bayesian analysis of the Gaussian distribution equation 85  89


36  public GaussianModel(double meanPriorMu, double meanPriorVariance, int precisionPriorAlpha, double precisionPriorBeta) {


37  this.meanPriorMu = meanPriorMu;


38  this.meanPriorVariance = meanPriorVariance;


39 


40  this.knownVariance = false;


41  this.precisionPriorAlpha = precisionPriorAlpha;


42  this.precisionPriorBeta = precisionPriorBeta;


43  }


44 


45 


46  public double SampleExpectedReward(Random random) {


47  if (knownVariance) {


48  return SampleExpectedRewardKnownVariance(random);


49  } else {


50  return SampleExpectedRewardUnknownVariance(random);


51  }


52  }


53 


54  private double SampleExpectedRewardKnownVariance(Random random) {


55  // expected values for reward


56  // calculate posterior mean and variance (for mean reward)


57 


58  // see Murphy 2007: Conjugate Bayesian analysis of the Gaussian distribution (http://www.cs.ubc.ca/~murphyk/Papers/bayesGauss.pdf)


59  var posteriorMeanVariance = 1.0 / (estimator.N / rewardVariance + 1.0 / meanPriorVariance);


60  var posteriorMeanMean = posteriorMeanVariance * (meanPriorMu / meanPriorVariance + estimator.Sum / rewardVariance);


61 


62  // sample a mean from the posterior


63  var posteriorMeanSample = Rand.RandNormal(random) * Math.Sqrt(posteriorMeanVariance) + posteriorMeanMean;


64  // theta already represents the expected reward value => nothing else to do


65  return posteriorMeanSample;


66 


67  // return 0.99quantile value


68  //return alglib.invnormaldistribution(0.99) * Math.Sqrt(rewardVariance + posteriorMeanVariance) + posteriorMeanMean;


69  }


70 


71  // see Murphy 2007: Conjugate Bayesian analysis of the Gaussian distribution page 6 onwards (http://www.cs.ubc.ca/~murphyk/Papers/bayesGauss.pdf)


72  private double SampleExpectedRewardUnknownVariance(Random random) {


73 


74  var posteriorMean = (priorK * meanPriorMu + estimator.Sum) / (priorK + estimator.N);


75  var posteriorK = priorK + estimator.N;


76  var posteriorAlpha = precisionPriorAlpha + estimator.N / 2.0;


77  double posteriorBeta;


78  if (estimator.N > 0) {


79  posteriorBeta = precisionPriorBeta + 0.5 * estimator.N * estimator.Variance + priorK * estimator.N * Math.Pow(estimator.Avg  meanPriorMu, 2) / (2.0 * (priorK + estimator.N));


80  } else {


81  posteriorBeta = precisionPriorBeta;


82  }


83 


84  // sample from the posterior marginal for mu (expected value) equ. 91


85  // p(µD) = T2αn (µ µn, βn/(αnκn))


86 


87  // sample from Tk distribution : http://stats.stackexchange.com/a/70270


88  var t2alpha = alglib.invstudenttdistribution((int)(2 * posteriorAlpha), random.NextDouble());


89 


90  var theta = t2alpha * posteriorBeta / (posteriorAlpha * posteriorK) + posteriorMean;


91  return theta;


92 


93  //return alglib.invnormaldistribution(random.NextDouble()) * + theta;


94  //return alglib.invstudenttdistribution((int)(2 * posteriorAlpha), 0.99) * (posteriorBeta*posteriorK + posteriorBeta) / (posteriorAlpha*posteriorK) + posteriorMean;


95  }


96 


97 


98  public void Update(double reward) {


99  estimator.UpdateReward(reward);


100  }


101 


102  public void Reset() {


103  estimator.Reset();


104  }


105 


106  public void PrintStats() {


107  Console.Write("{0:F2} ", estimator.Avg);


108  }


109 


110  public object Clone() {


111  if (knownVariance)


112  return new GaussianModel(meanPriorMu, meanPriorVariance, rewardVariance);


113  else


114  return new GaussianModel(meanPriorMu, meanPriorVariance, precisionPriorAlpha, precisionPriorBeta);


115  }


116  }


117  }

