using System; using HeuristicLab.Common; namespace HeuristicLab.Algorithms.Bandits.Models { // bayesian estimation of a Gaussian with // 1) unknown mean and known variance // 2) unknown mean and unknown variance public class GaussianModel : IModel { private OnlineMeanAndVarianceEstimator estimator = new OnlineMeanAndVarianceEstimator(); // parameters of Gaussian prior for mean private readonly double meanPriorMu; private readonly double meanPriorVariance; private readonly bool knownVariance; private readonly double rewardVariance = 0.1; // assumed know reward variance // parameters of Gamma prior for precision (= inverse variance) private readonly int precisionPriorAlpha; private readonly double precisionPriorBeta; // non-informative prior private const double priorK = 1.0; // this constructor assumes the variance is known public GaussianModel(double meanPriorMu, double meanPriorVariance, double rewardVariance = 0.1) { this.meanPriorMu = meanPriorMu; this.meanPriorVariance = meanPriorVariance; this.knownVariance = true; this.rewardVariance = rewardVariance; } // this constructor assumes the variance is also unknown // uses Murphy 2007: Conjugate Bayesian analysis of the Gaussian distribution equation 85 - 89 public GaussianModel(double meanPriorMu, double meanPriorVariance, int precisionPriorAlpha, double precisionPriorBeta) { this.meanPriorMu = meanPriorMu; this.meanPriorVariance = meanPriorVariance; this.knownVariance = false; this.precisionPriorAlpha = precisionPriorAlpha; this.precisionPriorBeta = precisionPriorBeta; } public double SampleExpectedReward(Random random) { if (knownVariance) { return SampleExpectedRewardKnownVariance(random); } else { return SampleExpectedRewardUnknownVariance(random); } } private double SampleExpectedRewardKnownVariance(Random random) { // expected values for reward // calculate posterior mean and variance (for mean reward) // see Murphy 2007: Conjugate Bayesian analysis of the Gaussian distribution (http://www.cs.ubc.ca/~murphyk/Papers/bayesGauss.pdf) var posteriorMeanVariance = 1.0 / (estimator.N / rewardVariance + 1.0 / meanPriorVariance); var posteriorMeanMean = posteriorMeanVariance * (meanPriorMu / meanPriorVariance + estimator.Sum / rewardVariance); // sample a mean from the posterior var posteriorMeanSample = Rand.RandNormal(random) * Math.Sqrt(posteriorMeanVariance) + posteriorMeanMean; // theta already represents the expected reward value => nothing else to do return posteriorMeanSample; // return 0.99-quantile value //return alglib.invnormaldistribution(0.99) * Math.Sqrt(rewardVariance + posteriorMeanVariance) + posteriorMeanMean; } // see Murphy 2007: Conjugate Bayesian analysis of the Gaussian distribution page 6 onwards (http://www.cs.ubc.ca/~murphyk/Papers/bayesGauss.pdf) private double SampleExpectedRewardUnknownVariance(Random random) { var posteriorMean = (priorK * meanPriorMu + estimator.Sum) / (priorK + estimator.N); var posteriorK = priorK + estimator.N; var posteriorAlpha = precisionPriorAlpha + estimator.N / 2.0; double posteriorBeta; if (estimator.N > 0) { posteriorBeta = precisionPriorBeta + 0.5 * estimator.N * estimator.Variance + priorK * estimator.N * Math.Pow(estimator.Avg - meanPriorMu, 2) / (2.0 * (priorK + estimator.N)); } else { posteriorBeta = precisionPriorBeta; } // sample from the posterior marginal for mu (expected value) equ. 91 // p(µ|D) = T2αn (µ| µn, βn/(αnκn)) // sample from Tk distribution : http://stats.stackexchange.com/a/70270 var t2alpha = alglib.invstudenttdistribution((int)(2 * posteriorAlpha), random.NextDouble()); var theta = t2alpha * posteriorBeta / (posteriorAlpha * posteriorK) + posteriorMean; return theta; //return alglib.invnormaldistribution(random.NextDouble()) * + theta; //return alglib.invstudenttdistribution((int)(2 * posteriorAlpha), 0.99) * (posteriorBeta*posteriorK + posteriorBeta) / (posteriorAlpha*posteriorK) + posteriorMean; } public void Update(double reward) { estimator.UpdateReward(reward); } public void Reset() { estimator.Reset(); } public void PrintStats() { Console.Write("{0:F2} ", estimator.Avg); } public object Clone() { if (knownVariance) return new GaussianModel(meanPriorMu, meanPriorVariance, rewardVariance); else return new GaussianModel(meanPriorMu, meanPriorVariance, precisionPriorAlpha, precisionPriorBeta); } } }