using System; using HeuristicLab.Common; namespace HeuristicLab.Algorithms.Bandits.Models { // bayesian estimation of a Gaussian with // 1) unknown mean and known variance // 2) unknown mean and unknown variance public class GaussianModel : IModel { private OnlineMeanAndVarianceEstimator estimator = new OnlineMeanAndVarianceEstimator(); // parameters of Gaussian prior for mean private readonly double meanPriorMu; private readonly double meanPriorVariance; private readonly bool knownVariance; private readonly double rewardVariance = 0.1; // assumed know reward variance // parameters of Gamma prior for precision (= inverse variance) private readonly int precisionPriorAlpha; private readonly double precisionPriorBeta; // non-informative prior private const double priorK = 1.0; // this constructor assumes the variance is known public GaussianModel(double meanPriorMu, double meanPriorVariance, double rewardVariance = 0.1) { this.meanPriorMu = meanPriorMu; this.meanPriorVariance = meanPriorVariance; this.knownVariance = true; this.rewardVariance = rewardVariance; } // this constructor assumes the variance is also unknown // uses Murphy 2007: Conjugate Bayesian analysis of the Gaussian distribution equation 85 - 89 public GaussianModel(double meanPriorMu, double meanPriorVariance, int precisionPriorAlpha, double precisionPriorBeta) { this.meanPriorMu = meanPriorMu; this.meanPriorVariance = meanPriorVariance; this.knownVariance = false; this.precisionPriorAlpha = precisionPriorAlpha; this.precisionPriorBeta = precisionPriorBeta; } public double Sample(Random random) { if (knownVariance) { return SampleExpectedRewardKnownVariance(random); } else { return SampleExpectedRewardUnknownVariance(random); } } private double SampleExpectedRewardKnownVariance(Random random) { // expected values for reward // calculate posterior mean and variance (for mean reward) // see Murphy 2007: Conjugate Bayesian analysis of the Gaussian distribution (http://www.cs.ubc.ca/~murphyk/Papers/bayesGauss.pdf) var posteriorMeanVariance = 1.0 / (estimator.N / rewardVariance + 1.0 / meanPriorVariance); var posteriorMeanMean = posteriorMeanVariance * (meanPriorMu / meanPriorVariance + estimator.Sum / rewardVariance); // sample a mean from the posterior var posteriorMeanSample = Rand.RandNormal(random) * Math.Sqrt(posteriorMeanVariance) + posteriorMeanMean; // theta already represents the expected reward value => nothing else to do return posteriorMeanSample; // return 0.99-quantile value //return alglib.invnormaldistribution(0.99) * Math.Sqrt(rewardVariance + posteriorMeanVariance) + posteriorMeanMean; } // see Murphy 2007: Conjugate Bayesian analysis of the Gaussian distribution page 6 onwards (http://www.cs.ubc.ca/~murphyk/Papers/bayesGauss.pdf) private double SampleExpectedRewardUnknownVariance(Random random) { var posteriorMean = (priorK * meanPriorMu + estimator.Sum) / (priorK + estimator.N); var posteriorK = priorK + estimator.N; var posteriorAlpha = precisionPriorAlpha + estimator.N / 2.0; double posteriorBeta; if (estimator.N > 0) { posteriorBeta = precisionPriorBeta + 0.5 * estimator.N * estimator.Variance + priorK * estimator.N * Math.Pow(estimator.Avg - meanPriorMu, 2) / (2.0 * (priorK + estimator.N)); } else { posteriorBeta = precisionPriorBeta; } // sample from the posterior marginal for mu (expected value) equ. 91 // p(µ|D) = T2αn (µ| µn, βn/(αnκn)) var t2alpha = alglib.invstudenttdistribution((int)(2 * posteriorAlpha), Math.Max(random.NextDouble(), 0.00001)); var theta = t2alpha * posteriorBeta / (posteriorAlpha * posteriorK) + posteriorMean; return theta; /* * value function : 0.99-quantile // sample posterior mean and posterior variance independently var sampledPrec = Rand.GammaRand(random, posteriorAlpha) * posteriorBeta; var t2alpha = alglib.invstudenttdistribution((int)(2 * posteriorAlpha), random.NextDouble()); var sampledMean = t2alpha * posteriorBeta / (posteriorAlpha * posteriorK) + posteriorMean; return alglib.invnormaldistribution(0.99) / Math.Sqrt(sampledPrec) + sampledMean; */ } public void Update(double reward) { estimator.UpdateReward(reward); } public void Reset() { estimator.Reset(); } public object Clone() { if (knownVariance) return new GaussianModel(meanPriorMu, meanPriorVariance, rewardVariance); else return new GaussianModel(meanPriorMu, meanPriorVariance, precisionPriorAlpha, precisionPriorBeta); } public override string ToString() { if (knownVariance) { var posteriorMeanVariance = 1.0 / (estimator.N / rewardVariance + 1.0 / meanPriorVariance); var posteriorMeanMean = posteriorMeanVariance * (meanPriorMu / meanPriorVariance + estimator.Sum / rewardVariance); return string.Format("Gaussian(mu, var=0.1), mu ~ Gaussian(mu'={0:F3}, var'={1:F3})", posteriorMeanMean, posteriorMeanVariance); } else { var posteriorMean = (priorK * meanPriorMu + estimator.Sum) / (priorK + estimator.N); var posteriorK = priorK + estimator.N; var posteriorAlpha = precisionPriorAlpha + estimator.N / 2.0; double posteriorBeta; if (estimator.N > 0) { posteriorBeta = precisionPriorBeta + 0.5 * estimator.N * estimator.Variance + priorK * estimator.N * Math.Pow(estimator.Avg - meanPriorMu, 2) / (2.0 * (priorK + estimator.N)); } else { posteriorBeta = precisionPriorBeta; } var nu = (int)(2 * posteriorAlpha); var meanVariance = posteriorBeta / (posteriorAlpha * posteriorK) * (nu / (double)(nu - 2)); return string.Format("Gaussian(mu, var), mu ~ T{0}(mu'={1:F3}, var'={2:F3}), 1.0/var ~ Gamma(mu={3:F3}, var={4:F3})", nu, posteriorMean, meanVariance, posteriorAlpha / posteriorBeta, posteriorAlpha / (posteriorBeta * posteriorBeta)); } } } }