[11730] | 1 | using System;
|
---|
| 2 | using HeuristicLab.Common;
|
---|
| 3 |
|
---|
| 4 | namespace HeuristicLab.Algorithms.Bandits.Models {
|
---|
[11732] | 5 | // bayesian estimation of a Gaussian with
|
---|
| 6 | // 1) unknown mean and known variance
|
---|
| 7 | // 2) unknown mean and unknown variance
|
---|
[11730] | 8 | public class GaussianModel : IModel {
|
---|
[11742] | 9 |
|
---|
[11732] | 10 | private OnlineMeanAndVarianceEstimator estimator = new OnlineMeanAndVarianceEstimator();
|
---|
[11730] | 11 |
|
---|
| 12 | // parameters of Gaussian prior for mean
|
---|
| 13 | private readonly double meanPriorMu;
|
---|
| 14 | private readonly double meanPriorVariance;
|
---|
| 15 |
|
---|
[11732] | 16 | private readonly bool knownVariance;
|
---|
[11730] | 17 | private readonly double rewardVariance = 0.1; // assumed know reward variance
|
---|
| 18 |
|
---|
[11732] | 19 | // parameters of Gamma prior for precision (= inverse variance)
|
---|
| 20 | private readonly int precisionPriorAlpha;
|
---|
| 21 | private readonly double precisionPriorBeta;
|
---|
| 22 |
|
---|
| 23 | // non-informative prior
|
---|
| 24 | private const double priorK = 1.0;
|
---|
| 25 |
|
---|
| 26 | // this constructor assumes the variance is known
|
---|
| 27 | public GaussianModel(double meanPriorMu, double meanPriorVariance, double rewardVariance = 0.1) {
|
---|
[11730] | 28 | this.meanPriorMu = meanPriorMu;
|
---|
| 29 | this.meanPriorVariance = meanPriorVariance;
|
---|
[11732] | 30 |
|
---|
| 31 | this.knownVariance = true;
|
---|
| 32 | this.rewardVariance = rewardVariance;
|
---|
[11730] | 33 | }
|
---|
| 34 |
|
---|
[11732] | 35 | // this constructor assumes the variance is also unknown
|
---|
| 36 | // uses Murphy 2007: Conjugate Bayesian analysis of the Gaussian distribution equation 85 - 89
|
---|
| 37 | public GaussianModel(double meanPriorMu, double meanPriorVariance, int precisionPriorAlpha, double precisionPriorBeta) {
|
---|
| 38 | this.meanPriorMu = meanPriorMu;
|
---|
| 39 | this.meanPriorVariance = meanPriorVariance;
|
---|
[11976] | 40 |
|
---|
[11732] | 41 | this.knownVariance = false;
|
---|
| 42 | this.precisionPriorAlpha = precisionPriorAlpha;
|
---|
| 43 | this.precisionPriorBeta = precisionPriorBeta;
|
---|
| 44 | }
|
---|
| 45 |
|
---|
| 46 |
|
---|
[11851] | 47 | public double Sample(Random random) {
|
---|
[11732] | 48 | if (knownVariance) {
|
---|
| 49 | return SampleExpectedRewardKnownVariance(random);
|
---|
| 50 | } else {
|
---|
| 51 | return SampleExpectedRewardUnknownVariance(random);
|
---|
| 52 | }
|
---|
| 53 | }
|
---|
| 54 |
|
---|
| 55 | private double SampleExpectedRewardKnownVariance(Random random) {
|
---|
[11730] | 56 | // expected values for reward
|
---|
[11732] | 57 | // calculate posterior mean and variance (for mean reward)
|
---|
[11730] | 58 |
|
---|
[11732] | 59 | // see Murphy 2007: Conjugate Bayesian analysis of the Gaussian distribution (http://www.cs.ubc.ca/~murphyk/Papers/bayesGauss.pdf)
|
---|
| 60 | var posteriorMeanVariance = 1.0 / (estimator.N / rewardVariance + 1.0 / meanPriorVariance);
|
---|
| 61 | var posteriorMeanMean = posteriorMeanVariance * (meanPriorMu / meanPriorVariance + estimator.Sum / rewardVariance);
|
---|
[11730] | 62 |
|
---|
[11732] | 63 | // sample a mean from the posterior
|
---|
| 64 | var posteriorMeanSample = Rand.RandNormal(random) * Math.Sqrt(posteriorMeanVariance) + posteriorMeanMean;
|
---|
| 65 | // theta already represents the expected reward value => nothing else to do
|
---|
| 66 | return posteriorMeanSample;
|
---|
[11730] | 67 |
|
---|
[11732] | 68 | // return 0.99-quantile value
|
---|
| 69 | //return alglib.invnormaldistribution(0.99) * Math.Sqrt(rewardVariance + posteriorMeanVariance) + posteriorMeanMean;
|
---|
| 70 | }
|
---|
| 71 |
|
---|
| 72 | // see Murphy 2007: Conjugate Bayesian analysis of the Gaussian distribution page 6 onwards (http://www.cs.ubc.ca/~murphyk/Papers/bayesGauss.pdf)
|
---|
| 73 | private double SampleExpectedRewardUnknownVariance(Random random) {
|
---|
| 74 |
|
---|
| 75 | var posteriorMean = (priorK * meanPriorMu + estimator.Sum) / (priorK + estimator.N);
|
---|
| 76 | var posteriorK = priorK + estimator.N;
|
---|
| 77 | var posteriorAlpha = precisionPriorAlpha + estimator.N / 2.0;
|
---|
| 78 | double posteriorBeta;
|
---|
| 79 | if (estimator.N > 0) {
|
---|
| 80 | posteriorBeta = precisionPriorBeta + 0.5 * estimator.N * estimator.Variance + priorK * estimator.N * Math.Pow(estimator.Avg - meanPriorMu, 2) / (2.0 * (priorK + estimator.N));
|
---|
| 81 | } else {
|
---|
| 82 | posteriorBeta = precisionPriorBeta;
|
---|
[11730] | 83 | }
|
---|
| 84 |
|
---|
[11742] | 85 |
|
---|
[11732] | 86 | // sample from the posterior marginal for mu (expected value) equ. 91
|
---|
| 87 | // p(µ|D) = T2αn (µ| µn, βn/(αnκn))
|
---|
| 88 |
|
---|
[12390] | 89 | var t2alpha = alglib.invstudenttdistribution((int)(2 * posteriorAlpha), Math.Max(random.NextDouble(), 0.00001));
|
---|
[11732] | 90 |
|
---|
| 91 | var theta = t2alpha * posteriorBeta / (posteriorAlpha * posteriorK) + posteriorMean;
|
---|
[11730] | 92 | return theta;
|
---|
| 93 |
|
---|
[11742] | 94 |
|
---|
| 95 | /*
|
---|
| 96 | * value function : 0.99-quantile
|
---|
| 97 | // sample posterior mean and posterior variance independently
|
---|
| 98 | var sampledPrec = Rand.GammaRand(random, posteriorAlpha) * posteriorBeta;
|
---|
| 99 | var t2alpha = alglib.invstudenttdistribution((int)(2 * posteriorAlpha), random.NextDouble());
|
---|
| 100 | var sampledMean = t2alpha * posteriorBeta / (posteriorAlpha * posteriorK) + posteriorMean;
|
---|
| 101 |
|
---|
| 102 | return alglib.invnormaldistribution(0.99) / Math.Sqrt(sampledPrec) + sampledMean;
|
---|
| 103 | */
|
---|
[11730] | 104 | }
|
---|
| 105 |
|
---|
[11732] | 106 |
|
---|
| 107 | public void Update(double reward) {
|
---|
| 108 | estimator.UpdateReward(reward);
|
---|
[11730] | 109 | }
|
---|
| 110 |
|
---|
| 111 | public void Reset() {
|
---|
[11732] | 112 | estimator.Reset();
|
---|
[11730] | 113 | }
|
---|
| 114 |
|
---|
[11732] | 115 | public object Clone() {
|
---|
| 116 | if (knownVariance)
|
---|
| 117 | return new GaussianModel(meanPriorMu, meanPriorVariance, rewardVariance);
|
---|
| 118 | else
|
---|
| 119 | return new GaussianModel(meanPriorMu, meanPriorVariance, precisionPriorAlpha, precisionPriorBeta);
|
---|
| 120 | }
|
---|
[11742] | 121 |
|
---|
| 122 | public override string ToString() {
|
---|
| 123 | if (knownVariance) {
|
---|
| 124 | var posteriorMeanVariance = 1.0 / (estimator.N / rewardVariance + 1.0 / meanPriorVariance);
|
---|
| 125 | var posteriorMeanMean = posteriorMeanVariance * (meanPriorMu / meanPriorVariance + estimator.Sum / rewardVariance);
|
---|
| 126 | return string.Format("Gaussian(mu, var=0.1), mu ~ Gaussian(mu'={0:F3}, var'={1:F3})", posteriorMeanMean, posteriorMeanVariance);
|
---|
| 127 | } else {
|
---|
| 128 | var posteriorMean = (priorK * meanPriorMu + estimator.Sum) / (priorK + estimator.N);
|
---|
| 129 | var posteriorK = priorK + estimator.N;
|
---|
| 130 | var posteriorAlpha = precisionPriorAlpha + estimator.N / 2.0;
|
---|
| 131 | double posteriorBeta;
|
---|
| 132 | if (estimator.N > 0) {
|
---|
| 133 | posteriorBeta = precisionPriorBeta + 0.5 * estimator.N * estimator.Variance + priorK * estimator.N * Math.Pow(estimator.Avg - meanPriorMu, 2) / (2.0 * (priorK + estimator.N));
|
---|
| 134 | } else {
|
---|
| 135 | posteriorBeta = precisionPriorBeta;
|
---|
| 136 | }
|
---|
| 137 | var nu = (int)(2 * posteriorAlpha);
|
---|
| 138 | var meanVariance = posteriorBeta / (posteriorAlpha * posteriorK) * (nu / (double)(nu - 2));
|
---|
| 139 | return string.Format("Gaussian(mu, var), mu ~ T{0}(mu'={1:F3}, var'={2:F3}), 1.0/var ~ Gamma(mu={3:F3}, var={4:F3})",
|
---|
| 140 | nu, posteriorMean, meanVariance,
|
---|
| 141 | posteriorAlpha / posteriorBeta, posteriorAlpha / (posteriorBeta * posteriorBeta));
|
---|
| 142 | }
|
---|
| 143 | }
|
---|
[11730] | 144 | }
|
---|
| 145 | }
|
---|