Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Models/GaussianModel.cs @ 11732

Last change on this file since 11732 was 11732, checked in by gkronber, 9 years ago

#2283: refactoring and bug fixes

File size: 4.8 KB
Line 
1using System;
2using HeuristicLab.Common;
3
4namespace HeuristicLab.Algorithms.Bandits.Models {
5  // bayesian estimation of a Gaussian with
6  // 1) unknown mean and known variance
7  // 2) unknown mean and unknown variance
8  public class GaussianModel : IModel {
9    private OnlineMeanAndVarianceEstimator estimator = new OnlineMeanAndVarianceEstimator();
10
11    // parameters of Gaussian prior for mean
12    private readonly double meanPriorMu;
13    private readonly double meanPriorVariance;
14
15    private readonly bool knownVariance;
16    private readonly double rewardVariance = 0.1; // assumed know reward variance
17
18    // parameters of Gamma prior for precision (= inverse variance)
19    private readonly int precisionPriorAlpha;
20    private readonly double precisionPriorBeta;
21
22    // non-informative prior
23    private const double priorK = 1.0;
24
25    // this constructor assumes the variance is known
26    public GaussianModel(double meanPriorMu, double meanPriorVariance, double rewardVariance = 0.1) {
27      this.meanPriorMu = meanPriorMu;
28      this.meanPriorVariance = meanPriorVariance;
29
30      this.knownVariance = true;
31      this.rewardVariance = rewardVariance;
32    }
33
34    // this constructor assumes the variance is also unknown
35    // uses Murphy 2007: Conjugate Bayesian analysis of the Gaussian distribution equation 85 - 89
36    public GaussianModel(double meanPriorMu, double meanPriorVariance, int precisionPriorAlpha, double precisionPriorBeta) {
37      this.meanPriorMu = meanPriorMu;
38      this.meanPriorVariance = meanPriorVariance;
39
40      this.knownVariance = false;
41      this.precisionPriorAlpha = precisionPriorAlpha;
42      this.precisionPriorBeta = precisionPriorBeta;
43    }
44
45
46    public double SampleExpectedReward(Random random) {
47      if (knownVariance) {
48        return SampleExpectedRewardKnownVariance(random);
49      } else {
50        return SampleExpectedRewardUnknownVariance(random);
51      }
52    }
53
54    private double SampleExpectedRewardKnownVariance(Random random) {
55      // expected values for reward
56      // calculate posterior mean and variance (for mean reward)
57
58      // see Murphy 2007: Conjugate Bayesian analysis of the Gaussian distribution (http://www.cs.ubc.ca/~murphyk/Papers/bayesGauss.pdf)
59      var posteriorMeanVariance = 1.0 / (estimator.N / rewardVariance + 1.0 / meanPriorVariance);
60      var posteriorMeanMean = posteriorMeanVariance * (meanPriorMu / meanPriorVariance + estimator.Sum / rewardVariance);
61
62      // sample a mean from the posterior
63      var posteriorMeanSample = Rand.RandNormal(random) * Math.Sqrt(posteriorMeanVariance) + posteriorMeanMean;
64      // theta already represents the expected reward value => nothing else to do
65      return posteriorMeanSample;
66
67      // return 0.99-quantile value
68      //return alglib.invnormaldistribution(0.99) * Math.Sqrt(rewardVariance + posteriorMeanVariance) + posteriorMeanMean;
69    }
70
71    // see Murphy 2007: Conjugate Bayesian analysis of the Gaussian distribution page 6 onwards (http://www.cs.ubc.ca/~murphyk/Papers/bayesGauss.pdf)
72    private double SampleExpectedRewardUnknownVariance(Random random) {
73
74      var posteriorMean = (priorK * meanPriorMu + estimator.Sum) / (priorK + estimator.N);
75      var posteriorK = priorK + estimator.N;
76      var posteriorAlpha = precisionPriorAlpha + estimator.N / 2.0;
77      double posteriorBeta;
78      if (estimator.N > 0) {
79        posteriorBeta = precisionPriorBeta + 0.5 * estimator.N * estimator.Variance + priorK * estimator.N * Math.Pow(estimator.Avg - meanPriorMu, 2) / (2.0 * (priorK + estimator.N));
80      } else {
81        posteriorBeta = precisionPriorBeta;
82      }
83
84      // sample from the posterior marginal for mu (expected value) equ. 91
85      // p(µ|D) = T2αn (µ| µn, βn/(αnκn))
86
87      // sample from Tk distribution : http://stats.stackexchange.com/a/70270
88      var t2alpha = alglib.invstudenttdistribution((int)(2 * posteriorAlpha), random.NextDouble());
89
90      var theta = t2alpha * posteriorBeta / (posteriorAlpha * posteriorK) + posteriorMean;
91      return theta;
92
93      //return alglib.invnormaldistribution(random.NextDouble()) * + theta;
94      //return alglib.invstudenttdistribution((int)(2 * posteriorAlpha), 0.99) * (posteriorBeta*posteriorK + posteriorBeta) / (posteriorAlpha*posteriorK) + posteriorMean;
95    }
96
97
98    public void Update(double reward) {
99      estimator.UpdateReward(reward);
100    }
101
102    public void Reset() {
103      estimator.Reset();
104    }
105
106    public void PrintStats() {
107      Console.Write("{0:F2} ", estimator.Avg);
108    }
109
110    public object Clone() {
111      if (knownVariance)
112        return new GaussianModel(meanPriorMu, meanPriorVariance, rewardVariance);
113      else
114        return new GaussianModel(meanPriorMu, meanPriorVariance, precisionPriorAlpha, precisionPriorBeta);
115    }
116  }
117}
Note: See TracBrowser for help on using the repository browser.