Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Distributions/GaussianModel.cs @ 13231

Last change on this file since 13231 was 11976, checked in by gkronber, 10 years ago

#2283 worked on seq search for ant

File size: 6.3 KB
RevLine 
[11730]1using System;
2using HeuristicLab.Common;
3
4namespace HeuristicLab.Algorithms.Bandits.Models {
[11732]5  // bayesian estimation of a Gaussian with
6  // 1) unknown mean and known variance
7  // 2) unknown mean and unknown variance
[11730]8  public class GaussianModel : IModel {
[11742]9
[11732]10    private OnlineMeanAndVarianceEstimator estimator = new OnlineMeanAndVarianceEstimator();
[11730]11
12    // parameters of Gaussian prior for mean
13    private readonly double meanPriorMu;
14    private readonly double meanPriorVariance;
15
[11732]16    private readonly bool knownVariance;
[11730]17    private readonly double rewardVariance = 0.1; // assumed know reward variance
18
[11732]19    // parameters of Gamma prior for precision (= inverse variance)
20    private readonly int precisionPriorAlpha;
21    private readonly double precisionPriorBeta;
22
23    // non-informative prior
24    private const double priorK = 1.0;
25
26    // this constructor assumes the variance is known
27    public GaussianModel(double meanPriorMu, double meanPriorVariance, double rewardVariance = 0.1) {
[11730]28      this.meanPriorMu = meanPriorMu;
29      this.meanPriorVariance = meanPriorVariance;
[11732]30
31      this.knownVariance = true;
32      this.rewardVariance = rewardVariance;
[11730]33    }
34
[11732]35    // this constructor assumes the variance is also unknown
36    // uses Murphy 2007: Conjugate Bayesian analysis of the Gaussian distribution equation 85 - 89
37    public GaussianModel(double meanPriorMu, double meanPriorVariance, int precisionPriorAlpha, double precisionPriorBeta) {
38      this.meanPriorMu = meanPriorMu;
39      this.meanPriorVariance = meanPriorVariance;
[11976]40     
[11732]41      this.knownVariance = false;
42      this.precisionPriorAlpha = precisionPriorAlpha;
43      this.precisionPriorBeta = precisionPriorBeta;
44    }
45
46
[11851]47    public double Sample(Random random) {
[11732]48      if (knownVariance) {
49        return SampleExpectedRewardKnownVariance(random);
50      } else {
51        return SampleExpectedRewardUnknownVariance(random);
52      }
53    }
54
55    private double SampleExpectedRewardKnownVariance(Random random) {
[11730]56      // expected values for reward
[11732]57      // calculate posterior mean and variance (for mean reward)
[11730]58
[11732]59      // see Murphy 2007: Conjugate Bayesian analysis of the Gaussian distribution (http://www.cs.ubc.ca/~murphyk/Papers/bayesGauss.pdf)
60      var posteriorMeanVariance = 1.0 / (estimator.N / rewardVariance + 1.0 / meanPriorVariance);
61      var posteriorMeanMean = posteriorMeanVariance * (meanPriorMu / meanPriorVariance + estimator.Sum / rewardVariance);
[11730]62
[11732]63      // sample a mean from the posterior
64      var posteriorMeanSample = Rand.RandNormal(random) * Math.Sqrt(posteriorMeanVariance) + posteriorMeanMean;
65      // theta already represents the expected reward value => nothing else to do
66      return posteriorMeanSample;
[11730]67
[11732]68      // return 0.99-quantile value
69      //return alglib.invnormaldistribution(0.99) * Math.Sqrt(rewardVariance + posteriorMeanVariance) + posteriorMeanMean;
70    }
71
72    // see Murphy 2007: Conjugate Bayesian analysis of the Gaussian distribution page 6 onwards (http://www.cs.ubc.ca/~murphyk/Papers/bayesGauss.pdf)
73    private double SampleExpectedRewardUnknownVariance(Random random) {
74
75      var posteriorMean = (priorK * meanPriorMu + estimator.Sum) / (priorK + estimator.N);
76      var posteriorK = priorK + estimator.N;
77      var posteriorAlpha = precisionPriorAlpha + estimator.N / 2.0;
78      double posteriorBeta;
79      if (estimator.N > 0) {
80        posteriorBeta = precisionPriorBeta + 0.5 * estimator.N * estimator.Variance + priorK * estimator.N * Math.Pow(estimator.Avg - meanPriorMu, 2) / (2.0 * (priorK + estimator.N));
81      } else {
82        posteriorBeta = precisionPriorBeta;
[11730]83      }
84
[11742]85
[11732]86      // sample from the posterior marginal for mu (expected value) equ. 91
87      // p(µ|D) = T2αn (µ| µn, βn/(αnκn))
88
89      var t2alpha = alglib.invstudenttdistribution((int)(2 * posteriorAlpha), random.NextDouble());
90
91      var theta = t2alpha * posteriorBeta / (posteriorAlpha * posteriorK) + posteriorMean;
[11730]92      return theta;
93
[11742]94
95      /*
96       * value function : 0.99-quantile
97      // sample posterior mean and posterior variance independently
98      var sampledPrec = Rand.GammaRand(random, posteriorAlpha) * posteriorBeta;
99      var t2alpha = alglib.invstudenttdistribution((int)(2 * posteriorAlpha), random.NextDouble());
100      var sampledMean = t2alpha * posteriorBeta / (posteriorAlpha * posteriorK) + posteriorMean;
101
102      return alglib.invnormaldistribution(0.99) / Math.Sqrt(sampledPrec) + sampledMean;
103       */
[11730]104    }
105
[11732]106
107    public void Update(double reward) {
108      estimator.UpdateReward(reward);
[11730]109    }
110
111    public void Reset() {
[11732]112      estimator.Reset();
[11730]113    }
114
[11732]115    public object Clone() {
116      if (knownVariance)
117        return new GaussianModel(meanPriorMu, meanPriorVariance, rewardVariance);
118      else
119        return new GaussianModel(meanPriorMu, meanPriorVariance, precisionPriorAlpha, precisionPriorBeta);
120    }
[11742]121
122    public override string ToString() {
123      if (knownVariance) {
124        var posteriorMeanVariance = 1.0 / (estimator.N / rewardVariance + 1.0 / meanPriorVariance);
125        var posteriorMeanMean = posteriorMeanVariance * (meanPriorMu / meanPriorVariance + estimator.Sum / rewardVariance);
126        return string.Format("Gaussian(mu, var=0.1), mu ~ Gaussian(mu'={0:F3}, var'={1:F3})", posteriorMeanMean, posteriorMeanVariance);
127      } else {
128        var posteriorMean = (priorK * meanPriorMu + estimator.Sum) / (priorK + estimator.N);
129        var posteriorK = priorK + estimator.N;
130        var posteriorAlpha = precisionPriorAlpha + estimator.N / 2.0;
131        double posteriorBeta;
132        if (estimator.N > 0) {
133          posteriorBeta = precisionPriorBeta + 0.5 * estimator.N * estimator.Variance + priorK * estimator.N * Math.Pow(estimator.Avg - meanPriorMu, 2) / (2.0 * (priorK + estimator.N));
134        } else {
135          posteriorBeta = precisionPriorBeta;
136        }
137        var nu = (int)(2 * posteriorAlpha);
138        var meanVariance = posteriorBeta / (posteriorAlpha * posteriorK) * (nu / (double)(nu - 2));
139        return string.Format("Gaussian(mu, var), mu ~ T{0}(mu'={1:F3}, var'={2:F3}), 1.0/var ~ Gamma(mu={3:F3}, var={4:F3})",
140          nu, posteriorMean, meanVariance,
141          posteriorAlpha / posteriorBeta, posteriorAlpha / (posteriorBeta * posteriorBeta));
142      }
143    }
[11730]144  }
145}
Note: See TracBrowser for help on using the repository browser.