1 | using System;
|
---|
2 | using HeuristicLab.Common;
|
---|
3 |
|
---|
4 | namespace HeuristicLab.Algorithms.Bandits.Models {
|
---|
5 | // bayesian estimation of a Gaussian with
|
---|
6 | // 1) unknown mean and known variance
|
---|
7 | // 2) unknown mean and unknown variance
|
---|
8 | public class GaussianModel : IModel {
|
---|
9 | private OnlineMeanAndVarianceEstimator estimator = new OnlineMeanAndVarianceEstimator();
|
---|
10 |
|
---|
11 | // parameters of Gaussian prior for mean
|
---|
12 | private readonly double meanPriorMu;
|
---|
13 | private readonly double meanPriorVariance;
|
---|
14 |
|
---|
15 | private readonly bool knownVariance;
|
---|
16 | private readonly double rewardVariance = 0.1; // assumed know reward variance
|
---|
17 |
|
---|
18 | // parameters of Gamma prior for precision (= inverse variance)
|
---|
19 | private readonly int precisionPriorAlpha;
|
---|
20 | private readonly double precisionPriorBeta;
|
---|
21 |
|
---|
22 | // non-informative prior
|
---|
23 | private const double priorK = 1.0;
|
---|
24 |
|
---|
25 | // this constructor assumes the variance is known
|
---|
26 | public GaussianModel(double meanPriorMu, double meanPriorVariance, double rewardVariance = 0.1) {
|
---|
27 | this.meanPriorMu = meanPriorMu;
|
---|
28 | this.meanPriorVariance = meanPriorVariance;
|
---|
29 |
|
---|
30 | this.knownVariance = true;
|
---|
31 | this.rewardVariance = rewardVariance;
|
---|
32 | }
|
---|
33 |
|
---|
34 | // this constructor assumes the variance is also unknown
|
---|
35 | // uses Murphy 2007: Conjugate Bayesian analysis of the Gaussian distribution equation 85 - 89
|
---|
36 | public GaussianModel(double meanPriorMu, double meanPriorVariance, int precisionPriorAlpha, double precisionPriorBeta) {
|
---|
37 | this.meanPriorMu = meanPriorMu;
|
---|
38 | this.meanPriorVariance = meanPriorVariance;
|
---|
39 |
|
---|
40 | this.knownVariance = false;
|
---|
41 | this.precisionPriorAlpha = precisionPriorAlpha;
|
---|
42 | this.precisionPriorBeta = precisionPriorBeta;
|
---|
43 | }
|
---|
44 |
|
---|
45 |
|
---|
46 | public double SampleExpectedReward(Random random) {
|
---|
47 | if (knownVariance) {
|
---|
48 | return SampleExpectedRewardKnownVariance(random);
|
---|
49 | } else {
|
---|
50 | return SampleExpectedRewardUnknownVariance(random);
|
---|
51 | }
|
---|
52 | }
|
---|
53 |
|
---|
54 | private double SampleExpectedRewardKnownVariance(Random random) {
|
---|
55 | // expected values for reward
|
---|
56 | // calculate posterior mean and variance (for mean reward)
|
---|
57 |
|
---|
58 | // see Murphy 2007: Conjugate Bayesian analysis of the Gaussian distribution (http://www.cs.ubc.ca/~murphyk/Papers/bayesGauss.pdf)
|
---|
59 | var posteriorMeanVariance = 1.0 / (estimator.N / rewardVariance + 1.0 / meanPriorVariance);
|
---|
60 | var posteriorMeanMean = posteriorMeanVariance * (meanPriorMu / meanPriorVariance + estimator.Sum / rewardVariance);
|
---|
61 |
|
---|
62 | // sample a mean from the posterior
|
---|
63 | var posteriorMeanSample = Rand.RandNormal(random) * Math.Sqrt(posteriorMeanVariance) + posteriorMeanMean;
|
---|
64 | // theta already represents the expected reward value => nothing else to do
|
---|
65 | return posteriorMeanSample;
|
---|
66 |
|
---|
67 | // return 0.99-quantile value
|
---|
68 | //return alglib.invnormaldistribution(0.99) * Math.Sqrt(rewardVariance + posteriorMeanVariance) + posteriorMeanMean;
|
---|
69 | }
|
---|
70 |
|
---|
71 | // see Murphy 2007: Conjugate Bayesian analysis of the Gaussian distribution page 6 onwards (http://www.cs.ubc.ca/~murphyk/Papers/bayesGauss.pdf)
|
---|
72 | private double SampleExpectedRewardUnknownVariance(Random random) {
|
---|
73 |
|
---|
74 | var posteriorMean = (priorK * meanPriorMu + estimator.Sum) / (priorK + estimator.N);
|
---|
75 | var posteriorK = priorK + estimator.N;
|
---|
76 | var posteriorAlpha = precisionPriorAlpha + estimator.N / 2.0;
|
---|
77 | double posteriorBeta;
|
---|
78 | if (estimator.N > 0) {
|
---|
79 | posteriorBeta = precisionPriorBeta + 0.5 * estimator.N * estimator.Variance + priorK * estimator.N * Math.Pow(estimator.Avg - meanPriorMu, 2) / (2.0 * (priorK + estimator.N));
|
---|
80 | } else {
|
---|
81 | posteriorBeta = precisionPriorBeta;
|
---|
82 | }
|
---|
83 |
|
---|
84 | // sample from the posterior marginal for mu (expected value) equ. 91
|
---|
85 | // p(µ|D) = T2αn (µ| µn, βn/(αnκn))
|
---|
86 |
|
---|
87 | // sample from Tk distribution : http://stats.stackexchange.com/a/70270
|
---|
88 | var t2alpha = alglib.invstudenttdistribution((int)(2 * posteriorAlpha), random.NextDouble());
|
---|
89 |
|
---|
90 | var theta = t2alpha * posteriorBeta / (posteriorAlpha * posteriorK) + posteriorMean;
|
---|
91 | return theta;
|
---|
92 |
|
---|
93 | //return alglib.invnormaldistribution(random.NextDouble()) * + theta;
|
---|
94 | //return alglib.invstudenttdistribution((int)(2 * posteriorAlpha), 0.99) * (posteriorBeta*posteriorK + posteriorBeta) / (posteriorAlpha*posteriorK) + posteriorMean;
|
---|
95 | }
|
---|
96 |
|
---|
97 |
|
---|
98 | public void Update(double reward) {
|
---|
99 | estimator.UpdateReward(reward);
|
---|
100 | }
|
---|
101 |
|
---|
102 | public void Reset() {
|
---|
103 | estimator.Reset();
|
---|
104 | }
|
---|
105 |
|
---|
106 | public void PrintStats() {
|
---|
107 | Console.Write("{0:F2} ", estimator.Avg);
|
---|
108 | }
|
---|
109 |
|
---|
110 | public object Clone() {
|
---|
111 | if (knownVariance)
|
---|
112 | return new GaussianModel(meanPriorMu, meanPriorVariance, rewardVariance);
|
---|
113 | else
|
---|
114 | return new GaussianModel(meanPriorMu, meanPriorVariance, precisionPriorAlpha, precisionPriorBeta);
|
---|
115 | }
|
---|
116 | }
|
---|
117 | }
|
---|