1 | using System;
|
---|
2 | using HeuristicLab.Common;
|
---|
3 |
|
---|
4 | namespace HeuristicLab.Algorithms.Bandits.Models {
|
---|
5 | // bayesian estimation of a Gaussian with
|
---|
6 | // 1) unknown mean and known variance
|
---|
7 | // 2) unknown mean and unknown variance
|
---|
8 | public class GaussianModel : IModel {
|
---|
9 |
|
---|
10 | private OnlineMeanAndVarianceEstimator estimator = new OnlineMeanAndVarianceEstimator();
|
---|
11 |
|
---|
12 | // parameters of Gaussian prior for mean
|
---|
13 | private readonly double meanPriorMu;
|
---|
14 | private readonly double meanPriorVariance;
|
---|
15 |
|
---|
16 | private readonly bool knownVariance;
|
---|
17 | private readonly double rewardVariance = 0.1; // assumed know reward variance
|
---|
18 |
|
---|
19 | // parameters of Gamma prior for precision (= inverse variance)
|
---|
20 | private readonly int precisionPriorAlpha;
|
---|
21 | private readonly double precisionPriorBeta;
|
---|
22 |
|
---|
23 | // non-informative prior
|
---|
24 | private const double priorK = 1.0;
|
---|
25 |
|
---|
26 | // this constructor assumes the variance is known
|
---|
27 | public GaussianModel(double meanPriorMu, double meanPriorVariance, double rewardVariance = 0.1) {
|
---|
28 | this.meanPriorMu = meanPriorMu;
|
---|
29 | this.meanPriorVariance = meanPriorVariance;
|
---|
30 |
|
---|
31 | this.knownVariance = true;
|
---|
32 | this.rewardVariance = rewardVariance;
|
---|
33 | }
|
---|
34 |
|
---|
35 | // this constructor assumes the variance is also unknown
|
---|
36 | // uses Murphy 2007: Conjugate Bayesian analysis of the Gaussian distribution equation 85 - 89
|
---|
37 | public GaussianModel(double meanPriorMu, double meanPriorVariance, int precisionPriorAlpha, double precisionPriorBeta) {
|
---|
38 | this.meanPriorMu = meanPriorMu;
|
---|
39 | this.meanPriorVariance = meanPriorVariance;
|
---|
40 |
|
---|
41 | this.knownVariance = false;
|
---|
42 | this.precisionPriorAlpha = precisionPriorAlpha;
|
---|
43 | this.precisionPriorBeta = precisionPriorBeta;
|
---|
44 | }
|
---|
45 |
|
---|
46 |
|
---|
47 | public double Sample(Random random) {
|
---|
48 | if (knownVariance) {
|
---|
49 | return SampleExpectedRewardKnownVariance(random);
|
---|
50 | } else {
|
---|
51 | return SampleExpectedRewardUnknownVariance(random);
|
---|
52 | }
|
---|
53 | }
|
---|
54 |
|
---|
55 | private double SampleExpectedRewardKnownVariance(Random random) {
|
---|
56 | // expected values for reward
|
---|
57 | // calculate posterior mean and variance (for mean reward)
|
---|
58 |
|
---|
59 | // see Murphy 2007: Conjugate Bayesian analysis of the Gaussian distribution (http://www.cs.ubc.ca/~murphyk/Papers/bayesGauss.pdf)
|
---|
60 | var posteriorMeanVariance = 1.0 / (estimator.N / rewardVariance + 1.0 / meanPriorVariance);
|
---|
61 | var posteriorMeanMean = posteriorMeanVariance * (meanPriorMu / meanPriorVariance + estimator.Sum / rewardVariance);
|
---|
62 |
|
---|
63 | // sample a mean from the posterior
|
---|
64 | var posteriorMeanSample = Rand.RandNormal(random) * Math.Sqrt(posteriorMeanVariance) + posteriorMeanMean;
|
---|
65 | // theta already represents the expected reward value => nothing else to do
|
---|
66 | return posteriorMeanSample;
|
---|
67 |
|
---|
68 | // return 0.99-quantile value
|
---|
69 | //return alglib.invnormaldistribution(0.99) * Math.Sqrt(rewardVariance + posteriorMeanVariance) + posteriorMeanMean;
|
---|
70 | }
|
---|
71 |
|
---|
72 | // see Murphy 2007: Conjugate Bayesian analysis of the Gaussian distribution page 6 onwards (http://www.cs.ubc.ca/~murphyk/Papers/bayesGauss.pdf)
|
---|
73 | private double SampleExpectedRewardUnknownVariance(Random random) {
|
---|
74 |
|
---|
75 | var posteriorMean = (priorK * meanPriorMu + estimator.Sum) / (priorK + estimator.N);
|
---|
76 | var posteriorK = priorK + estimator.N;
|
---|
77 | var posteriorAlpha = precisionPriorAlpha + estimator.N / 2.0;
|
---|
78 | double posteriorBeta;
|
---|
79 | if (estimator.N > 0) {
|
---|
80 | posteriorBeta = precisionPriorBeta + 0.5 * estimator.N * estimator.Variance + priorK * estimator.N * Math.Pow(estimator.Avg - meanPriorMu, 2) / (2.0 * (priorK + estimator.N));
|
---|
81 | } else {
|
---|
82 | posteriorBeta = precisionPriorBeta;
|
---|
83 | }
|
---|
84 |
|
---|
85 |
|
---|
86 | // sample from the posterior marginal for mu (expected value) equ. 91
|
---|
87 | // p(µ|D) = T2αn (µ| µn, βn/(αnκn))
|
---|
88 |
|
---|
89 | var t2alpha = alglib.invstudenttdistribution((int)(2 * posteriorAlpha), Math.Max(random.NextDouble(), 0.00001));
|
---|
90 |
|
---|
91 | var theta = t2alpha * posteriorBeta / (posteriorAlpha * posteriorK) + posteriorMean;
|
---|
92 | return theta;
|
---|
93 |
|
---|
94 |
|
---|
95 | /*
|
---|
96 | * value function : 0.99-quantile
|
---|
97 | // sample posterior mean and posterior variance independently
|
---|
98 | var sampledPrec = Rand.GammaRand(random, posteriorAlpha) * posteriorBeta;
|
---|
99 | var t2alpha = alglib.invstudenttdistribution((int)(2 * posteriorAlpha), random.NextDouble());
|
---|
100 | var sampledMean = t2alpha * posteriorBeta / (posteriorAlpha * posteriorK) + posteriorMean;
|
---|
101 |
|
---|
102 | return alglib.invnormaldistribution(0.99) / Math.Sqrt(sampledPrec) + sampledMean;
|
---|
103 | */
|
---|
104 | }
|
---|
105 |
|
---|
106 |
|
---|
107 | public void Update(double reward) {
|
---|
108 | estimator.UpdateReward(reward);
|
---|
109 | }
|
---|
110 |
|
---|
111 | public void Reset() {
|
---|
112 | estimator.Reset();
|
---|
113 | }
|
---|
114 |
|
---|
115 | public object Clone() {
|
---|
116 | if (knownVariance)
|
---|
117 | return new GaussianModel(meanPriorMu, meanPriorVariance, rewardVariance);
|
---|
118 | else
|
---|
119 | return new GaussianModel(meanPriorMu, meanPriorVariance, precisionPriorAlpha, precisionPriorBeta);
|
---|
120 | }
|
---|
121 |
|
---|
122 | public override string ToString() {
|
---|
123 | if (knownVariance) {
|
---|
124 | var posteriorMeanVariance = 1.0 / (estimator.N / rewardVariance + 1.0 / meanPriorVariance);
|
---|
125 | var posteriorMeanMean = posteriorMeanVariance * (meanPriorMu / meanPriorVariance + estimator.Sum / rewardVariance);
|
---|
126 | return string.Format("Gaussian(mu, var=0.1), mu ~ Gaussian(mu'={0:F3}, var'={1:F3})", posteriorMeanMean, posteriorMeanVariance);
|
---|
127 | } else {
|
---|
128 | var posteriorMean = (priorK * meanPriorMu + estimator.Sum) / (priorK + estimator.N);
|
---|
129 | var posteriorK = priorK + estimator.N;
|
---|
130 | var posteriorAlpha = precisionPriorAlpha + estimator.N / 2.0;
|
---|
131 | double posteriorBeta;
|
---|
132 | if (estimator.N > 0) {
|
---|
133 | posteriorBeta = precisionPriorBeta + 0.5 * estimator.N * estimator.Variance + priorK * estimator.N * Math.Pow(estimator.Avg - meanPriorMu, 2) / (2.0 * (priorK + estimator.N));
|
---|
134 | } else {
|
---|
135 | posteriorBeta = precisionPriorBeta;
|
---|
136 | }
|
---|
137 | var nu = (int)(2 * posteriorAlpha);
|
---|
138 | var meanVariance = posteriorBeta / (posteriorAlpha * posteriorK) * (nu / (double)(nu - 2));
|
---|
139 | return string.Format("Gaussian(mu, var), mu ~ T{0}(mu'={1:F3}, var'={2:F3}), 1.0/var ~ Gamma(mu={3:F3}, var={4:F3})",
|
---|
140 | nu, posteriorMean, meanVariance,
|
---|
141 | posteriorAlpha / posteriorBeta, posteriorAlpha / (posteriorBeta * posteriorBeta));
|
---|
142 | }
|
---|
143 | }
|
---|
144 | }
|
---|
145 | }
|
---|