Changeset 11732 for branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Models/GaussianModel.cs
- Timestamp:
- 01/07/15 09:21:46 (9 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Models/GaussianModel.cs
r11730 r11732 1 1 using System; 2 using System.Collections.Generic;3 using System.Diagnostics;4 using System.Linq;5 using System.Text;6 using System.Threading.Tasks;7 2 using HeuristicLab.Common; 8 3 9 4 namespace HeuristicLab.Algorithms.Bandits.Models { 10 // bayesian estimation of a Gaussian with unknown mean and known variance 5 // bayesian estimation of a Gaussian with 6 // 1) unknown mean and known variance 7 // 2) unknown mean and unknown variance 11 8 public class GaussianModel : IModel { 12 private readonly int numActions; 13 private readonly int[] tries; 14 private readonly double[] sumRewards; 15 9 private OnlineMeanAndVarianceEstimator estimator = new OnlineMeanAndVarianceEstimator(); 16 10 17 11 // parameters of Gaussian prior for mean … … 19 13 private readonly double meanPriorVariance; 20 14 15 private readonly bool knownVariance; 21 16 private readonly double rewardVariance = 0.1; // assumed know reward variance 22 17 23 public GaussianModel(int numActions, double meanPriorMu, double meanPriorVariance) { 24 this.numActions = numActions; 25 this.tries = new int[numActions]; 26 this.sumRewards = new double[numActions]; 18 // parameters of Gamma prior for precision (= inverse variance) 19 private readonly int precisionPriorAlpha; 20 private readonly double precisionPriorBeta; 21 22 // non-informative prior 23 private const double priorK = 1.0; 24 25 // this constructor assumes the variance is known 26 public GaussianModel(double meanPriorMu, double meanPriorVariance, double rewardVariance = 0.1) { 27 27 this.meanPriorMu = meanPriorMu; 28 28 this.meanPriorVariance = meanPriorVariance; 29 30 this.knownVariance = true; 31 this.rewardVariance = rewardVariance; 32 } 33 34 // this constructor assumes the variance is also unknown 35 // uses Murphy 2007: Conjugate Bayesian analysis of the Gaussian distribution equation 85 - 89 36 public GaussianModel(double meanPriorMu, double meanPriorVariance, int precisionPriorAlpha, double precisionPriorBeta) { 37 this.meanPriorMu = meanPriorMu; 38 this.meanPriorVariance = meanPriorVariance; 39 40 this.knownVariance = false; 41 this.precisionPriorAlpha = precisionPriorAlpha; 42 this.precisionPriorBeta = precisionPriorBeta; 29 43 } 30 44 31 45 32 public double[] SampleExpectedRewards(Random random) { 46 public double SampleExpectedReward(Random random) { 47 if (knownVariance) { 48 return SampleExpectedRewardKnownVariance(random); 49 } else { 50 return SampleExpectedRewardUnknownVariance(random); 51 } 52 } 53 54 private double SampleExpectedRewardKnownVariance(Random random) { 33 55 // expected values for reward 34 var theta = new double[numActions];56 // calculate posterior mean and variance (for mean reward) 35 57 36 for (int a = 0; a < numActions; a++) { 37 if (tries[a] == -1) { 38 theta[a] = double.NegativeInfinity; // disabled action 39 } else { 40 // calculate posterior mean and variance (for mean reward) 58 // see Murphy 2007: Conjugate Bayesian analysis of the Gaussian distribution (http://www.cs.ubc.ca/~murphyk/Papers/bayesGauss.pdf) 59 var posteriorMeanVariance = 1.0 / (estimator.N / rewardVariance + 1.0 / meanPriorVariance); 60 var posteriorMeanMean = posteriorMeanVariance * (meanPriorMu / meanPriorVariance + estimator.Sum / rewardVariance); 41 61 42 // see Murphy 2007: Conjugate Bayesian analysis of the Gaussian distribution (http://www.cs.ubc.ca/~murphyk/Papers/bayesGauss.pdf) 43 var posteriorVariance = 1.0 / (tries[a] / rewardVariance + 1.0 / meanPriorVariance); 44 var posteriorMean = posteriorVariance * (meanPriorMu / meanPriorVariance + sumRewards[a] / rewardVariance); 62 // sample a mean from the posterior 63 var posteriorMeanSample = Rand.RandNormal(random) * Math.Sqrt(posteriorMeanVariance) + posteriorMeanMean; 64 // theta already represents the expected reward value => nothing else to do 65 return posteriorMeanSample; 45 66 46 // sample a mean from the posterior 47 theta[a] = Rand.RandNormal(random) * Math.Sqrt(posteriorVariance) + posteriorMean; 48 // theta already represents the expected reward value => nothing else to do 49 } 67 // return 0.99-quantile value 68 //return alglib.invnormaldistribution(0.99) * Math.Sqrt(rewardVariance + posteriorMeanVariance) + posteriorMeanMean; 69 } 70 71 // see Murphy 2007: Conjugate Bayesian analysis of the Gaussian distribution page 6 onwards (http://www.cs.ubc.ca/~murphyk/Papers/bayesGauss.pdf) 72 private double SampleExpectedRewardUnknownVariance(Random random) { 73 74 var posteriorMean = (priorK * meanPriorMu + estimator.Sum) / (priorK + estimator.N); 75 var posteriorK = priorK + estimator.N; 76 var posteriorAlpha = precisionPriorAlpha + estimator.N / 2.0; 77 double posteriorBeta; 78 if (estimator.N > 0) { 79 posteriorBeta = precisionPriorBeta + 0.5 * estimator.N * estimator.Variance + priorK * estimator.N * Math.Pow(estimator.Avg - meanPriorMu, 2) / (2.0 * (priorK + estimator.N)); 80 } else { 81 posteriorBeta = precisionPriorBeta; 50 82 } 51 83 84 // sample from the posterior marginal for mu (expected value) equ. 91 85 // p(µ|D) = T2αn (µ| µn, βn/(αnκn)) 86 87 // sample from Tk distribution : http://stats.stackexchange.com/a/70270 88 var t2alpha = alglib.invstudenttdistribution((int)(2 * posteriorAlpha), random.NextDouble()); 89 90 var theta = t2alpha * posteriorBeta / (posteriorAlpha * posteriorK) + posteriorMean; 52 91 return theta; 92 93 //return alglib.invnormaldistribution(random.NextDouble()) * + theta; 94 //return alglib.invstudenttdistribution((int)(2 * posteriorAlpha), 0.99) * (posteriorBeta*posteriorK + posteriorBeta) / (posteriorAlpha*posteriorK) + posteriorMean; 53 95 } 54 96 55 public void Update(int action, double reward) {56 sumRewards[action] += reward;57 tries[action]++;58 }59 97 60 public void Disable(int action) { 61 tries[action] = -1; 62 sumRewards[action] = 0.0; 98 public void Update(double reward) { 99 estimator.UpdateReward(reward); 63 100 } 64 101 65 102 public void Reset() { 66 Array.Clear(tries, 0, numActions); 67 Array.Clear(sumRewards, 0, numActions); 103 estimator.Reset(); 68 104 } 69 105 70 106 public void PrintStats() { 71 for (int i = 0; i < numActions; i++) { 72 Console.Write("{0:F2} ", sumRewards[i] / (double)tries[i]); 73 } 107 Console.Write("{0:F2} ", estimator.Avg); 108 } 109 110 public object Clone() { 111 if (knownVariance) 112 return new GaussianModel(meanPriorMu, meanPriorVariance, rewardVariance); 113 else 114 return new GaussianModel(meanPriorMu, meanPriorVariance, precisionPriorAlpha, precisionPriorBeta); 74 115 } 75 116 }
Note: See TracChangeset
for help on using the changeset viewer.