Changeset 11732 for branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Models
- Timestamp:
- 01/07/15 09:21:46 (10 years ago)
- Location:
- branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Models
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Models/BernoulliModel.cs
r11730 r11732 9 9 namespace HeuristicLab.Algorithms.Bandits.Models { 10 10 public class BernoulliModel : IModel { 11 private readonly int numActions; 12 private readonly int[] success; 13 private readonly int[] failure; 11 private int success; 12 private int failure; 14 13 15 14 // parameters of beta prior distribution … … 17 16 private readonly double beta; 18 17 19 public BernoulliModel(int numActions, double alpha = 1.0, double beta = 1.0) { 20 this.numActions = numActions; 21 this.success = new int[numActions]; 22 this.failure = new int[numActions]; 18 public BernoulliModel(double alpha = 1.0, double beta = 1.0) { 23 19 this.alpha = alpha; 24 20 this.beta = beta; 25 21 } 26 22 27 28 public double[] SampleExpectedRewards(Random random) { 23 public double SampleExpectedReward(Random random) { 29 24 // sample bernoulli mean from beta prior 30 var theta = new double[numActions]; 31 for (int a = 0; a < numActions; a++) { 32 if (success[a] == -1) 33 theta[a] = 0.0; 34 else { 35 theta[a] = Rand.BetaRand(random, success[a] + alpha, failure[a] + beta); 36 } 37 } 38 39 // no need to sample we know the exact expected value 40 // the expected value of a bernoulli variable is just theta 41 return theta.Select(t => t).ToArray(); 25 return Rand.BetaRand(random, success + alpha, failure + beta); 42 26 } 43 27 44 public void Update(int action, double reward) { 45 const double EPSILON = 1E-6; 46 Debug.Assert(Math.Abs(reward - 0.0) < EPSILON || Math.Abs(reward - 1.0) < EPSILON); 47 if (Math.Abs(reward - 1.0) < EPSILON) { 48 success[action]++; 28 public void Update(double reward) { 29 Debug.Assert(reward.IsAlmost(1.0) || reward.IsAlmost(0.0)); 30 if (reward.IsAlmost(1.0)) { 31 success++; 49 32 } else { 50 failure [action]++;33 failure++; 51 34 } 52 35 } 53 36 54 public void Disable(int action) {55 success[action] = -1;56 }57 58 37 public void Reset() { 59 Array.Clear(success, 0, numActions);60 Array.Clear(failure, 0, numActions);38 success = 0; 39 failure = 0; 61 40 } 62 41 63 42 public void PrintStats() { 64 for (int i = 0; i < numActions; i++) { 65 Console.Write("{0:F2} ", success[i] / (double)failure[i]); 66 } 43 Console.Write("{0:F2} ", success / (double)failure); 44 } 45 46 public object Clone() { 47 return new BernoulliModel() { failure = this.failure, success = this.success }; 67 48 } 68 49 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Models/GaussianModel.cs
r11730 r11732 1 1 using System; 2 using System.Collections.Generic;3 using System.Diagnostics;4 using System.Linq;5 using System.Text;6 using System.Threading.Tasks;7 2 using HeuristicLab.Common; 8 3 9 4 namespace HeuristicLab.Algorithms.Bandits.Models { 10 // bayesian estimation of a Gaussian with unknown mean and known variance 5 // bayesian estimation of a Gaussian with 6 // 1) unknown mean and known variance 7 // 2) unknown mean and unknown variance 11 8 public class GaussianModel : IModel { 12 private readonly int numActions; 13 private readonly int[] tries; 14 private readonly double[] sumRewards; 15 9 private OnlineMeanAndVarianceEstimator estimator = new OnlineMeanAndVarianceEstimator(); 16 10 17 11 // parameters of Gaussian prior for mean … … 19 13 private readonly double meanPriorVariance; 20 14 15 private readonly bool knownVariance; 21 16 private readonly double rewardVariance = 0.1; // assumed know reward variance 22 17 23 public GaussianModel(int numActions, double meanPriorMu, double meanPriorVariance) { 24 this.numActions = numActions; 25 this.tries = new int[numActions]; 26 this.sumRewards = new double[numActions]; 18 // parameters of Gamma prior for precision (= inverse variance) 19 private readonly int precisionPriorAlpha; 20 private readonly double precisionPriorBeta; 21 22 // non-informative prior 23 private const double priorK = 1.0; 24 25 // this constructor assumes the variance is known 26 public GaussianModel(double meanPriorMu, double meanPriorVariance, double rewardVariance = 0.1) { 27 27 this.meanPriorMu = meanPriorMu; 28 28 this.meanPriorVariance = meanPriorVariance; 29 30 this.knownVariance = true; 31 this.rewardVariance = rewardVariance; 32 } 33 34 // this constructor assumes the variance is also unknown 35 // uses Murphy 2007: Conjugate Bayesian analysis of the Gaussian distribution equation 85 - 89 36 public GaussianModel(double meanPriorMu, double meanPriorVariance, int precisionPriorAlpha, double precisionPriorBeta) { 37 this.meanPriorMu = meanPriorMu; 38 this.meanPriorVariance = meanPriorVariance; 39 40 this.knownVariance = false; 41 this.precisionPriorAlpha = precisionPriorAlpha; 42 this.precisionPriorBeta = precisionPriorBeta; 29 43 } 30 44 31 45 32 public double[] SampleExpectedRewards(Random random) { 46 public double SampleExpectedReward(Random random) { 47 if (knownVariance) { 48 return SampleExpectedRewardKnownVariance(random); 49 } else { 50 return SampleExpectedRewardUnknownVariance(random); 51 } 52 } 53 54 private double SampleExpectedRewardKnownVariance(Random random) { 33 55 // expected values for reward 34 var theta = new double[numActions];56 // calculate posterior mean and variance (for mean reward) 35 57 36 for (int a = 0; a < numActions; a++) { 37 if (tries[a] == -1) { 38 theta[a] = double.NegativeInfinity; // disabled action 39 } else { 40 // calculate posterior mean and variance (for mean reward) 58 // see Murphy 2007: Conjugate Bayesian analysis of the Gaussian distribution (http://www.cs.ubc.ca/~murphyk/Papers/bayesGauss.pdf) 59 var posteriorMeanVariance = 1.0 / (estimator.N / rewardVariance + 1.0 / meanPriorVariance); 60 var posteriorMeanMean = posteriorMeanVariance * (meanPriorMu / meanPriorVariance + estimator.Sum / rewardVariance); 41 61 42 // see Murphy 2007: Conjugate Bayesian analysis of the Gaussian distribution (http://www.cs.ubc.ca/~murphyk/Papers/bayesGauss.pdf) 43 var posteriorVariance = 1.0 / (tries[a] / rewardVariance + 1.0 / meanPriorVariance); 44 var posteriorMean = posteriorVariance * (meanPriorMu / meanPriorVariance + sumRewards[a] / rewardVariance); 62 // sample a mean from the posterior 63 var posteriorMeanSample = Rand.RandNormal(random) * Math.Sqrt(posteriorMeanVariance) + posteriorMeanMean; 64 // theta already represents the expected reward value => nothing else to do 65 return posteriorMeanSample; 45 66 46 // sample a mean from the posterior 47 theta[a] = Rand.RandNormal(random) * Math.Sqrt(posteriorVariance) + posteriorMean; 48 // theta already represents the expected reward value => nothing else to do 49 } 67 // return 0.99-quantile value 68 //return alglib.invnormaldistribution(0.99) * Math.Sqrt(rewardVariance + posteriorMeanVariance) + posteriorMeanMean; 69 } 70 71 // see Murphy 2007: Conjugate Bayesian analysis of the Gaussian distribution page 6 onwards (http://www.cs.ubc.ca/~murphyk/Papers/bayesGauss.pdf) 72 private double SampleExpectedRewardUnknownVariance(Random random) { 73 74 var posteriorMean = (priorK * meanPriorMu + estimator.Sum) / (priorK + estimator.N); 75 var posteriorK = priorK + estimator.N; 76 var posteriorAlpha = precisionPriorAlpha + estimator.N / 2.0; 77 double posteriorBeta; 78 if (estimator.N > 0) { 79 posteriorBeta = precisionPriorBeta + 0.5 * estimator.N * estimator.Variance + priorK * estimator.N * Math.Pow(estimator.Avg - meanPriorMu, 2) / (2.0 * (priorK + estimator.N)); 80 } else { 81 posteriorBeta = precisionPriorBeta; 50 82 } 51 83 84 // sample from the posterior marginal for mu (expected value) equ. 91 85 // p(µ|D) = T2αn (µ| µn, βn/(αnκn)) 86 87 // sample from Tk distribution : http://stats.stackexchange.com/a/70270 88 var t2alpha = alglib.invstudenttdistribution((int)(2 * posteriorAlpha), random.NextDouble()); 89 90 var theta = t2alpha * posteriorBeta / (posteriorAlpha * posteriorK) + posteriorMean; 52 91 return theta; 92 93 //return alglib.invnormaldistribution(random.NextDouble()) * + theta; 94 //return alglib.invstudenttdistribution((int)(2 * posteriorAlpha), 0.99) * (posteriorBeta*posteriorK + posteriorBeta) / (posteriorAlpha*posteriorK) + posteriorMean; 53 95 } 54 96 55 public void Update(int action, double reward) {56 sumRewards[action] += reward;57 tries[action]++;58 }59 97 60 public void Disable(int action) { 61 tries[action] = -1; 62 sumRewards[action] = 0.0; 98 public void Update(double reward) { 99 estimator.UpdateReward(reward); 63 100 } 64 101 65 102 public void Reset() { 66 Array.Clear(tries, 0, numActions); 67 Array.Clear(sumRewards, 0, numActions); 103 estimator.Reset(); 68 104 } 69 105 70 106 public void PrintStats() { 71 for (int i = 0; i < numActions; i++) { 72 Console.Write("{0:F2} ", sumRewards[i] / (double)tries[i]); 73 } 107 Console.Write("{0:F2} ", estimator.Avg); 108 } 109 110 public object Clone() { 111 if (knownVariance) 112 return new GaussianModel(meanPriorMu, meanPriorVariance, rewardVariance); 113 else 114 return new GaussianModel(meanPriorMu, meanPriorVariance, precisionPriorAlpha, precisionPriorBeta); 74 115 } 75 116 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Models/IModel.cs
r11730 r11732 6 6 7 7 namespace HeuristicLab.Algorithms.Bandits { 8 public interface IModel {9 double[] SampleExpectedRewards(Random random);10 void Update(int action, double reward);11 void Disable(int action);8 // represents a model for the reward distribution (of an action given a state) 9 public interface IModel : ICloneable { 10 double SampleExpectedReward(Random random); 11 void Update(double reward); 12 12 void Reset(); 13 13 void PrintStats();
Note: See TracChangeset
for help on using the changeset viewer.