Context Navigation

← Previous Change
Next Change →

Changeset 11732 for branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Models

Timestamp:

01/07/15 09:21:46 (10 years ago)

Author:

gkronber

Message:

#2283: refactoring and bug fixes

Location:

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Models

Files:

: 3 edited

BernoulliModel.cs (modified) (2 diffs)
GaussianModel.cs (modified) (2 diffs)
IModel.cs (modified) (1 diff)

Legend:

: Unmodified
: Added
: Removed

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Models/BernoulliModel.cs

-                      r11730
+                      r11732
 namespace HeuristicLab.Algorithms.Bandits.Models {
   public class BernoulliModel : IModel {
+    private readonly int numActions;
+    private readonly int[] success;
+    private readonly int[] failure;
+    private int success;
+    private int failure;
     // parameters of beta prior distribution
 …
     private readonly double beta;
+    public BernoulliModel(int numActions, double alpha = 1.0, double beta = 1.0) {
+      this.numActions = numActions;
+      this.success = new int[numActions];
+      this.failure = new int[numActions];
+    public BernoulliModel(double alpha = 1.0, double beta = 1.0) {
       this.alpha = alpha;
       this.beta = beta;
+    }
+    public double[] SampleExpectedRewards(Random random) {
+    public double SampleExpectedReward(Random random) {
       // sample bernoulli mean from beta prior
+      var theta = new double[numActions];
+      for (int a = 0; a < numActions; a++) {
+        if (success[a] == -1)
+          theta[a] = 0.0;
+        else {
+          theta[a] = Rand.BetaRand(random, success[a] + alpha, failure[a] + beta);
+        }
+      }
+      // no need to sample we know the exact expected value
+      // the expected value of a bernoulli variable is just theta
+      return theta.Select(t => t).ToArray();
+      return Rand.BetaRand(random, success + alpha, failure + beta);
+    }
+    public void Update(int action, double reward) {
+      const double EPSILON = 1E-6;
+      Debug.Assert(Math.Abs(reward - 0.0) < EPSILON || Math.Abs(reward - 1.0) < EPSILON);
+      if (Math.Abs(reward - 1.0) < EPSILON) {
+        success[action]++;
+    public void Update(double reward) {
+      Debug.Assert(reward.IsAlmost(1.0) || reward.IsAlmost(0.0));
+      if (reward.IsAlmost(1.0)) {
+        success++;
       } else {
         failure[action]++;
+        failure++;
+      }
+    }
-    public void Disable(int action) {
-      success[action] = -1;
+    }
     public void Reset() {
       Array.Clear(success, 0, numActions);
       Array.Clear(failure, 0, numActions);
+      success = 0;
+      failure = 0;
+    }
     public void PrintStats() {
+      for (int i = 0; i < numActions; i++) {
+        Console.Write("{0:F2} ", success[i] / (double)failure[i]);
+      }
+      Console.Write("{0:F2} ", success / (double)failure);
+    }
+    public object Clone() {
+      return new BernoulliModel() { failure = this.failure, success = this.success };
+    }
+  }

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Models/GaussianModel.cs

-                      r11730
+                      r11732
 using System;
-using System.Collections.Generic;
-using System.Diagnostics;
-using System.Linq;
-using System.Text;
-using System.Threading.Tasks;
 using HeuristicLab.Common;
 namespace HeuristicLab.Algorithms.Bandits.Models {
+  // bayesian estimation of a Gaussian with unknown mean and known variance
+  // bayesian estimation of a Gaussian with
+  // 1) unknown mean and known variance
+  // 2) unknown mean and unknown variance
   public class GaussianModel : IModel {
+    private readonly int numActions;
+    private readonly int[] tries;
+    private readonly double[] sumRewards;
+    private OnlineMeanAndVarianceEstimator estimator = new OnlineMeanAndVarianceEstimator();
     // parameters of Gaussian prior for mean
 …
     private readonly double meanPriorVariance;
+    private readonly bool knownVariance;
     private readonly double rewardVariance = 0.1; // assumed know reward variance
+    public GaussianModel(int numActions, double meanPriorMu, double meanPriorVariance) {
+      this.numActions = numActions;
+      this.tries = new int[numActions];
+      this.sumRewards = new double[numActions];
+    // parameters of Gamma prior for precision (= inverse variance)
+    private readonly int precisionPriorAlpha;
+    private readonly double precisionPriorBeta;
+    // non-informative prior
+    private const double priorK = 1.0;
+    // this constructor assumes the variance is known
+    public GaussianModel(double meanPriorMu, double meanPriorVariance, double rewardVariance = 0.1) {
       this.meanPriorMu = meanPriorMu;
       this.meanPriorVariance = meanPriorVariance;
+      this.knownVariance = true;
+      this.rewardVariance = rewardVariance;
+    }
+    // this constructor assumes the variance is also unknown
+    // uses Murphy 2007: Conjugate Bayesian analysis of the Gaussian distribution equation 85 - 89
+    public GaussianModel(double meanPriorMu, double meanPriorVariance, int precisionPriorAlpha, double precisionPriorBeta) {
+      this.meanPriorMu = meanPriorMu;
+      this.meanPriorVariance = meanPriorVariance;
+      this.knownVariance = false;
+      this.precisionPriorAlpha = precisionPriorAlpha;
+      this.precisionPriorBeta = precisionPriorBeta;
+    }
+    public double[] SampleExpectedRewards(Random random) {
+    public double SampleExpectedReward(Random random) {
+      if (knownVariance) {
+        return SampleExpectedRewardKnownVariance(random);
+      } else {
+        return SampleExpectedRewardUnknownVariance(random);
+      }
+    }
+    private double SampleExpectedRewardKnownVariance(Random random) {
       // expected values for reward
       var theta = new double[numActions];
+      // calculate posterior mean and variance (for mean reward)
+      for (int a = 0; a < numActions; a++) {
+        if (tries[a] == -1) {
+          theta[a] = double.NegativeInfinity; // disabled action
+        } else {
+          // calculate posterior mean and variance (for mean reward)
+      // see Murphy 2007: Conjugate Bayesian analysis of the Gaussian distribution (http://www.cs.ubc.ca/~murphyk/Papers/bayesGauss.pdf)
+      var posteriorMeanVariance = 1.0 / (estimator.N / rewardVariance + 1.0 / meanPriorVariance);
+      var posteriorMeanMean = posteriorMeanVariance * (meanPriorMu / meanPriorVariance + estimator.Sum / rewardVariance);
+          // see Murphy 2007: Conjugate Bayesian analysis of the Gaussian distribution (http://www.cs.ubc.ca/~murphyk/Papers/bayesGauss.pdf)
+          var posteriorVariance = 1.0 / (tries[a] / rewardVariance + 1.0 / meanPriorVariance);
+          var posteriorMean = posteriorVariance * (meanPriorMu / meanPriorVariance + sumRewards[a] / rewardVariance);
+      // sample a mean from the posterior
+      var posteriorMeanSample = Rand.RandNormal(random) * Math.Sqrt(posteriorMeanVariance) + posteriorMeanMean;
+      // theta already represents the expected reward value => nothing else to do
+      return posteriorMeanSample;
+          // sample a mean from the posterior
+          theta[a] = Rand.RandNormal(random) * Math.Sqrt(posteriorVariance) + posteriorMean;
+          // theta already represents the expected reward value => nothing else to do
+        }
+      // return 0.99-quantile value
+      //return alglib.invnormaldistribution(0.99) * Math.Sqrt(rewardVariance + posteriorMeanVariance) + posteriorMeanMean;
+    }
+    // see Murphy 2007: Conjugate Bayesian analysis of the Gaussian distribution page 6 onwards (http://www.cs.ubc.ca/~murphyk/Papers/bayesGauss.pdf)
+    private double SampleExpectedRewardUnknownVariance(Random random) {
+      var posteriorMean = (priorK * meanPriorMu + estimator.Sum) / (priorK + estimator.N);
+      var posteriorK = priorK + estimator.N;
+      var posteriorAlpha = precisionPriorAlpha + estimator.N / 2.0;
+      double posteriorBeta;
+      if (estimator.N > 0) {
+        posteriorBeta = precisionPriorBeta + 0.5 * estimator.N * estimator.Variance + priorK * estimator.N * Math.Pow(estimator.Avg - meanPriorMu, 2) / (2.0 * (priorK + estimator.N));
+      } else {
+        posteriorBeta = precisionPriorBeta;
+      }
+      // sample from the posterior marginal for mu (expected value) equ. 91
+      // p(µ|D) = T2αn (µ| µn, βn/(αnκn))
+      // sample from Tk distribution : http://stats.stackexchange.com/a/70270
+      var t2alpha = alglib.invstudenttdistribution((int)(2 * posteriorAlpha), random.NextDouble());
+      var theta = t2alpha * posteriorBeta / (posteriorAlpha * posteriorK) + posteriorMean;
       return theta;
+      //return alglib.invnormaldistribution(random.NextDouble()) * + theta;
+      //return alglib.invstudenttdistribution((int)(2 * posteriorAlpha), 0.99) * (posteriorBeta*posteriorK + posteriorBeta) / (posteriorAlpha*posteriorK) + posteriorMean;
+    }
-    public void Update(int action, double reward) {
-      sumRewards[action] += reward;
-      tries[action]++;
+    }
+    public void Disable(int action) {
+      tries[action] = -1;
+      sumRewards[action] = 0.0;
+    public void Update(double reward) {
+      estimator.UpdateReward(reward);
+    }
     public void Reset() {
+      Array.Clear(tries, 0, numActions);
+      Array.Clear(sumRewards, 0, numActions);
+      estimator.Reset();
+    }
     public void PrintStats() {
+      for (int i = 0; i < numActions; i++) {
+        Console.Write("{0:F2} ", sumRewards[i] / (double)tries[i]);
+      }
+      Console.Write("{0:F2} ", estimator.Avg);
+    }
+    public object Clone() {
+      if (knownVariance)
+        return new GaussianModel(meanPriorMu, meanPriorVariance, rewardVariance);
+      else
+        return new GaussianModel(meanPriorMu, meanPriorVariance, precisionPriorAlpha, precisionPriorBeta);
+    }
+  }

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Models/IModel.cs

-                      r11730
+                      r11732
 namespace HeuristicLab.Algorithms.Bandits {
   public interface IModel {
     double[] SampleExpectedRewards(Random random);
     void Update(int action, double reward);
     void Disable(int action);
+  // represents a model for the reward distribution (of an action given a state)
+  public interface IModel : ICloneable {
+    double SampleExpectedReward(Random random);
+    void Update(double reward);
     void Reset();
     void PrintStats();

Note: See TracChangeset for help on using the changeset viewer.

Download in other formats:

Update cookies preferences