Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
01/07/15 09:21:46 (10 years ago)
Author:
gkronber
Message:

#2283: refactoring and bug fixes

Location:
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Models
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Models/BernoulliModel.cs

    r11730 r11732  
    99namespace HeuristicLab.Algorithms.Bandits.Models {
    1010  public class BernoulliModel : IModel {
    11     private readonly int numActions;
    12     private readonly int[] success;
    13     private readonly int[] failure;
     11    private int success;
     12    private int failure;
    1413
    1514    // parameters of beta prior distribution
     
    1716    private readonly double beta;
    1817
    19     public BernoulliModel(int numActions, double alpha = 1.0, double beta = 1.0) {
    20       this.numActions = numActions;
    21       this.success = new int[numActions];
    22       this.failure = new int[numActions];
     18    public BernoulliModel(double alpha = 1.0, double beta = 1.0) {
    2319      this.alpha = alpha;
    2420      this.beta = beta;
    2521    }
    2622
    27 
    28     public double[] SampleExpectedRewards(Random random) {
     23    public double SampleExpectedReward(Random random) {
    2924      // sample bernoulli mean from beta prior
    30       var theta = new double[numActions];
    31       for (int a = 0; a < numActions; a++) {
    32         if (success[a] == -1)
    33           theta[a] = 0.0;
    34         else {
    35           theta[a] = Rand.BetaRand(random, success[a] + alpha, failure[a] + beta);
    36         }
    37       }
    38 
    39       // no need to sample we know the exact expected value
    40       // the expected value of a bernoulli variable is just theta
    41       return theta.Select(t => t).ToArray();
     25      return Rand.BetaRand(random, success + alpha, failure + beta);
    4226    }
    4327
    44     public void Update(int action, double reward) {
    45       const double EPSILON = 1E-6;
    46       Debug.Assert(Math.Abs(reward - 0.0) < EPSILON || Math.Abs(reward - 1.0) < EPSILON);
    47       if (Math.Abs(reward - 1.0) < EPSILON) {
    48         success[action]++;
     28    public void Update(double reward) {
     29      Debug.Assert(reward.IsAlmost(1.0) || reward.IsAlmost(0.0));
     30      if (reward.IsAlmost(1.0)) {
     31        success++;
    4932      } else {
    50         failure[action]++;
     33        failure++;
    5134      }
    5235    }
    5336
    54     public void Disable(int action) {
    55       success[action] = -1;
    56     }
    57 
    5837    public void Reset() {
    59       Array.Clear(success, 0, numActions);
    60       Array.Clear(failure, 0, numActions);
     38      success = 0;
     39      failure = 0;
    6140    }
    6241
    6342    public void PrintStats() {
    64       for (int i = 0; i < numActions; i++) {
    65         Console.Write("{0:F2} ", success[i] / (double)failure[i]);
    66       }
     43      Console.Write("{0:F2} ", success / (double)failure);
     44    }
     45
     46    public object Clone() {
     47      return new BernoulliModel() { failure = this.failure, success = this.success };
    6748    }
    6849  }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Models/GaussianModel.cs

    r11730 r11732  
    11using System;
    2 using System.Collections.Generic;
    3 using System.Diagnostics;
    4 using System.Linq;
    5 using System.Text;
    6 using System.Threading.Tasks;
    72using HeuristicLab.Common;
    83
    94namespace HeuristicLab.Algorithms.Bandits.Models {
    10   // bayesian estimation of a Gaussian with unknown mean and known variance
     5  // bayesian estimation of a Gaussian with
     6  // 1) unknown mean and known variance
     7  // 2) unknown mean and unknown variance
    118  public class GaussianModel : IModel {
    12     private readonly int numActions;
    13     private readonly int[] tries;
    14     private readonly double[] sumRewards;
    15 
     9    private OnlineMeanAndVarianceEstimator estimator = new OnlineMeanAndVarianceEstimator();
    1610
    1711    // parameters of Gaussian prior for mean
     
    1913    private readonly double meanPriorVariance;
    2014
     15    private readonly bool knownVariance;
    2116    private readonly double rewardVariance = 0.1; // assumed know reward variance
    2217
    23     public GaussianModel(int numActions, double meanPriorMu, double meanPriorVariance) {
    24       this.numActions = numActions;
    25       this.tries = new int[numActions];
    26       this.sumRewards = new double[numActions];
     18    // parameters of Gamma prior for precision (= inverse variance)
     19    private readonly int precisionPriorAlpha;
     20    private readonly double precisionPriorBeta;
     21
     22    // non-informative prior
     23    private const double priorK = 1.0;
     24
     25    // this constructor assumes the variance is known
     26    public GaussianModel(double meanPriorMu, double meanPriorVariance, double rewardVariance = 0.1) {
    2727      this.meanPriorMu = meanPriorMu;
    2828      this.meanPriorVariance = meanPriorVariance;
     29
     30      this.knownVariance = true;
     31      this.rewardVariance = rewardVariance;
     32    }
     33
     34    // this constructor assumes the variance is also unknown
     35    // uses Murphy 2007: Conjugate Bayesian analysis of the Gaussian distribution equation 85 - 89
     36    public GaussianModel(double meanPriorMu, double meanPriorVariance, int precisionPriorAlpha, double precisionPriorBeta) {
     37      this.meanPriorMu = meanPriorMu;
     38      this.meanPriorVariance = meanPriorVariance;
     39
     40      this.knownVariance = false;
     41      this.precisionPriorAlpha = precisionPriorAlpha;
     42      this.precisionPriorBeta = precisionPriorBeta;
    2943    }
    3044
    3145
    32     public double[] SampleExpectedRewards(Random random) {
     46    public double SampleExpectedReward(Random random) {
     47      if (knownVariance) {
     48        return SampleExpectedRewardKnownVariance(random);
     49      } else {
     50        return SampleExpectedRewardUnknownVariance(random);
     51      }
     52    }
     53
     54    private double SampleExpectedRewardKnownVariance(Random random) {
    3355      // expected values for reward
    34       var theta = new double[numActions];
     56      // calculate posterior mean and variance (for mean reward)
    3557
    36       for (int a = 0; a < numActions; a++) {
    37         if (tries[a] == -1) {
    38           theta[a] = double.NegativeInfinity; // disabled action
    39         } else {
    40           // calculate posterior mean and variance (for mean reward)
     58      // see Murphy 2007: Conjugate Bayesian analysis of the Gaussian distribution (http://www.cs.ubc.ca/~murphyk/Papers/bayesGauss.pdf)
     59      var posteriorMeanVariance = 1.0 / (estimator.N / rewardVariance + 1.0 / meanPriorVariance);
     60      var posteriorMeanMean = posteriorMeanVariance * (meanPriorMu / meanPriorVariance + estimator.Sum / rewardVariance);
    4161
    42           // see Murphy 2007: Conjugate Bayesian analysis of the Gaussian distribution (http://www.cs.ubc.ca/~murphyk/Papers/bayesGauss.pdf)
    43           var posteriorVariance = 1.0 / (tries[a] / rewardVariance + 1.0 / meanPriorVariance);
    44           var posteriorMean = posteriorVariance * (meanPriorMu / meanPriorVariance + sumRewards[a] / rewardVariance);
     62      // sample a mean from the posterior
     63      var posteriorMeanSample = Rand.RandNormal(random) * Math.Sqrt(posteriorMeanVariance) + posteriorMeanMean;
     64      // theta already represents the expected reward value => nothing else to do
     65      return posteriorMeanSample;
    4566
    46           // sample a mean from the posterior
    47           theta[a] = Rand.RandNormal(random) * Math.Sqrt(posteriorVariance) + posteriorMean;
    48           // theta already represents the expected reward value => nothing else to do
    49         }
     67      // return 0.99-quantile value
     68      //return alglib.invnormaldistribution(0.99) * Math.Sqrt(rewardVariance + posteriorMeanVariance) + posteriorMeanMean;
     69    }
     70
     71    // see Murphy 2007: Conjugate Bayesian analysis of the Gaussian distribution page 6 onwards (http://www.cs.ubc.ca/~murphyk/Papers/bayesGauss.pdf)
     72    private double SampleExpectedRewardUnknownVariance(Random random) {
     73
     74      var posteriorMean = (priorK * meanPriorMu + estimator.Sum) / (priorK + estimator.N);
     75      var posteriorK = priorK + estimator.N;
     76      var posteriorAlpha = precisionPriorAlpha + estimator.N / 2.0;
     77      double posteriorBeta;
     78      if (estimator.N > 0) {
     79        posteriorBeta = precisionPriorBeta + 0.5 * estimator.N * estimator.Variance + priorK * estimator.N * Math.Pow(estimator.Avg - meanPriorMu, 2) / (2.0 * (priorK + estimator.N));
     80      } else {
     81        posteriorBeta = precisionPriorBeta;
    5082      }
    5183
     84      // sample from the posterior marginal for mu (expected value) equ. 91
     85      // p(µ|D) = T2αn (µ| µn, βn/(αnκn))
     86
     87      // sample from Tk distribution : http://stats.stackexchange.com/a/70270
     88      var t2alpha = alglib.invstudenttdistribution((int)(2 * posteriorAlpha), random.NextDouble());
     89
     90      var theta = t2alpha * posteriorBeta / (posteriorAlpha * posteriorK) + posteriorMean;
    5291      return theta;
     92
     93      //return alglib.invnormaldistribution(random.NextDouble()) * + theta;
     94      //return alglib.invstudenttdistribution((int)(2 * posteriorAlpha), 0.99) * (posteriorBeta*posteriorK + posteriorBeta) / (posteriorAlpha*posteriorK) + posteriorMean;
    5395    }
    5496
    55     public void Update(int action, double reward) {
    56       sumRewards[action] += reward;
    57       tries[action]++;
    58     }
    5997
    60     public void Disable(int action) {
    61       tries[action] = -1;
    62       sumRewards[action] = 0.0;
     98    public void Update(double reward) {
     99      estimator.UpdateReward(reward);
    63100    }
    64101
    65102    public void Reset() {
    66       Array.Clear(tries, 0, numActions);
    67       Array.Clear(sumRewards, 0, numActions);
     103      estimator.Reset();
    68104    }
    69105
    70106    public void PrintStats() {
    71       for (int i = 0; i < numActions; i++) {
    72         Console.Write("{0:F2} ", sumRewards[i] / (double)tries[i]);
    73       }
     107      Console.Write("{0:F2} ", estimator.Avg);
     108    }
     109
     110    public object Clone() {
     111      if (knownVariance)
     112        return new GaussianModel(meanPriorMu, meanPriorVariance, rewardVariance);
     113      else
     114        return new GaussianModel(meanPriorMu, meanPriorVariance, precisionPriorAlpha, precisionPriorBeta);
    74115    }
    75116  }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Models/IModel.cs

    r11730 r11732  
    66
    77namespace HeuristicLab.Algorithms.Bandits {
    8   public interface IModel {
    9     double[] SampleExpectedRewards(Random random);
    10     void Update(int action, double reward);
    11     void Disable(int action);
     8  // represents a model for the reward distribution (of an action given a state)
     9  public interface IModel : ICloneable {
     10    double SampleExpectedReward(Random random);
     11    void Update(double reward);
    1212    void Reset();
    1313    void PrintStats();
Note: See TracChangeset for help on using the changeset viewer.