Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
01/07/15 09:21:46 (9 years ago)
Author:
gkronber
Message:

#2283: refactoring and bug fixes

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Models/GaussianModel.cs

    r11730 r11732  
    11using System;
    2 using System.Collections.Generic;
    3 using System.Diagnostics;
    4 using System.Linq;
    5 using System.Text;
    6 using System.Threading.Tasks;
    72using HeuristicLab.Common;
    83
    94namespace HeuristicLab.Algorithms.Bandits.Models {
    10   // bayesian estimation of a Gaussian with unknown mean and known variance
     5  // bayesian estimation of a Gaussian with
     6  // 1) unknown mean and known variance
     7  // 2) unknown mean and unknown variance
    118  public class GaussianModel : IModel {
    12     private readonly int numActions;
    13     private readonly int[] tries;
    14     private readonly double[] sumRewards;
    15 
     9    private OnlineMeanAndVarianceEstimator estimator = new OnlineMeanAndVarianceEstimator();
    1610
    1711    // parameters of Gaussian prior for mean
     
    1913    private readonly double meanPriorVariance;
    2014
     15    private readonly bool knownVariance;
    2116    private readonly double rewardVariance = 0.1; // assumed know reward variance
    2217
    23     public GaussianModel(int numActions, double meanPriorMu, double meanPriorVariance) {
    24       this.numActions = numActions;
    25       this.tries = new int[numActions];
    26       this.sumRewards = new double[numActions];
     18    // parameters of Gamma prior for precision (= inverse variance)
     19    private readonly int precisionPriorAlpha;
     20    private readonly double precisionPriorBeta;
     21
     22    // non-informative prior
     23    private const double priorK = 1.0;
     24
     25    // this constructor assumes the variance is known
     26    public GaussianModel(double meanPriorMu, double meanPriorVariance, double rewardVariance = 0.1) {
    2727      this.meanPriorMu = meanPriorMu;
    2828      this.meanPriorVariance = meanPriorVariance;
     29
     30      this.knownVariance = true;
     31      this.rewardVariance = rewardVariance;
     32    }
     33
     34    // this constructor assumes the variance is also unknown
     35    // uses Murphy 2007: Conjugate Bayesian analysis of the Gaussian distribution equation 85 - 89
     36    public GaussianModel(double meanPriorMu, double meanPriorVariance, int precisionPriorAlpha, double precisionPriorBeta) {
     37      this.meanPriorMu = meanPriorMu;
     38      this.meanPriorVariance = meanPriorVariance;
     39
     40      this.knownVariance = false;
     41      this.precisionPriorAlpha = precisionPriorAlpha;
     42      this.precisionPriorBeta = precisionPriorBeta;
    2943    }
    3044
    3145
    32     public double[] SampleExpectedRewards(Random random) {
     46    public double SampleExpectedReward(Random random) {
     47      if (knownVariance) {
     48        return SampleExpectedRewardKnownVariance(random);
     49      } else {
     50        return SampleExpectedRewardUnknownVariance(random);
     51      }
     52    }
     53
     54    private double SampleExpectedRewardKnownVariance(Random random) {
    3355      // expected values for reward
    34       var theta = new double[numActions];
     56      // calculate posterior mean and variance (for mean reward)
    3557
    36       for (int a = 0; a < numActions; a++) {
    37         if (tries[a] == -1) {
    38           theta[a] = double.NegativeInfinity; // disabled action
    39         } else {
    40           // calculate posterior mean and variance (for mean reward)
     58      // see Murphy 2007: Conjugate Bayesian analysis of the Gaussian distribution (http://www.cs.ubc.ca/~murphyk/Papers/bayesGauss.pdf)
     59      var posteriorMeanVariance = 1.0 / (estimator.N / rewardVariance + 1.0 / meanPriorVariance);
     60      var posteriorMeanMean = posteriorMeanVariance * (meanPriorMu / meanPriorVariance + estimator.Sum / rewardVariance);
    4161
    42           // see Murphy 2007: Conjugate Bayesian analysis of the Gaussian distribution (http://www.cs.ubc.ca/~murphyk/Papers/bayesGauss.pdf)
    43           var posteriorVariance = 1.0 / (tries[a] / rewardVariance + 1.0 / meanPriorVariance);
    44           var posteriorMean = posteriorVariance * (meanPriorMu / meanPriorVariance + sumRewards[a] / rewardVariance);
     62      // sample a mean from the posterior
     63      var posteriorMeanSample = Rand.RandNormal(random) * Math.Sqrt(posteriorMeanVariance) + posteriorMeanMean;
     64      // theta already represents the expected reward value => nothing else to do
     65      return posteriorMeanSample;
    4566
    46           // sample a mean from the posterior
    47           theta[a] = Rand.RandNormal(random) * Math.Sqrt(posteriorVariance) + posteriorMean;
    48           // theta already represents the expected reward value => nothing else to do
    49         }
     67      // return 0.99-quantile value
     68      //return alglib.invnormaldistribution(0.99) * Math.Sqrt(rewardVariance + posteriorMeanVariance) + posteriorMeanMean;
     69    }
     70
     71    // see Murphy 2007: Conjugate Bayesian analysis of the Gaussian distribution page 6 onwards (http://www.cs.ubc.ca/~murphyk/Papers/bayesGauss.pdf)
     72    private double SampleExpectedRewardUnknownVariance(Random random) {
     73
     74      var posteriorMean = (priorK * meanPriorMu + estimator.Sum) / (priorK + estimator.N);
     75      var posteriorK = priorK + estimator.N;
     76      var posteriorAlpha = precisionPriorAlpha + estimator.N / 2.0;
     77      double posteriorBeta;
     78      if (estimator.N > 0) {
     79        posteriorBeta = precisionPriorBeta + 0.5 * estimator.N * estimator.Variance + priorK * estimator.N * Math.Pow(estimator.Avg - meanPriorMu, 2) / (2.0 * (priorK + estimator.N));
     80      } else {
     81        posteriorBeta = precisionPriorBeta;
    5082      }
    5183
     84      // sample from the posterior marginal for mu (expected value) equ. 91
     85      // p(µ|D) = T2αn (µ| µn, βn/(αnκn))
     86
     87      // sample from Tk distribution : http://stats.stackexchange.com/a/70270
     88      var t2alpha = alglib.invstudenttdistribution((int)(2 * posteriorAlpha), random.NextDouble());
     89
     90      var theta = t2alpha * posteriorBeta / (posteriorAlpha * posteriorK) + posteriorMean;
    5291      return theta;
     92
     93      //return alglib.invnormaldistribution(random.NextDouble()) * + theta;
     94      //return alglib.invstudenttdistribution((int)(2 * posteriorAlpha), 0.99) * (posteriorBeta*posteriorK + posteriorBeta) / (posteriorAlpha*posteriorK) + posteriorMean;
    5395    }
    5496
    55     public void Update(int action, double reward) {
    56       sumRewards[action] += reward;
    57       tries[action]++;
    58     }
    5997
    60     public void Disable(int action) {
    61       tries[action] = -1;
    62       sumRewards[action] = 0.0;
     98    public void Update(double reward) {
     99      estimator.UpdateReward(reward);
    63100    }
    64101
    65102    public void Reset() {
    66       Array.Clear(tries, 0, numActions);
    67       Array.Clear(sumRewards, 0, numActions);
     103      estimator.Reset();
    68104    }
    69105
    70106    public void PrintStats() {
    71       for (int i = 0; i < numActions; i++) {
    72         Console.Write("{0:F2} ", sumRewards[i] / (double)tries[i]);
    73       }
     107      Console.Write("{0:F2} ", estimator.Avg);
     108    }
     109
     110    public object Clone() {
     111      if (knownVariance)
     112        return new GaussianModel(meanPriorMu, meanPriorVariance, rewardVariance);
     113      else
     114        return new GaussianModel(meanPriorMu, meanPriorVariance, precisionPriorAlpha, precisionPriorBeta);
    74115    }
    75116  }
Note: See TracChangeset for help on using the changeset viewer.