Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Models/GaussianModel.cs @ 11730

Last change on this file since 11730 was 11730, checked in by gkronber, 9 years ago

#2283: several major extensions for grammatical optimization

File size: 2.5 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Diagnostics;
4using System.Linq;
5using System.Text;
6using System.Threading.Tasks;
7using HeuristicLab.Common;
8
9namespace HeuristicLab.Algorithms.Bandits.Models {
10  // bayesian estimation of a Gaussian with unknown mean and known variance
11  public class GaussianModel : IModel {
12    private readonly int numActions;
13    private readonly int[] tries;
14    private readonly double[] sumRewards;
15
16
17    // parameters of Gaussian prior for mean
18    private readonly double meanPriorMu;
19    private readonly double meanPriorVariance;
20
21    private readonly double rewardVariance = 0.1; // assumed know reward variance
22
23    public GaussianModel(int numActions, double meanPriorMu, double meanPriorVariance) {
24      this.numActions = numActions;
25      this.tries = new int[numActions];
26      this.sumRewards = new double[numActions];
27      this.meanPriorMu = meanPriorMu;
28      this.meanPriorVariance = meanPriorVariance;
29    }
30
31
32    public double[] SampleExpectedRewards(Random random) {
33      // expected values for reward
34      var theta = new double[numActions];
35
36      for (int a = 0; a < numActions; a++) {
37        if (tries[a] == -1) {
38          theta[a] = double.NegativeInfinity; // disabled action
39        } else {
40          // calculate posterior mean and variance (for mean reward)
41
42          // see Murphy 2007: Conjugate Bayesian analysis of the Gaussian distribution (http://www.cs.ubc.ca/~murphyk/Papers/bayesGauss.pdf)
43          var posteriorVariance = 1.0 / (tries[a] / rewardVariance + 1.0 / meanPriorVariance);
44          var posteriorMean = posteriorVariance * (meanPriorMu / meanPriorVariance + sumRewards[a] / rewardVariance);
45
46          // sample a mean from the posterior
47          theta[a] = Rand.RandNormal(random) * Math.Sqrt(posteriorVariance) + posteriorMean;
48          // theta already represents the expected reward value => nothing else to do
49        }
50      }
51
52      return theta;
53    }
54
55    public void Update(int action, double reward) {
56      sumRewards[action] += reward;
57      tries[action]++;
58    }
59
60    public void Disable(int action) {
61      tries[action] = -1;
62      sumRewards[action] = 0.0;
63    }
64
65    public void Reset() {
66      Array.Clear(tries, 0, numActions);
67      Array.Clear(sumRewards, 0, numActions);
68    }
69
70    public void PrintStats() {
71      for (int i = 0; i < numActions; i++) {
72        Console.Write("{0:F2} ", sumRewards[i] / (double)tries[i]);
73      }
74    }
75  }
76}
Note: See TracBrowser for help on using the repository browser.