1 | using System;
|
---|
2 | using System.Collections.Generic;
|
---|
3 | using System.Diagnostics;
|
---|
4 | using System.Linq;
|
---|
5 | using System.Text;
|
---|
6 | using System.Threading.Tasks;
|
---|
7 | using HeuristicLab.Common;
|
---|
8 |
|
---|
9 | namespace HeuristicLab.Algorithms.Bandits.Models {
|
---|
10 | // bayesian estimation of a Gaussian with unknown mean and known variance
|
---|
11 | public class GaussianModel : IModel {
|
---|
12 | private readonly int numActions;
|
---|
13 | private readonly int[] tries;
|
---|
14 | private readonly double[] sumRewards;
|
---|
15 |
|
---|
16 |
|
---|
17 | // parameters of Gaussian prior for mean
|
---|
18 | private readonly double meanPriorMu;
|
---|
19 | private readonly double meanPriorVariance;
|
---|
20 |
|
---|
21 | private readonly double rewardVariance = 0.1; // assumed know reward variance
|
---|
22 |
|
---|
23 | public GaussianModel(int numActions, double meanPriorMu, double meanPriorVariance) {
|
---|
24 | this.numActions = numActions;
|
---|
25 | this.tries = new int[numActions];
|
---|
26 | this.sumRewards = new double[numActions];
|
---|
27 | this.meanPriorMu = meanPriorMu;
|
---|
28 | this.meanPriorVariance = meanPriorVariance;
|
---|
29 | }
|
---|
30 |
|
---|
31 |
|
---|
32 | public double[] SampleExpectedRewards(Random random) {
|
---|
33 | // expected values for reward
|
---|
34 | var theta = new double[numActions];
|
---|
35 |
|
---|
36 | for (int a = 0; a < numActions; a++) {
|
---|
37 | if (tries[a] == -1) {
|
---|
38 | theta[a] = double.NegativeInfinity; // disabled action
|
---|
39 | } else {
|
---|
40 | // calculate posterior mean and variance (for mean reward)
|
---|
41 |
|
---|
42 | // see Murphy 2007: Conjugate Bayesian analysis of the Gaussian distribution (http://www.cs.ubc.ca/~murphyk/Papers/bayesGauss.pdf)
|
---|
43 | var posteriorVariance = 1.0 / (tries[a] / rewardVariance + 1.0 / meanPriorVariance);
|
---|
44 | var posteriorMean = posteriorVariance * (meanPriorMu / meanPriorVariance + sumRewards[a] / rewardVariance);
|
---|
45 |
|
---|
46 | // sample a mean from the posterior
|
---|
47 | theta[a] = Rand.RandNormal(random) * Math.Sqrt(posteriorVariance) + posteriorMean;
|
---|
48 | // theta already represents the expected reward value => nothing else to do
|
---|
49 | }
|
---|
50 | }
|
---|
51 |
|
---|
52 | return theta;
|
---|
53 | }
|
---|
54 |
|
---|
55 | public void Update(int action, double reward) {
|
---|
56 | sumRewards[action] += reward;
|
---|
57 | tries[action]++;
|
---|
58 | }
|
---|
59 |
|
---|
60 | public void Disable(int action) {
|
---|
61 | tries[action] = -1;
|
---|
62 | sumRewards[action] = 0.0;
|
---|
63 | }
|
---|
64 |
|
---|
65 | public void Reset() {
|
---|
66 | Array.Clear(tries, 0, numActions);
|
---|
67 | Array.Clear(sumRewards, 0, numActions);
|
---|
68 | }
|
---|
69 |
|
---|
70 | public void PrintStats() {
|
---|
71 | for (int i = 0; i < numActions; i++) {
|
---|
72 | Console.Write("{0:F2} ", sumRewards[i] / (double)tries[i]);
|
---|
73 | }
|
---|
74 | }
|
---|
75 | }
|
---|
76 | }
|
---|