Context Navigation

source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Models/GaussianModel.cs @ 11730

Visit:

Last change on this file since 11730 was 11730, checked in by gkronber, 9 years ago
#2283: several major extensions for grammatical optimization
File size: 2.5 KB

Line
1	using System;
2	using System.Collections.Generic;
3	using System.Diagnostics;
4	using System.Linq;
5	using System.Text;
6	using System.Threading.Tasks;
7	using HeuristicLab.Common;
8
9	namespace HeuristicLab.Algorithms.Bandits.Models {
10	// bayesian estimation of a Gaussian with unknown mean and known variance
11	public class GaussianModel : IModel {
12	private readonly int numActions;
13	private readonly int[] tries;
14	private readonly double[] sumRewards;
15
16
17	// parameters of Gaussian prior for mean
18	private readonly double meanPriorMu;
19	private readonly double meanPriorVariance;
20
21	private readonly double rewardVariance = 0.1; // assumed know reward variance
22
23	public GaussianModel(int numActions, double meanPriorMu, double meanPriorVariance) {
24	this.numActions = numActions;
25	this.tries = new int[numActions];
26	this.sumRewards = new double[numActions];
27	this.meanPriorMu = meanPriorMu;
28	this.meanPriorVariance = meanPriorVariance;
29	}
30
31
32	public double[] SampleExpectedRewards(Random random) {
33	// expected values for reward
34	var theta = new double[numActions];
35
36	for (int a = 0; a < numActions; a++) {
37	if (tries[a] == -1) {
38	theta[a] = double.NegativeInfinity; // disabled action
39	} else {
40	// calculate posterior mean and variance (for mean reward)
41
42	// see Murphy 2007: Conjugate Bayesian analysis of the Gaussian distribution (http://www.cs.ubc.ca/~murphyk/Papers/bayesGauss.pdf)
43	var posteriorVariance = 1.0 / (tries[a] / rewardVariance + 1.0 / meanPriorVariance);
44	var posteriorMean = posteriorVariance * (meanPriorMu / meanPriorVariance + sumRewards[a] / rewardVariance);
45
46	// sample a mean from the posterior
47	theta[a] = Rand.RandNormal(random) * Math.Sqrt(posteriorVariance) + posteriorMean;
48	// theta already represents the expected reward value => nothing else to do
49	}
50	}
51
52	return theta;
53	}
54
55	public void Update(int action, double reward) {
56	sumRewards[action] += reward;
57	tries[action]++;
58	}
59
60	public void Disable(int action) {
61	tries[action] = -1;
62	sumRewards[action] = 0.0;
63	}
64
65	public void Reset() {
66	Array.Clear(tries, 0, numActions);
67	Array.Clear(sumRewards, 0, numActions);
68	}
69
70	public void PrintStats() {
71	for (int i = 0; i < numActions; i++) {
72	Console.Write("{0:F2} ", sumRewards[i] / (double)tries[i]);
73	}
74	}
75	}
76	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Update cookies preferences