Context Navigation

source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/GaussianThompsonSamplingPolicy.cs @ 11742

Visit:

Last change on this file since 11742 was 11742, checked in by gkronber, 10 years ago
#2283 refactoring
File size: 3.1 KB

Line
1	using System;
2	using System.Collections.Generic;
3	using System.Diagnostics;
4	using System.Linq;
5	using HeuristicLab.Common;
6
7	namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
8
9	[Obsolete("Replaced by GenericThompsonSamplingPolicy(GaussianModel(0.5, 1.0, 0.1))")]
10	public class GaussianThompsonSamplingPolicy : IBanditPolicy {
11	private bool compatibility;
12
13	// assumes a Gaussian reward distribution with different means but the same variances for each action
14	// the prior for the mean is also Gaussian with the following parameters
15	private readonly double rewardVariance = 0.1; // we assume a known variance
16
17	private readonly double priorMean = 0.5;
18	private readonly double priorVariance = 1;
19
20
21	public GaussianThompsonSamplingPolicy(bool compatibility = false) {
22	this.compatibility = compatibility;
23	}
24
25	public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
26	var myActionInfos = actionInfos.OfType<MeanAndVariancePolicyActionInfo>();
27	int bestAction = -1;
28	double bestQ = double.NegativeInfinity;
29
30	int aIdx = -1;
31	foreach (var aInfo in myActionInfos) {
32	aIdx++;
33	if (aInfo.Disabled) continue;
34
35	var tries = aInfo.Tries;
36	var sampleMean = aInfo.AvgReward;
37	var sampleVariance = aInfo.RewardVariance;
38
39	double theta;
40	if (compatibility) {
41	// old code used for old experiments (preserved because it performed very well)
42	if (tries < 2) return aIdx;
43	var mu = sampleMean;
44	var variance = sampleVariance;
45	var stdDev = Math.Sqrt(variance);
46	theta = Rand.RandNormal(random) * stdDev + mu;
47	} else {
48	// calculate posterior mean and variance (for mean reward)
49
50	// see Murphy 2007: Conjugate Bayesian analysis of the Gaussian distribution (http://www.cs.ubc.ca/~murphyk/Papers/bayesGauss.pdf)
51	var posteriorVariance = 1.0 / (tries / rewardVariance + 1.0 / priorVariance);
52	var posteriorMean = posteriorVariance * (priorMean / priorVariance + tries * sampleMean / rewardVariance);
53
54	// sample a mean from the posterior
55	theta = Rand.RandNormal(random) * Math.Sqrt(posteriorVariance) + posteriorMean;
56
57	// theta already represents the expected reward value => nothing else to do
58	}
59
60	if (theta > bestQ) {
61	bestQ = theta;
62	bestAction = aIdx;
63	}
64	}
65	Debug.Assert(bestAction > -1);
66	return bestAction;
67	}
68
69	public IBanditPolicyActionInfo CreateActionInfo() {
70	return new MeanAndVariancePolicyActionInfo();
71	}
72
73
74	//public override void UpdateReward(int action, double reward) {
75	// Debug.Assert(Actions.Contains(action));
76	// tries[action]++;
77	// var delta = reward - sampleMean[action];
78	// sampleMean[action] += delta / tries[action];
79	// sampleM2[action] += sampleM2[action] + delta * (reward - sampleMean[action]);
80	//}
81
82	public override string ToString() {
83	return "GaussianThompsonSamplingPolicy";
84	}
85	}
86	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Update cookies preferences