Context Navigation

source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/GaussianThompsonSamplingPolicy.cs @ 13231

Visit:

Last change on this file since 13231 was 11974, checked in by gkronber, 10 years ago
#2283: eurocast experiments
File size: 3.1 KB

Line
1	using System;
2	using System.Collections.Generic;
3	using System.Diagnostics;
4	using System.Linq;
5	using HeuristicLab.Common;
6
7	namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
8
9	[Obsolete("Replaced by GenericThompsonSamplingPolicy(GaussianModel(0.5, 1.0, 0.1))")]
10	public class GaussianThompsonSamplingPolicy : IBanditPolicy {
11	private bool compatibility;
12
13	// assumes a Gaussian reward distribution with different means but the same variances for each action
14	// the prior for the mean is also Gaussian with the following parameters
15	private readonly double rewardVariance = 0.1; // we assume a known variance
16
17	private readonly double priorMean = 0.5;
18	private readonly double priorVariance = 1;
19
20
21	public GaussianThompsonSamplingPolicy(bool compatibility = false) {
22	this.compatibility = compatibility;
23	}
24
25	public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
26	var myActionInfos = actionInfos.OfType<MeanAndVariancePolicyActionInfo>();
27	int bestAction = -1;
28	double bestQ = double.NegativeInfinity;
29
30	int aIdx = -1;
31	foreach (var aInfo in myActionInfos) {
32	aIdx++;
33
34	var tries = aInfo.Tries;
35	var sampleMean = aInfo.AvgReward;
36	var sampleVariance = aInfo.RewardVariance;
37
38	double theta;
39	if (compatibility) {
40	// old code used for old experiments (preserved because it performed very well)
41	if (tries < 2) return aIdx;
42	var mu = sampleMean;
43	var variance = sampleVariance;
44	var stdDev = Math.Sqrt(variance);
45	theta = Rand.RandNormal(random) * stdDev + mu;
46	} else {
47	// calculate posterior mean and variance (for mean reward)
48
49	// see Murphy 2007: Conjugate Bayesian analysis of the Gaussian distribution (http://www.cs.ubc.ca/~murphyk/Papers/bayesGauss.pdf)
50	var posteriorVariance = 1.0 / (tries / rewardVariance + 1.0 / priorVariance);
51	var posteriorMean = posteriorVariance * (priorMean / priorVariance + tries * sampleMean / rewardVariance);
52
53	// sample a mean from the posterior
54	theta = Rand.RandNormal(random) * Math.Sqrt(posteriorVariance) + posteriorMean;
55
56	// theta already represents the expected reward value => nothing else to do
57	}
58
59	// very unlikely to be the same (and we don't care)
60	if (theta > bestQ) {
61	bestQ = theta;
62	bestAction = aIdx;
63	}
64	}
65	Debug.Assert(bestAction > -1);
66	return bestAction;
67	}
68
69	public IBanditPolicyActionInfo CreateActionInfo() {
70	return new MeanAndVariancePolicyActionInfo();
71	}
72
73
74	//public override void UpdateReward(int action, double reward) {
75	// Debug.Assert(Actions.Contains(action));
76	// tries[action]++;
77	// var delta = reward - sampleMean[action];
78	// sampleMean[action] += delta / tries[action];
79	// sampleM2[action] += sampleM2[action] + delta * (reward - sampleMean[action]);
80	//}
81
82	public override string ToString() {
83	return "GaussianThompsonSamplingPolicy";
84	}
85	}
86	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Update cookies preferences