Context Navigation

source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/GaussianThompsonSamplingPolicy.cs @ 11832

Visit:

Last change on this file since 11832 was 11832, checked in by gkronber, 9 years ago
linear value function approximation and good results for poly-10 benchmark
File size: 3.1 KB

Line
1	using System;
2	using System.Collections.Generic;
3	using System.Diagnostics;
4	using System.Linq;
5	using HeuristicLab.Common;
6
7	namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
8
9	[Obsolete("Replaced by GenericThompsonSamplingPolicy(GaussianModel(0.5, 1.0, 0.1))")]
10	public class GaussianThompsonSamplingPolicy : IBanditPolicy {
11	private bool compatibility;
12
13	// assumes a Gaussian reward distribution with different means but the same variances for each action
14	// the prior for the mean is also Gaussian with the following parameters
15	private readonly double rewardVariance = 0.1; // we assume a known variance
16
17	private readonly double priorMean = 0.5;
18	private readonly double priorVariance = 1;
19
20
21	public GaussianThompsonSamplingPolicy(bool compatibility = false) {
22	this.compatibility = compatibility;
23	}
24
25	public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
26	var myActionInfos = actionInfos.OfType<MeanAndVariancePolicyActionInfo>();
27	int bestAction = -1;
28	double bestQ = double.NegativeInfinity;
29
30	int aIdx = -1;
31	foreach (var aInfo in myActionInfos) {
32	aIdx++;
33
34	var tries = aInfo.Tries;
35	var sampleMean = aInfo.AvgReward;
36	var sampleVariance = aInfo.RewardVariance;
37
38	double theta;
39	if (compatibility) {
40	// old code used for old experiments (preserved because it performed very well)
41	if (tries < 2) return aIdx;
42	var mu = sampleMean;
43	var variance = sampleVariance;
44	var stdDev = Math.Sqrt(variance);
45	theta = Rand.RandNormal(random) * stdDev + mu;
46	} else {
47	// calculate posterior mean and variance (for mean reward)
48
49	// see Murphy 2007: Conjugate Bayesian analysis of the Gaussian distribution (http://www.cs.ubc.ca/~murphyk/Papers/bayesGauss.pdf)
50	var posteriorVariance = 1.0 / (tries / rewardVariance + 1.0 / priorVariance);
51	var posteriorMean = posteriorVariance * (priorMean / priorVariance + tries * sampleMean / rewardVariance);
52
53	// sample a mean from the posterior
54	theta = Rand.RandNormal(random) * Math.Sqrt(posteriorVariance) + posteriorMean;
55
56	// theta already represents the expected reward value => nothing else to do
57	}
58
59	if (theta > bestQ) {
60	bestQ = theta;
61	bestAction = aIdx;
62	}
63	}
64	Debug.Assert(bestAction > -1);
65	return bestAction;
66	}
67
68	public IBanditPolicyActionInfo CreateActionInfo() {
69	return new MeanAndVariancePolicyActionInfo();
70	}
71
72
73	//public override void UpdateReward(int action, double reward) {
74	// Debug.Assert(Actions.Contains(action));
75	// tries[action]++;
76	// var delta = reward - sampleMean[action];
77	// sampleMean[action] += delta / tries[action];
78	// sampleM2[action] += sampleM2[action] + delta * (reward - sampleMean[action]);
79	//}
80
81	public override string ToString() {
82	return "GaussianThompsonSamplingPolicy";
83	}
84	}
85	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Update cookies preferences