Context Navigation

source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/UCB1TunedPolicy.cs @ 11832

Visit:

Last change on this file since 11832 was 11832, checked in by gkronber, 9 years ago
linear value function approximation and good results for poly-10 benchmark
File size: 1.9 KB

Line
1	using System;
2	using System.Collections.Generic;
3	using System.Diagnostics;
4	using System.Linq;
5	using System.Text;
6	using System.Threading.Tasks;
7	using HeuristicLab.Common;
8
9	namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
10	// policy for k-armed bandit (see Auer et al. 2002)
11	public class UCB1TunedPolicy : IBanditPolicy {
12
13	public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
14	var myActionInfos = actionInfos.OfType<MeanAndVariancePolicyActionInfo>();
15
16	int totalTries = myActionInfos.Sum(a => a.Tries);
17
18	int aIdx = -1;
19	double bestQ = double.NegativeInfinity;
20	var bestActions = new List<int>();
21	foreach (var aInfo in myActionInfos) {
22	aIdx++;
23	double q;
24	if (aInfo.Tries == 0) {
25	q = double.PositiveInfinity;
26	} else {
27	var sumReward = aInfo.SumReward;
28	var tries = aInfo.Tries;
29
30	var avgReward = sumReward / tries;
31	q = avgReward + Math.Sqrt((Math.Log(totalTries) / tries) * Math.Min(1.0 / 4, V(aInfo, totalTries)));
32	// 1/4 is upper bound of bernoulli distributed variable
33	}
34	if (q > bestQ) {
35	bestQ = q;
36	bestActions.Clear();
37	bestActions.Add(aIdx);
38	} else if (q.IsAlmost(bestQ)) {
39	bestActions.Add(aIdx);
40	}
41	}
42	Debug.Assert(bestActions.Any());
43
44	return bestActions.SelectRandom(random);
45	}
46
47	public IBanditPolicyActionInfo CreateActionInfo() {
48	return new MeanAndVariancePolicyActionInfo();
49	}
50
51	private double V(MeanAndVariancePolicyActionInfo actionInfo, int totalTries) {
52	var s = actionInfo.Tries;
53	return actionInfo.RewardVariance + Math.Sqrt(2 * Math.Log(totalTries) / s);
54	}
55
56	public override string ToString() {
57	return "UCB1TunedPolicy";
58	}
59	}
60	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Update cookies preferences