Context Navigation

source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/ChernoffIntervalEstimationPolicy.cs @ 11742

Visit:

Last change on this file since 11742 was 11742, checked in by gkronber, 9 years ago
#2283 refactoring
File size: 2.1 KB

Line
1	using System;
2	using System.Collections.Generic;
3	using System.Diagnostics;
4	using System.Linq;
5	using System.Text;
6	using System.Threading.Tasks;
7
8	namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
9	/* see: Streeter and Smith: A simple distribution-free approach to the max k-armed bandit problem, Proceedings of the 12th
10	International Conference, CP 2006, Nantes, France, September 25-29, 2006. pp 560-574 */
11
12	public class ChernoffIntervalEstimationPolicy : IBanditPolicy {
13	private readonly double delta;
14
15	public ChernoffIntervalEstimationPolicy(double delta = 0.01) {
16	this.delta = delta;
17	}
18	public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
19	Debug.Assert(actionInfos.Any());
20	// select best
21	var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>();
22	int k = myActionInfos.Count(a => !a.Disabled);
23	int totalTries = myActionInfos.Where(a => !a.Disabled).Sum(a => a.Tries);
24	int bestAction = -1;
25	double bestQ = double.NegativeInfinity;
26	var aIdx = -1;
27	foreach (var aInfo in myActionInfos) {
28	aIdx++;
29	if (aInfo.Disabled) continue;
30	if (aInfo.Tries == 0) return aIdx;
31
32	var avgReward = aInfo.SumReward / aInfo.Tries;
33
34	// page 5 of "A simple distribution-free appraoch to the max k-armed bandit problem"
35	// var alpha = Math.Log(2 * totalTries * k / delta);
36	double alpha = Math.Log(2.0) + Math.Log(totalTries) + Math.Log(k) - Math.Log(delta); // total tries is max tries in the original paper
37	var q = avgReward + (alpha + Math.Sqrt(2 * aInfo.Tries * avgReward * alpha + alpha * alpha)) / aInfo.Tries;
38	if (q > bestQ) {
39	bestQ = q;
40	bestAction = aIdx;
41	}
42	}
43	Debug.Assert(bestAction >= 0);
44	return bestAction;
45	}
46
47	public IBanditPolicyActionInfo CreateActionInfo() {
48	return new DefaultPolicyActionInfo();
49	}
50
51	public override string ToString() {
52	return string.Format("ChernoffIntervalEstimationPolicy({0:F2})", delta);
53	}
54	}
55	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Update cookies preferences