Context Navigation

source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/ChernoffIntervalEstimationPolicy.cs @ 11732

Visit:

Last change on this file since 11732 was 11732, checked in by gkronber, 10 years ago
#2283: refactoring and bug fixes
File size: 2.1 KB

Line
1	using System;
2	using System.Collections.Generic;
3	using System.Diagnostics;
4	using System.Linq;
5	using System.Text;
6	using System.Threading.Tasks;
7
8	namespace HeuristicLab.Algorithms.Bandits {
9	/* see: Streeter and Smith: A simple distribution-free approach to the max k-armed bandit problem, Proceedings of the 12th
10	International Conference, CP 2006, Nantes, France, September 25-29, 2006. pp 560-574 */
11
12	public class ChernoffIntervalEstimationPolicy : IPolicy {
13	private readonly double delta;
14
15	public ChernoffIntervalEstimationPolicy(double delta = 0.01) {
16	this.delta = delta;
17	}
18	public int SelectAction(Random random, IEnumerable<IPolicyActionInfo> actionInfos) {
19	Debug.Assert(actionInfos.Any());
20	// select best
21	var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>().ToArray(); // TODO: performance
22	int k = myActionInfos.Length;
23	int totalTries = myActionInfos.Where(a => !a.Disabled).Sum(a => a.Tries);
24	int bestAction = -1;
25	double bestQ = double.NegativeInfinity;
26	for (int a = 0; a < myActionInfos.Length; a++) {
27	if (myActionInfos[a].Disabled) continue;
28	if (myActionInfos[a].Tries == 0) return a;
29
30	var sumReward = myActionInfos[a].SumReward;
31	var tries = myActionInfos[a].Tries;
32
33	var avgReward = sumReward / tries;
34
35	// page 5 of "A simple distribution-free appraoch to the max k-armed bandit problem"
36	// var alpha = Math.Log(2 * totalTries * k / delta);
37	double alpha = Math.Log(2) + Math.Log(totalTries) + Math.Log(k) - Math.Log(delta); // total tries is max tries in the original paper
38	var q = avgReward + (alpha + Math.Sqrt(2 * tries * avgReward * alpha + alpha * alpha)) / tries;
39	if (q > bestQ) {
40	bestQ = q;
41	bestAction = a;
42	}
43	}
44	Debug.Assert(bestAction >= 0);
45	return bestAction;
46	}
47
48	public IPolicyActionInfo CreateActionInfo() {
49	return new DefaultPolicyActionInfo();
50	}
51
52	public override string ToString() {
53	return string.Format("ChernoffIntervalEstimationPolicy({0:F2})", delta);
54	}
55	}
56	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Update cookies preferences