Context Navigation

source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/ThresholdAscentPolicy.cs @ 11730

Visit:

Last change on this file since 11730 was 11730, checked in by gkronber, 10 years ago
#2283: several major extensions for grammatical optimization
File size: 4.3 KB

Line
1	using System;
2	using System.Collections.Generic;
3	using System.Diagnostics;
4	using System.Linq;
5	using System.Text;
6	using System.Threading.Tasks;
7
8	namespace HeuristicLab.Algorithms.Bandits {
9	/* see: Streeter and Smith: A simple distribution-free approach to the max k-armed bandit problem, Proceedings of the 12th
10	International Conference, CP 2006, Nantes, France, September 25-29, 2006. pp 560-574 */
11
12	public class ThresholdAscentPolicy : BanditPolicy {
13	const int numBins = 101;
14	const double binSize = 1.0 / (numBins - 1);
15
16	// for each arm store the number of observed rewards for each bin of size delta
17	// for delta = 0.01 we have 101 bins
18	// the first bin is freq of rewards >= 0 // all
19	// the second bin is freq of rewards > 0
20	// the third bin is freq of rewards > 0.01
21	// the last bin is for rewards > 0.99
22	//
23	// (also see RewardBin function)
24	private readonly int[,] armRewardHistogram; // for performance reasons we store cumulative counts (freq of rewards > lower threshold)
25
26
27	private readonly int[] tries;
28	private readonly int s;
29	private readonly double delta;
30
31	private int totalTries = 0;
32	private int thresholdBin; // bin index of current threshold
33	private const double maxTries = 1E6;
34
35	public ThresholdAscentPolicy(int numActions, int s = 100, double delta = 0.05)
36	: base(numActions) {
37	this.thresholdBin = 1; // first bin to check is bin idx 1 == freq of rewards > 0
38	this.s = s;
39	this.delta = delta;
40	this.armRewardHistogram = new int[numActions, numBins];
41	this.tries = new int[numActions];
42	}
43
44	// maps a reward value to it's bin
45	private static int RewardBin(double reward) {
46	Debug.Assert(reward >= 0 && reward <= 1.0);
47	// reward = 0 => 0
48	// ]0.00 .. 0.01] => 1
49	// ]0.01 .. 0.02] => 2
50	// ...
51	// ]0.99 .. 1.00] => 100
52	if (reward <= 0) return 0;
53	return (int)Math.Ceiling((reward / binSize));
54	}
55
56
57	private double U(double mu, int n, int k) {
58	//var alpha = Math.Log(2.0 * totalTries * k / delta);
59	double alpha = Math.Log(2) + Math.Log(maxTries) + Math.Log(k) - Math.Log(delta); // totalTries is max iterations in original paper
60	return mu + (alpha + Math.Sqrt(2 * n * mu * alpha + alpha * alpha)) / n;
61	}
62
63
64	public override int SelectAction() {
65	Debug.Assert(Actions.Any());
66	UpdateThreshold();
67	int bestAction = -1;
68	double bestQ = double.NegativeInfinity;
69	int k = Actions.Count();
70	foreach (var a in Actions) {
71	if (tries[a] == 0) return a;
72	double mu = armRewardHistogram[a, thresholdBin] / (double)tries[a]; // probability of rewards > T
73	double q = U(mu, tries[a], k);
74	if (q > bestQ) {
75	bestQ = q;
76	bestAction = a;
77	}
78	}
79	Debug.Assert(Actions.Contains(bestAction));
80	return bestAction;
81	}
82
83	private void UpdateThreshold() {
84	while (thresholdBin < (numBins - 1) && Actions.Sum(a => armRewardHistogram[a, thresholdBin]) >= s) {
85	thresholdBin++;
86	// Console.WriteLine("New threshold {0:F2}", T);
87	}
88	}
89
90	public override void UpdateReward(int action, double reward) {
91	Debug.Assert(Actions.Contains(action));
92	totalTries++;
93	tries[action]++;
94	// efficiency: we can start at the current threshold bin because all bins below that are not accessed in select-action
95	for (var idx = thresholdBin; idx <= RewardBin(reward); idx++)
96	armRewardHistogram[action, idx]++;
97	}
98
99	public override void DisableAction(int action) {
100	base.DisableAction(action);
101	totalTries -= tries[action];
102	tries[action] = -1;
103	}
104
105	public override void Reset() {
106	base.Reset();
107	totalTries = 0;
108	thresholdBin = 1;
109	Array.Clear(tries, 0, tries.Length);
110	Array.Clear(armRewardHistogram, 0, armRewardHistogram.Length);
111	}
112
113	public override void PrintStats() {
114	for (int i = 0; i < tries.Length; i++) {
115	if (tries[i] >= 0) {
116	Console.Write("{0,6}", tries[i]);
117	} else {
118	Console.Write("{0,6}", "");
119	}
120	}
121	Console.WriteLine();
122	}
123	public override string ToString() {
124	return string.Format("ThresholdAscentPolicy({0},{1:F2})", s, delta);
125	}
126
127	}
128	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Update cookies preferences