Context Navigation

source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/UCB1TunedPolicy.cs @ 11730

Visit:

Last change on this file since 11730 was 11730, checked in by gkronber, 9 years ago
#2283: several major extensions for grammatical optimization
File size: 2.4 KB

Line
1	using System;
2	using System.Collections.Generic;
3	using System.Diagnostics;
4	using System.Linq;
5	using System.Text;
6	using System.Threading.Tasks;
7
8	namespace HeuristicLab.Algorithms.Bandits {
9	public class UCB1TunedPolicy : BanditPolicy {
10	private readonly int[] tries;
11	private readonly double[] sumReward;
12	private readonly double[] sumSqrReward;
13	private int totalTries = 0;
14	public UCB1TunedPolicy(int numActions)
15	: base(numActions) {
16	this.tries = new int[numActions];
17	this.sumReward = new double[numActions];
18	this.sumSqrReward = new double[numActions];
19	}
20
21	private double V(int arm) {
22	var s = tries[arm];
23	return sumSqrReward[arm] / s - Math.Pow(sumReward[arm] / s, 2) + Math.Sqrt(2 * Math.Log(totalTries) / s);
24	}
25
26
27	public override int SelectAction() {
28	Debug.Assert(Actions.Any());
29	int bestAction = -1;
30	double bestQ = double.NegativeInfinity;
31	foreach (var a in Actions) {
32	if (tries[a] == 0) return a;
33	var q = sumReward[a] / tries[a] + Math.Sqrt((Math.Log(totalTries) / tries[a]) * Math.Min(1.0 / 4, V(a))); // 1/4 is upper bound of bernoulli distributed variable
34	if (q > bestQ) {
35	bestQ = q;
36	bestAction = a;
37	}
38	}
39	return bestAction;
40	}
41	public override void UpdateReward(int action, double reward) {
42	Debug.Assert(Actions.Contains(action));
43	totalTries++;
44	tries[action]++;
45	sumReward[action] += reward;
46	sumSqrReward[action] += reward * reward;
47	}
48
49	public override void DisableAction(int action) {
50	base.DisableAction(action);
51	totalTries -= tries[action];
52	tries[action] = -1;
53	sumReward[action] = 0;
54	sumSqrReward[action] = 0;
55	}
56
57	public override void Reset() {
58	base.Reset();
59	totalTries = 0;
60	Array.Clear(tries, 0, tries.Length);
61	Array.Clear(sumReward, 0, sumReward.Length);
62	Array.Clear(sumSqrReward, 0, sumSqrReward.Length);
63	}
64	public override void PrintStats() {
65	for (int i = 0; i < sumReward.Length; i++) {
66	if (tries[i] >= 0) {
67	Console.Write("{0,5:F2}", sumReward[i] / tries[i]);
68	} else {
69	Console.Write("{0,5}", "");
70	}
71	}
72	Console.WriteLine();
73	}
74	public override string ToString() {
75	return "UCB1TunedPolicy";
76	}
77	}
78	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Update cookies preferences