Context Navigation

UCB1Policy.cs @ 11727

Visit:

Last change on this file since 11727 was 11727, checked in by gkronber, 9 years ago
#2283: worked on grammatical optimization problem solvers (simple MCTS done)
File size: 1.5 KB

Line
1	using System;
2	using System.Collections.Generic;
3	using System.Diagnostics;
4	using System.Linq;
5	using System.Text;
6	using System.Threading.Tasks;
7
8	namespace HeuristicLab.Algorithms.Bandits {
9	public class UCB1Policy : BanditPolicy {
10	private readonly int[] tries;
11	private readonly double[] sumReward;
12	private int totalTries = 0;
13	public UCB1Policy(int numActions)
14	: base(numActions) {
15	this.tries = new int[numActions];
16	this.sumReward = new double[numActions];
17	}
18
19	public override int SelectAction() {
20	int bestAction = -1;
21	double bestQ = double.NegativeInfinity;
22	foreach (var a in Actions) {
23	if (tries[a] == 0) return a;
24	var q = sumReward[a] / tries[a] + Math.Sqrt((2 * Math.Log(totalTries)) / tries[a]);
25	if (q > bestQ) {
26	bestQ = q;
27	bestAction = a;
28	}
29	}
30	return bestAction;
31	}
32	public override void UpdateReward(int action, double reward) {
33	Debug.Assert(Actions.Contains(action));
34	totalTries++;
35	tries[action]++;
36	sumReward[action] += reward;
37	}
38
39	public override void DisableAction(int action) {
40	base.DisableAction(action);
41	totalTries -= tries[action];
42	tries[action] = -1;
43	sumReward[action] = 0;
44	}
45
46	public override void Reset() {
47	base.Reset();
48	totalTries = 0;
49	Array.Clear(tries, 0, tries.Length);
50	Array.Clear(sumReward, 0, sumReward.Length);
51	}
52	}
53	}

Note: See TracBrowser for help on using the repository browser.