Context Navigation

UCB1Policy.cs @ 11710

Visit:

Last change on this file since 11710 was 11710, checked in by gkronber, 9 years ago
#2283: more bandit policies and tests
File size: 1.2 KB

Line
1	using System;
2	using System.Collections.Generic;
3	using System.Linq;
4	using System.Text;
5	using System.Threading.Tasks;
6
7	namespace HeuristicLab.Algorithms.Bandits {
8	public class UCB1Policy : BanditPolicy {
9	private readonly int[] tries;
10	private readonly double[] sumReward;
11	private int totalTries = 0;
12	public UCB1Policy(int numActions)
13	: base(numActions) {
14	this.tries = new int[NumActions];
15	this.sumReward = new double[NumActions];
16	}
17
18	public override int SelectAction() {
19	int bestAction = -1;
20	double bestQ = double.NegativeInfinity;
21	for (int i = 0; i < NumActions; i++) {
22	if (tries[i] == 0) return i;
23	var q = sumReward[i] / tries[i] + Math.Sqrt((2 * Math.Log(totalTries)) / tries[i]);
24	if (q > bestQ) {
25	bestQ = q;
26	bestAction = i;
27	}
28	}
29	return bestAction;
30	}
31	public override void UpdateReward(int action, double reward) {
32	totalTries++;
33	tries[action]++;
34	sumReward[action] += reward;
35	}
36	public override void Reset() {
37	totalTries = 0;
38	Array.Clear(tries, 0, tries.Length);
39	Array.Clear(sumReward, 0, sumReward.Length);
40	}
41	}
42	}

Note: See TracBrowser for help on using the repository browser.