Context Navigation

EpsGreedyPolicy.cs @ 11711

Visit:

Last change on this file since 11711 was 11711, checked in by gkronber, 9 years ago
#2283: folders for bandits and policies
File size: 1.4 KB

Line
1	using System;
2	using System.Collections.Generic;
3	using System.Linq;
4	using System.Text;
5	using System.Threading.Tasks;
6
7	namespace HeuristicLab.Algorithms.Bandits {
8	public class EpsGreedyPolicy : BanditPolicy {
9	private readonly Random random;
10	private readonly double eps;
11	private readonly int[] tries;
12	private readonly double[] sumReward;
13	public EpsGreedyPolicy(Random random, int numActions, double eps)
14	: base(numActions) {
15	this.random = random;
16	this.eps = eps;
17	this.tries = new int[NumActions];
18	this.sumReward = new double[NumActions];
19	}
20
21	public override int SelectAction() {
22	if (random.NextDouble() > eps) {
23	// select best
24	var maxReward = double.NegativeInfinity;
25	int bestAction = -1;
26	for (int i = 0; i < NumActions; i++) {
27	if (tries[i] == 0) return i;
28	var avgReward = sumReward[i] / tries[i];
29	if (maxReward < avgReward) {
30	maxReward = avgReward;
31	bestAction = i;
32	}
33	}
34	return bestAction;
35	} else {
36	// select random
37	return random.Next(NumActions);
38	}
39	}
40	public override void UpdateReward(int action, double reward) {
41	tries[action]++;
42	sumReward[action] += reward;
43	}
44	public override void Reset() {
45	Array.Clear(tries, 0, tries.Length);
46	Array.Clear(sumReward, 0, sumReward.Length);
47	}
48	}
49	}

Note: See TracBrowser for help on using the repository browser.