Context Navigation

EpsGreedyPolicy.cs @ 11732

Visit:

Last change on this file since 11732 was 11732, checked in by gkronber, 9 years ago
#2283: refactoring and bug fixes
File size: 1.6 KB

Line
1	using System;
2	using System.Collections.Generic;
3	using System.Diagnostics;
4	using System.Linq;
5	using System.Text;
6	using System.Threading.Tasks;
7
8	namespace HeuristicLab.Algorithms.Bandits {
9	public class EpsGreedyPolicy : IPolicy {
10	private readonly double eps;
11	private readonly RandomPolicy randomPolicy;
12
13	public EpsGreedyPolicy(double eps) {
14	this.eps = eps;
15	this.randomPolicy = new RandomPolicy();
16	}
17	public int SelectAction(Random random, IEnumerable<IPolicyActionInfo> actionInfos) {
18	Debug.Assert(actionInfos.Any());
19	if (random.NextDouble() > eps) {
20	// select best
21	var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>();
22	int bestAction = -1;
23	double bestQ = double.NegativeInfinity;
24	int aIdx = -1;
25	foreach (var aInfo in myActionInfos) {
26
27	aIdx++;
28	if (aInfo.Disabled) continue;
29	if (aInfo.Tries == 0) return aIdx;
30
31
32	var avgReward = aInfo.SumReward / aInfo.Tries;
33	//var q = avgReward;
34	var q = aInfo.MaxReward;
35	if (q > bestQ) {
36	bestQ = q;
37	bestAction = aIdx;
38	}
39	}
40	Debug.Assert(bestAction >= 0);
41	return bestAction;
42	} else {
43	// select random
44	return randomPolicy.SelectAction(random, actionInfos);
45	}
46	}
47
48	public IPolicyActionInfo CreateActionInfo() {
49	return new DefaultPolicyActionInfo();
50	}
51
52
53	public override string ToString() {
54	return string.Format("EpsGreedyPolicy({0:F2})", eps);
55	}
56	}
57	}

Note: See TracBrowser for help on using the repository browser.