source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/EpsGreedyPolicy.cs @ 11742

Last change on this file since 11742 was 11742, checked in by gkronber, 6 years ago

#2283 refactoring

File size: 2.0 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Diagnostics;
4using System.Linq;
5using System.Text;
6using System.Threading.Tasks;
7using HeuristicLab.Common;
8
9namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
10  public class EpsGreedyPolicy : IBanditPolicy {
11    private readonly double eps;
12    private readonly RandomPolicy randomPolicy;
13    private readonly Func<DefaultPolicyActionInfo, double> valueFunction;
14    private readonly string desc;
15
16
17    public EpsGreedyPolicy(double eps) : this(eps, DefaultPolicyActionInfo.AverageReward, string.Empty) { }
18
19    public EpsGreedyPolicy(double eps, Func<DefaultPolicyActionInfo, double> valueFunction, string desc) {
20      this.eps = eps;
21      this.randomPolicy = new RandomPolicy();
22      this.valueFunction = valueFunction;
23      this.desc = desc;
24    }
25
26    public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
27      Debug.Assert(actionInfos.Any());
28      if (random.NextDouble() > eps) {
29        // select best
30        var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>();
31        var bestActions = new List<int>();
32        double bestQ = double.NegativeInfinity;
33
34        int aIdx = -1;
35        foreach (var aInfo in myActionInfos) {
36          aIdx++;
37          if (aInfo.Disabled) continue;
38
39          var q = valueFunction(aInfo);
40
41          if (q > bestQ) {
42            bestActions.Clear();
43            bestActions.Add(aIdx);
44            bestQ = q;
45          } else if (q.IsAlmost(bestQ)) {
46            bestActions.Add(aIdx);
47          }
48        }
49        Debug.Assert(bestActions.Any());
50        return bestActions.SelectRandom(random);
51      } else {
52        // select random
53        return randomPolicy.SelectAction(random, actionInfos);
54      }
55    }
56
57    public IBanditPolicyActionInfo CreateActionInfo() {
58      return new DefaultPolicyActionInfo();
59    }
60
61
62    public override string ToString() {
63      return string.Format("EpsGreedyPolicy({0:F2},{1})", eps, desc);
64    }
65  }
66}
Note: See TracBrowser for help on using the repository browser.