Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/EpsGreedyPolicy.cs @ 11806

Last change on this file since 11806 was 11806, checked in by gkronber, 9 years ago

#2283: separated value-states from done-states in GenericGrammarPolicy and removed disabling of actions from bandit policies

File size: 2.1 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Diagnostics;
4using System.Linq;
5using System.Text;
6using System.Threading.Tasks;
7using HeuristicLab.Common;
8
9namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
10  public class EpsGreedyPolicy : IBanditPolicy {
11    private readonly double eps;
12    private readonly RandomPolicy randomPolicy;
13    private readonly Func<DefaultPolicyActionInfo, double> valueFunction;
14    private readonly string desc;
15
16
17    public EpsGreedyPolicy(double eps) : this(eps, DefaultPolicyActionInfo.AverageReward, string.Empty) { }
18
19    public EpsGreedyPolicy(double eps, Func<DefaultPolicyActionInfo, double> valueFunction, string desc) {
20      this.eps = eps;
21      this.randomPolicy = new RandomPolicy();
22      this.valueFunction = valueFunction;
23      this.desc = desc;
24    }
25
26    public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
27      Debug.Assert(actionInfos.Any());
28      if (random.NextDouble() >= eps) { // eps == 0 should be equivalent to pure exploitation, eps == 1 is pure exploration
29        // select best
30        var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>();
31        var bestActions = new List<int>();
32        double bestQ = double.NegativeInfinity;
33
34        int aIdx = -1;
35        foreach (var aInfo in myActionInfos) {
36          aIdx++;
37
38          var q = valueFunction(aInfo);
39
40          if (q > bestQ) {
41            bestActions.Clear();
42            bestActions.Add(aIdx);
43            bestQ = q;
44          } else if (q.IsAlmost(bestQ)) {
45            bestActions.Add(aIdx);
46          }
47        }
48        Debug.Assert(bestActions.Any());
49        return bestActions.SelectRandom(random);
50      } else {
51        // select random
52        return randomPolicy.SelectAction(random, actionInfos);
53      }
54    }
55
56    public IBanditPolicyActionInfo CreateActionInfo() {
57      return new DefaultPolicyActionInfo();
58    }
59
60
61    public override string ToString() {
62      return string.Format("EpsGreedyPolicy({0:F2},{1})", eps, desc);
63    }
64  }
65}
Note: See TracBrowser for help on using the repository browser.