Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/Policies/EpsGreedyPolicy.cs @ 12290

Last change on this file since 12290 was 12290, checked in by gkronber, 10 years ago

#2283 created a new branch to separate development from aballeit

File size: 1.9 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Diagnostics;
4using System.Linq;
5using System.Text;
6using System.Threading.Tasks;
7using HeuristicLab.Common;
8
9namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
10  public class EpsGreedyPolicy : IBanditPolicy {
11    private readonly double eps;
12    private readonly RandomPolicy randomPolicy;
13    private readonly string desc;
14
15
16    public EpsGreedyPolicy(double eps) : this(eps, string.Empty) { }
17
18    public EpsGreedyPolicy(double eps, string desc) {
19      this.eps = eps;
20      this.randomPolicy = new RandomPolicy();
21      this.desc = desc;
22    }
23
24    public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
25      Debug.Assert(actionInfos.Any());
26      if (random.NextDouble() >= eps) { // eps == 0 should be equivalent to pure exploitation, eps == 1 is pure exploration
27        // select best
28        var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>();
29        var bestActions = new List<int>();
30        double bestQ = double.NegativeInfinity;
31
32        int aIdx = -1;
33        foreach (var aInfo in myActionInfos) {
34          aIdx++;
35
36          var q = aInfo.Value;
37
38          if (q > bestQ) {
39            bestActions.Clear();
40            bestActions.Add(aIdx);
41            bestQ = q;
42          } else if (q.IsAlmost(bestQ)) {
43            bestActions.Add(aIdx);
44          }
45        }
46        Debug.Assert(bestActions.Any());
47        return bestActions.SelectRandom(random);
48      } else {
49        // select random
50        return randomPolicy.SelectAction(random, actionInfos);
51      }
52    }
53
54    public IBanditPolicyActionInfo CreateActionInfo() {
55      return new DefaultPolicyActionInfo();
56    }
57
58
59    public override string ToString() {
60      return string.Format("EpsGreedyPolicy({0:F2},{1})", eps, desc);
61    }
62  }
63}
Note: See TracBrowser for help on using the repository browser.