Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/Policies/EpsGreedyPolicy.cs @ 13042

Last change on this file since 13042 was 12893, checked in by gkronber, 9 years ago

#2283: experiments on grammatical optimization algorithms (maxreward instead of avg reward, ...)

File size: 2.0 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Diagnostics;
4using System.Linq;
5using System.Text;
6using System.Threading.Tasks;
7using HeuristicLab.Common;
8
9namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
10  public class EpsGreedyPolicy : IBanditPolicy {
11    private readonly double eps;
12    private readonly RandomPolicy randomPolicy;
13    private readonly string desc;
14
15
16    public EpsGreedyPolicy(double eps) : this(eps, string.Empty) { }
17
18    public EpsGreedyPolicy(double eps, string desc) {
19      this.eps = eps;
20      this.randomPolicy = new RandomPolicy();
21      this.desc = desc;
22    }
23
24    public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
25      Debug.Assert(actionInfos.Any());
26      var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>();
27      int totalTries = myActionInfos.Select(i => i.Tries).Sum();
28     
29      //var eps = Math.Exp(Math.Exp(-totalTries/200.0)) - 1;
30
31      if (random.NextDouble() >= eps) { // eps == 0 should be equivalent to pure exploitation, eps == 1 is pure exploration
32        // select best
33        var bestActions = new List<int>();
34        double bestQ = double.NegativeInfinity;
35
36        int aIdx = -1;
37        foreach (var aInfo in myActionInfos) {
38          aIdx++;
39
40          var q = aInfo.MaxReward;
41
42          if (q > bestQ) {
43            bestActions.Clear();
44            bestActions.Add(aIdx);
45            bestQ = q;
46          } else if (q.IsAlmost(bestQ)) {
47            bestActions.Add(aIdx);
48          }
49        }
50        Debug.Assert(bestActions.Any());
51        //return bestActions.SelectRandom(random);
52        return bestActions.First();
53      } else {
54        // select random
55        return randomPolicy.SelectAction(random, actionInfos);
56      }
57    }
58
59    public IBanditPolicyActionInfo CreateActionInfo() {
60      return new DefaultPolicyActionInfo();
61    }
62
63
64    public override string ToString() {
65      return string.Format("EpsGreedyPolicy({0:F2},{1})", eps, desc);
66    }
67  }
68}
Note: See TracBrowser for help on using the repository browser.