Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/Policies/EpsGreedyPolicy.cs @ 13728

Last change on this file since 13728 was 12893, checked in by gkronber, 9 years ago

#2283: experiments on grammatical optimization algorithms (maxreward instead of avg reward, ...)

File size: 2.0 KB
RevLine 
[11708]1using System;
2using System.Collections.Generic;
[11727]3using System.Diagnostics;
[11708]4using System.Linq;
5using System.Text;
6using System.Threading.Tasks;
[11742]7using HeuristicLab.Common;
[11708]8
[11742]9namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
10  public class EpsGreedyPolicy : IBanditPolicy {
[11708]11    private readonly double eps;
[11727]12    private readonly RandomPolicy randomPolicy;
[11742]13    private readonly string desc;
[11727]14
[11742]15
[12290]16    public EpsGreedyPolicy(double eps) : this(eps, string.Empty) { }
[11742]17
[12290]18    public EpsGreedyPolicy(double eps, string desc) {
[11708]19      this.eps = eps;
[11732]20      this.randomPolicy = new RandomPolicy();
[11742]21      this.desc = desc;
[11708]22    }
[11742]23
24    public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
[11732]25      Debug.Assert(actionInfos.Any());
[12893]26      var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>();
27      int totalTries = myActionInfos.Select(i => i.Tries).Sum();
28     
29      //var eps = Math.Exp(Math.Exp(-totalTries/200.0)) - 1;
30
[11793]31      if (random.NextDouble() >= eps) { // eps == 0 should be equivalent to pure exploitation, eps == 1 is pure exploration
[11708]32        // select best
[11742]33        var bestActions = new List<int>();
[11732]34        double bestQ = double.NegativeInfinity;
[11742]35
[11732]36        int aIdx = -1;
37        foreach (var aInfo in myActionInfos) {
38          aIdx++;
39
[12893]40          var q = aInfo.MaxReward;
[11732]41
42          if (q > bestQ) {
[11742]43            bestActions.Clear();
44            bestActions.Add(aIdx);
[11730]45            bestQ = q;
[11742]46          } else if (q.IsAlmost(bestQ)) {
47            bestActions.Add(aIdx);
[11708]48          }
49        }
[11742]50        Debug.Assert(bestActions.Any());
[12893]51        //return bestActions.SelectRandom(random);
52        return bestActions.First();
[11708]53      } else {
54        // select random
[11732]55        return randomPolicy.SelectAction(random, actionInfos);
[11708]56      }
57    }
[11727]58
[11742]59    public IBanditPolicyActionInfo CreateActionInfo() {
[11732]60      return new DefaultPolicyActionInfo();
[11708]61    }
[11727]62
63
[11730]64    public override string ToString() {
[11742]65      return string.Format("EpsGreedyPolicy({0:F2},{1})", eps, desc);
[11730]66    }
[11708]67  }
68}
Note: See TracBrowser for help on using the repository browser.