Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/BoltzmannExplorationPolicy.cs @ 11742

Last change on this file since 11742 was 11742, checked in by gkronber, 9 years ago

#2283 refactoring

File size: 1.7 KB
RevLine 
[11730]1using System;
2using System.Collections.Generic;
3using System.Diagnostics;
4using System.Linq;
5using System.Text;
6using System.Threading.Tasks;
[11732]7using HeuristicLab.Common;
[11730]8
[11742]9namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
[11730]10  // also called softmax policy
[11742]11  public class BoltzmannExplorationPolicy : IBanditPolicy {
[11730]12    private readonly double beta;
[11742]13    private readonly Func<DefaultPolicyActionInfo, double> valueFunction;
[11730]14
[11742]15    public BoltzmannExplorationPolicy(double eps) : this(eps, DefaultPolicyActionInfo.AverageReward) { }
16
17    public BoltzmannExplorationPolicy(double beta, Func<DefaultPolicyActionInfo, double> valueFunction) {
[11730]18      if (beta < 0) throw new ArgumentException();
19      this.beta = beta;
[11742]20      this.valueFunction = valueFunction;
[11730]21    }
[11742]22    public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
[11732]23      Debug.Assert(actionInfos.Any());
[11730]24
25      // select best
[11742]26      var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>();
[11732]27      Debug.Assert(myActionInfos.Any(a => !a.Disabled));
[11730]28
[11742]29      var w = from aInfo in myActionInfos
30              select aInfo.Disabled
31                ? 0.0
32                : Math.Exp(beta * valueFunction(aInfo));
[11730]33
[11742]34      var bestAction = myActionInfos
35        .Select((aInfo, idx) => new { aInfo, idx })
36        .SampleProportional(random, w)
37        .Select(p => p.idx)
38        .First();
[11732]39      Debug.Assert(bestAction >= 0);
40      return bestAction;
[11730]41    }
42
[11742]43    public IBanditPolicyActionInfo CreateActionInfo() {
[11732]44      return new DefaultPolicyActionInfo();
[11730]45    }
46
47    public override string ToString() {
48      return string.Format("BoltzmannExplorationPolicy({0:F2})", beta);
49    }
50  }
51}
Note: See TracBrowser for help on using the repository browser.