Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/BoltzmannExplorationPolicy.cs @ 11732

Last change on this file since 11732 was 11732, checked in by gkronber, 9 years ago

#2283: refactoring and bug fixes

File size: 1.7 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Diagnostics;
4using System.Linq;
5using System.Text;
6using System.Threading.Tasks;
7using HeuristicLab.Common;
8
9namespace HeuristicLab.Algorithms.Bandits {
10  // also called softmax policy
11  public class BoltzmannExplorationPolicy : IPolicy {
12    private readonly double beta;
13
14    public BoltzmannExplorationPolicy(double beta) {
15      if (beta < 0) throw new ArgumentException();
16      this.beta = beta;
17    }
18    public int SelectAction(Random random, IEnumerable<IPolicyActionInfo> actionInfos) {
19      Debug.Assert(actionInfos.Any());
20
21      // select best
22      var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>().ToArray(); // TODO: performance
23      Debug.Assert(myActionInfos.Any(a => !a.Disabled));
24      double[] w = new double[myActionInfos.Length];
25
26      for (int a = 0; a < myActionInfos.Length; a++) {
27        if (myActionInfos[a].Disabled) {
28          w[a] = 0; continue;
29        }
30        if (myActionInfos[a].Tries == 0) return a;
31        var sumReward = myActionInfos[a].SumReward;
32        var tries = myActionInfos[a].Tries;
33        var avgReward = sumReward / tries;
34        w[a] = Math.Exp(beta * avgReward);
35      }
36
37
38      var bestAction = Enumerable.Range(0, w.Length).SampleProportional(random, w).First();
39      Debug.Assert(bestAction >= 0);
40      Debug.Assert(bestAction < w.Length);
41      Debug.Assert(!myActionInfos[bestAction].Disabled);
42      return bestAction;
43    }
44
45    public IPolicyActionInfo CreateActionInfo() {
46      return new DefaultPolicyActionInfo();
47    }
48
49    public override string ToString() {
50      return string.Format("BoltzmannExplorationPolicy({0:F2})", beta);
51    }
52  }
53}
Note: See TracBrowser for help on using the repository browser.