Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/Policies/BoltzmannExplorationPolicy.cs @ 12290

Last change on this file since 12290 was 12290, checked in by gkronber, 10 years ago

#2283 created a new branch to separate development from aballeit

File size: 1.7 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Diagnostics;
4using System.Linq;
5using System.Text;
6using System.Threading.Tasks;
7using HeuristicLab.Common;
8
9namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
10  // also called softmax policy
11  public class BoltzmannExplorationPolicy : IBanditPolicy {
12    private readonly double beta;
13
14    public BoltzmannExplorationPolicy(double beta)  {
15      if (beta < 0) throw new ArgumentException();
16      this.beta = beta;
17    }
18    public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
19      Debug.Assert(actionInfos.Any());
20
21      // select best
22      var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>();
23
24      // try any of the untries actions randomly
25      // for RoyalSequence it is much better to select the actions in the order of occurrence (all terminal alternatives first)
26      //if (myActionInfos.Any(aInfo => !aInfo.Disabled && aInfo.Tries == 0)) {
27      //  return myActionInfos
28      //  .Select((aInfo, idx) => new { aInfo, idx })
29      //  .Where(p => !p.aInfo.Disabled)
30      //  .Where(p => p.aInfo.Tries == 0)
31      //  .SelectRandom(random).idx;
32      //}
33
34      var w = from aInfo in myActionInfos
35              select Math.Exp(beta * aInfo.Value);
36
37      var bestAction = Enumerable.Range(0, myActionInfos.Count()).SampleProportional(random, w);
38      Debug.Assert(bestAction >= 0);
39      return bestAction;
40    }
41
42    public IBanditPolicyActionInfo CreateActionInfo() {
43      return new DefaultPolicyActionInfo();
44    }
45
46    public override string ToString() {
47      return string.Format("BoltzmannExplorationPolicy({0:F2})", beta);
48    }
49  }
50}
Note: See TracBrowser for help on using the repository browser.