Changeset 12290 for branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/Policies/BoltzmannExplorationPolicy.cs
- Timestamp:
- 04/07/15 14:31:06 (10 years ago)
- Location:
- branches/HeuristicLab.Problems.GrammaticalOptimization-gkr
- Files:
-
- 1 edited
- 1 copied
Legend:
- Unmodified
- Added
- Removed
-
branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/Policies/BoltzmannExplorationPolicy.cs
r11806 r12290 11 11 public class BoltzmannExplorationPolicy : IBanditPolicy { 12 12 private readonly double beta; 13 private readonly Func<DefaultPolicyActionInfo, double> valueFunction;14 13 15 public BoltzmannExplorationPolicy(double beta) : this(beta, DefaultPolicyActionInfo.AverageReward) { } 16 17 public BoltzmannExplorationPolicy(double beta, Func<DefaultPolicyActionInfo, double> valueFunction) { 14 public BoltzmannExplorationPolicy(double beta) { 18 15 if (beta < 0) throw new ArgumentException(); 19 16 this.beta = beta; 20 this.valueFunction = valueFunction;21 17 } 22 18 public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) { … … 37 33 38 34 var w = from aInfo in myActionInfos 39 select Math.Exp(beta * valueFunction(aInfo));35 select Math.Exp(beta * aInfo.Value); 40 36 41 37 var bestAction = Enumerable.Range(0, myActionInfos.Count()).SampleProportional(random, w);
Note: See TracChangeset
for help on using the changeset viewer.