Changeset 11747 for branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/BoltzmannExplorationPolicy.cs
- Timestamp:
- 01/12/15 21:23:01 (9 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/BoltzmannExplorationPolicy.cs
r11742 r11747 13 13 private readonly Func<DefaultPolicyActionInfo, double> valueFunction; 14 14 15 public BoltzmannExplorationPolicy(double eps) : this(eps, DefaultPolicyActionInfo.AverageReward) { }15 public BoltzmannExplorationPolicy(double beta) : this(beta, DefaultPolicyActionInfo.AverageReward) { } 16 16 17 17 public BoltzmannExplorationPolicy(double beta, Func<DefaultPolicyActionInfo, double> valueFunction) { … … 25 25 // select best 26 26 var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>(); 27 Debug.Assert(myActionInfos.Any(a => !a.Disabled)); 27 28 // try any of the untries actions randomly 29 // for RoyalSequence it is much better to select the actions in the order of occurrence (all terminal alternatives first) 30 //if (myActionInfos.Any(aInfo => !aInfo.Disabled && aInfo.Tries == 0)) { 31 // return myActionInfos 32 // .Select((aInfo, idx) => new { aInfo, idx }) 33 // .Where(p => !p.aInfo.Disabled) 34 // .Where(p => p.aInfo.Tries == 0) 35 // .SelectRandom(random).idx; 36 //} 28 37 29 38 var w = from aInfo in myActionInfos
Note: See TracChangeset
for help on using the changeset viewer.