Changeset 12876 for branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/Policies/UCB1Policy.cs
- Timestamp:
- 08/17/15 19:13:19 (8 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/Policies/UCB1Policy.cs
r11806 r12876 10 10 // policy for k-armed bandit (see Auer et al. 2002) 11 11 public class UCB1Policy : IBanditPolicy { 12 public double MaxReward { get; private set; } 13 public UCB1Policy(double maxReward = 1.0) { 14 this.MaxReward = maxReward; 15 } 12 16 public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) { 13 17 var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>(); … … 24 28 } else { 25 29 26 q = aInfo.SumReward / aInfo.Tries + 0.5* Math.Sqrt((2 * Math.Log(totalTries)) / aInfo.Tries);30 q = aInfo.SumReward / aInfo.Tries + MaxReward * Math.Sqrt((2 * Math.Log(totalTries)) / aInfo.Tries); 27 31 } 28 32 if (q > bestQ) {
Note: See TracChangeset
for help on using the changeset viewer.