Changeset 12876 for branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/Policies/ActiveLearningPolicy.cs
- Timestamp:
- 08/17/15 19:13:19 (8 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/Policies/ActiveLearningPolicy.cs
r11806 r12876 9 9 namespace HeuristicLab.Algorithms.Bandits.BanditPolicies { 10 10 public class ActiveLearningPolicy : IBanditPolicy { 11 public double MaxReward { get; private set; } 12 public ActiveLearningPolicy(double maxReward = 1.0) { 13 this.MaxReward = maxReward; 14 } 11 15 public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) { 12 16 var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>(); … … 29 33 q = aInfo.SumReward / aInfo.Tries; 30 34 var b = Math.Sqrt(Math.Log(2.0 * k * totalTries / delta) / (2.0 * aInfo.Tries)); 31 u = q + 0.5* b;32 l = q - 0.5* b;35 u = q + MaxReward * b; 36 l = q - MaxReward * b; 33 37 } 34 38 bestActions.Add(aIdx);
Note: See TracChangeset
for help on using the changeset viewer.