Changeset 12876 for branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/Policies
- Timestamp:
- 08/17/15 19:13:19 (9 years ago)
- Location:
- branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/Policies
- Files:
-
- 3 added
- 4 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/Policies/ActiveLearningPolicy.cs
r11806 r12876 9 9 namespace HeuristicLab.Algorithms.Bandits.BanditPolicies { 10 10 public class ActiveLearningPolicy : IBanditPolicy { 11 public double MaxReward { get; private set; } 12 public ActiveLearningPolicy(double maxReward = 1.0) { 13 this.MaxReward = maxReward; 14 } 11 15 public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) { 12 16 var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>(); … … 29 33 q = aInfo.SumReward / aInfo.Tries; 30 34 var b = Math.Sqrt(Math.Log(2.0 * k * totalTries / delta) / (2.0 * aInfo.Tries)); 31 u = q + 0.5* b;32 l = q - 0.5* b;35 u = q + MaxReward * b; 36 l = q - MaxReward * b; 33 37 } 34 38 bestActions.Add(aIdx); -
branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/Policies/ChernoffIntervalEstimationPolicy.cs
r11806 r12876 35 35 var avgReward = aInfo.SumReward / aInfo.Tries; 36 36 37 // page 5 of "A simple distribution-free appr aoch to the max k-armed bandit problem"37 // page 5 of "A simple distribution-free approach to the max k-armed bandit problem" 38 38 // var alpha = Math.Log(2 * totalTries * k / delta); 39 39 double alpha = Math.Log(2.0) + Math.Log(totalTries) + Math.Log(k) - Math.Log(delta); -
branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/Policies/UCB1Policy.cs
r11806 r12876 10 10 // policy for k-armed bandit (see Auer et al. 2002) 11 11 public class UCB1Policy : IBanditPolicy { 12 public double MaxReward { get; private set; } 13 public UCB1Policy(double maxReward = 1.0) { 14 this.MaxReward = maxReward; 15 } 12 16 public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) { 13 17 var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>(); … … 24 28 } else { 25 29 26 q = aInfo.SumReward / aInfo.Tries + 0.5* Math.Sqrt((2 * Math.Log(totalTries)) / aInfo.Tries);30 q = aInfo.SumReward / aInfo.Tries + MaxReward * Math.Sqrt((2 * Math.Log(totalTries)) / aInfo.Tries); 27 31 } 28 32 if (q > bestQ) { -
branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/Policies/UCB1TunedPolicy.cs
r11832 r12876 9 9 namespace HeuristicLab.Algorithms.Bandits.BanditPolicies { 10 10 // policy for k-armed bandit (see Auer et al. 2002) 11 // specific to Bernoulli distributed rewards 11 12 public class UCB1TunedPolicy : IBanditPolicy { 12 13
Note: See TracChangeset
for help on using the changeset viewer.