Changeset 12290 for branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits
- Timestamp:
- 04/07/15 14:31:06 (10 years ago)
- Location:
- branches/HeuristicLab.Problems.GrammaticalOptimization-gkr
- Files:
-
- 4 edited
- 1 copied
Legend:
- Unmodified
- Added
- Removed
-
branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/ActionInfos/DefaultPolicyActionInfo.cs
r11849 r12290 12 12 public int Tries { get; private set; } 13 13 public double MaxReward { get; private set; } 14 private double avgValue = 0.0; 14 15 public double Value { 15 16 get { 16 return Tries > 0 ? SumReward / Tries : 0.0;17 return Tries > 0 ? avgValue : double.PositiveInfinity; 17 18 } 18 19 } … … 25 26 SumReward += reward; 26 27 MaxReward = Math.Max(MaxReward, reward); 28 var delta = reward - avgValue; 29 //var alpha = 0.01; 30 var alpha = Math.Max(1.0/Tries, 0.01); 31 avgValue = avgValue + alpha * delta; 27 32 } 28 33 … … 31 36 Tries = 0; 32 37 MaxReward = 0.0; 38 avgValue = 0.0; 33 39 } 34 40 … … 36 42 return string.Format("{0:F3} {1:F3} {2}", Value, MaxReward, Tries); 37 43 } 38 39 public static Func<DefaultPolicyActionInfo, double> AverageReward {40 get {41 return (aInfo) =>42 aInfo.Tries == 0 ?43 double.PositiveInfinity :44 aInfo.SumReward / (double)aInfo.Tries;45 }46 }47 44 } 48 45 } -
branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/ActionInfos/MeanAndVariancePolicyActionInfo.cs
r11849 r12290 26 26 estimator.Reset(); 27 27 } 28 29 public override string ToString() { 30 return string.Format("{0:N3} {1,3}", AvgReward, Tries); 31 } 28 32 } 29 33 } -
branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/Policies/BoltzmannExplorationPolicy.cs
r11806 r12290 11 11 public class BoltzmannExplorationPolicy : IBanditPolicy { 12 12 private readonly double beta; 13 private readonly Func<DefaultPolicyActionInfo, double> valueFunction;14 13 15 public BoltzmannExplorationPolicy(double beta) : this(beta, DefaultPolicyActionInfo.AverageReward) { } 16 17 public BoltzmannExplorationPolicy(double beta, Func<DefaultPolicyActionInfo, double> valueFunction) { 14 public BoltzmannExplorationPolicy(double beta) { 18 15 if (beta < 0) throw new ArgumentException(); 19 16 this.beta = beta; 20 this.valueFunction = valueFunction;21 17 } 22 18 public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) { … … 37 33 38 34 var w = from aInfo in myActionInfos 39 select Math.Exp(beta * valueFunction(aInfo));35 select Math.Exp(beta * aInfo.Value); 40 36 41 37 var bestAction = Enumerable.Range(0, myActionInfos.Count()).SampleProportional(random, w); -
branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/Policies/EpsGreedyPolicy.cs
r11806 r12290 11 11 private readonly double eps; 12 12 private readonly RandomPolicy randomPolicy; 13 private readonly Func<DefaultPolicyActionInfo, double> valueFunction;14 13 private readonly string desc; 15 14 16 15 17 public EpsGreedyPolicy(double eps) : this(eps, DefaultPolicyActionInfo.AverageReward,string.Empty) { }16 public EpsGreedyPolicy(double eps) : this(eps, string.Empty) { } 18 17 19 public EpsGreedyPolicy(double eps, Func<DefaultPolicyActionInfo, double> valueFunction,string desc) {18 public EpsGreedyPolicy(double eps, string desc) { 20 19 this.eps = eps; 21 20 this.randomPolicy = new RandomPolicy(); 22 this.valueFunction = valueFunction;23 21 this.desc = desc; 24 22 } … … 36 34 aIdx++; 37 35 38 var q = valueFunction(aInfo);36 var q = aInfo.Value; 39 37 40 38 if (q > bestQ) {
Note: See TracChangeset
for help on using the changeset viewer.