Changeset 11742 for branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/EpsGreedyPolicy.cs
- Timestamp:
- 01/09/15 14:57:28 (9 years ago)
- Location:
- branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies
- Files:
-
- 1 edited
- 1 moved
Legend:
- Unmodified
- Added
- Removed
-
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/EpsGreedyPolicy.cs
r11732 r11742 5 5 using System.Text; 6 6 using System.Threading.Tasks; 7 using HeuristicLab.Common; 7 8 8 namespace HeuristicLab.Algorithms.Bandits {9 public class EpsGreedyPolicy : I Policy {9 namespace HeuristicLab.Algorithms.Bandits.BanditPolicies { 10 public class EpsGreedyPolicy : IBanditPolicy { 10 11 private readonly double eps; 11 12 private readonly RandomPolicy randomPolicy; 13 private readonly Func<DefaultPolicyActionInfo, double> valueFunction; 14 private readonly string desc; 12 15 13 public EpsGreedyPolicy(double eps) { 16 17 public EpsGreedyPolicy(double eps) : this(eps, DefaultPolicyActionInfo.AverageReward, string.Empty) { } 18 19 public EpsGreedyPolicy(double eps, Func<DefaultPolicyActionInfo, double> valueFunction, string desc) { 14 20 this.eps = eps; 15 21 this.randomPolicy = new RandomPolicy(); 22 this.valueFunction = valueFunction; 23 this.desc = desc; 16 24 } 17 public int SelectAction(Random random, IEnumerable<IPolicyActionInfo> actionInfos) { 25 26 public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) { 18 27 Debug.Assert(actionInfos.Any()); 19 28 if (random.NextDouble() > eps) { 20 29 // select best 21 30 var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>(); 22 int bestAction = -1;31 var bestActions = new List<int>(); 23 32 double bestQ = double.NegativeInfinity; 33 24 34 int aIdx = -1; 25 35 foreach (var aInfo in myActionInfos) { 26 27 36 aIdx++; 28 37 if (aInfo.Disabled) continue; 29 if (aInfo.Tries == 0) return aIdx;30 38 39 var q = valueFunction(aInfo); 31 40 32 var avgReward = aInfo.SumReward / aInfo.Tries;33 //var q = avgReward;34 var q = aInfo.MaxReward;35 41 if (q > bestQ) { 42 bestActions.Clear(); 43 bestActions.Add(aIdx); 36 44 bestQ = q; 37 bestAction = aIdx; 45 } else if (q.IsAlmost(bestQ)) { 46 bestActions.Add(aIdx); 38 47 } 39 48 } 40 Debug.Assert(bestAction >= 0);41 return bestAction ;49 Debug.Assert(bestActions.Any()); 50 return bestActions.SelectRandom(random); 42 51 } else { 43 52 // select random … … 46 55 } 47 56 48 public I PolicyActionInfo CreateActionInfo() {57 public IBanditPolicyActionInfo CreateActionInfo() { 49 58 return new DefaultPolicyActionInfo(); 50 59 } … … 52 61 53 62 public override string ToString() { 54 return string.Format("EpsGreedyPolicy({0:F2} )", eps);63 return string.Format("EpsGreedyPolicy({0:F2},{1})", eps, desc); 55 64 } 56 65 }
Note: See TracChangeset
for help on using the changeset viewer.