Context Navigation

← Previous Change
Next Change →

EpsGreedyPolicy.cs

Timestamp:

01/09/15 14:57:28 (9 years ago)

Author:

gkronber

Message:

#2283 refactoring

Location:

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies

Files:

: 1 edited
: 1 moved

. (moved) (moved from branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies)
EpsGreedyPolicy.cs (modified) (3 diffs)

Legend:

: Unmodified
: Added
: Removed

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/EpsGreedyPolicy.cs

-                      r11732
+                      r11742
 using System.Text;
 using System.Threading.Tasks;
+using HeuristicLab.Common;
 namespace HeuristicLab.Algorithms.Bandits {
   public class EpsGreedyPolicy : IPolicy {
+namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
+  public class EpsGreedyPolicy : IBanditPolicy {
     private readonly double eps;
     private readonly RandomPolicy randomPolicy;
+    private readonly Func<DefaultPolicyActionInfo, double> valueFunction;
+    private readonly string desc;
+    public EpsGreedyPolicy(double eps) {
+    public EpsGreedyPolicy(double eps) : this(eps, DefaultPolicyActionInfo.AverageReward, string.Empty) { }
+    public EpsGreedyPolicy(double eps, Func<DefaultPolicyActionInfo, double> valueFunction, string desc) {
       this.eps = eps;
       this.randomPolicy = new RandomPolicy();
+      this.valueFunction = valueFunction;
+      this.desc = desc;
+    }
+    public int SelectAction(Random random, IEnumerable<IPolicyActionInfo> actionInfos) {
+    public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
       Debug.Assert(actionInfos.Any());
       if (random.NextDouble() > eps) {
         // select best
         var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>();
         int bestAction = -1;
+        var bestActions = new List<int>();
         double bestQ = double.NegativeInfinity;
         int aIdx = -1;
         foreach (var aInfo in myActionInfos) {
           aIdx++;
           if (aInfo.Disabled) continue;
-          if (aInfo.Tries == 0) return aIdx;
+          var q = valueFunction(aInfo);
-          var avgReward = aInfo.SumReward / aInfo.Tries;
-          //var q = avgReward;
-          var q = aInfo.MaxReward;
           if (q > bestQ) {
+            bestActions.Clear();
+            bestActions.Add(aIdx);
             bestQ = q;
+            bestAction = aIdx;
+          } else if (q.IsAlmost(bestQ)) {
+            bestActions.Add(aIdx);
+          }
+        }
         Debug.Assert(bestAction >= 0);
         return bestAction;
+        Debug.Assert(bestActions.Any());
+        return bestActions.SelectRandom(random);
       } else {
         // select random
 …
+    }
     public IPolicyActionInfo CreateActionInfo() {
+    public IBanditPolicyActionInfo CreateActionInfo() {
       return new DefaultPolicyActionInfo();
+    }
 …
     public override string ToString() {
       return string.Format("EpsGreedyPolicy({0:F2})", eps);
+      return string.Format("EpsGreedyPolicy({0:F2},{1})", eps, desc);
+    }
+  }

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 11742 for branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/EpsGreedyPolicy.cs

Legend:

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/EpsGreedyPolicy.cs

Download in other formats: