Context Navigation

HeuristicLab.Algorithms.Bandits

Timestamp:

04/07/15 14:31:06 (10 years ago)

Author:

gkronber

Message:

#2283 created a new branch to separate development from aballeit

Location:

branches/HeuristicLab.Problems.GrammaticalOptimization-gkr

Files:

-                      r11849
+                      r12290
     public int Tries { get; private set; }
     public double MaxReward { get; private set; }
+    private double avgValue = 0.0;
     public double Value {
       get {
         return Tries > 0 ? SumReward / Tries : 0.0;
+        return Tries > 0 ? avgValue : double.PositiveInfinity;
+      }
+    }
 …
       SumReward += reward;
       MaxReward = Math.Max(MaxReward, reward);
+      var delta = reward - avgValue;
+      //var alpha = 0.01;
+      var alpha = Math.Max(1.0/Tries, 0.01);
+      avgValue = avgValue + alpha * delta;
+    }
 …
       Tries = 0;
       MaxReward = 0.0;
+      avgValue = 0.0;
+    }
 …
       return string.Format("{0:F3} {1:F3} {2}", Value, MaxReward, Tries);
+    }
-    public static Func<DefaultPolicyActionInfo, double> AverageReward {
-      get {
-        return (aInfo) =>
-          aInfo.Tries == 0 ?
-          double.PositiveInfinity :
-          aInfo.SumReward / (double)aInfo.Tries;
+      }
+    }
+  }
+}

r11849	r12290
26	26	estimator.Reset();
27	27	}
	28
	29	public override string ToString() {
	30	return string.Format("{0:N3} {1,3}", AvgReward, Tries);
	31	}
28	32	}
29	33	}

-                      r11806
+                      r12290
   public class BoltzmannExplorationPolicy : IBanditPolicy {
     private readonly double beta;
-    private readonly Func<DefaultPolicyActionInfo, double> valueFunction;
+    public BoltzmannExplorationPolicy(double beta) : this(beta, DefaultPolicyActionInfo.AverageReward) { }
+    public BoltzmannExplorationPolicy(double beta, Func<DefaultPolicyActionInfo, double> valueFunction) {
+    public BoltzmannExplorationPolicy(double beta)  {
       if (beta < 0) throw new ArgumentException();
       this.beta = beta;
-      this.valueFunction = valueFunction;
+    }
     public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
 …
       var w = from aInfo in myActionInfos
               select Math.Exp(beta * valueFunction(aInfo));
+              select Math.Exp(beta * aInfo.Value);
       var bestAction = Enumerable.Range(0, myActionInfos.Count()).SampleProportional(random, w);

-                      r11806
+                      r12290
     private readonly double eps;
     private readonly RandomPolicy randomPolicy;
-    private readonly Func<DefaultPolicyActionInfo, double> valueFunction;
     private readonly string desc;
     public EpsGreedyPolicy(double eps) : this(eps, DefaultPolicyActionInfo.AverageReward, string.Empty) { }
+    public EpsGreedyPolicy(double eps) : this(eps, string.Empty) { }
     public EpsGreedyPolicy(double eps, Func<DefaultPolicyActionInfo, double> valueFunction, string desc) {
+    public EpsGreedyPolicy(double eps, string desc) {
       this.eps = eps;
       this.randomPolicy = new RandomPolicy();
-      this.valueFunction = valueFunction;
       this.desc = desc;
+    }
 …
           aIdx++;
           var q = valueFunction(aInfo);
+          var q = aInfo.Value;
           if (q > bestQ) {

Note: See TracChangeset for help on using the changeset viewer.