Context Navigation

← Previous Change
Next Change →

ChernoffIntervalEstimationPolicy.cs

Timestamp:

01/09/15 14:57:28 (10 years ago)

Author:

gkronber

Message:

#2283 refactoring

Location:

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies

Files:

: 1 edited
: 1 moved

. (moved) (moved from branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies)
ChernoffIntervalEstimationPolicy.cs (modified) (3 diffs)

Legend:

: Unmodified
: Added
: Removed

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/ChernoffIntervalEstimationPolicy.cs

-                      r11732
+                      r11742
 using System.Threading.Tasks;
 namespace HeuristicLab.Algorithms.Bandits {
+namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
   /* see: Streeter and Smith: A simple distribution-free approach to the max k-armed bandit problem, Proceedings  of the 12th
 International Conference, CP 2006, Nantes, France, September 25-29, 2006. pp 560-574 */
   public class ChernoffIntervalEstimationPolicy : IPolicy {
+  public class ChernoffIntervalEstimationPolicy : IBanditPolicy {
     private readonly double delta;
 …
       this.delta = delta;
+    }
     public int SelectAction(Random random, IEnumerable<IPolicyActionInfo> actionInfos) {
+    public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
       Debug.Assert(actionInfos.Any());
       // select best
       var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>().ToArray(); // TODO: performance
       int k = myActionInfos.Length;
+      var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>();
+      int k = myActionInfos.Count(a => !a.Disabled);
       int totalTries = myActionInfos.Where(a => !a.Disabled).Sum(a => a.Tries);
       int bestAction = -1;
       double bestQ = double.NegativeInfinity;
+      for (int a = 0; a < myActionInfos.Length; a++) {
+        if (myActionInfos[a].Disabled) continue;
+        if (myActionInfos[a].Tries == 0) return a;
+      var aIdx = -1;
+      foreach (var aInfo in myActionInfos) {
+        aIdx++;
+        if (aInfo.Disabled) continue;
+        if (aInfo.Tries == 0) return aIdx;
+        var sumReward = myActionInfos[a].SumReward;
+        var tries = myActionInfos[a].Tries;
+        var avgReward = sumReward / tries;
+        var avgReward = aInfo.SumReward / aInfo.Tries;
         // page 5 of "A simple distribution-free appraoch to the max k-armed bandit problem"
         // var alpha = Math.Log(2 * totalTries * k / delta);
         double alpha = Math.Log(2) + Math.Log(totalTries) + Math.Log(k) - Math.Log(delta); // total tries is max tries in the original paper
         var q = avgReward + (alpha + Math.Sqrt(2 * tries * avgReward * alpha + alpha * alpha)) / tries;
+        double alpha = Math.Log(2.0) + Math.Log(totalTries) + Math.Log(k) - Math.Log(delta); // total tries is max tries in the original paper
+        var q = avgReward + (alpha + Math.Sqrt(2 * aInfo.Tries * avgReward * alpha + alpha * alpha)) / aInfo.Tries;
         if (q > bestQ) {
           bestQ = q;
           bestAction = a;
+          bestAction = aIdx;
+        }
+      }
 …
+    }
     public IPolicyActionInfo CreateActionInfo() {
+    public IBanditPolicyActionInfo CreateActionInfo() {
       return new DefaultPolicyActionInfo();
+    }

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 11742 for branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/ChernoffIntervalEstimationPolicy.cs

Legend:

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/ChernoffIntervalEstimationPolicy.cs

Download in other formats: