Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
01/09/15 14:57:28 (9 years ago)
Author:
gkronber
Message:

#2283 refactoring

Location:
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies
Files:
1 edited
1 moved

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/ChernoffIntervalEstimationPolicy.cs

    r11732 r11742  
    66using System.Threading.Tasks;
    77
    8 namespace HeuristicLab.Algorithms.Bandits {
     8namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
    99  /* see: Streeter and Smith: A simple distribution-free approach to the max k-armed bandit problem, Proceedings  of the 12th
    1010International Conference, CP 2006, Nantes, France, September 25-29, 2006. pp 560-574 */
    1111
    12   public class ChernoffIntervalEstimationPolicy : IPolicy {
     12  public class ChernoffIntervalEstimationPolicy : IBanditPolicy {
    1313    private readonly double delta;
    1414
     
    1616      this.delta = delta;
    1717    }
    18     public int SelectAction(Random random, IEnumerable<IPolicyActionInfo> actionInfos) {
     18    public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
    1919      Debug.Assert(actionInfos.Any());
    2020      // select best
    21       var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>().ToArray(); // TODO: performance
    22       int k = myActionInfos.Length;
     21      var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>();
     22      int k = myActionInfos.Count(a => !a.Disabled);
    2323      int totalTries = myActionInfos.Where(a => !a.Disabled).Sum(a => a.Tries);
    2424      int bestAction = -1;
    2525      double bestQ = double.NegativeInfinity;
    26       for (int a = 0; a < myActionInfos.Length; a++) {
    27         if (myActionInfos[a].Disabled) continue;
    28         if (myActionInfos[a].Tries == 0) return a;
     26      var aIdx = -1;
     27      foreach (var aInfo in myActionInfos) {
     28        aIdx++;
     29        if (aInfo.Disabled) continue;
     30        if (aInfo.Tries == 0) return aIdx;
    2931
    30         var sumReward = myActionInfos[a].SumReward;
    31         var tries = myActionInfos[a].Tries;
    32 
    33         var avgReward = sumReward / tries;
     32        var avgReward = aInfo.SumReward / aInfo.Tries;
    3433
    3534        // page 5 of "A simple distribution-free appraoch to the max k-armed bandit problem"
    3635        // var alpha = Math.Log(2 * totalTries * k / delta);
    37         double alpha = Math.Log(2) + Math.Log(totalTries) + Math.Log(k) - Math.Log(delta); // total tries is max tries in the original paper
    38         var q = avgReward + (alpha + Math.Sqrt(2 * tries * avgReward * alpha + alpha * alpha)) / tries;
     36        double alpha = Math.Log(2.0) + Math.Log(totalTries) + Math.Log(k) - Math.Log(delta); // total tries is max tries in the original paper
     37        var q = avgReward + (alpha + Math.Sqrt(2 * aInfo.Tries * avgReward * alpha + alpha * alpha)) / aInfo.Tries;
    3938        if (q > bestQ) {
    4039          bestQ = q;
    41           bestAction = a;
     40          bestAction = aIdx;
    4241        }
    4342      }
     
    4645    }
    4746
    48     public IPolicyActionInfo CreateActionInfo() {
     47    public IBanditPolicyActionInfo CreateActionInfo() {
    4948      return new DefaultPolicyActionInfo();
    5049    }
Note: See TracChangeset for help on using the changeset viewer.