Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
01/09/15 14:57:28 (9 years ago)
Author:
gkronber
Message:

#2283 refactoring

Location:
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies
Files:
1 edited
1 moved

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/UCB1TunedPolicy.cs

    r11732 r11742  
    66using System.Threading.Tasks;
    77
    8 namespace HeuristicLab.Algorithms.Bandits {
    9   public class UCB1TunedPolicy : IPolicy {
     8namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
     9  // policy for k-armed bandit (see Auer et al. 2002)
     10  public class UCB1TunedPolicy : IBanditPolicy {
    1011
    11     public int SelectAction(Random random, IEnumerable<IPolicyActionInfo> actionInfos) {
    12       var myActionInfos = actionInfos.OfType<MeanAndVariancePolicyActionInfo>().ToArray(); // TODO: performance
     12    public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
     13      var myActionInfos = actionInfos.OfType<MeanAndVariancePolicyActionInfo>();
    1314      int bestAction = -1;
    1415      double bestQ = double.NegativeInfinity;
    1516      int totalTries = myActionInfos.Where(a => !a.Disabled).Sum(a => a.Tries);
    1617
    17       for (int a = 0; a < myActionInfos.Length; a++) {
    18         if (myActionInfos[a].Disabled) continue;
    19         if (myActionInfos[a].Tries == 0) return a;
     18      int aIdx = -1;
     19      foreach (var aInfo in myActionInfos) {
     20        aIdx++;
     21        if (aInfo.Disabled) continue;
     22        if (aInfo.Tries == 0) return aIdx;
    2023
    21         var sumReward = myActionInfos[a].SumReward;
    22         var tries = myActionInfos[a].Tries;
     24        var sumReward = aInfo.SumReward;
     25        var tries = aInfo.Tries;
    2326
    2427        var avgReward = sumReward / tries;
    25         var q = avgReward + Math.Sqrt((Math.Log(totalTries) / tries) * Math.Min(1.0 / 4, V(myActionInfos[a], totalTries))); // 1/4 is upper bound of bernoulli distributed variable
     28        var q = avgReward + Math.Sqrt((Math.Log(totalTries) / tries) * Math.Min(1.0 / 4, V(aInfo, totalTries))); // 1/4 is upper bound of bernoulli distributed variable
    2629        if (q > bestQ) {
    2730          bestQ = q;
    28           bestAction = a;
     31          bestAction = aIdx;
    2932        }
    3033      }
     
    3336    }
    3437
    35     public IPolicyActionInfo CreateActionInfo() {
     38    public IBanditPolicyActionInfo CreateActionInfo() {
    3639      return new MeanAndVariancePolicyActionInfo();
    3740    }
Note: See TracChangeset for help on using the changeset viewer.