Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
01/10/15 14:06:29 (10 years ago)
Author:
gkronber
Message:

#2283: worked on contextual MCTS

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/UCB1Policy.cs

    r11742 r11745  
    1010  public class UCB1Policy : IBanditPolicy {
    1111    public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
    12       var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>().ToArray(); // TODO: performance
     12      var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>();
    1313      int bestAction = -1;
    1414      double bestQ = double.NegativeInfinity;
    1515      int totalTries = myActionInfos.Where(a => !a.Disabled).Sum(a => a.Tries);
    1616
    17       for (int a = 0; a < myActionInfos.Length; a++) {
    18         if (myActionInfos[a].Disabled) continue;
    19         if (myActionInfos[a].Tries == 0) return a;
    20         var q = myActionInfos[a].SumReward / myActionInfos[a].Tries + Math.Sqrt((2 * Math.Log(totalTries)) / myActionInfos[a].Tries);
     17      int aIdx = -1;
     18      foreach (var aInfo in myActionInfos) {
     19        aIdx++;
     20        if (aInfo.Disabled) continue;
     21        if (aInfo.Tries == 0) return aIdx;
     22        var q = aInfo.SumReward / aInfo.Tries + Math.Sqrt((2 * Math.Log(totalTries)) / aInfo.Tries);
    2123        if (q > bestQ) {
    2224          bestQ = q;
    23           bestAction = a;
     25          bestAction = aIdx;
    2426        }
    2527      }
Note: See TracChangeset for help on using the changeset viewer.