Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
01/12/15 21:23:01 (9 years ago)
Author:
gkronber
Message:

#2283: implemented test problems for MCTS

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/UCB1Policy.cs

    r11745 r11747  
    55using System.Text;
    66using System.Threading.Tasks;
     7using HeuristicLab.Common;
    78
    89namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
     
    1112    public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
    1213      var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>();
    13       int bestAction = -1;
    1414      double bestQ = double.NegativeInfinity;
    1515      int totalTries = myActionInfos.Where(a => !a.Disabled).Sum(a => a.Tries);
    1616
     17      var bestActions = new List<int>();
    1718      int aIdx = -1;
    1819      foreach (var aInfo in myActionInfos) {
    1920        aIdx++;
    2021        if (aInfo.Disabled) continue;
    21         if (aInfo.Tries == 0) return aIdx;
    22         var q = aInfo.SumReward / aInfo.Tries + Math.Sqrt((2 * Math.Log(totalTries)) / aInfo.Tries);
     22        double q;
     23        if (aInfo.Tries == 0) {
     24          q = double.PositiveInfinity;
     25        } else {
     26
     27          q = aInfo.SumReward / aInfo.Tries + 0.5 * Math.Sqrt((2 * Math.Log(totalTries)) / aInfo.Tries);
     28        }
    2329        if (q > bestQ) {
    2430          bestQ = q;
    25           bestAction = aIdx;
     31          bestActions.Clear();
     32          bestActions.Add(aIdx);
     33        } else if (q == bestQ) {
     34          bestActions.Add(aIdx);
    2635        }
    2736      }
    28       Debug.Assert(bestAction > -1);
    29       return bestAction;
     37      Debug.Assert(bestActions.Any());
     38      return bestActions.SelectRandom(random);
    3039    }
    3140
Note: See TracChangeset for help on using the changeset viewer.