Ignore:
Timestamp:
01/09/15 14:57:28 (7 years ago)
Author:
gkronber
Message:

#2283 refactoring

Location:
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies
Files:
1 edited
1 moved

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/UCTPolicy.cs

    r11732 r11742  
    55using System.Text;
    66using System.Threading.Tasks;
    7 
    8 namespace HeuristicLab.Algorithms.Bandits {
     7namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
    98  /* Kocsis et al. Bandit based Monte-Carlo Planning */
    10   public class UCTPolicy : IPolicy {
     9  public class UCTPolicy : IBanditPolicy {
    1110    private readonly double c;
    1211
     
    1615
    1716
    18     public int SelectAction(Random random, IEnumerable<IPolicyActionInfo> actionInfos) {
    19       var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>().ToArray(); // TODO: performance
     17    public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
     18      var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>();
    2019      int bestAction = -1;
    2120      double bestQ = double.NegativeInfinity;
    2221      int totalTries = myActionInfos.Where(a => !a.Disabled).Sum(a => a.Tries);
    2322
    24       for (int a = 0; a < myActionInfos.Length; a++) {
    25         if (myActionInfos[a].Disabled) continue;
    26         if (myActionInfos[a].Tries == 0) return a;
    27         var q = myActionInfos[a].SumReward / myActionInfos[a].Tries + 2 * c * Math.Sqrt(Math.Log(totalTries) / myActionInfos[a].Tries);
     23      int aIdx = -1;
     24      foreach (var aInfo in myActionInfos) {
     25        aIdx++;
     26        if (aInfo.Disabled) continue;
     27        if (aInfo.Tries == 0) return aIdx;
     28        var q = aInfo.SumReward / aInfo.Tries + 2.0 * c * Math.Sqrt(Math.Log(totalTries) / aInfo.Tries);
    2829        if (q > bestQ) {
    2930          bestQ = q;
    30           bestAction = a;
     31          bestAction = aIdx;
    3132        }
    3233      }
     
    3536    }
    3637
    37     public IPolicyActionInfo CreateActionInfo() {
     38    public IBanditPolicyActionInfo CreateActionInfo() {
    3839      return new DefaultPolicyActionInfo();
    3940    }
Note: See TracChangeset for help on using the changeset viewer.