source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/UCTPolicy.cs @ 11742

Last change on this file since 11742 was 11742, checked in by gkronber, 6 years ago

#2283 refactoring

File size: 1.4 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Diagnostics;
4using System.Linq;
5using System.Text;
6using System.Threading.Tasks;
7namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
8  /* Kocsis et al. Bandit based Monte-Carlo Planning */
9  public class UCTPolicy : IBanditPolicy {
10    private readonly double c;
11
12    public UCTPolicy(double c = 1.0) {
13      this.c = c;
14    }
15
16
17    public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
18      var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>();
19      int bestAction = -1;
20      double bestQ = double.NegativeInfinity;
21      int totalTries = myActionInfos.Where(a => !a.Disabled).Sum(a => a.Tries);
22
23      int aIdx = -1;
24      foreach (var aInfo in myActionInfos) {
25        aIdx++;
26        if (aInfo.Disabled) continue;
27        if (aInfo.Tries == 0) return aIdx;
28        var q = aInfo.SumReward / aInfo.Tries + 2.0 * c * Math.Sqrt(Math.Log(totalTries) / aInfo.Tries);
29        if (q > bestQ) {
30          bestQ = q;
31          bestAction = aIdx;
32        }
33      }
34      Debug.Assert(bestAction > -1);
35      return bestAction;
36    }
37
38    public IBanditPolicyActionInfo CreateActionInfo() {
39      return new DefaultPolicyActionInfo();
40    }
41
42    public override string ToString() {
43      return string.Format("UCTPolicy({0:F2})", c);
44    }
45  }
46}
Note: See TracBrowser for help on using the repository browser.