Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/UCTPolicy.cs @ 11747

Last change on this file since 11747 was 11747, checked in by gkronber, 10 years ago

#2283: implemented test problems for MCTS

File size: 1.7 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Diagnostics;
4using System.Linq;
5using System.Text;
6using System.Threading.Tasks;
7using HeuristicLab.Common;
8
9namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
10  /* Kocsis et al. Bandit based Monte-Carlo Planning */
11  public class UCTPolicy : IBanditPolicy {
12    private readonly double c;
13
14    public UCTPolicy(double c = 1.0) {
15      this.c = c;
16    }
17
18
19    public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
20      var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>();
21      int bestAction = -1;
22      double bestQ = double.NegativeInfinity;
23      int totalTries = myActionInfos.Where(a => !a.Disabled).Sum(a => a.Tries);
24
25      int aIdx = -1;
26      var bestActions = new List<int>();
27      foreach (var aInfo in myActionInfos) {
28        aIdx++;
29        if (aInfo.Disabled) continue;
30        double q;
31        if (aInfo.Tries == 0) {
32          q = double.PositiveInfinity;
33        } else {
34          q = aInfo.SumReward / aInfo.Tries + 2.0 * c * Math.Sqrt(Math.Log(totalTries) / aInfo.Tries);
35        }
36        if (q > bestQ) {
37          bestActions.Clear();
38          bestQ = q;
39          bestActions.Add(aIdx);
40        }
41        if (q == bestQ) {
42          bestActions.Add(aIdx);
43        }
44
45      }
46      Debug.Assert(bestActions.Any());
47      return bestActions.SelectRandom(random);
48    }
49
50    public IBanditPolicyActionInfo CreateActionInfo() {
51      return new DefaultPolicyActionInfo();
52    }
53
54    public override string ToString() {
55      return string.Format("UCTPolicy({0:F2})", c);
56    }
57  }
58}
Note: See TracBrowser for help on using the repository browser.