Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/UCTPolicy.cs @ 13492

Last change on this file since 13492 was 13492, checked in by aballeit, 8 years ago

#2283 UCT parameter c

File size: 1.6 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Diagnostics;
4using System.Linq;
5using System.Text;
6using System.Threading.Tasks;
7using HeuristicLab.Common;
8
9namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
10  /* Kocsis et al. Bandit based Monte-Carlo Planning */
11  public class UCTPolicy : IBanditPolicy {
12    private readonly double c;
13
14      // c = sqrt(2)
15    public UCTPolicy(double c = 1.41421356237)
16    {
17      this.c = c;
18    }
19
20
21    public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
22      var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>();
23      double bestQ = double.NegativeInfinity;
24      int totalTries = myActionInfos.Sum(a => a.Tries);
25
26      int aIdx = -1;
27      var bestActions = new List<int>();
28      foreach (var aInfo in myActionInfos) {
29        aIdx++;
30        double q;
31        if (aInfo.Tries == 0) {
32          q = double.PositiveInfinity;
33        } else {
34            q = aInfo.SumReward / aInfo.Tries + c * Math.Sqrt(Math.Log(totalTries) / aInfo.Tries);
35        }
36        if (q > bestQ) {
37          bestActions.Clear();
38          bestQ = q;
39          bestActions.Add(aIdx);
40        } else if (q.IsAlmost(bestQ)) {
41          bestActions.Add(aIdx);
42        }
43
44      }
45      Debug.Assert(bestActions.Any());
46      return bestActions.SelectRandom(random);
47    }
48
49    public IBanditPolicyActionInfo CreateActionInfo() {
50      return new DefaultPolicyActionInfo();
51    }
52
53    public override string ToString() {
54      return string.Format("UCTPolicy({0:F2})", c);
55    }
56  }
57}
Note: See TracBrowser for help on using the repository browser.