Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/UCTPolicy.cs @ 13042

Last change on this file since 13042 was 12503, checked in by aballeit, 9 years ago

#2283 added GUI and charts; fixed MCTS

File size: 1.6 KB
RevLine 
[11730]1using System;
2using System.Collections.Generic;
3using System.Diagnostics;
4using System.Linq;
5using System.Text;
6using System.Threading.Tasks;
[11747]7using HeuristicLab.Common;
8
[11742]9namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
[11730]10  /* Kocsis et al. Bandit based Monte-Carlo Planning */
[11742]11  public class UCTPolicy : IBanditPolicy {
[11730]12    private readonly double c;
13
[11732]14    public UCTPolicy(double c = 1.0) {
[11730]15      this.c = c;
16    }
17
[11732]18
[11742]19    public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
20      var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>();
[11730]21      double bestQ = double.NegativeInfinity;
[11806]22      int totalTries = myActionInfos.Sum(a => a.Tries);
[11732]23
[11742]24      int aIdx = -1;
[11747]25      var bestActions = new List<int>();
[11742]26      foreach (var aInfo in myActionInfos) {
27        aIdx++;
[11747]28        double q;
29        if (aInfo.Tries == 0) {
30          q = double.PositiveInfinity;
31        } else {
[12503]32            q = aInfo.SumReward / aInfo.Tries + 2.0 * c * Math.Sqrt(Math.Log(totalTries) / aInfo.Tries);
[11747]33        }
[11730]34        if (q > bestQ) {
[11747]35          bestActions.Clear();
[11730]36          bestQ = q;
[11747]37          bestActions.Add(aIdx);
[11806]38        } else if (q.IsAlmost(bestQ)) {
[11747]39          bestActions.Add(aIdx);
40        }
41
[11730]42      }
[11747]43      Debug.Assert(bestActions.Any());
44      return bestActions.SelectRandom(random);
[11730]45    }
46
[11742]47    public IBanditPolicyActionInfo CreateActionInfo() {
[11732]48      return new DefaultPolicyActionInfo();
[11730]49    }
50
51    public override string ToString() {
52      return string.Format("UCTPolicy({0:F2})", c);
53    }
54  }
55}
Note: See TracBrowser for help on using the repository browser.