Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/UCTPolicy.cs @ 13847

Last change on this file since 13847 was 13492, checked in by aballeit, 9 years ago

#2283 UCT parameter c

File size: 1.6 KB
RevLine 
[11730]1using System;
2using System.Collections.Generic;
3using System.Diagnostics;
4using System.Linq;
5using System.Text;
6using System.Threading.Tasks;
[11747]7using HeuristicLab.Common;
8
[11742]9namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
[11730]10  /* Kocsis et al. Bandit based Monte-Carlo Planning */
[11742]11  public class UCTPolicy : IBanditPolicy {
[11730]12    private readonly double c;
13
[13492]14      // c = sqrt(2)
15    public UCTPolicy(double c = 1.41421356237)
16    {
[11730]17      this.c = c;
18    }
19
[11732]20
[11742]21    public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
22      var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>();
[11730]23      double bestQ = double.NegativeInfinity;
[11806]24      int totalTries = myActionInfos.Sum(a => a.Tries);
[11732]25
[11742]26      int aIdx = -1;
[11747]27      var bestActions = new List<int>();
[11742]28      foreach (var aInfo in myActionInfos) {
29        aIdx++;
[11747]30        double q;
31        if (aInfo.Tries == 0) {
32          q = double.PositiveInfinity;
33        } else {
[13492]34            q = aInfo.SumReward / aInfo.Tries + c * Math.Sqrt(Math.Log(totalTries) / aInfo.Tries);
[11747]35        }
[11730]36        if (q > bestQ) {
[11747]37          bestActions.Clear();
[11730]38          bestQ = q;
[11747]39          bestActions.Add(aIdx);
[11806]40        } else if (q.IsAlmost(bestQ)) {
[11747]41          bestActions.Add(aIdx);
42        }
43
[11730]44      }
[11747]45      Debug.Assert(bestActions.Any());
46      return bestActions.SelectRandom(random);
[11730]47    }
48
[11742]49    public IBanditPolicyActionInfo CreateActionInfo() {
[11732]50      return new DefaultPolicyActionInfo();
[11730]51    }
52
53    public override string ToString() {
54      return string.Format("UCTPolicy({0:F2})", c);
55    }
56  }
57}
Note: See TracBrowser for help on using the repository browser.