Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/ModifiedUCTPolicy.cs @ 12533

Last change on this file since 12533 was 11806, checked in by gkronber, 10 years ago

#2283: separated value-states from done-states in GenericGrammarPolicy and removed disabling of actions from bandit policies

File size: 1.7 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Diagnostics;
4using System.Linq;
5using System.Text;
6using System.Threading.Tasks;
7using HeuristicLab.Common;
8
9namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
10  /* modified UCT: see Coquelin & Munos, Bandit Algorithms for Tree Search, UAI 2007 */
11  public class ModifiedUCTPolicy : IBanditPolicy {
12    private readonly double c;
13
14    public ModifiedUCTPolicy(double c = 1.0) {
15      this.c = c;
16    }
17
18
19    public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
20      var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>();
21      double bestQ = double.NegativeInfinity;
22      int totalTries = myActionInfos.Sum(a => a.Tries);
23
24      int aIdx = -1;
25      var bestActions = new List<int>();
26      foreach (var aInfo in myActionInfos) {
27        aIdx++;
28        double q;
29        if (aInfo.Tries == 0) {
30          q = double.PositiveInfinity;
31        } else {
32          q = aInfo.SumReward / aInfo.Tries + c * Math.Sqrt(Math.Sqrt(totalTries) / aInfo.Tries); // modification in comparison to original UCT
33        }
34        if (q > bestQ) {
35          bestActions.Clear();
36          bestQ = q;
37          bestActions.Add(aIdx);
38        } else if (q.IsAlmost(bestQ)) {
39          bestActions.Add(aIdx);
40        }
41
42      }
43      Debug.Assert(bestActions.Any());
44      return bestActions.SelectRandom(random);
45    }
46
47    public IBanditPolicyActionInfo CreateActionInfo() {
48      return new DefaultPolicyActionInfo();
49    }
50
51    public override string ToString() {
52      return string.Format("ModifiedUCTPolicy({0:F2})", c);
53    }
54  }
55}
Note: See TracBrowser for help on using the repository browser.