Context Navigation

BoltzmannExplorationPolicy.cs

Timestamp:

01/12/15 21:23:01 (9 years ago)

Author:

gkronber

Message:

#2283: implemented test problems for MCTS

File:

-                      r11742
+                      r11747
     private readonly Func<DefaultPolicyActionInfo, double> valueFunction;
     public BoltzmannExplorationPolicy(double eps) : this(eps, DefaultPolicyActionInfo.AverageReward) { }
+    public BoltzmannExplorationPolicy(double beta) : this(beta, DefaultPolicyActionInfo.AverageReward) { }
     public BoltzmannExplorationPolicy(double beta, Func<DefaultPolicyActionInfo, double> valueFunction) {
 …
       // select best
       var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>();
+      Debug.Assert(myActionInfos.Any(a => !a.Disabled));
+      // try any of the untries actions randomly
+      // for RoyalSequence it is much better to select the actions in the order of occurrence (all terminal alternatives first)
+      //if (myActionInfos.Any(aInfo => !aInfo.Disabled && aInfo.Tries == 0)) {
+      //  return myActionInfos
+      //  .Select((aInfo, idx) => new { aInfo, idx })
+      //  .Where(p => !p.aInfo.Disabled)
+      //  .Where(p => p.aInfo.Tries == 0)
+      //  .SelectRandom(random).idx;
+      //}
       var w = from aInfo in myActionInfos

Note: See TracChangeset for help on using the changeset viewer.