Context Navigation

UCTPolicy.cs

Timestamp:

01/12/15 21:23:01 (10 years ago)

Author:

gkronber

Message:

#2283: implemented test problems for MCTS

File:

-                      r11742
+                      r11747
 using System.Text;
 using System.Threading.Tasks;
+using HeuristicLab.Common;
 namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
   /* Kocsis et al. Bandit based Monte-Carlo Planning */
 …
       int aIdx = -1;
+      var bestActions = new List<int>();
       foreach (var aInfo in myActionInfos) {
         aIdx++;
         if (aInfo.Disabled) continue;
+        if (aInfo.Tries == 0) return aIdx;
+        var q = aInfo.SumReward / aInfo.Tries + 2.0 * c * Math.Sqrt(Math.Log(totalTries) / aInfo.Tries);
+        double q;
+        if (aInfo.Tries == 0) {
+          q = double.PositiveInfinity;
+        } else {
+          q = aInfo.SumReward / aInfo.Tries + 2.0 * c * Math.Sqrt(Math.Log(totalTries) / aInfo.Tries);
+        }
         if (q > bestQ) {
+          bestActions.Clear();
           bestQ = q;
           bestAction = aIdx;
+          bestActions.Add(aIdx);
+        }
+        if (q == bestQ) {
+          bestActions.Add(aIdx);
+        }
+      }
       Debug.Assert(bestAction > -1);
       return bestAction;
+      Debug.Assert(bestActions.Any());
+      return bestActions.SelectRandom(random);
+    }

Note: See TracChangeset for help on using the changeset viewer.