Ignore:
Timestamp:
01/12/15 21:23:01 (7 years ago)
Author:
gkronber
Message:

#2283: implemented test problems for MCTS

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/UCTPolicy.cs

    r11742 r11747  
    55using System.Text;
    66using System.Threading.Tasks;
     7using HeuristicLab.Common;
     8
    79namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
    810  /* Kocsis et al. Bandit based Monte-Carlo Planning */
     
    2224
    2325      int aIdx = -1;
     26      var bestActions = new List<int>();
    2427      foreach (var aInfo in myActionInfos) {
    2528        aIdx++;
    2629        if (aInfo.Disabled) continue;
    27         if (aInfo.Tries == 0) return aIdx;
    28         var q = aInfo.SumReward / aInfo.Tries + 2.0 * c * Math.Sqrt(Math.Log(totalTries) / aInfo.Tries);
     30        double q;
     31        if (aInfo.Tries == 0) {
     32          q = double.PositiveInfinity;
     33        } else {
     34          q = aInfo.SumReward / aInfo.Tries + 2.0 * c * Math.Sqrt(Math.Log(totalTries) / aInfo.Tries);
     35        }
    2936        if (q > bestQ) {
     37          bestActions.Clear();
    3038          bestQ = q;
    31           bestAction = aIdx;
     39          bestActions.Add(aIdx);
    3240        }
     41        if (q == bestQ) {
     42          bestActions.Add(aIdx);
     43        }
     44
    3345      }
    34       Debug.Assert(bestAction > -1);
    35       return bestAction;
     46      Debug.Assert(bestActions.Any());
     47      return bestActions.SelectRandom(random);
    3648    }
    3749
Note: See TracChangeset for help on using the changeset viewer.