Changeset 11747 for branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/UCTPolicy.cs
- Timestamp:
- 01/12/15 21:23:01 (9 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/UCTPolicy.cs
r11742 r11747 5 5 using System.Text; 6 6 using System.Threading.Tasks; 7 using HeuristicLab.Common; 8 7 9 namespace HeuristicLab.Algorithms.Bandits.BanditPolicies { 8 10 /* Kocsis et al. Bandit based Monte-Carlo Planning */ … … 22 24 23 25 int aIdx = -1; 26 var bestActions = new List<int>(); 24 27 foreach (var aInfo in myActionInfos) { 25 28 aIdx++; 26 29 if (aInfo.Disabled) continue; 27 if (aInfo.Tries == 0) return aIdx; 28 var q = aInfo.SumReward / aInfo.Tries + 2.0 * c * Math.Sqrt(Math.Log(totalTries) / aInfo.Tries); 30 double q; 31 if (aInfo.Tries == 0) { 32 q = double.PositiveInfinity; 33 } else { 34 q = aInfo.SumReward / aInfo.Tries + 2.0 * c * Math.Sqrt(Math.Log(totalTries) / aInfo.Tries); 35 } 29 36 if (q > bestQ) { 37 bestActions.Clear(); 30 38 bestQ = q; 31 bestAction = aIdx;39 bestActions.Add(aIdx); 32 40 } 41 if (q == bestQ) { 42 bestActions.Add(aIdx); 43 } 44 33 45 } 34 Debug.Assert(bestAction > -1);35 return bestAction ;46 Debug.Assert(bestActions.Any()); 47 return bestActions.SelectRandom(random); 36 48 } 37 49
Note: See TracChangeset
for help on using the changeset viewer.