Changeset 11745 for branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits
- Timestamp:
- 01/10/15 14:06:29 (10 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
TabularUnified branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/UCB1Policy.cs ΒΆ
r11742 r11745 10 10 public class UCB1Policy : IBanditPolicy { 11 11 public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) { 12 var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>() .ToArray(); // TODO: performance12 var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>(); 13 13 int bestAction = -1; 14 14 double bestQ = double.NegativeInfinity; 15 15 int totalTries = myActionInfos.Where(a => !a.Disabled).Sum(a => a.Tries); 16 16 17 for (int a = 0; a < myActionInfos.Length; a++) { 18 if (myActionInfos[a].Disabled) continue; 19 if (myActionInfos[a].Tries == 0) return a; 20 var q = myActionInfos[a].SumReward / myActionInfos[a].Tries + Math.Sqrt((2 * Math.Log(totalTries)) / myActionInfos[a].Tries); 17 int aIdx = -1; 18 foreach (var aInfo in myActionInfos) { 19 aIdx++; 20 if (aInfo.Disabled) continue; 21 if (aInfo.Tries == 0) return aIdx; 22 var q = aInfo.SumReward / aInfo.Tries + Math.Sqrt((2 * Math.Log(totalTries)) / aInfo.Tries); 21 23 if (q > bestQ) { 22 24 bestQ = q; 23 bestAction = a ;25 bestAction = aIdx; 24 26 } 25 27 }
Note: See TracChangeset
for help on using the changeset viewer.