Changeset 11747 for branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/UCB1Policy.cs
- Timestamp:
- 01/12/15 21:23:01 (9 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/UCB1Policy.cs
r11745 r11747 5 5 using System.Text; 6 6 using System.Threading.Tasks; 7 using HeuristicLab.Common; 7 8 8 9 namespace HeuristicLab.Algorithms.Bandits.BanditPolicies { … … 11 12 public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) { 12 13 var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>(); 13 int bestAction = -1;14 14 double bestQ = double.NegativeInfinity; 15 15 int totalTries = myActionInfos.Where(a => !a.Disabled).Sum(a => a.Tries); 16 16 17 var bestActions = new List<int>(); 17 18 int aIdx = -1; 18 19 foreach (var aInfo in myActionInfos) { 19 20 aIdx++; 20 21 if (aInfo.Disabled) continue; 21 if (aInfo.Tries == 0) return aIdx; 22 var q = aInfo.SumReward / aInfo.Tries + Math.Sqrt((2 * Math.Log(totalTries)) / aInfo.Tries); 22 double q; 23 if (aInfo.Tries == 0) { 24 q = double.PositiveInfinity; 25 } else { 26 27 q = aInfo.SumReward / aInfo.Tries + 0.5 * Math.Sqrt((2 * Math.Log(totalTries)) / aInfo.Tries); 28 } 23 29 if (q > bestQ) { 24 30 bestQ = q; 25 bestAction = aIdx; 31 bestActions.Clear(); 32 bestActions.Add(aIdx); 33 } else if (q == bestQ) { 34 bestActions.Add(aIdx); 26 35 } 27 36 } 28 Debug.Assert(bestAction > -1);29 return bestAction ;37 Debug.Assert(bestActions.Any()); 38 return bestActions.SelectRandom(random); 30 39 } 31 40
Note: See TracChangeset
for help on using the changeset viewer.