Changeset 11792 for branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/UCB1TunedPolicy.cs
- Timestamp:
- 01/16/15 18:26:35 (10 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/UCB1TunedPolicy.cs
r11742 r11792 5 5 using System.Text; 6 6 using System.Threading.Tasks; 7 using HeuristicLab.Common; 7 8 8 9 namespace HeuristicLab.Algorithms.Bandits.BanditPolicies { … … 12 13 public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) { 13 14 var myActionInfos = actionInfos.OfType<MeanAndVariancePolicyActionInfo>(); 14 int bestAction = -1; 15 double bestQ = double.NegativeInfinity; 15 16 16 int totalTries = myActionInfos.Where(a => !a.Disabled).Sum(a => a.Tries); 17 17 18 18 int aIdx = -1; 19 double bestQ = double.NegativeInfinity; 20 var bestActions = new List<int>(); 19 21 foreach (var aInfo in myActionInfos) { 20 22 aIdx++; 21 23 if (aInfo.Disabled) continue; 22 if (aInfo.Tries == 0) return aIdx; 24 double q; 25 if (aInfo.Tries == 0) { 26 q = double.PositiveInfinity; 27 } else { 28 var sumReward = aInfo.SumReward; 29 var tries = aInfo.Tries; 23 30 24 var sumReward = aInfo.SumReward; 25 var tries = aInfo.Tries; 26 27 var avgReward = sumReward / tries; 28 var q = avgReward + Math.Sqrt((Math.Log(totalTries) / tries) * Math.Min(1.0 / 4, V(aInfo, totalTries))); // 1/4 is upper bound of bernoulli distributed variable 31 var avgReward = sumReward / tries; 32 q = avgReward + Math.Sqrt((Math.Log(totalTries) / tries) * Math.Min(1.0 / 4, V(aInfo, totalTries))); 33 // 1/4 is upper bound of bernoulli distributed variable 34 } 29 35 if (q > bestQ) { 30 36 bestQ = q; 31 bestAction = aIdx; 37 bestActions.Clear(); 38 bestActions.Add(aIdx); 39 } else if (q == bestQ) { 40 bestActions.Add(aIdx); 32 41 } 33 42 } 34 Debug.Assert(bestAction > -1); 35 return bestAction; 43 Debug.Assert(bestActions.Any()); 44 45 return bestActions.SelectRandom(random); 36 46 } 37 47
Note: See TracChangeset
for help on using the changeset viewer.