Changeset 11742 for branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/UCB1TunedPolicy.cs
- Timestamp:
- 01/09/15 14:57:28 (9 years ago)
- Location:
- branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies
- Files:
-
- 1 edited
- 1 moved
Legend:
- Unmodified
- Added
- Removed
-
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/UCB1TunedPolicy.cs
r11732 r11742 6 6 using System.Threading.Tasks; 7 7 8 namespace HeuristicLab.Algorithms.Bandits { 9 public class UCB1TunedPolicy : IPolicy { 8 namespace HeuristicLab.Algorithms.Bandits.BanditPolicies { 9 // policy for k-armed bandit (see Auer et al. 2002) 10 public class UCB1TunedPolicy : IBanditPolicy { 10 11 11 public int SelectAction(Random random, IEnumerable<I PolicyActionInfo> actionInfos) {12 var myActionInfos = actionInfos.OfType<MeanAndVariancePolicyActionInfo>() .ToArray(); // TODO: performance12 public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) { 13 var myActionInfos = actionInfos.OfType<MeanAndVariancePolicyActionInfo>(); 13 14 int bestAction = -1; 14 15 double bestQ = double.NegativeInfinity; 15 16 int totalTries = myActionInfos.Where(a => !a.Disabled).Sum(a => a.Tries); 16 17 17 for (int a = 0; a < myActionInfos.Length; a++) { 18 if (myActionInfos[a].Disabled) continue; 19 if (myActionInfos[a].Tries == 0) return a; 18 int aIdx = -1; 19 foreach (var aInfo in myActionInfos) { 20 aIdx++; 21 if (aInfo.Disabled) continue; 22 if (aInfo.Tries == 0) return aIdx; 20 23 21 var sumReward = myActionInfos[a].SumReward;22 var tries = myActionInfos[a].Tries;24 var sumReward = aInfo.SumReward; 25 var tries = aInfo.Tries; 23 26 24 27 var avgReward = sumReward / tries; 25 var q = avgReward + Math.Sqrt((Math.Log(totalTries) / tries) * Math.Min(1.0 / 4, V( myActionInfos[a], totalTries))); // 1/4 is upper bound of bernoulli distributed variable28 var q = avgReward + Math.Sqrt((Math.Log(totalTries) / tries) * Math.Min(1.0 / 4, V(aInfo, totalTries))); // 1/4 is upper bound of bernoulli distributed variable 26 29 if (q > bestQ) { 27 30 bestQ = q; 28 bestAction = a ;31 bestAction = aIdx; 29 32 } 30 33 } … … 33 36 } 34 37 35 public I PolicyActionInfo CreateActionInfo() {38 public IBanditPolicyActionInfo CreateActionInfo() { 36 39 return new MeanAndVariancePolicyActionInfo(); 37 40 }
Note: See TracChangeset
for help on using the changeset viewer.