Changeset 11727 for branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/EpsGreedyPolicy.cs
- Timestamp:
- 12/29/14 11:02:36 (9 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/EpsGreedyPolicy.cs
r11711 r11727 1 1 using System; 2 2 using System.Collections.Generic; 3 using System.Diagnostics; 3 4 using System.Linq; 4 5 using System.Text; … … 11 12 private readonly int[] tries; 12 13 private readonly double[] sumReward; 14 private readonly RandomPolicy randomPolicy; 15 13 16 public EpsGreedyPolicy(Random random, int numActions, double eps) 14 17 : base(numActions) { 15 18 this.random = random; 16 19 this.eps = eps; 17 this.tries = new int[NumActions]; 18 this.sumReward = new double[NumActions]; 20 this.randomPolicy = new RandomPolicy(random, numActions); 21 this.tries = new int[numActions]; 22 this.sumReward = new double[numActions]; 19 23 } 20 24 21 25 public override int SelectAction() { 26 Debug.Assert(Actions.Any()); 22 27 if (random.NextDouble() > eps) { 23 28 // select best 24 29 var maxReward = double.NegativeInfinity; 25 30 int bestAction = -1; 26 for (int i = 0; i < NumActions; i++) {27 if (tries[ i] == 0) return i;28 var avgReward = sumReward[ i] / tries[i];31 foreach (var a in Actions) { 32 if (tries[a] == 0) return a; 33 var avgReward = sumReward[a] / tries[a]; 29 34 if (maxReward < avgReward) { 30 35 maxReward = avgReward; 31 bestAction = i;36 bestAction = a; 32 37 } 33 38 } 39 Debug.Assert(bestAction >= 0); 34 40 return bestAction; 35 41 } else { 36 42 // select random 37 return random .Next(NumActions);43 return randomPolicy.SelectAction(); 38 44 } 39 45 } 40 46 public override void UpdateReward(int action, double reward) { 47 Debug.Assert(Actions.Contains(action)); 48 49 randomPolicy.UpdateReward(action, reward); // does nothing 41 50 tries[action]++; 42 51 sumReward[action] += reward; 43 52 } 53 54 public override void DisableAction(int action) { 55 base.DisableAction(action); 56 randomPolicy.DisableAction(action); 57 sumReward[action] = 0; 58 tries[action] = -1; 59 } 60 44 61 public override void Reset() { 62 base.Reset(); 63 randomPolicy.Reset(); 45 64 Array.Clear(tries, 0, tries.Length); 46 65 Array.Clear(sumReward, 0, sumReward.Length);
Note: See TracChangeset
for help on using the changeset viewer.