Changeset 11727 for branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/UCB1TunedPolicy.cs
- Timestamp:
- 12/29/14 11:02:36 (9 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/UCB1TunedPolicy.cs
r11711 r11727 1 1 using System; 2 2 using System.Collections.Generic; 3 using System.Diagnostics; 3 4 using System.Linq; 4 5 using System.Text; … … 13 14 public UCB1TunedPolicy(int numActions) 14 15 : base(numActions) { 15 this.tries = new int[ NumActions];16 this.sumReward = new double[ NumActions];17 this.sumSqrReward = new double[ NumActions];16 this.tries = new int[numActions]; 17 this.sumReward = new double[numActions]; 18 this.sumSqrReward = new double[numActions]; 18 19 } 19 20 … … 25 26 26 27 public override int SelectAction() { 28 Debug.Assert(Actions.Any()); 27 29 int bestAction = -1; 28 30 double bestQ = double.NegativeInfinity; 29 for (int i = 0; i < NumActions; i++) {30 if (tries[ i] == 0) return i;31 var q = sumReward[ i] / tries[i] + Math.Sqrt((Math.Log(totalTries) / tries[i]) * Math.Min(1.0 / 4, V(i))); // 1/4 is upper bound of bernoulli distributed variable31 foreach (var a in Actions) { 32 if (tries[a] == 0) return a; 33 var q = sumReward[a] / tries[a] + Math.Sqrt((Math.Log(totalTries) / tries[a]) * Math.Min(1.0 / 4, V(a))); // 1/4 is upper bound of bernoulli distributed variable 32 34 if (q > bestQ) { 33 35 bestQ = q; 34 bestAction = i;36 bestAction = a; 35 37 } 36 38 } … … 38 40 } 39 41 public override void UpdateReward(int action, double reward) { 42 Debug.Assert(Actions.Contains(action)); 40 43 totalTries++; 41 44 tries[action]++; … … 43 46 sumSqrReward[action] += reward * reward; 44 47 } 48 49 public override void DisableAction(int action) { 50 base.DisableAction(action); 51 totalTries -= tries[action]; 52 tries[action] = -1; 53 sumReward[action] = 0; 54 sumSqrReward[action] = 0; 55 } 56 45 57 public override void Reset() { 58 base.Reset(); 46 59 totalTries = 0; 47 60 Array.Clear(tries, 0, tries.Length);
Note: See TracChangeset
for help on using the changeset viewer.