Changeset 11730 for branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/UCBNormalPolicy.cs
- Timestamp:
- 01/02/15 16:08:21 (10 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/UCBNormalPolicy.cs
r11727 r11730 24 24 double bestQ = double.NegativeInfinity; 25 25 foreach (var a in Actions) { 26 if (totalTries == 0 || tries[a] == 0 || tries[a] <Math.Ceiling(8 * Math.Log(totalTries))) return a;26 if (totalTries <= 1 || tries[a] <= 1 || tries[a] <= Math.Ceiling(8 * Math.Log(totalTries))) return a; 27 27 var avgReward = sumReward[a] / tries[a]; 28 var estVariance = 16 * ((sumSqrReward[a] - tries[a] * Math.Pow(avgReward, 2)) / (tries[a] - 1)) * (Math.Log(totalTries - 1) / tries[a]); 29 if (estVariance < 0) estVariance = 0; // numerical problems 28 30 var q = avgReward 29 + Math.Sqrt( 16 * ((sumSqrReward[a] - tries[a] * Math.Pow(avgReward, 2)) / (tries[a] - 1)) * (Math.Log(totalTries - 1) / tries[a]));31 + Math.Sqrt(estVariance); 30 32 if (q > bestQ) { 31 33 bestQ = q; … … 33 35 } 34 36 } 37 Debug.Assert(Actions.Contains(bestAction)); 35 38 return bestAction; 36 39 } … … 58 61 Array.Clear(sumSqrReward, 0, sumSqrReward.Length); 59 62 } 63 public override void PrintStats() { 64 for (int i = 0; i < sumReward.Length; i++) { 65 if (tries[i] >= 0) { 66 Console.Write("{0,5:F2}", sumReward[i] / tries[i]); 67 } else { 68 Console.Write("{0,5}", ""); 69 } 70 } 71 Console.WriteLine(); 72 } 73 public override string ToString() { 74 return "UCBNormalPolicy"; 75 } 60 76 } 61 77 }
Note: See TracChangeset
for help on using the changeset viewer.