Changeset 12893 for branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/ActionInfos/ExtremeHunterActionInfo.cs
- Timestamp:
- 08/24/15 13:56:27 (9 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/ActionInfos/ExtremeHunterActionInfo.cs
r12876 r12893 22 22 if (minHeap.Count <= 1) return double.PositiveInfinity; 23 23 double xk = minHeap.GetMin(); 24 if (xk.IsAlmost(0.0)) return double. NegativeInfinity;24 if (xk.IsAlmost(0.0)) return double.PositiveInfinity; 25 25 var alpha = 1.0 / (minHeap.Count - 1) * minHeap.Skip(1).Sum(x => Math.Log(x) - Math.Log(xk)); 26 26 Debug.Assert(alpha > 0); … … 55 55 56 56 Debug.Assert(minHeap.Count == ((int)Math.Floor(n * R))); 57 Debug.Assert(maxHeap.Count == 0 || minHeap.Count == 0 || maxHeap.GetMin() < minHeap.GetMin());57 Debug.Assert(maxHeap.Count == 0 || minHeap.Count == 0 || maxHeap.GetMin() <= minHeap.GetMin()); 58 58 } 59 59 } … … 64 64 private OnlineHillEstimator hillEstimator; 65 65 private List<double> rewards; 66 66 public double MaxReward { get; private set; } 67 67 public double Value { 68 68 get { … … 76 76 public void UpdateReward(double reward) { 77 77 if (reward < 0.0) throw new ArgumentException("reward"); 78 MaxReward = Math.Max(MaxReward, reward); 78 79 Tries++; 80 reward = (1 / (1 - reward)); // transformation from [0..1] 79 81 rewards.Add(reward); 80 82 hillEstimator.Update(reward); … … 82 84 83 85 public void Reset() { 86 MaxReward = double.NegativeInfinity; 87 84 88 this.hillEstimator = new OnlineHillEstimator(); 85 89 this.rewards = new List<double>();
Note: See TracChangeset
for help on using the changeset viewer.