Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/ActionInfos/ExtremeHunterActionInfo.cs @ 13234

Last change on this file since 13234 was 12893, checked in by gkronber, 9 years ago

#2283: experiments on grammatical optimization algorithms (maxreward instead of avg reward, ...)

File size: 3.2 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Diagnostics;
4using System.Linq;
5using System.Text;
6using System.Threading.Tasks;
7using HeuristicLab.Common;
8
9namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
10  // stores information that is relevant for most of the policies
11  public class ExtremeHunterActionInfo : IBanditPolicyActionInfo {
12
13    // hills estimator using a fraction r of the largest sample values
14    private class OnlineHillEstimator {
15      private MinHeap<double> minHeap;
16      private MaxHeap<double> maxHeap;
17
18      public double R { get; set; }
19      private long n;
20      public double Alpha {
21        get {
22          if (minHeap.Count <= 1) return double.PositiveInfinity;
23          double xk = minHeap.GetMin();
24          if (xk.IsAlmost(0.0)) return double.PositiveInfinity;
25          var alpha = 1.0 / (minHeap.Count - 1) * minHeap.Skip(1).Sum(x => Math.Log(x) - Math.Log(xk));
26          Debug.Assert(alpha > 0);
27          return alpha;
28        }
29      }
30      public OnlineHillEstimator(double r = 0.1) {
31        this.R = r;
32        this.minHeap = new MinHeap<double>();
33        this.maxHeap = new MaxHeap<double>();
34        n = 0;
35      }
36
37      private double maxNotInHeap = double.NegativeInfinity;
38      public void Update(double reward) {
39        // if we found a new larger reward then the best n*r so far then insert and drop the smallest
40        if (minHeap.Count > 0 && reward > minHeap.GetMin()) {
41          maxHeap.Add(minHeap.ExtractDominating());
42          minHeap.Add(reward);
43        } else {
44          maxHeap.Add(reward);
45        }
46
47        // increase number of observed rewards
48        n++;
49
50        // now it could be possible that we need to increase the size of the heap by 1
51        // => add the largest observed reward so far which is not in the heap
52        if (((int)Math.Floor(n * R)) > minHeap.Count) {
53          minHeap.Add(maxHeap.ExtractDominating());
54        }
55
56        Debug.Assert(minHeap.Count == ((int)Math.Floor(n * R)));
57        Debug.Assert(maxHeap.Count == 0 || minHeap.Count == 0 || maxHeap.GetMin() <= minHeap.GetMin());
58      }
59    }
60
61
62    public int Tries { get; private set; }
63    public IEnumerable<double> Rewards { get { return rewards; } }
64    private OnlineHillEstimator hillEstimator;
65    private List<double> rewards;
66    public double MaxReward { get; private set; }
67    public double Value {
68      get {
69        return hillEstimator.Alpha;
70      }
71    }
72    public ExtremeHunterActionInfo() {
73      Reset();
74    }
75
76    public void UpdateReward(double reward) {
77      if (reward < 0.0) throw new ArgumentException("reward");
78      MaxReward = Math.Max(MaxReward, reward);
79      Tries++;
80      reward = (1 / (1 - reward)); // transformation from [0..1]
81      rewards.Add(reward);
82      hillEstimator.Update(reward);
83    }
84
85    public void Reset() {
86      MaxReward = double.NegativeInfinity;
87
88      this.hillEstimator = new OnlineHillEstimator();
89      this.rewards = new List<double>();
90      this.Tries = 0;
91    }
92
93    public override string ToString() {
94      return string.Format("{0:F3} {1}", Value, Tries);
95    }
96  }
97}
Note: See TracBrowser for help on using the repository browser.