Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
08/24/15 13:56:27 (9 years ago)
Author:
gkronber
Message:

#2283: experiments on grammatical optimization algorithms (maxreward instead of avg reward, ...)

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/ActionInfos/ExtremeHunterActionInfo.cs

    r12876 r12893  
    2222          if (minHeap.Count <= 1) return double.PositiveInfinity;
    2323          double xk = minHeap.GetMin();
    24           if (xk.IsAlmost(0.0)) return double.NegativeInfinity;
     24          if (xk.IsAlmost(0.0)) return double.PositiveInfinity;
    2525          var alpha = 1.0 / (minHeap.Count - 1) * minHeap.Skip(1).Sum(x => Math.Log(x) - Math.Log(xk));
    2626          Debug.Assert(alpha > 0);
     
    5555
    5656        Debug.Assert(minHeap.Count == ((int)Math.Floor(n * R)));
    57         Debug.Assert(maxHeap.Count == 0 || minHeap.Count == 0 || maxHeap.GetMin() < minHeap.GetMin());
     57        Debug.Assert(maxHeap.Count == 0 || minHeap.Count == 0 || maxHeap.GetMin() <= minHeap.GetMin());
    5858      }
    5959    }
     
    6464    private OnlineHillEstimator hillEstimator;
    6565    private List<double> rewards;
    66 
     66    public double MaxReward { get; private set; }
    6767    public double Value {
    6868      get {
     
    7676    public void UpdateReward(double reward) {
    7777      if (reward < 0.0) throw new ArgumentException("reward");
     78      MaxReward = Math.Max(MaxReward, reward);
    7879      Tries++;
     80      reward = (1 / (1 - reward)); // transformation from [0..1]
    7981      rewards.Add(reward);
    8082      hillEstimator.Update(reward);
     
    8284
    8385    public void Reset() {
     86      MaxReward = double.NegativeInfinity;
     87
    8488      this.hillEstimator = new OnlineHillEstimator();
    8589      this.rewards = new List<double>();
Note: See TracChangeset for help on using the changeset viewer.