Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/ActionInfos/DefaultPolicyActionInfo.cs @ 12876

Last change on this file since 12876 was 12876, checked in by gkronber, 8 years ago

#2283: implemented first crude version of extreme hunter algorithm in branch

File size: 1.2 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Diagnostics;
4using System.Linq;
5using System.Text;
6using System.Threading.Tasks;
7
8namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
9  // stores information that is relevant for most of the policies
10  public class DefaultPolicyActionInfo : IBanditPolicyActionInfo {
11    public double SumReward { get; private set; }
12    public int Tries { get; private set; }
13    public double MaxReward { get; private set; }
14    private double avgValue = 0.0;
15    public double Value {
16      get {
17        return Tries > 0 ? avgValue : double.PositiveInfinity;
18      }
19    }
20    public DefaultPolicyActionInfo() {
21      MaxReward = 0.0;
22    }
23
24    public void UpdateReward(double reward) {
25      Tries++;
26      SumReward += reward;
27      MaxReward = Math.Max(MaxReward, reward);
28      var delta = reward - avgValue;
29      double alpha = 1.0 / Tries;
30      avgValue = avgValue + alpha * delta;
31    }
32
33    public void Reset() {
34      SumReward = 0.0;
35      Tries = 0;
36      MaxReward = 0.0;
37      avgValue = 0.0;
38    }
39
40    public override string ToString() {
41      return string.Format("{0:F3} {1:F3} {2}", Value, MaxReward, Tries);
42    }
43  }
44}
Note: See TracBrowser for help on using the repository browser.