Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/ActionInfos/DefaultPolicyActionInfo.cs @ 12406

Last change on this file since 12406 was 12290, checked in by gkronber, 10 years ago

#2283 created a new branch to separate development from aballeit

File size: 1.3 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Diagnostics;
4using System.Linq;
5using System.Text;
6using System.Threading.Tasks;
7
8namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
9  // stores information that is relevant for most of the policies
10  public class DefaultPolicyActionInfo : IBanditPolicyActionInfo {
11    public double SumReward { get; private set; }
12    public int Tries { get; private set; }
13    public double MaxReward { get; private set; }
14    private double avgValue = 0.0;
15    public double Value {
16      get {
17        return Tries > 0 ? avgValue : double.PositiveInfinity;
18      }
19    }
20    public DefaultPolicyActionInfo() {
21      MaxReward = 0.0;
22    }
23
24    public void UpdateReward(double reward) {
25      Tries++;
26      SumReward += reward;
27      MaxReward = Math.Max(MaxReward, reward);
28      var delta = reward - avgValue;
29      //var alpha = 0.01;
30      var alpha = Math.Max(1.0/Tries, 0.01);
31      avgValue = avgValue + alpha * delta;
32    }
33
34    public void Reset() {
35      SumReward = 0.0;
36      Tries = 0;
37      MaxReward = 0.0;
38      avgValue = 0.0;
39    }
40
41    public override string ToString() {
42      return string.Format("{0:F3} {1:F3} {2}", Value, MaxReward, Tries);
43    }
44  }
45}
Note: See TracBrowser for help on using the repository browser.