Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/ActionInfos/DefaultPolicyActionInfo.cs @ 13398

Last change on this file since 13398 was 12893, checked in by gkronber, 9 years ago

#2283: experiments on grammatical optimization algorithms (maxreward instead of avg reward, ...)

File size: 1.2 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Diagnostics;
4using System.Linq;
5using System.Text;
6using System.Threading.Tasks;
7
8namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
9  // stores information that is relevant for most of the policies
10  public class DefaultPolicyActionInfo : IBanditPolicyActionInfo {
11    public double SumReward { get; private set; }
12    public int Tries { get; private set; }
13    public double MaxReward { get; private set; }
14    private double avgValue = 0.0;
15    public double Value {
16      get {
17        return Tries > 0 ? avgValue : double.PositiveInfinity;
18      }
19    }
20    public DefaultPolicyActionInfo() {
21      MaxReward = 0.0;
22    }
23
24
25    public void UpdateReward(double reward)
26    {
27      MaxReward = Math.Max(MaxReward, reward);
28      Tries++;
29      SumReward += reward;
30      var delta = reward - avgValue;
31      double alpha = 1.0 / Tries;
32      avgValue = avgValue + alpha * delta;
33    }
34
35    public void Reset() {
36      SumReward = 0.0;
37      Tries = 0;
38      MaxReward = double.NegativeInfinity;
39      avgValue = 0.0;
40    }
41
42    public override string ToString() {
43      return string.Format("{0:F3} {1:F3} {2}", Value, MaxReward, Tries);
44    }
45  }
46}
Note: See TracBrowser for help on using the repository browser.