using System; using System.Collections.Generic; using System.Diagnostics; using System.Linq; using System.Text; using System.Threading.Tasks; namespace HeuristicLab.Algorithms.Bandits.BanditPolicies { // stores information that is relevant for most of the policies public class DefaultPolicyActionInfo : IBanditPolicyActionInfo { public double SumReward { get; private set; } public int Tries { get; private set; } public double MaxReward { get; private set; } private double avgValue = 0.0; public double Value { get { return Tries > 0 ? avgValue : double.PositiveInfinity; } } public DefaultPolicyActionInfo() { MaxReward = 0.0; } public void UpdateReward(double reward) { MaxReward = Math.Max(MaxReward, reward); Tries++; SumReward += reward; var delta = reward - avgValue; double alpha = 1.0 / Tries; avgValue = avgValue + alpha * delta; } public void Reset() { SumReward = 0.0; Tries = 0; MaxReward = double.NegativeInfinity; avgValue = 0.0; } public override string ToString() { return string.Format("{0:F3} {1:F3} {2}", Value, MaxReward, Tries); } } }