using System; using System.Collections.Generic; using System.Diagnostics; using System.Linq; using System.Text; using System.Threading.Tasks; namespace HeuristicLab.Algorithms.Bandits.BanditPolicies { // stores information that is relevant for most of the policies public class DefaultPolicyActionInfo : IBanditPolicyActionInfo { public double SumReward { get; private set; } public int Tries { get; private set; } public double MaxReward { get; private set; } public double Value { get { return Tries > 0 ? SumReward / Tries : 0.0; } } public DefaultPolicyActionInfo() { MaxReward = 0.0; } public void UpdateReward(double reward) { Tries++; SumReward += reward; MaxReward = Math.Max(MaxReward, reward); } public void Reset() { SumReward = 0.0; Tries = 0; MaxReward = 0.0; } public override string ToString() { return string.Format("{0:F3} {1:F3} {2}", Value, MaxReward, Tries); } public static Func AverageReward { get { return (aInfo) => aInfo.Tries == 0 ? double.PositiveInfinity : aInfo.SumReward / (double)aInfo.Tries; } } } }