using System; using System.Collections.Generic; using System.Diagnostics; using System.Linq; using System.Text; using System.Threading.Tasks; namespace HeuristicLab.Algorithms.Bandits.BanditPolicies { public class MeanAndVariancePolicyActionInfo : IBanditPolicyActionInfo { private OnlineMeanAndVarianceEstimator estimator = new OnlineMeanAndVarianceEstimator(); public int Tries { get { return estimator.N; } } public double SumReward { get { return estimator.Sum; } } public double AvgReward { get { return estimator.Avg; } } public double MaxReward { get; private set; } public double RewardVariance { get { return estimator.Variance; } } public double Value { get { return AvgReward; } } public void UpdateReward(double reward) { MaxReward = Math.Max(MaxReward, reward); estimator.UpdateReward(reward); } public void Reset() { MaxReward = double.NegativeInfinity; estimator.Reset(); } public override string ToString() { return string.Format("{0:N3} {1,3}", AvgReward, Tries); } } }