using System; using System.Collections.Generic; using System.Diagnostics; using System.Linq; using System.Text; using System.Threading.Tasks; using HeuristicLab.Common; namespace HeuristicLab.Algorithms.Bandits.BanditPolicies { public class BernoulliPolicyActionInfo : IBanditPolicyActionInfo { public int NumSuccess { get; private set; } public int NumFailure { get; private set; } public int Tries { get { return NumSuccess + NumFailure; } } public double MaxReward { get; private set; } public double Value { get { return NumSuccess / (double)(Tries); } } public void UpdateReward(double reward) { //Debug.Assert(reward.IsAlmost(0.0) || reward.IsAlmost(1.0)); //if (reward.IsAlmost(1.0)) NumSuccess++; MaxReward = Math.Max(MaxReward, reward); if (reward > 0) NumSuccess++; else NumFailure++; } public void Reset() { NumSuccess = 0; NumFailure = 0; MaxReward = double.NegativeInfinity; } public void PrintStats() { Console.WriteLine("expected value {0,5:F2}", Value); } } }