using System; using System.Collections.Generic; using System.Diagnostics; using System.Linq; using System.Text; using System.Threading.Tasks; namespace HeuristicLab.Algorithms.Bandits.BanditPolicies { // uses a statistical model to sample and update posterior distribution p(Reward | Data) public class ModelPolicyActionInfo : IBanditPolicyActionInfo { private readonly IModel model; public double MaxReward { get; private set; } public double Value { get { return model.Sample(new Random()); } } public int Tries { get; private set; } public ModelPolicyActionInfo(IModel model) { this.model = model; } public void UpdateReward(double reward) { Tries++; MaxReward = Math.Max(MaxReward, reward); model.Update(reward); } public double SampleExpectedReward(Random random) { return model.Sample(random); } public void Reset() { Tries = 0; model.Reset(); } public override string ToString() { return string.Format("model {1}", model); } } }