using System; using System.Collections.Generic; using System.Diagnostics; using System.Linq; using System.Text; using System.Threading.Tasks; namespace HeuristicLab.Algorithms.Bandits.BanditPolicies { // uses a statistical model to sample and update posterior distribution p(Reward | Data) public class ModelPolicyActionInfo : IBanditPolicyActionInfo { private readonly IModel model; public bool Disabled { get { return Tries == -1; } } public double Value { get { return model.SampleExpectedReward(new Random()); } } public int Tries { get; private set; } public ModelPolicyActionInfo(IModel model) { this.model = model; } public void UpdateReward(double reward) { Debug.Assert(!Disabled); Tries++; model.Update(reward); } public double SampleExpectedReward(Random random) { return model.SampleExpectedReward(random); } public void Disable() { this.Tries = -1; } public void Reset() { Tries = 0; model.Reset(); } public void PrintStats() { model.PrintStats(); } public override string ToString() { return string.Format("disabled {0} model {1}", Disabled, model); } } }