using System; using System.Collections.Generic; using System.Diagnostics; using System.Linq; using System.Text; using System.Threading.Tasks; using HeuristicLab.Common; namespace HeuristicLab.Algorithms.Bandits.BanditPolicies { public class GenericThompsonSamplingPolicy : IBanditPolicy { private readonly IModel model; public GenericThompsonSamplingPolicy(IModel model) { this.model = model; } public int SelectAction(Random random, IEnumerable actionInfos) { var myActionInfos = actionInfos.OfType(); int bestAction = -1; double bestQ = double.NegativeInfinity; var aIdx = -1; foreach (var aInfo in myActionInfos) { aIdx++; var q = aInfo.SampleExpectedReward(random); // very unlikely to be equal and we don't care if (q > bestQ) { bestQ = q; bestAction = aIdx; } } Debug.Assert(bestAction > -1); return bestAction; } public IBanditPolicyActionInfo CreateActionInfo() { return new ModelPolicyActionInfo((IModel)model.Clone()); } public override string ToString() { return string.Format("GenericThompsonSamplingPolicy({0})", model); } } }