using System; using System.Collections.Generic; using System.Diagnostics; using System.Linq; using System.Text; using System.Threading.Tasks; using HeuristicLab.Common; namespace HeuristicLab.Algorithms.Bandits { public class GenericThompsonSamplingPolicy : IPolicy { private readonly IModel model; public GenericThompsonSamplingPolicy(IModel model) { this.model = model; } public int SelectAction(Random random, IEnumerable actionInfos) { var myActionInfos = actionInfos.OfType(); int bestAction = -1; double bestQ = double.NegativeInfinity; var aIdx = -1; foreach (var aInfo in myActionInfos) { aIdx++; if (aInfo.Disabled) continue; //if (aInfo.Tries == 0) return aIdx; var q = aInfo.SampleExpectedReward(random); if (q > bestQ) { bestQ = q; bestAction = aIdx; } } Debug.Assert(bestAction > -1); return bestAction; } public IPolicyActionInfo CreateActionInfo() { return new ModelPolicyActionInfo((IModel)model.Clone()); } public override string ToString() { return string.Format("GenericThompsonSamplingPolicy({0})", model); } } }