source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/BernoulliThompsonSamplingPolicy.cs @ 11732

Last change on this file since 11732 was 11732, checked in by gkronber, 5 years ago

#2283: refactoring and bug fixes

File size: 1.3 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Diagnostics;
4using System.Linq;
5using System.Text;
6using System.Threading.Tasks;
7using HeuristicLab.Common;
8
9namespace HeuristicLab.Algorithms.Bandits {
10  public class BernoulliThompsonSamplingPolicy : IPolicy {
11    // parameters of beta prior distribution
12    private readonly double alpha = 1.0;
13    private readonly double beta = 1.0;
14
15    public int SelectAction(Random random, IEnumerable<IPolicyActionInfo> actionInfos) {
16      var myActionInfos = actionInfos.OfType<BernoulliPolicyActionInfo>(); // TODO: performance
17      int bestAction = -1;
18      double maxTheta = double.NegativeInfinity;
19      var aIdx = -1;
20
21      foreach (var aInfo in myActionInfos) {
22        aIdx++;
23        if (aInfo.Disabled) continue;
24        var theta = Rand.BetaRand(random, aInfo.NumSuccess + alpha, aInfo.NumFailure + beta);
25        if (theta > maxTheta) {
26          maxTheta = theta;
27          bestAction = aIdx;
28        }
29      }
30      Debug.Assert(bestAction > -1);
31      return bestAction;
32    }
33
34    public IPolicyActionInfo CreateActionInfo() {
35      return new BernoulliPolicyActionInfo();
36    }
37
38
39    public override string ToString() {
40      return "BernoulliThompsonSamplingPolicy";
41    }
42  }
43}
Note: See TracBrowser for help on using the repository browser.