Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/BernoulliThompsonSamplingPolicy.cs @ 12214

Last change on this file since 12214 was 11832, checked in by gkronber, 10 years ago

linear value function approximation and good results for poly-10 benchmark

File size: 1.2 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Diagnostics;
4using System.Linq;
5using System.Text;
6using System.Threading.Tasks;
7using HeuristicLab.Common;
8
9namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
10  public class BernoulliThompsonSamplingPolicy : IBanditPolicy {
11    // parameters of beta prior distribution
12    private readonly double alpha = 1.0;
13    private readonly double beta = 1.0;
14
15    public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
16      var myActionInfos = actionInfos.OfType<BernoulliPolicyActionInfo>();
17      int bestAction = -1;
18      double maxTheta = double.NegativeInfinity;
19      var aIdx = -1;
20
21      foreach (var aInfo in myActionInfos) {
22        aIdx++;
23        var theta = Rand.BetaRand(random, aInfo.NumSuccess + alpha, aInfo.NumFailure + beta);
24        if (theta > maxTheta) {
25          maxTheta = theta;
26          bestAction = aIdx;
27        }
28      }
29      Debug.Assert(bestAction > -1);
30      return bestAction;
31    }
32
33    public IBanditPolicyActionInfo CreateActionInfo() {
34      return new BernoulliPolicyActionInfo();
35    }
36
37
38    public override string ToString() {
39      return "BernoulliThompsonSamplingPolicy";
40    }
41  }
42}
Note: See TracBrowser for help on using the repository browser.