Context Navigation

BernoulliThompsonSamplingPolicy.cs @ 11849

Visit:

Last change on this file since 11849 was 11832, checked in by gkronber, 10 years ago
linear value function approximation and good results for poly-10 benchmark
File size: 1.2 KB

Line
1	using System;
2	using System.Collections.Generic;
3	using System.Diagnostics;
4	using System.Linq;
5	using System.Text;
6	using System.Threading.Tasks;
7	using HeuristicLab.Common;
8
9	namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
10	public class BernoulliThompsonSamplingPolicy : IBanditPolicy {
11	// parameters of beta prior distribution
12	private readonly double alpha = 1.0;
13	private readonly double beta = 1.0;
14
15	public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
16	var myActionInfos = actionInfos.OfType<BernoulliPolicyActionInfo>();
17	int bestAction = -1;
18	double maxTheta = double.NegativeInfinity;
19	var aIdx = -1;
20
21	foreach (var aInfo in myActionInfos) {
22	aIdx++;
23	var theta = Rand.BetaRand(random, aInfo.NumSuccess + alpha, aInfo.NumFailure + beta);
24	if (theta > maxTheta) {
25	maxTheta = theta;
26	bestAction = aIdx;
27	}
28	}
29	Debug.Assert(bestAction > -1);
30	return bestAction;
31	}
32
33	public IBanditPolicyActionInfo CreateActionInfo() {
34	return new BernoulliPolicyActionInfo();
35	}
36
37
38	public override string ToString() {
39	return "BernoulliThompsonSamplingPolicy";
40	}
41	}
42	}

Note: See TracBrowser for help on using the repository browser.