Context Navigation

GenericThompsonSamplingPolicy.cs @ 11849

Visit:

Last change on this file since 11849 was 11806, checked in by gkronber, 9 years ago
#2283: separated value-states from done-states in GenericGrammarPolicy and removed disabling of actions from bandit policies
File size: 1.2 KB

Line
1	using System;
2	using System.Collections.Generic;
3	using System.Diagnostics;
4	using System.Linq;
5	using System.Text;
6	using System.Threading.Tasks;
7	using HeuristicLab.Common;
8
9	namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
10	public class GenericThompsonSamplingPolicy : IBanditPolicy {
11	private readonly IModel model;
12
13	public GenericThompsonSamplingPolicy(IModel model) {
14	this.model = model;
15	}
16
17	public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
18	var myActionInfos = actionInfos.OfType<ModelPolicyActionInfo>();
19	int bestAction = -1;
20	double bestQ = double.NegativeInfinity;
21	var aIdx = -1;
22	foreach (var aInfo in myActionInfos) {
23	aIdx++;
24	var q = aInfo.SampleExpectedReward(random);
25	if (q > bestQ) {
26	bestQ = q;
27	bestAction = aIdx;
28	}
29	}
30	Debug.Assert(bestAction > -1);
31	return bestAction;
32	}
33
34	public IBanditPolicyActionInfo CreateActionInfo() {
35	return new ModelPolicyActionInfo((IModel)model.Clone());
36	}
37
38	public override string ToString() {
39	return string.Format("GenericThompsonSamplingPolicy({0})", model);
40	}
41	}
42	}

Note: See TracBrowser for help on using the repository browser.