Context Navigation

GenericThompsonSamplingPolicy.cs @ 11730

Visit:

Last change on this file since 11730 was 11730, checked in by gkronber, 9 years ago
#2283: several major extensions for grammatical optimization
File size: 1.5 KB

Line
1	using System;
2	using System.Collections.Generic;
3	using System.Diagnostics;
4	using System.Linq;
5	using System.Text;
6	using System.Threading.Tasks;
7	using HeuristicLab.Common;
8
9	namespace HeuristicLab.Algorithms.Bandits {
10	public class GenericThompsonSamplingPolicy : BanditPolicy {
11	private readonly Random random;
12	private readonly IModel model;
13
14	public GenericThompsonSamplingPolicy(Random random, int numActions, IModel model)
15	: base(numActions) {
16	this.random = random;
17	this.model = model;
18	}
19
20	public override int SelectAction() {
21	Debug.Assert(Actions.Any());
22	var maxR = double.NegativeInfinity;
23	int bestAction = -1;
24	var expRewards = model.SampleExpectedRewards(random);
25	foreach (var a in Actions) {
26	var r = expRewards[a];
27	if (r > maxR) {
28	maxR = r;
29	bestAction = a;
30	}
31	}
32	return bestAction;
33	}
34
35	public override void UpdateReward(int action, double reward) {
36	Debug.Assert(Actions.Contains(action));
37
38	model.Update(action, reward);
39	}
40
41	public override void DisableAction(int action) {
42	base.DisableAction(action);
43	model.Disable(action);
44	}
45
46	public override void Reset() {
47	base.Reset();
48	model.Reset();
49	}
50
51	public override void PrintStats() {
52	model.PrintStats();
53	}
54
55	public override string ToString() {
56	return string.Format("GenericThompsonSamplingPolicy({0})", model);
57	}
58	}
59	}

Note: See TracBrowser for help on using the repository browser.