Context Navigation

ModelPolicyActionInfo.cs @ 13777

Visit:

Last change on this file since 13777 was 12893, checked in by gkronber, 9 years ago
#2283: experiments on grammatical optimization algorithms (maxreward instead of avg reward, ...)
File size: 1.1 KB

Line
1	using System;
2	using System.Collections.Generic;
3	using System.Diagnostics;
4	using System.Linq;
5	using System.Text;
6	using System.Threading.Tasks;
7
8	namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
9	// uses a statistical model to sample and update posterior distribution p(Reward \| Data)
10	public class ModelPolicyActionInfo : IBanditPolicyActionInfo {
11	private readonly IModel model;
12	public double MaxReward { get; private set; }
13	public double Value {
14	get {
15	return model.Sample(new Random());
16	}
17	}
18
19	public int Tries { get; private set; }
20	public ModelPolicyActionInfo(IModel model) {
21	this.model = model;
22	}
23
24	public void UpdateReward(double reward) {
25	Tries++;
26	MaxReward = Math.Max(MaxReward, reward);
27	model.Update(reward);
28	}
29
30	public double SampleExpectedReward(Random random) {
31	return model.Sample(random);
32	}
33
34	public void Reset() {
35	Tries = 0;
36	model.Reset();
37	}
38
39	public override string ToString() {
40	return string.Format("model {1}", model);
41	}
42	}
43	}

Note: See TracBrowser for help on using the repository browser.