Context Navigation

ModelPolicyActionInfo.cs @ 13042

Visit:

Last change on this file since 13042 was 12893, checked in by gkronber, 9 years ago
#2283: experiments on grammatical optimization algorithms (maxreward instead of avg reward, ...)
File size: 1.1 KB

Rev	Line
[11732]	1	using System;
	2	using System.Collections.Generic;
	3	using System.Diagnostics;
	4	using System.Linq;
	5	using System.Text;
	6	using System.Threading.Tasks;
	7
[11742]	8	namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
[11732]	9	// uses a statistical model to sample and update posterior distribution p(Reward \| Data)
[11742]	10	public class ModelPolicyActionInfo : IBanditPolicyActionInfo {
[11732]	11	private readonly IModel model;
[12893]	12	public double MaxReward { get; private set; }
[11747]	13	public double Value {
	14	get {
[11851]	15	return model.Sample(new Random());
[11747]	16	}
	17	}
[11732]	18
	19	public int Tries { get; private set; }
	20	public ModelPolicyActionInfo(IModel model) {
	21	this.model = model;
	22	}
	23
	24	public void UpdateReward(double reward) {
[11744]	25	Tries++;
[12893]	26	MaxReward = Math.Max(MaxReward, reward);
[11732]	27	model.Update(reward);
	28	}
	29
	30	public double SampleExpectedReward(Random random) {
[11851]	31	return model.Sample(random);
[11732]	32	}
	33
	34	public void Reset() {
	35	Tries = 0;
	36	model.Reset();
	37	}
	38
[11742]	39	public override string ToString() {
[11806]	40	return string.Format("model {1}", model);
[11742]	41	}
[11732]	42	}
	43	}

Note: See TracBrowser for help on using the repository browser.