Context Navigation

DefaultPolicyActionInfo.cs @ 13777

Visit:

Last change on this file since 13777 was 12893, checked in by gkronber, 9 years ago
#2283: experiments on grammatical optimization algorithms (maxreward instead of avg reward, ...)
File size: 1.2 KB

Line
1	using System;
2	using System.Collections.Generic;
3	using System.Diagnostics;
4	using System.Linq;
5	using System.Text;
6	using System.Threading.Tasks;
7
8	namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
9	// stores information that is relevant for most of the policies
10	public class DefaultPolicyActionInfo : IBanditPolicyActionInfo {
11	public double SumReward { get; private set; }
12	public int Tries { get; private set; }
13	public double MaxReward { get; private set; }
14	private double avgValue = 0.0;
15	public double Value {
16	get {
17	return Tries > 0 ? avgValue : double.PositiveInfinity;
18	}
19	}
20	public DefaultPolicyActionInfo() {
21	MaxReward = 0.0;
22	}
23
24
25	public void UpdateReward(double reward)
26	{
27	MaxReward = Math.Max(MaxReward, reward);
28	Tries++;
29	SumReward += reward;
30	var delta = reward - avgValue;
31	double alpha = 1.0 / Tries;
32	avgValue = avgValue + alpha * delta;
33	}
34
35	public void Reset() {
36	SumReward = 0.0;
37	Tries = 0;
38	MaxReward = double.NegativeInfinity;
39	avgValue = 0.0;
40	}
41
42	public override string ToString() {
43	return string.Format("{0:F3} {1:F3} {2}", Value, MaxReward, Tries);
44	}
45	}
46	}

Note: See TracBrowser for help on using the repository browser.