Context Navigation

source: branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.Bandits/ActionInfos/ExtremeHunterActionInfo.cs @ 13777

Visit:

Last change on this file since 13777 was 12893, checked in by gkronber, 9 years ago
#2283: experiments on grammatical optimization algorithms (maxreward instead of avg reward, ...)
File size: 3.2 KB

Line
1	using System;
2	using System.Collections.Generic;
3	using System.Diagnostics;
4	using System.Linq;
5	using System.Text;
6	using System.Threading.Tasks;
7	using HeuristicLab.Common;
8
9	namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
10	// stores information that is relevant for most of the policies
11	public class ExtremeHunterActionInfo : IBanditPolicyActionInfo {
12
13	// hills estimator using a fraction r of the largest sample values
14	private class OnlineHillEstimator {
15	private MinHeap<double> minHeap;
16	private MaxHeap<double> maxHeap;
17
18	public double R { get; set; }
19	private long n;
20	public double Alpha {
21	get {
22	if (minHeap.Count <= 1) return double.PositiveInfinity;
23	double xk = minHeap.GetMin();
24	if (xk.IsAlmost(0.0)) return double.PositiveInfinity;
25	var alpha = 1.0 / (minHeap.Count - 1) * minHeap.Skip(1).Sum(x => Math.Log(x) - Math.Log(xk));
26	Debug.Assert(alpha > 0);
27	return alpha;
28	}
29	}
30	public OnlineHillEstimator(double r = 0.1) {
31	this.R = r;
32	this.minHeap = new MinHeap<double>();
33	this.maxHeap = new MaxHeap<double>();
34	n = 0;
35	}
36
37	private double maxNotInHeap = double.NegativeInfinity;
38	public void Update(double reward) {
39	// if we found a new larger reward then the best n*r so far then insert and drop the smallest
40	if (minHeap.Count > 0 && reward > minHeap.GetMin()) {
41	maxHeap.Add(minHeap.ExtractDominating());
42	minHeap.Add(reward);
43	} else {
44	maxHeap.Add(reward);
45	}
46
47	// increase number of observed rewards
48	n++;
49
50	// now it could be possible that we need to increase the size of the heap by 1
51	// => add the largest observed reward so far which is not in the heap
52	if (((int)Math.Floor(n * R)) > minHeap.Count) {
53	minHeap.Add(maxHeap.ExtractDominating());
54	}
55
56	Debug.Assert(minHeap.Count == ((int)Math.Floor(n * R)));
57	Debug.Assert(maxHeap.Count == 0 \|\| minHeap.Count == 0 \|\| maxHeap.GetMin() <= minHeap.GetMin());
58	}
59	}
60
61
62	public int Tries { get; private set; }
63	public IEnumerable<double> Rewards { get { return rewards; } }
64	private OnlineHillEstimator hillEstimator;
65	private List<double> rewards;
66	public double MaxReward { get; private set; }
67	public double Value {
68	get {
69	return hillEstimator.Alpha;
70	}
71	}
72	public ExtremeHunterActionInfo() {
73	Reset();
74	}
75
76	public void UpdateReward(double reward) {
77	if (reward < 0.0) throw new ArgumentException("reward");
78	MaxReward = Math.Max(MaxReward, reward);
79	Tries++;
80	reward = (1 / (1 - reward)); // transformation from [0..1]
81	rewards.Add(reward);
82	hillEstimator.Update(reward);
83	}
84
85	public void Reset() {
86	MaxReward = double.NegativeInfinity;
87
88	this.hillEstimator = new OnlineHillEstimator();
89	this.rewards = new List<double>();
90	this.Tries = 0;
91	}
92
93	public override string ToString() {
94	return string.Format("{0:F3} {1}", Value, Tries);
95	}
96	}
97	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Update cookies preferences