Context Navigation

UCBPolicy.cs @ 12966

Visit:

Last change on this file since 12966 was 12876, checked in by gkronber, 9 years ago
#2283: implemented first crude version of extreme hunter algorithm in branch
File size: 1.6 KB

Line
1	using System;
2	using System.Collections.Generic;
3	using System.Diagnostics;
4	using System.Linq;
5	using System.Text;
6	using System.Threading.Tasks;
7	using HeuristicLab.Common;
8
9	namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
10	// Powell, Approximate Dynamic Programming, section 12.3.6, page 467,
11	public class UCBPolicy : IBanditPolicy {
12	private double maxReward;
13	public UCBPolicy(double maxReward = 1.0) {
14	this.maxReward = maxReward;
15	}
16
17	public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
18	var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>();
19	double bestQ = double.NegativeInfinity;
20	int totalTries = myActionInfos.Sum(a => a.Tries);
21
22	var bestActions = new List<int>();
23	int aIdx = -1;
24	foreach (var aInfo in myActionInfos) {
25	aIdx++;
26	double q;
27	if (aInfo.Tries == 0) {
28	q = double.PositiveInfinity;
29	} else {
30
31	q = aInfo.SumReward / aInfo.Tries + maxReward * Math.Sqrt((2 * Math.Log(totalTries)) / aInfo.Tries);
32	}
33	if (q > bestQ) {
34	bestQ = q;
35	bestActions.Clear();
36	bestActions.Add(aIdx);
37	} else if (q.IsAlmost(bestQ)) {
38	bestActions.Add(aIdx);
39	}
40	}
41	Debug.Assert(bestActions.Any());
42	return bestActions.SelectRandom(random);
43	}
44
45	public IBanditPolicyActionInfo CreateActionInfo() {
46	return new DefaultPolicyActionInfo();
47	}
48	public override string ToString() {
49	return "UCBPolicy(Powell)";
50	}
51	}
52	}

Note: See TracBrowser for help on using the repository browser.