Context Navigation

UCB1Policy.cs @ 11747

Visit:

Last change on this file since 11747 was 11747, checked in by gkronber, 10 years ago
#2283: implemented test problems for MCTS
File size: 1.5 KB

Line
1	using System;
2	using System.Collections.Generic;
3	using System.Diagnostics;
4	using System.Linq;
5	using System.Text;
6	using System.Threading.Tasks;
7	using HeuristicLab.Common;
8
9	namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
10	// policy for k-armed bandit (see Auer et al. 2002)
11	public class UCB1Policy : IBanditPolicy {
12	public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
13	var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>();
14	double bestQ = double.NegativeInfinity;
15	int totalTries = myActionInfos.Where(a => !a.Disabled).Sum(a => a.Tries);
16
17	var bestActions = new List<int>();
18	int aIdx = -1;
19	foreach (var aInfo in myActionInfos) {
20	aIdx++;
21	if (aInfo.Disabled) continue;
22	double q;
23	if (aInfo.Tries == 0) {
24	q = double.PositiveInfinity;
25	} else {
26
27	q = aInfo.SumReward / aInfo.Tries + 0.5 * Math.Sqrt((2 * Math.Log(totalTries)) / aInfo.Tries);
28	}
29	if (q > bestQ) {
30	bestQ = q;
31	bestActions.Clear();
32	bestActions.Add(aIdx);
33	} else if (q == bestQ) {
34	bestActions.Add(aIdx);
35	}
36	}
37	Debug.Assert(bestActions.Any());
38	return bestActions.SelectRandom(random);
39	}
40
41	public IBanditPolicyActionInfo CreateActionInfo() {
42	return new DefaultPolicyActionInfo();
43	}
44	public override string ToString() {
45	return "UCB1Policy";
46	}
47	}
48	}

Note: See TracBrowser for help on using the repository browser.