Context Navigation

UCTPolicy.cs @ 11732

Visit:

Last change on this file since 11732 was 11732, checked in by gkronber, 9 years ago
#2283: refactoring and bug fixes
File size: 1.4 KB

Line
1	using System;
2	using System.Collections.Generic;
3	using System.Diagnostics;
4	using System.Linq;
5	using System.Text;
6	using System.Threading.Tasks;
7
8	namespace HeuristicLab.Algorithms.Bandits {
9	/* Kocsis et al. Bandit based Monte-Carlo Planning */
10	public class UCTPolicy : IPolicy {
11	private readonly double c;
12
13	public UCTPolicy(double c = 1.0) {
14	this.c = c;
15	}
16
17
18	public int SelectAction(Random random, IEnumerable<IPolicyActionInfo> actionInfos) {
19	var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>().ToArray(); // TODO: performance
20	int bestAction = -1;
21	double bestQ = double.NegativeInfinity;
22	int totalTries = myActionInfos.Where(a => !a.Disabled).Sum(a => a.Tries);
23
24	for (int a = 0; a < myActionInfos.Length; a++) {
25	if (myActionInfos[a].Disabled) continue;
26	if (myActionInfos[a].Tries == 0) return a;
27	var q = myActionInfos[a].SumReward / myActionInfos[a].Tries + 2 * c * Math.Sqrt(Math.Log(totalTries) / myActionInfos[a].Tries);
28	if (q > bestQ) {
29	bestQ = q;
30	bestAction = a;
31	}
32	}
33	Debug.Assert(bestAction > -1);
34	return bestAction;
35	}
36
37	public IPolicyActionInfo CreateActionInfo() {
38	return new DefaultPolicyActionInfo();
39	}
40
41	public override string ToString() {
42	return string.Format("UCTPolicy({0:F2})", c);
43	}
44	}
45	}

Note: See TracBrowser for help on using the repository browser.