Context Navigation

source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/BoltzmannExplorationPolicy.cs @ 11747

Visit:

Last change on this file since 11747 was 11747, checked in by gkronber, 10 years ago
#2283: implemented test problems for MCTS
File size: 2.1 KB

Line
1	using System;
2	using System.Collections.Generic;
3	using System.Diagnostics;
4	using System.Linq;
5	using System.Text;
6	using System.Threading.Tasks;
7	using HeuristicLab.Common;
8
9	namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
10	// also called softmax policy
11	public class BoltzmannExplorationPolicy : IBanditPolicy {
12	private readonly double beta;
13	private readonly Func<DefaultPolicyActionInfo, double> valueFunction;
14
15	public BoltzmannExplorationPolicy(double beta) : this(beta, DefaultPolicyActionInfo.AverageReward) { }
16
17	public BoltzmannExplorationPolicy(double beta, Func<DefaultPolicyActionInfo, double> valueFunction) {
18	if (beta < 0) throw new ArgumentException();
19	this.beta = beta;
20	this.valueFunction = valueFunction;
21	}
22	public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
23	Debug.Assert(actionInfos.Any());
24
25	// select best
26	var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>();
27
28	// try any of the untries actions randomly
29	// for RoyalSequence it is much better to select the actions in the order of occurrence (all terminal alternatives first)
30	//if (myActionInfos.Any(aInfo => !aInfo.Disabled && aInfo.Tries == 0)) {
31	// return myActionInfos
32	// .Select((aInfo, idx) => new { aInfo, idx })
33	// .Where(p => !p.aInfo.Disabled)
34	// .Where(p => p.aInfo.Tries == 0)
35	// .SelectRandom(random).idx;
36	//}
37
38	var w = from aInfo in myActionInfos
39	select aInfo.Disabled
40	? 0.0
41	: Math.Exp(beta * valueFunction(aInfo));
42
43	var bestAction = myActionInfos
44	.Select((aInfo, idx) => new { aInfo, idx })
45	.SampleProportional(random, w)
46	.Select(p => p.idx)
47	.First();
48	Debug.Assert(bestAction >= 0);
49	return bestAction;
50	}
51
52	public IBanditPolicyActionInfo CreateActionInfo() {
53	return new DefaultPolicyActionInfo();
54	}
55
56	public override string ToString() {
57	return string.Format("BoltzmannExplorationPolicy({0:F2})", beta);
58	}
59	}
60	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Update cookies preferences