Context Navigation

source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/MctsSampler.cs @ 11747

Visit:

Last change on this file since 11747 was 11747, checked in by gkronber, 10 years ago
#2283: implemented test problems for MCTS
File size: 7.4 KB

Line
1	using System;
2	using System.Collections.Generic;
3	using System.Diagnostics;
4	using System.Linq;
5	using System.Text;
6	using HeuristicLab.Algorithms.Bandits;
7	using HeuristicLab.Common;
8	using HeuristicLab.Problems.GrammaticalOptimization;
9
10	namespace HeuristicLab.Algorithms.GrammaticalOptimization {
11	public class MctsSampler {
12	private class TreeNode {
13	public string ident;
14	public int randomTries;
15	public IBanditPolicyActionInfo actionInfo;
16	public TreeNode parent;
17	public TreeNode[] children;
18	public bool done = false;
19
20	public TreeNode(string id, TreeNode parent) {
21	this.ident = id;
22	this.parent = parent;
23	}
24
25	public override string ToString() {
26	return string.Format("Node({0} tries: {1}, done: {2}, policy: {3})", ident, actionInfo.Tries, done, actionInfo);
27	}
28	}
29
30
31	public event Action<string, double> FoundNewBestSolution;
32	public event Action<string, double> SolutionEvaluated;
33
34	private readonly int maxLen;
35	private readonly IProblem problem;
36	private readonly Random random;
37	private readonly int randomTries;
38	private readonly IBanditPolicy policy;
39
40	private TreeNode lastNode; // the bottom node in one episode
41	private TreeNode rootNode;
42
43	public int treeDepth;
44	public int treeSize;
45	private double bestQuality;
46
47	public MctsSampler(IProblem problem, int maxLen, Random random, int randomTries, IBanditPolicy policy) {
48	this.maxLen = maxLen;
49	this.problem = problem;
50	this.random = random;
51	this.randomTries = randomTries;
52	this.policy = policy;
53	}
54
55	public void Run(int maxIterations) {
56	bestQuality = double.MinValue;
57	InitPolicies(problem.Grammar);
58	for (int i = 0; !rootNode.done && i < maxIterations; i++) {
59	var sentence = SampleSentence(problem.Grammar).ToString();
60	var quality = problem.Evaluate(sentence) / problem.BestKnownQuality(maxLen);
61	Debug.Assert(quality >= 0 && quality <= 1.0);
62	DistributeReward(quality);
63
64	RaiseSolutionEvaluated(sentence, quality);
65
66	if (quality > bestQuality) {
67	bestQuality = quality;
68	RaiseFoundNewBestSolution(sentence, quality);
69	}
70	}
71
72	// clean up
73	InitPolicies(problem.Grammar); GC.Collect();
74	}
75
76	public void PrintStats() {
77	var n = rootNode;
78	Console.WriteLine("depth: {0,5} size: {1,10} root tries {2,10}, rootQ {3:F3}, bestQ {4:F3}", treeDepth, treeSize, n.actionInfo.Tries, n.actionInfo.Value, bestQuality);
79	while (n.children != null) {
80	Console.WriteLine("{0,-30}", n.ident);
81	double maxVForRow = n.children.Select(ch => ch.actionInfo.Value).Max();
82	if (maxVForRow == 0) maxVForRow = 1.0;
83
84	for (int i = 0; i < n.children.Length; i++) {
85	var ch = n.children[i];
86	SetColorForChild(ch, maxVForRow);
87	Console.Write("{0,5}", ch.ident);
88	}
89	Console.WriteLine();
90	for (int i = 0; i < n.children.Length; i++) {
91	var ch = n.children[i];
92	SetColorForChild(ch, maxVForRow);
93	Console.Write("{0,5:F2}", ch.actionInfo.Value * 10);
94	}
95	Console.WriteLine();
96	for (int i = 0; i < n.children.Length; i++) {
97	var ch = n.children[i];
98	SetColorForChild(ch, maxVForRow);
99	Console.Write("{0,5}", ch.done ? "X" : ch.actionInfo.Tries.ToString());
100	}
101	Console.ForegroundColor = ConsoleColor.White;
102	Console.WriteLine();
103	//n.policy.PrintStats();
104	//n = n.children.Where(ch => !ch.done).OrderByDescending(c => c.actionInfo.Value).First();
105	n = n.children.Where(ch=>!ch.done).OrderByDescending(c => c.actionInfo.Value).First();
106	}
107	Console.WriteLine("-----------------------");
108	}
109
110	private void SetColorForChild(TreeNode ch, double maxVForRow) {
111	//if (ch.done) Console.ForegroundColor = ConsoleColor.White;
112	//else
113	Console.ForegroundColor = ConsoleEx.ColorForValue(ch.actionInfo.Value / maxVForRow);
114	}
115
116	private void InitPolicies(IGrammar grammar) {
117
118
119	rootNode = new TreeNode(grammar.SentenceSymbol.ToString(), null);
120	rootNode.actionInfo = policy.CreateActionInfo();
121	treeDepth = 0;
122	treeSize = 0;
123	}
124
125	private Sequence SampleSentence(IGrammar grammar) {
126	lastNode = null;
127	var startPhrase = new Sequence(grammar.SentenceSymbol);
128	//var startPhrase = new Sequence("ab+cd+e*f+E");
129
130	return CompleteSentence(grammar, startPhrase);
131	}
132
133	private Sequence CompleteSentence(IGrammar g, Sequence phrase) {
134	if (phrase.Length > maxLen) throw new ArgumentException();
135	if (g.MinPhraseLength(phrase) > maxLen) throw new ArgumentException();
136	TreeNode n = rootNode;
137	var curDepth = 0;
138	while (!phrase.IsTerminal) {
139
140	if (n.randomTries < randomTries) {
141	n.randomTries++;
142	treeDepth = Math.Max(treeDepth, curDepth);
143	lastNode = n;
144	return g.CompleteSentenceRandomly(random, phrase, maxLen);
145	} else {
146	char nt = phrase.FirstNonTerminal;
147
148	int maxLenOfReplacement = maxLen - (phrase.Length - 1); // replacing aAb with maxLen 4 means we can only use alternatives with a minPhraseLen <= 2
149	Debug.Assert(maxLenOfReplacement > 0);
150
151	var alts = g.GetAlternatives(nt).Where(alt => g.MinPhraseLength(alt) <= maxLenOfReplacement);
152
153	if (n.randomTries == randomTries && n.children == null) {
154	n.children = alts.Select(alt => new TreeNode(alt.ToString(), n)).ToArray(); // create a new node for each alternative
155	foreach (var ch in n.children) ch.actionInfo = policy.CreateActionInfo();
156	treeSize += n.children.Length;
157	}
158	// => select using bandit policy
159	int selectedAltIdx = policy.SelectAction(random, n.children.Select(c => c.actionInfo));
160	Sequence selectedAlt = alts.ElementAt(selectedAltIdx);
161
162	// replace nt with alt
163	phrase.ReplaceAt(phrase.FirstNonTerminalIndex, 1, selectedAlt);
164
165	curDepth++;
166
167	// prepare for next iteration
168	n = n.children[selectedAltIdx];
169	}
170	} // while
171
172	lastNode = n;
173
174
175	// the last node is a leaf node (sentence is done), so we never need to visit this node again
176	n.done = true;
177
178	treeDepth = Math.Max(treeDepth, curDepth);
179	return phrase;
180	}
181
182	private void DistributeReward(double reward) {
183	// iterate in reverse order (bottom up)
184
185	var node = lastNode;
186	while (node != null) {
187	if (node.done) node.actionInfo.Disable(reward);
188	if (node.children != null && node.children.All(c => c.done)) {
189	node.done = true;
190	var bestActionValue = node.children.Select(c => c.actionInfo.Value).Max();
191	node.actionInfo.Disable(bestActionValue);
192	}
193	if (!node.done) {
194	node.actionInfo.UpdateReward(reward);
195	}
196	node = node.parent;
197	}
198	}
199
200	private void RaiseSolutionEvaluated(string sentence, double quality) {
201	var handler = SolutionEvaluated;
202	if (handler != null) handler(sentence, quality);
203	}
204	private void RaiseFoundNewBestSolution(string sentence, double quality) {
205	var handler = FoundNewBestSolution;
206	if (handler != null) handler(sentence, quality);
207	}
208	}
209	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Update cookies preferences