Context Navigation

source: branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.GrammaticalOptimization/Solvers/SequentialSearch.cs @ 13234

Visit:

Last change on this file since 13234 was 12893, checked in by gkronber, 9 years ago
#2283: experiments on grammatical optimization algorithms (maxreward instead of avg reward, ...)
File size: 9.5 KB

Line
1	using System;
2	using System.Collections.Generic;
3	using System.Diagnostics;
4	using System.Linq;
5	using System.Resources;
6	using System.Runtime.InteropServices;
7	using System.Text;
8	using System.Windows.Markup;
9	using HeuristicLab.Algorithms.Bandits;
10	using HeuristicLab.Algorithms.Bandits.BanditPolicies;
11	using HeuristicLab.Algorithms.Bandits.GrammarPolicies;
12	using HeuristicLab.Common;
13	using HeuristicLab.Problems.GrammaticalOptimization;
14
15	namespace HeuristicLab.Algorithms.GrammaticalOptimization {
16	// a search procedure that uses a policy to generate sentences and updates the policy (online RL)
17	// 1) Start with phrase = sentence symbol of grammar
18	// 2) Repeat
19	// a) generate derived phrases using left-canonical derivation and grammar rules
20	// b) keep only the phrases which are allowed (sentence length limit)
21	// c) if the set of phrases is empty restart with 1)
22	// d) otherwise use policy to select one of the possible derived phrases as active phrase
23	// the policy has the option to fail (for instance if all derived phrases are terminal and should not be visited again), in this case we restart at 1
24	// ... until phrase is terminal
25	// 3) Collect reward and update policy (feedback: state of visited rewards from step 2)
26	public class SequentialSearch : SolverBase {
27	// only for storing states so that it is not necessary to allocate new state strings whenever we select a follow state using the policy
28	private class TreeNode {
29	public int randomTries;
30	public string phrase;
31	public Sequence alternative;
32	public TreeNode[] children;
33
34	public TreeNode(string phrase, Sequence alternative) {
35	this.alternative = alternative;
36	this.phrase = phrase;
37	}
38	}
39
40
41	private readonly int maxLen;
42	private readonly IProblem problem;
43	private readonly System.Random random;
44	private readonly int randomTries;
45	private readonly IGrammarPolicy behaviourPolicy;
46	private TreeNode rootNode;
47
48	private int tries;
49	private int maxSearchDepth;
50
51	private string bestPhrase;
52	private readonly List<string> stateChain;
53
54	public SequentialSearch(IProblem problem, int maxLen, System.Random random, int randomTries, IGrammarPolicy behaviourPolicy) {
55	this.maxLen = maxLen;
56	this.problem = problem;
57	this.random = random;
58	this.randomTries = randomTries;
59	this.behaviourPolicy = behaviourPolicy;
60	this.stateChain = new List<string>();
61	}
62
63	public bool StopRequested {
64	get;
65	set;
66	}
67
68	public override void Run(int maxIterations) {
69	Reset();
70
71	for (int i = 0; !StopRequested && !Done() && i < maxIterations; i++) {
72	var phrase = SampleSentence(problem.Grammar);
73	// can fail on the last sentence
74	if (phrase.IsTerminal) {
75	var sentence = phrase.ToString();
76	tries++;
77	var quality = problem.Evaluate(sentence) / problem.BestKnownQuality(maxLen);
78	if (double.IsNaN(quality)) quality = 0.0;
79	Debug.Assert(quality >= 0 && quality <= 1.0);
80
81	if (quality > bestQuality) {
82	bestPhrase = sentence;
83	}
84
85	OnSolutionEvaluated(sentence, quality);
86	DistributeReward(quality);
87
88	}
89	}
90	}
91
92
93	private Sequence SampleSentence(IGrammar grammar) {
94	Sequence phrase;
95	do {
96	stateChain.Clear();
97	phrase = new Sequence(rootNode.phrase);
98	} while (!Done() && !TryCompleteSentence(grammar, ref phrase));
99	return phrase;
100	}
101
102	private bool TryCompleteSentence(IGrammar g, ref Sequence phrase) {
103	if (phrase.Length > maxLen) throw new ArgumentException();
104	if (g.MinPhraseLength(phrase) > maxLen) throw new ArgumentException();
105	var curDepth = 0;
106	var n = rootNode;
107	stateChain.Add(n.phrase);
108
109	while (!phrase.IsTerminal) {
110	if (n.randomTries < randomTries) {
111	n.randomTries++;
112	maxSearchDepth = Math.Max(maxSearchDepth, curDepth);
113	g.CompleteSentenceRandomly(random, phrase, maxLen);
114	return true;
115	} else {
116	// => select using bandit policy
117	// failure means we simply restart
118	GenerateFollowStates(n); // creates child nodes for node n
119
120
121	int selectedChildIdx;
122	if (!behaviourPolicy.TrySelect(random, n.phrase, n.children.Select(ch => ch.phrase), out selectedChildIdx)) {
123	return false;
124	}
125
126	phrase.ReplaceAt(phrase.FirstNonTerminalIndex, 1, n.children[selectedChildIdx].alternative);
127
128	// prepare for next iteration
129	n = n.children[selectedChildIdx];
130	stateChain.Add(n.phrase);
131	curDepth++;
132	}
133	} // while
134
135	maxSearchDepth = Math.Max(maxSearchDepth, curDepth);
136	return true;
137	}
138
139
140	private IEnumerable<string> GenerateFollowStates(TreeNode n) {
141	// create children on the first visit
142	if (n.children == null) {
143	var g = problem.Grammar;
144	// tree is only used for easily retrieving the follow-states of a state
145	var phrase = new Sequence(n.phrase);
146	char nt = phrase.FirstNonTerminal;
147
148	int maxLenOfReplacement = maxLen - (phrase.Length - 1);
149	// replacing aAb with maxLen 4 means we can only use alternatives with a minPhraseLen <= 2
150	Debug.Assert(maxLenOfReplacement > 0);
151
152	var alts = g.GetAlternatives(nt).Where(alt => g.MinPhraseLength(alt) <= maxLenOfReplacement);
153
154	var children = new TreeNode[alts.Count()];
155	int idx = 0;
156	foreach (var alt in alts) {
157	// var newPhrase = new Sequence(phrase); // clone
158	// newPhrase.ReplaceAt(newPhrase.FirstNonTerminalIndex, 1, alt);
159	// children[idx++] = new TreeNode(newPhrase.ToString(), alt);
160
161	// since we are not using a sequence later on we might directly transform the current sequence to a string and replace there
162	var phraseStr = phrase.ToString();
163	var sb = new StringBuilder(phraseStr);
164	sb.Remove(phrase.FirstNonTerminalIndex, 1).Insert(phrase.FirstNonTerminalIndex, alt.ToString());
165	children[idx++] = new TreeNode(sb.ToString(), alt);
166	}
167	n.children = children;
168	}
169	return n.children.Select(ch => ch.phrase);
170	}
171
172
173
174	private void DistributeReward(double reward) {
175	behaviourPolicy.UpdateReward(stateChain, reward);
176	}
177
178
179
180	private void Reset() {
181	StopRequested = false;
182	behaviourPolicy.Reset();
183	maxSearchDepth = 0;
184	bestQuality = 0.0;
185	tries = 0;
186	//rootNode = new TreeNode("ab+cd+e*f+E", new ReadonlySequence("$"));
187	rootNode = new TreeNode(problem.Grammar.SentenceSymbol.ToString(), new ReadonlySequence("$"));
188	}
189
190	public bool Done() {
191	int selectedStateIdx;
192	return !behaviourPolicy.TrySelect(random, rootNode.phrase, GenerateFollowStates(rootNode), out selectedStateIdx);
193	}
194
195	#region introspection
196	public void PrintStats() {
197	Console.WriteLine("depth: {0,5} tries: {1,5} best phrase {2,50} bestQ {3:F3}", maxSearchDepth, tries, bestPhrase, bestQuality);
198
199	// use behaviour strategy to generate the currently prefered sentence
200	var policy = behaviourPolicy;
201
202	var n = rootNode;
203	int lvl = 0;
204	while (n != null) {
205	var phrase = n.phrase;
206	Console.ForegroundColor = ConsoleColor.White;
207
208	if (lvl++ > 10) return;
209
210	Console.WriteLine("{0,-30}", phrase);
211	var children = n.children;
212	if (children == null \|\| !children.Any()) break;
213	var valuesEnumerable = children.Select(ch => policy.GetValue(ch.phrase));
214	double maxValue = valuesEnumerable.Where(v => !double.IsInfinity(v)).DefaultIfEmpty(0).Max();
215	maxValue = Math.Max(maxValue, 1.0);
216	// write phrases
217	foreach (var ch in children) {
218	//SetColorForValue(policy.GetValue(ch.phrase) / maxValue);
219	Console.Write(" {0,-4}", ch.phrase.Substring(Math.Max(0, ch.phrase.Length - 3), Math.Min(3, ch.phrase.Length)));
220	}
221	Console.WriteLine();
222
223	// write values
224	foreach (var ch in children) {
225	//SetColorForValue(policy.GetValue(ch.phrase) / maxValue);
226	if (!double.IsInfinity(policy.GetValue(ch.phrase)))
227	Console.Write(" {0:F2}", policy.GetValue(ch.phrase) * 10.0);
228	else
229	Console.Write(" Inf ");
230	}
231	Console.WriteLine();
232
233	// write tries
234	foreach (var ch in children) {
235	//SetColorForValue(policy.GetValue(ch.phrase) / maxValue);
236	Console.Write(" {0,4}", policy.GetTries(ch.phrase));
237	}
238	Console.WriteLine();
239	var triesArr = valuesEnumerable.ToArray();
240	//var selectedChildIdx = Array.IndexOf(triesArr, triesArr.Max());
241	var valuesArr = children.Select(ch => policy.GetValue(ch.phrase)).ToArray();
242	int selectedChildIdx = Enumerable.Range(0, children.Length).OrderByDescending(i => valuesArr[i]).ThenByDescending(i => triesArr[i]).First();
243
244	//int selectedChildIdx;
245	//if (!policy.TrySelect(random, phrase, children.Select(ch => ch.phrase), out selectedChildIdx)) {
246	// break;
247	//}
248	n = n.children[selectedChildIdx];
249	}
250
251	Console.ForegroundColor = ConsoleColor.White;
252	Console.WriteLine("-------------------");
253	}
254
255	private void SetColorForValue(double v) {
256	Console.ForegroundColor = ConsoleEx.ColorForValue(v);
257	}
258	#endregion
259
260	}
261	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Update cookies preferences