Context Navigation

source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/Solvers/SequentialSearch.cs @ 11977

Visit:

Last change on this file since 11977 was 11977, checked in by gkronber, 9 years ago
#2283 commit for 'realistic' (same settings for ant and symbreg) experiment
File size: 9.0 KB

Line
1	using System;
2	using System.Collections.Generic;
3	using System.Diagnostics;
4	using System.Linq;
5	using System.Resources;
6	using System.Runtime.InteropServices;
7	using System.Text;
8	using HeuristicLab.Algorithms.Bandits;
9	using HeuristicLab.Algorithms.Bandits.BanditPolicies;
10	using HeuristicLab.Algorithms.Bandits.GrammarPolicies;
11	using HeuristicLab.Common;
12	using HeuristicLab.Problems.GrammaticalOptimization;
13
14	namespace HeuristicLab.Algorithms.GrammaticalOptimization {
15	// a search procedure that uses a policy to generate sentences and updates the policy (online RL)
16	// 1) Start with phrase = sentence symbol of grammar
17	// 2) Repeat
18	// a) generate derived phrases using left-canonical derivation and grammar rules
19	// b) keep only the phrases which are allowed (sentence length limit)
20	// c) if the set of phrases is empty restart with 1)
21	// d) otherwise use policy to select one of the possible derived phrases as active phrase
22	// the policy has the option to fail (for instance if all derived phrases are terminal and should not be visited again), in this case we restart at 1
23	// ... until phrase is terminal
24	// 3) Collect reward and update policy (feedback: state of visited rewards from step 2)
25	public class SequentialSearch : SolverBase {
26	// only for storing states so that it is not necessary to allocate new state strings whenever we select a follow state using the policy
27	private class TreeNode {
28	public int randomTries;
29	public string phrase;
30	public Sequence alternative;
31	public TreeNode[] children;
32
33	public TreeNode(string phrase, Sequence alternative) {
34	this.alternative = alternative;
35	this.phrase = phrase;
36	}
37	}
38
39
40	private readonly int maxLen;
41	private readonly IProblem problem;
42	private readonly Random random;
43	private readonly int randomTries;
44	private readonly IGrammarPolicy behaviourPolicy;
45	private readonly IGrammarPolicy greedyPolicy;
46	private TreeNode rootNode;
47
48	private int tries;
49	private int maxSearchDepth;
50
51	private string bestPhrase;
52	private readonly List<string> stateChain;
53
54	public SequentialSearch(IProblem problem, int maxLen, Random random, int randomTries, IGrammarPolicy behaviourPolicy) {
55	this.maxLen = maxLen;
56	this.problem = problem;
57	this.random = random;
58	this.randomTries = randomTries;
59	this.behaviourPolicy = behaviourPolicy;
60	this.greedyPolicy = new GenericGrammarPolicy(problem, new EpsGreedyPolicy(0.0), false);
61	this.stateChain = new List<string>();
62	}
63
64	public bool StopRequested {
65	get;
66	set;
67	}
68
69	public override void Run(int maxIterations) {
70	Reset();
71
72	for (int i = 0; !StopRequested && !Done() && i < maxIterations; i++) {
73	var phrase = SampleSentence(problem.Grammar);
74	// can fail on the last sentence
75	if (phrase.IsTerminal) {
76	var sentence = phrase.ToString();
77	tries++;
78	var quality = problem.Evaluate(sentence) / problem.BestKnownQuality(maxLen);
79	if (double.IsNaN(quality)) quality = 0.0;
80	Debug.Assert(quality >= 0 && quality <= 1.0);
81
82	if (quality > bestQuality) {
83	bestPhrase = sentence;
84	}
85
86	OnSolutionEvaluated(sentence, quality);
87	DistributeReward(quality);
88
89	}
90	}
91	}
92
93
94	private Sequence SampleSentence(IGrammar grammar) {
95	Sequence phrase;
96	do {
97	stateChain.Clear();
98	phrase = new Sequence(rootNode.phrase);
99	} while (!Done() && !TryCompleteSentence(grammar, ref phrase));
100	return phrase;
101	}
102
103	private bool TryCompleteSentence(IGrammar g, ref Sequence phrase) {
104	if (phrase.Length > maxLen) throw new ArgumentException();
105	if (g.MinPhraseLength(phrase) > maxLen) throw new ArgumentException();
106	var curDepth = 0;
107	var n = rootNode;
108	stateChain.Add(n.phrase);
109
110	while (!phrase.IsTerminal) {
111	if (n.randomTries < randomTries) {
112	n.randomTries++;
113	maxSearchDepth = Math.Max(maxSearchDepth, curDepth);
114	g.CompleteSentenceRandomly(random, phrase, maxLen);
115	return true;
116	} else {
117	// => select using bandit policy
118	// failure means we simply restart
119	GenerateFollowStates(n); // creates child nodes for node n
120
121	int selectedChildIdx;
122	if (!behaviourPolicy.TrySelect(random, n.phrase, n.children.Select(ch => ch.phrase), out selectedChildIdx)) {
123	return false;
124	}
125	phrase.ReplaceAt(phrase.FirstNonTerminalIndex, 1, n.children[selectedChildIdx].alternative);
126
127	// prepare for next iteration
128	n = n.children[selectedChildIdx];
129	stateChain.Add(n.phrase);
130	curDepth++;
131	}
132	} // while
133
134	maxSearchDepth = Math.Max(maxSearchDepth, curDepth);
135	return true;
136	}
137
138
139	private IEnumerable<string> GenerateFollowStates(TreeNode n) {
140	// create children on the first visit
141	if (n.children == null) {
142	var g = problem.Grammar;
143	// tree is only used for easily retrieving the follow-states of a state
144	var phrase = new Sequence(n.phrase);
145	char nt = phrase.FirstNonTerminal;
146
147	int maxLenOfReplacement = maxLen - (phrase.Length - 1);
148	// replacing aAb with maxLen 4 means we can only use alternatives with a minPhraseLen <= 2
149	Debug.Assert(maxLenOfReplacement > 0);
150
151	var alts = g.GetAlternatives(nt).Where(alt => g.MinPhraseLength(alt) <= maxLenOfReplacement);
152
153	var children = new TreeNode[alts.Count()];
154	int idx = 0;
155	foreach (var alt in alts) {
156	// var newPhrase = new Sequence(phrase); // clone
157	// newPhrase.ReplaceAt(newPhrase.FirstNonTerminalIndex, 1, alt);
158	// children[idx++] = new TreeNode(newPhrase.ToString(), alt);
159
160	// since we are not using a sequence later on we might directly transform the current sequence to a string and replace there
161	var phraseStr = phrase.ToString();
162	var sb = new StringBuilder(phraseStr);
163	sb.Remove(phrase.FirstNonTerminalIndex, 1).Insert(phrase.FirstNonTerminalIndex, alt.ToString());
164	children[idx++] = new TreeNode(sb.ToString(), alt);
165	}
166	n.children = children;
167	}
168	return n.children.Select(ch => ch.phrase);
169	}
170
171	private void DistributeReward(double reward) {
172	behaviourPolicy.UpdateReward(stateChain, reward);
173	//greedyPolicy.UpdateReward(stateChain, reward);
174	}
175
176
177	private void Reset() {
178	StopRequested = false;
179	behaviourPolicy.Reset();
180	greedyPolicy.Reset();
181	maxSearchDepth = 0;
182	bestQuality = 0.0;
183	tries = 0;
184	rootNode = new TreeNode(problem.Grammar.SentenceSymbol.ToString(), new ReadonlySequence("$"));
185	}
186
187	public bool Done() {
188	int selectedStateIdx;
189	return !behaviourPolicy.TrySelect(random, rootNode.phrase, GenerateFollowStates(rootNode), out selectedStateIdx);
190	}
191
192	#region introspection
193	public void PrintStats() {
194	Console.WriteLine("depth: {0,5} tries: {1,5} best phrase {2,50} bestQ {3:F3}", maxSearchDepth, tries, bestPhrase, bestQuality);
195
196	// use behaviour strategy to generate the currently prefered sentence
197	var policy = behaviourPolicy;
198
199	var n = rootNode;
200
201	while (n != null) {
202	var phrase = n.phrase;
203	Console.ForegroundColor = ConsoleColor.White;
204	Console.WriteLine("{0,-30}", phrase);
205	var children = n.children;
206	if (children == null \|\| !children.Any()) break;
207	var values = children.Select(ch => policy.GetValue(ch.phrase));
208	var maxValue = values.Max();
209	if (maxValue == 0) maxValue = 1.0;
210
211	// write phrases
212	foreach (var ch in children) {
213	SetColorForValue(policy.GetValue(ch.phrase) / maxValue);
214	Console.Write(" {0,-4}", ch.phrase.Substring(Math.Max(0, ch.phrase.Length - 3), Math.Min(3, ch.phrase.Length)));
215	}
216	Console.WriteLine();
217
218	// write values
219	foreach (var ch in children) {
220	SetColorForValue(policy.GetValue(ch.phrase) / maxValue);
221	Console.Write(" {0:F2}", policy.GetValue(ch.phrase) * 10.0);
222	}
223	Console.WriteLine();
224
225	// write tries
226	foreach (var ch in children) {
227	SetColorForValue(policy.GetValue(ch.phrase) / maxValue);
228	Console.Write(" {0,4}", policy.GetTries(ch.phrase));
229	}
230	Console.WriteLine();
231	int selectedChildIdx;
232	if (!policy.TrySelect(random, phrase, children.Select(ch => ch.phrase), out selectedChildIdx)) {
233	break;
234	}
235	n = n.children[selectedChildIdx];
236	}
237
238	Console.ForegroundColor = ConsoleColor.White;
239	Console.WriteLine("-------------------");
240	}
241
242	private void SetColorForValue(double v) {
243	Console.ForegroundColor = ConsoleEx.ColorForValue(v);
244	}
245	#endregion
246
247	}
248	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Update cookies preferences