Context Navigation

source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/SequentialSearch.cs @ 11795

Visit:

Last change on this file since 11795 was 11793, checked in by gkronber, 9 years ago
#2283 fixed compile errors and refactoring
File size: 9.1 KB

Line
1	using System;
2	using System.Collections.Generic;
3	using System.Diagnostics;
4	using System.Linq;
5	using System.Resources;
6	using System.Runtime.InteropServices;
7	using System.Text;
8	using HeuristicLab.Algorithms.Bandits;
9	using HeuristicLab.Algorithms.Bandits.BanditPolicies;
10	using HeuristicLab.Algorithms.Bandits.GrammarPolicies;
11	using HeuristicLab.Common;
12	using HeuristicLab.Problems.GrammaticalOptimization;
13
14	namespace HeuristicLab.Algorithms.GrammaticalOptimization {
15	// a search procedure that uses a policy to generate sentences and updates the policy (online RL)
16	// 1) Start with phrase = sentence symbol of grammar
17	// 2) Repeat
18	// a) generate derived phrases using left-canonical derivation and grammar rules
19	// b) keep only the phrases which are allowed (sentence length limit)
20	// c) if the set of phrases is empty restart with 1)
21	// d) otherwise use policy to select one of the possible derived phrases as active phrase
22	// the policy has the option to fail (for instance if all derived phrases are terminal and should not be visited again), in this case we restart at 1
23	// ... until phrase is terminal
24	// 3) Collect reward and update policy (feedback: state of visited rewards from step 2)
25	public class SequentialSearch {
26	// only for storing states so that it is not necessary to allocate new state strings whenever we select a follow state using the policy
27	private class TreeNode {
28	public int randomTries;
29	public string phrase;
30	public Sequence alternative;
31	public TreeNode[] children;
32
33	public TreeNode(string phrase, Sequence alternative) {
34	this.alternative = alternative;
35	this.phrase = phrase;
36	}
37	}
38
39
40	public event Action<string, double> FoundNewBestSolution;
41	public event Action<string, double> SolutionEvaluated;
42
43	private readonly int maxLen;
44	private readonly IProblem problem;
45	private readonly Random random;
46	private readonly int randomTries;
47	private readonly IGrammarPolicy behaviourPolicy;
48	private readonly IGrammarPolicy greedyPolicy;
49	private TreeNode rootNode;
50
51	private int tries;
52	private int maxSearchDepth;
53
54	private double bestQuality;
55	private string bestPhrase;
56	private readonly List<string> stateChain;
57
58	public SequentialSearch(IProblem problem, int maxLen, Random random, int randomTries, IGrammarPolicy behaviourPolicy) {
59	this.maxLen = maxLen;
60	this.problem = problem;
61	this.random = random;
62	this.randomTries = randomTries;
63	this.behaviourPolicy = behaviourPolicy;
64	this.greedyPolicy = new GenericGrammarPolicy(problem, new EpsGreedyPolicy(0.0), false);
65	this.stateChain = new List<string>();
66	}
67
68	public void Run(int maxIterations) {
69	bestQuality = double.MinValue;
70	Reset();
71
72	for (int i = 0; bestQuality < 1.0 && !Done() && i < maxIterations; i++) {
73	var phrase = SampleSentence(problem.Grammar);
74	// can fail on the last sentence
75	if (phrase.IsTerminal) {
76	var sentence = phrase.ToString();
77	tries++;
78	var quality = problem.Evaluate(sentence) / problem.BestKnownQuality(maxLen);
79	Debug.Assert(quality >= 0 && quality <= 1.0);
80	DistributeReward(quality);
81
82	RaiseSolutionEvaluated(sentence, quality);
83
84	if (quality > bestQuality) {
85	bestQuality = quality;
86	bestPhrase = sentence;
87	RaiseFoundNewBestSolution(sentence, quality);
88	}
89	}
90	}
91	}
92
93
94	private Sequence SampleSentence(IGrammar grammar) {
95	Sequence phrase;
96	do {
97	stateChain.Clear();
98	phrase = new Sequence(rootNode.phrase);
99	//var startPhrase = new Sequence("ab+cd+e*f+E");
100	} while (!Done() && !TryCompleteSentence(grammar, ref phrase));
101	return phrase;
102	}
103
104	private bool TryCompleteSentence(IGrammar g, ref Sequence phrase) {
105	if (phrase.Length > maxLen) throw new ArgumentException();
106	if (g.MinPhraseLength(phrase) > maxLen) throw new ArgumentException();
107	var curDepth = 0;
108	var n = rootNode;
109	stateChain.Add(n.phrase);
110
111	while (!phrase.IsTerminal) {
112
113
114	//if (n.randomTries < randomTries) {
115	// n.randomTries++;
116	// curDepth = Math.Max(curDepth, curDepth);
117	// g.CompleteSentenceRandomly(random, phrase, maxLen);
118	// return true;
119	//} else {
120	// => select using bandit policy
121	// failure means we simply restart
122	GenerateFollowStates(n); // creates child nodes for node n
123
124	int selectedChildIdx;
125	if (!behaviourPolicy.TrySelect(random, n.phrase, n.children.Select(ch => ch.phrase), out selectedChildIdx)) {
126	return false;
127	}
128	phrase.ReplaceAt(phrase.FirstNonTerminalIndex, 1, n.children[selectedChildIdx].alternative);
129
130	// prepare for next iteration
131	n = n.children[selectedChildIdx];
132	stateChain.Add(n.phrase);
133	curDepth++;
134	//}
135	} // while
136
137	maxSearchDepth = Math.Max(maxSearchDepth, curDepth);
138	return true;
139	}
140
141
142	private IEnumerable<string> GenerateFollowStates(TreeNode n) {
143	// create children on the first visit
144	if (n.children == null) {
145	var g = problem.Grammar;
146	// tree is only used for easily retrieving the follow-states of a state
147	var phrase = new Sequence(n.phrase);
148	char nt = phrase.FirstNonTerminal;
149
150	int maxLenOfReplacement = maxLen - (phrase.Length - 1);
151	// replacing aAb with maxLen 4 means we can only use alternatives with a minPhraseLen <= 2
152	Debug.Assert(maxLenOfReplacement > 0);
153
154	var alts = g.GetAlternatives(nt).Where(alt => g.MinPhraseLength(alt) <= maxLenOfReplacement);
155
156	var children = new TreeNode[alts.Count()];
157	int idx = 0;
158	foreach (var alt in alts) {
159	var newPhrase = new Sequence(phrase); // clone
160	newPhrase.ReplaceAt(newPhrase.FirstNonTerminalIndex, 1, alt);
161	children[idx++] = new TreeNode(newPhrase.ToString(), alt);
162	}
163	n.children = children;
164	}
165	return n.children.Select(ch => ch.phrase);
166	}
167
168	private void DistributeReward(double reward) {
169	behaviourPolicy.UpdateReward(stateChain, reward);
170	greedyPolicy.UpdateReward(stateChain, reward);
171	}
172
173
174	private void Reset() {
175	behaviourPolicy.Reset();
176	greedyPolicy.Reset();
177	maxSearchDepth = 0;
178	bestQuality = 0.0;
179	tries = 0;
180	rootNode = new TreeNode(problem.Grammar.SentenceSymbol.ToString(), new ReadonlySequence("$"));
181	}
182
183	public bool Done() {
184	int selectedStateIdx;
185	return !behaviourPolicy.TrySelect(random, rootNode.phrase, GenerateFollowStates(rootNode), out selectedStateIdx);
186	}
187
188	#region introspection
189	public void PrintStats() {
190	Console.WriteLine("depth: {0,5} tries: {1,5} best phrase {2,50} bestQ {3:F3}", maxSearchDepth, tries, bestPhrase, bestQuality);
191
192	// use behaviour strategy to generate the currently prefered sentence
193	var policy = behaviourPolicy;
194
195	var n = rootNode;
196
197	while (n != null) {
198	var phrase = n.phrase;
199	Console.ForegroundColor = ConsoleColor.White;
200	Console.WriteLine("{0,-30}", phrase);
201	var children = n.children;
202	if (children == null \|\| !children.Any()) break;
203	var values = children.Select(ch => policy.GetValue(ch.phrase));
204	var maxValue = values.Max();
205	if (maxValue == 0) maxValue = 1.0;
206
207	// write phrases
208	foreach (var ch in children) {
209	SetColorForValue(policy.GetValue(ch.phrase) / maxValue);
210	Console.Write(" {0,-4}", ch.phrase.Substring(Math.Max(0, ch.phrase.Length - 3), Math.Min(3, ch.phrase.Length)));
211	}
212	Console.WriteLine();
213
214	// write values
215	foreach (var ch in children) {
216	SetColorForValue(policy.GetValue(ch.phrase) / maxValue);
217	Console.Write(" {0:F2}", policy.GetValue(ch.phrase) * 10.0);
218	}
219	Console.WriteLine();
220
221	// write tries
222	foreach (var ch in children) {
223	SetColorForValue(policy.GetValue(ch.phrase) / maxValue);
224	Console.Write(" {0,4}", policy.GetTries(ch.phrase));
225	}
226	Console.WriteLine();
227	int selectedChildIdx;
228	if (!policy.TrySelect(random, phrase, children.Select(ch => ch.phrase), out selectedChildIdx)) {
229	break;
230	}
231	n = n.children[selectedChildIdx];
232	}
233
234	Console.ForegroundColor = ConsoleColor.White;
235	Console.WriteLine("-------------------");
236	}
237
238	private void SetColorForValue(double v) {
239	Console.ForegroundColor = ConsoleEx.ColorForValue(v);
240	}
241	#endregion
242
243	private void RaiseSolutionEvaluated(string sentence, double quality) {
244	var handler = SolutionEvaluated;
245	if (handler != null) handler(sentence, quality);
246	}
247	private void RaiseFoundNewBestSolution(string sentence, double quality) {
248	var handler = FoundNewBestSolution;
249	if (handler != null) handler(sentence, quality);
250	}
251	}
252	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Update cookies preferences