Context Navigation

source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/SequentialSearch.cs @ 11806

Visit:

Last change on this file since 11806 was 11806, checked in by gkronber, 9 years ago
#2283: separated value-states from done-states in GenericGrammarPolicy and removed disabling of actions from bandit policies
File size: 9.5 KB

Rev	Line
[11770]	1	using System;
	2	using System.Collections.Generic;
	3	using System.Diagnostics;
	4	using System.Linq;
	5	using System.Resources;
	6	using System.Runtime.InteropServices;
	7	using System.Text;
	8	using HeuristicLab.Algorithms.Bandits;
	9	using HeuristicLab.Algorithms.Bandits.BanditPolicies;
	10	using HeuristicLab.Algorithms.Bandits.GrammarPolicies;
	11	using HeuristicLab.Common;
	12	using HeuristicLab.Problems.GrammaticalOptimization;
	13
	14	namespace HeuristicLab.Algorithms.GrammaticalOptimization {
	15	// a search procedure that uses a policy to generate sentences and updates the policy (online RL)
	16	// 1) Start with phrase = sentence symbol of grammar
	17	// 2) Repeat
	18	// a) generate derived phrases using left-canonical derivation and grammar rules
	19	// b) keep only the phrases which are allowed (sentence length limit)
	20	// c) if the set of phrases is empty restart with 1)
	21	// d) otherwise use policy to select one of the possible derived phrases as active phrase
	22	// the policy has the option to fail (for instance if all derived phrases are terminal and should not be visited again), in this case we restart at 1
	23	// ... until phrase is terminal
	24	// 3) Collect reward and update policy (feedback: state of visited rewards from step 2)
	25	public class SequentialSearch {
[11793]	26	// only for storing states so that it is not necessary to allocate new state strings whenever we select a follow state using the policy
	27	private class TreeNode {
	28	public int randomTries;
	29	public string phrase;
	30	public Sequence alternative;
	31	public TreeNode[] children;
[11770]	32
[11793]	33	public TreeNode(string phrase, Sequence alternative) {
	34	this.alternative = alternative;
	35	this.phrase = phrase;
	36	}
	37	}
	38
	39
[11770]	40	public event Action<string, double> FoundNewBestSolution;
	41	public event Action<string, double> SolutionEvaluated;
	42
	43	private readonly int maxLen;
	44	private readonly IProblem problem;
	45	private readonly Random random;
	46	private readonly int randomTries;
	47	private readonly IGrammarPolicy behaviourPolicy;
	48	private readonly IGrammarPolicy greedyPolicy;
[11793]	49	private TreeNode rootNode;
	50
	51	private int tries;
[11770]	52	private int maxSearchDepth;
	53
	54	private double bestQuality;
	55	private string bestPhrase;
[11793]	56	private readonly List<string> stateChain;
[11770]	57
	58	public SequentialSearch(IProblem problem, int maxLen, Random random, int randomTries, IGrammarPolicy behaviourPolicy) {
	59	this.maxLen = maxLen;
	60	this.problem = problem;
	61	this.random = random;
	62	this.randomTries = randomTries;
	63	this.behaviourPolicy = behaviourPolicy;
[11793]	64	this.greedyPolicy = new GenericGrammarPolicy(problem, new EpsGreedyPolicy(0.0), false);
	65	this.stateChain = new List<string>();
[11770]	66	}
	67
	68	public void Run(int maxIterations) {
	69	bestQuality = double.MinValue;
	70	Reset();
	71
[11799]	72	for (int i = 0; /!bestQuality.IsAlmost(1.0) && /!Done() && i < maxIterations; i++) {
[11770]	73	var phrase = SampleSentence(problem.Grammar);
	74	// can fail on the last sentence
	75	if (phrase.IsTerminal) {
	76	var sentence = phrase.ToString();
	77	tries++;
	78	var quality = problem.Evaluate(sentence) / problem.BestKnownQuality(maxLen);
[11806]	79	if (double.IsNaN(quality)) quality = 0.0;
[11770]	80	Debug.Assert(quality >= 0 && quality <= 1.0);
	81	DistributeReward(quality);
	82
	83	RaiseSolutionEvaluated(sentence, quality);
	84
	85	if (quality > bestQuality) {
	86	bestQuality = quality;
	87	bestPhrase = sentence;
	88	RaiseFoundNewBestSolution(sentence, quality);
	89	}
	90	}
	91	}
	92	}
	93
	94
[11793]	95	private Sequence SampleSentence(IGrammar grammar) {
	96	Sequence phrase;
[11770]	97	do {
	98	stateChain.Clear();
[11793]	99	phrase = new Sequence(rootNode.phrase);
[11770]	100	} while (!Done() && !TryCompleteSentence(grammar, ref phrase));
	101	return phrase;
	102	}
	103
[11793]	104	private bool TryCompleteSentence(IGrammar g, ref Sequence phrase) {
[11770]	105	if (phrase.Length > maxLen) throw new ArgumentException();
	106	if (g.MinPhraseLength(phrase) > maxLen) throw new ArgumentException();
	107	var curDepth = 0;
[11793]	108	var n = rootNode;
	109	stateChain.Add(n.phrase);
[11770]	110
	111	while (!phrase.IsTerminal) {
[11799]	112	if (n.randomTries < randomTries) {
	113	n.randomTries++;
	114	maxSearchDepth = Math.Max(maxSearchDepth, curDepth);
	115	g.CompleteSentenceRandomly(random, phrase, maxLen);
	116	return true;
	117	} else {
	118	// => select using bandit policy
	119	// failure means we simply restart
	120	GenerateFollowStates(n); // creates child nodes for node n
[11770]	121
[11799]	122	int selectedChildIdx;
	123	if (!behaviourPolicy.TrySelect(random, n.phrase, n.children.Select(ch => ch.phrase), out selectedChildIdx)) {
	124	return false;
	125	}
	126	phrase.ReplaceAt(phrase.FirstNonTerminalIndex, 1, n.children[selectedChildIdx].alternative);
[11793]	127
[11799]	128	// prepare for next iteration
	129	n = n.children[selectedChildIdx];
	130	stateChain.Add(n.phrase);
	131	curDepth++;
[11793]	132	}
[11770]	133	} // while
	134
	135	maxSearchDepth = Math.Max(maxSearchDepth, curDepth);
	136	return true;
	137	}
	138
	139
[11793]	140	private IEnumerable<string> GenerateFollowStates(TreeNode n) {
	141	// create children on the first visit
	142	if (n.children == null) {
	143	var g = problem.Grammar;
	144	// tree is only used for easily retrieving the follow-states of a state
	145	var phrase = new Sequence(n.phrase);
[11770]	146	char nt = phrase.FirstNonTerminal;
	147
	148	int maxLenOfReplacement = maxLen - (phrase.Length - 1);
	149	// replacing aAb with maxLen 4 means we can only use alternatives with a minPhraseLen <= 2
	150	Debug.Assert(maxLenOfReplacement > 0);
	151
	152	var alts = g.GetAlternatives(nt).Where(alt => g.MinPhraseLength(alt) <= maxLenOfReplacement);
	153
[11793]	154	var children = new TreeNode[alts.Count()];
[11770]	155	int idx = 0;
	156	foreach (var alt in alts) {
[11799]	157	// var newPhrase = new Sequence(phrase); // clone
	158	// newPhrase.ReplaceAt(newPhrase.FirstNonTerminalIndex, 1, alt);
	159	// children[idx++] = new TreeNode(newPhrase.ToString(), alt);
	160
	161	// since we are not using a sequence later on we might directly transform the current sequence to a string and replace there
	162	var phraseStr = phrase.ToString();
	163	var sb = new StringBuilder(phraseStr);
	164	sb.Remove(phrase.FirstNonTerminalIndex, 1).Insert(phrase.FirstNonTerminalIndex, alt.ToString());
	165	children[idx++] = new TreeNode(sb.ToString(), alt);
[11770]	166	}
[11793]	167	n.children = children;
	168	}
	169	return n.children.Select(ch => ch.phrase);
[11770]	170	}
	171
	172	private void DistributeReward(double reward) {
	173	behaviourPolicy.UpdateReward(stateChain, reward);
	174	greedyPolicy.UpdateReward(stateChain, reward);
	175	}
	176
	177
	178	private void Reset() {
	179	behaviourPolicy.Reset();
	180	greedyPolicy.Reset();
	181	maxSearchDepth = 0;
	182	bestQuality = 0.0;
	183	tries = 0;
[11793]	184	rootNode = new TreeNode(problem.Grammar.SentenceSymbol.ToString(), new ReadonlySequence("$"));
[11770]	185	}
	186
	187	public bool Done() {
[11793]	188	int selectedStateIdx;
	189	return !behaviourPolicy.TrySelect(random, rootNode.phrase, GenerateFollowStates(rootNode), out selectedStateIdx);
[11770]	190	}
	191
	192	#region introspection
	193	public void PrintStats() {
	194	Console.WriteLine("depth: {0,5} tries: {1,5} best phrase {2,50} bestQ {3:F3}", maxSearchDepth, tries, bestPhrase, bestQuality);
	195
[11793]	196	// use behaviour strategy to generate the currently prefered sentence
[11770]	197	var policy = behaviourPolicy;
[11793]	198
	199	var n = rootNode;
	200
	201	while (n != null) {
	202	var phrase = n.phrase;
[11770]	203	Console.ForegroundColor = ConsoleColor.White;
	204	Console.WriteLine("{0,-30}", phrase);
[11793]	205	var children = n.children;
	206	if (children == null \|\| !children.Any()) break;
	207	var values = children.Select(ch => policy.GetValue(ch.phrase));
[11770]	208	var maxValue = values.Max();
	209	if (maxValue == 0) maxValue = 1.0;
	210
	211	// write phrases
[11793]	212	foreach (var ch in children) {
	213	SetColorForValue(policy.GetValue(ch.phrase) / maxValue);
	214	Console.Write(" {0,-4}", ch.phrase.Substring(Math.Max(0, ch.phrase.Length - 3), Math.Min(3, ch.phrase.Length)));
[11770]	215	}
	216	Console.WriteLine();
	217
	218	// write values
[11793]	219	foreach (var ch in children) {
	220	SetColorForValue(policy.GetValue(ch.phrase) / maxValue);
	221	Console.Write(" {0:F2}", policy.GetValue(ch.phrase) * 10.0);
[11770]	222	}
	223	Console.WriteLine();
	224
	225	// write tries
[11793]	226	foreach (var ch in children) {
	227	SetColorForValue(policy.GetValue(ch.phrase) / maxValue);
	228	Console.Write(" {0,4}", policy.GetTries(ch.phrase));
[11770]	229	}
	230	Console.WriteLine();
[11793]	231	int selectedChildIdx;
	232	if (!policy.TrySelect(random, phrase, children.Select(ch => ch.phrase), out selectedChildIdx)) {
[11770]	233	break;
	234	}
[11793]	235	n = n.children[selectedChildIdx];
[11770]	236	}
	237
	238	Console.ForegroundColor = ConsoleColor.White;
	239	Console.WriteLine("-------------------");
	240	}
	241
	242	private void SetColorForValue(double v) {
	243	Console.ForegroundColor = ConsoleEx.ColorForValue(v);
	244	}
	245	#endregion
	246
	247	private void RaiseSolutionEvaluated(string sentence, double quality) {
	248	var handler = SolutionEvaluated;
	249	if (handler != null) handler(sentence, quality);
	250	}
	251	private void RaiseFoundNewBestSolution(string sentence, double quality) {
	252	var handler = FoundNewBestSolution;
	253	if (handler != null) handler(sentence, quality);
	254	}
	255	}
	256	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Update cookies preferences