Context Navigation

source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/TemporalDifferenceTreeSearchSampler.cs @ 11744

Visit:

Last change on this file since 11744 was 11744, checked in by gkronber, 10 years ago
#2283 worked on TD, and models for MCTS
File size: 7.1 KB

Rev	Line
[11744]	1	using System;
	2	using System.Collections.Generic;
	3	using System.Diagnostics;
	4	using System.Linq;
	5	using System.Text;
	6	using HeuristicLab.Algorithms.Bandits;
	7	using HeuristicLab.Common;
	8	using HeuristicLab.Problems.GrammaticalOptimization;
	9
	10	namespace HeuristicLab.Algorithms.GrammaticalOptimization {
	11	// SARSA (fig. 6.9 in Sutton & Barto)
	12	public class TemporalDifferenceTreeSearchSampler {
	13	private class TreeNode {
	14	public string ident;
	15	public int randomTries;
	16	public double q;
	17	public int tries;
	18	public TreeNode[] children;
	19	public bool done = false;
	20
	21	public TreeNode(string id) {
	22	this.ident = id;
	23	}
	24
	25	public override string ToString() {
	26	return string.Format("Node({0} tries: {1}, done: {2})", ident, tries, done);
	27	}
	28	}
	29
	30
	31	public event Action<string, double> FoundNewBestSolution;
	32	public event Action<string, double> SolutionEvaluated;
	33
	34	private readonly int maxLen;
	35	private readonly IProblem problem;
	36	private readonly Random random;
	37	private readonly int randomTries;
	38	private readonly IBanditPolicy policy;
	39
	40	private List<TreeNode> updateChain;
	41	private TreeNode rootNode;
	42
	43	public int treeDepth;
	44	public int treeSize;
	45	private double bestQuality;
	46
	47
	48	public TemporalDifferenceTreeSearchSampler(IProblem problem, int maxLen, Random random, int randomTries, IBanditPolicy policy) {
	49	this.maxLen = maxLen;
	50	this.problem = problem;
	51	this.random = random;
	52	this.randomTries = randomTries;
	53	this.policy = policy;
	54	}
	55
	56	public void Run(int maxIterations) {
	57	InitPolicies(problem.Grammar);
	58	for (int i = 0; !rootNode.done && i < maxIterations; i++) {
	59	var sentence = SampleSentence(problem.Grammar).ToString();
	60	var quality = problem.Evaluate(sentence) / problem.BestKnownQuality(maxLen);
	61	Debug.Assert(quality >= 0 && quality <= 1.0);
	62	DistributeReward(quality);
	63
	64	RaiseSolutionEvaluated(sentence, quality);
	65
	66	if (quality > bestQuality) {
	67	bestQuality = quality;
	68	RaiseFoundNewBestSolution(sentence, quality);
	69	}
	70	}
	71
	72	// clean up
	73	InitPolicies(problem.Grammar); GC.Collect();
	74	}
	75
	76	public void PrintStats() {
	77	var n = rootNode;
	78	Console.WriteLine("depth: {0,5} size: {1,10} root tries {2,10}, rootQ {3:F3}, bestQ {4:F3}", treeDepth, treeSize, n.tries, n.q, bestQuality);
	79	while (n.children != null) {
	80	Console.WriteLine();
	81	Console.WriteLine("{0,5}->{1,-50}", n.ident, string.Join(" ", n.children.Select(ch => string.Format("{0,4}", ch.ident))));
	82	Console.WriteLine("{0,5} {1,-50}", string.Empty, string.Join(" ", n.children.Select(ch => string.Format("{0,4:F2}", ch.q * 10))));
	83	Console.WriteLine("{0,5} {1,-50}", string.Empty, string.Join(" ", n.children.Select(ch => string.Format("{0,4}", ch.done ? "X" : ch.tries.ToString()))));
	84	//n.policy.PrintStats();
	85	n = n.children.Where(ch => !ch.done).OrderByDescending(c => c.q).First();
	86	}
	87	//Console.ReadLine();
	88	}
	89
	90	private void InitPolicies(IGrammar grammar) {
	91	this.updateChain = new List<TreeNode>();
	92
	93	rootNode = new TreeNode(grammar.SentenceSymbol.ToString());
	94	treeDepth = 0;
	95	treeSize = 0;
	96	}
	97
	98	private Sequence SampleSentence(IGrammar grammar) {
	99	updateChain.Clear();
	100	var startPhrase = new Sequence(grammar.SentenceSymbol);
	101	return CompleteSentence(grammar, startPhrase);
	102	}
	103
	104	private Sequence CompleteSentence(IGrammar g, Sequence phrase) {
	105	if (phrase.Length > maxLen) throw new ArgumentException();
	106	if (g.MinPhraseLength(phrase) > maxLen) throw new ArgumentException();
	107	TreeNode n = rootNode;
	108	var curDepth = 0;
	109	while (!phrase.IsTerminal) {
	110	updateChain.Add(n);
	111
	112	if (n.randomTries < randomTries) {
	113	n.randomTries++;
	114	treeDepth = Math.Max(treeDepth, curDepth);
	115	return g.CompleteSentenceRandomly(random, phrase, maxLen);
	116	} else {
	117	char nt = phrase.FirstNonTerminal;
	118
	119	int maxLenOfReplacement = maxLen - (phrase.Length - 1); // replacing aAb with maxLen 4 means we can only use alternatives with a minPhraseLen <= 2
	120	Debug.Assert(maxLenOfReplacement > 0);
	121
	122	var alts = g.GetAlternatives(nt).Where(alt => g.MinPhraseLength(alt) <= maxLenOfReplacement);
	123
	124	if (n.randomTries == randomTries && n.children == null) {
	125	n.children = alts.Select(alt => new TreeNode(alt.ToString())).ToArray(); // create a new node for each alternative
	126	treeSize += n.children.Length;
	127	}
	128	// => select using bandit policy
	129	int selectedAltIdx = SelectAction(random, n.children);
	130	Sequence selectedAlt = alts.ElementAt(selectedAltIdx);
	131
	132	// replace nt with alt
	133	phrase.ReplaceAt(phrase.FirstNonTerminalIndex, 1, selectedAlt);
	134
	135	curDepth++;
	136
	137	// prepare for next iteration
	138	n = n.children[selectedAltIdx];
	139	}
	140	} // while
	141
	142	updateChain.Add(n);
	143
	144
	145	// the last node is a leaf node (sentence is done), so we never need to visit this node again
	146	n.done = true;
	147
	148	treeDepth = Math.Max(treeDepth, curDepth);
	149	return phrase;
	150	}
	151
	152
	153	// eps-greedy
	154	private int SelectAction(Random random, TreeNode[] children) {
	155	if (random.NextDouble() < 0.1) {
	156
	157	return children.Select((ch, i) => Tuple.Create(ch, i)).Where(p => !p.Item1.done).SelectRandom(random).Item2;
	158	} else {
	159	var bestQ = double.NegativeInfinity;
	160	var bestChildIdx = -1;
	161	for (int i = 0; i < children.Length; i++) {
	162	if (children[i].done) continue;
	163	if (children[i].tries == 0) return i;
	164	if (children[i].q > bestQ) {
	165	bestQ = children[i].q;
	166	bestChildIdx = i;
	167	}
	168	}
	169	Debug.Assert(bestChildIdx > -1);
	170	return bestChildIdx;
	171	}
	172	}
	173
	174	private void DistributeReward(double reward) {
	175	const double alpha = 0.1;
	176	const double gamma = 1;
	177	// iterate in reverse order (bottom up)
	178	updateChain.Reverse();
	179	var nextQ = 0.0;
	180	foreach (var e in updateChain) {
	181	var node = e;
	182	node.tries++;
	183	if (node.children != null && node.children.All(c => c.done)) {
	184	node.done = true;
	185	}
	186	// reward is recieved only for the last action
	187	if (e == updateChain.First()) {
	188	node.q = node.q + alpha * (reward + gamma * nextQ - node.q);
	189	nextQ = node.q;
	190	} else {
	191	node.q = node.q + alpha * (0 + gamma * nextQ - node.q);
	192	nextQ = node.q;
	193	}
	194	}
	195	}
	196
	197	private void RaiseSolutionEvaluated(string sentence, double quality) {
	198	var handler = SolutionEvaluated;
	199	if (handler != null) handler(sentence, quality);
	200	}
	201	private void RaiseFoundNewBestSolution(string sentence, double quality) {
	202	var handler = FoundNewBestSolution;
	203	if (handler != null) handler(sentence, quality);
	204	}
	205	}
	206	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Update cookies preferences