Context Navigation

source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/AlternativesContextSampler.cs @ 11730

Visit:

Last change on this file since 11730 was 11730, checked in by gkronber, 9 years ago
#2283: several major extensions for grammatical optimization
File size: 4.5 KB

Rev	Line
[11727]	1	using System;
	2	using System.Collections.Generic;
	3	using System.Diagnostics;
	4	using System.Linq;
	5	using System.Text;
	6	using HeuristicLab.Algorithms.Bandits;
	7	using HeuristicLab.Common;
	8	using HeuristicLab.Problems.GrammaticalOptimization;
	9
	10	namespace HeuristicLab.Algorithms.GrammaticalOptimization {
	11	public class AlternativesContextSampler {
	12	public event Action<string, double> FoundNewBestSolution;
	13	public event Action<string, double> SolutionEvaluated;
	14
	15	private readonly int maxLen;
	16	private readonly IProblem problem;
	17	private readonly Random random;
	18	private readonly int contextLen;
[11730]	19	private readonly Func<Random, int, IPolicy> policyFactory;
[11727]	20
[11730]	21	public AlternativesContextSampler(IProblem problem, Random random, int maxLen, int contextLen, Func<Random, int, IPolicy> policyFactory) {
[11727]	22	this.maxLen = maxLen;
	23	this.problem = problem;
[11730]	24	this.random = random;
	25	this.contextLen = contextLen;
	26	this.policyFactory = policyFactory;
[11727]	27	}
	28
	29	public void Run(int maxIterations) {
	30	double bestQuality = double.MinValue;
	31	InitPolicies(problem.Grammar);
	32	for (int i = 0; i < maxIterations; i++) {
[11730]	33	var sentence = SampleSentence(problem.Grammar).ToString();
	34	var quality = problem.Evaluate(sentence) / problem.GetBestKnownQuality(maxLen);
[11727]	35	DistributeReward(quality);
	36
	37	RaiseSolutionEvaluated(sentence, quality);
	38
	39	if (quality > bestQuality) {
	40	bestQuality = quality;
	41	RaiseFoundNewBestSolution(sentence, quality);
	42	}
	43	}
	44	}
	45
	46
	47	private Dictionary<string, IPolicy> ntPolicy;
	48	private List<Tuple<string, int>> updateChain;
[11730]	49
[11727]	50	private void InitPolicies(IGrammar grammar) {
	51	this.ntPolicy = new Dictionary<string, IPolicy>();
	52	this.updateChain = new List<Tuple<string, int>>();
	53	}
	54
[11730]	55	private Sequence SampleSentence(IGrammar grammar) {
[11727]	56	updateChain.Clear();
[11730]	57	return CompleteSentence(grammar, new Sequence(grammar.SentenceSymbol));
[11727]	58	}
	59
[11730]	60	public Sequence CompleteSentence(IGrammar g, Sequence phrase) {
[11727]	61	if (phrase.Length > maxLen) throw new ArgumentException();
	62	if (g.MinPhraseLength(phrase) > maxLen) throw new ArgumentException();
[11730]	63	bool done = phrase.IsTerminal; // terminal phrase means we are done
[11727]	64	while (!done) {
[11730]	65	char nt = phrase.FirstNonTerminal;
[11727]	66
	67	int maxLenOfReplacement = maxLen - (phrase.Length - 1); // replacing aAb with maxLen 4 means we can only use alternatives with a minPhraseLen <= 2
	68	Debug.Assert(maxLenOfReplacement > 0);
	69
	70	var alts = g.GetAlternatives(nt);
[11730]	71	Sequence selectedAlt;
[11727]	72	// if the choice is restricted then one of the allowed alternatives is selected randomly
	73	if (alts.Any(alt => g.MinPhraseLength(alt) > maxLenOfReplacement)) {
	74	var allowedAlts = alts.Where(alt => g.MinPhraseLength(alt) <= maxLenOfReplacement);
	75	Debug.Assert(allowedAlts.Any());
	76	// replace nt with random alternative
	77	selectedAlt = allowedAlts.SelectRandom(random);
	78	} else {
	79	// all alts are allowed => select using bandit policy
[11730]	80	var ntIdx = phrase.FirstNonTerminalIndex;
[11727]	81	var startIdx = Math.Max(0, ntIdx - contextLen);
	82	var endIdx = Math.Min(startIdx + contextLen, ntIdx);
[11730]	83	var lft = phrase.Subsequence(startIdx, endIdx - startIdx + 1).ToString();
[11727]	84	lft = problem.Hash(lft);
	85	if (!ntPolicy.ContainsKey(lft)) {
[11730]	86	ntPolicy.Add(lft, policyFactory(random, g.GetAlternatives(nt).Count()));
[11727]	87	}
	88	var selectedAltIdx = ntPolicy[lft].SelectAction();
	89	selectedAlt = alts.ElementAt(selectedAltIdx);
	90	updateChain.Add(Tuple.Create(lft, selectedAltIdx));
	91	}
	92
	93	// replace nt with alt
[11730]	94	phrase.ReplaceAt(phrase.FirstNonTerminalIndex, 1, selectedAlt);
[11727]	95
[11730]	96	done = phrase.IsTerminal; // terminal phrase means we are done
[11727]	97	}
	98	return phrase;
	99	}
	100
	101	private void DistributeReward(double reward) {
	102	foreach (var e in updateChain) {
	103	var lft = e.Item1;
	104	var action = e.Item2;
	105	ntPolicy[lft].UpdateReward(action, reward);
	106	}
	107	}
	108
	109	private void RaiseSolutionEvaluated(string sentence, double quality) {
	110	var handler = SolutionEvaluated;
	111	if (handler != null) handler(sentence, quality);
	112	}
	113	private void RaiseFoundNewBestSolution(string sentence, double quality) {
	114	var handler = FoundNewBestSolution;
	115	if (handler != null) handler(sentence, quality);
	116	}
	117	}
	118	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Update cookies preferences