Context Navigation

source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/AlternativesContextSampler.cs @ 11742

Visit:

Last change on this file since 11742 was 11742, checked in by gkronber, 10 years ago
#2283 refactoring
File size: 4.6 KB

Line
1	using System;
2	using System.Collections.Generic;
3	using System.Diagnostics;
4	using System.Linq;
5	using System.Text;
6	using HeuristicLab.Algorithms.Bandits;
7	using HeuristicLab.Common;
8	using HeuristicLab.Problems.GrammaticalOptimization;
9
10	namespace HeuristicLab.Algorithms.GrammaticalOptimization {
11	public class AlternativesContextSampler {
12	public event Action<string, double> FoundNewBestSolution;
13	public event Action<string, double> SolutionEvaluated;
14
15	private readonly int maxLen;
16	private readonly IProblem problem;
17	private readonly Random random;
18	private readonly int contextLen;
19	private readonly IBanditPolicy policy;
20
21	public AlternativesContextSampler(IProblem problem, Random random, int maxLen, int contextLen, IBanditPolicy policy) {
22	this.maxLen = maxLen;
23	this.problem = problem;
24	this.random = random;
25	this.contextLen = contextLen;
26	this.policy = policy;
27	}
28
29	public void Run(int maxIterations) {
30	double bestQuality = double.MinValue;
31	InitPolicies(problem.Grammar);
32	for (int i = 0; i < maxIterations; i++) {
33	var sentence = SampleSentence(problem.Grammar).ToString();
34	var quality = problem.Evaluate(sentence) / problem.BestKnownQuality(maxLen);
35	DistributeReward(quality);
36
37	RaiseSolutionEvaluated(sentence, quality);
38
39	if (quality > bestQuality) {
40	bestQuality = quality;
41	RaiseFoundNewBestSolution(sentence, quality);
42	}
43	}
44	}
45
46
47	private Dictionary<string, IBanditPolicyActionInfo[]> contextActionInfos;
48	private List<Tuple<string, int>> updateChain;
49
50	private void InitPolicies(IGrammar grammar) {
51	this.contextActionInfos = new Dictionary<string, IBanditPolicyActionInfo[]>();
52	this.updateChain = new List<Tuple<string, int>>();
53	}
54
55	private Sequence SampleSentence(IGrammar grammar) {
56	updateChain.Clear();
57	return CompleteSentence(grammar, new Sequence(grammar.SentenceSymbol));
58	}
59
60	public Sequence CompleteSentence(IGrammar g, Sequence phrase) {
61	if (phrase.Length > maxLen) throw new ArgumentException();
62	if (g.MinPhraseLength(phrase) > maxLen) throw new ArgumentException();
63	bool done = phrase.IsTerminal; // terminal phrase means we are done
64	while (!done) {
65	char nt = phrase.FirstNonTerminal;
66
67	int maxLenOfReplacement = maxLen - (phrase.Length - 1); // replacing aAb with maxLen 4 means we can only use alternatives with a minPhraseLen <= 2
68	Debug.Assert(maxLenOfReplacement > 0);
69
70	var alts = g.GetAlternatives(nt);
71	Sequence selectedAlt;
72	// if the choice is restricted then one of the allowed alternatives is selected randomly
73	if (alts.Any(alt => g.MinPhraseLength(alt) > maxLenOfReplacement)) {
74	var allowedAlts = alts.Where(alt => g.MinPhraseLength(alt) <= maxLenOfReplacement);
75	Debug.Assert(allowedAlts.Any());
76	// replace nt with random alternative
77	selectedAlt = allowedAlts.SelectRandom(random);
78	} else {
79	// all alts are allowed => select using bandit policy
80	var ntIdx = phrase.FirstNonTerminalIndex;
81	var startIdx = Math.Max(0, ntIdx - contextLen);
82	var endIdx = Math.Min(startIdx + contextLen, ntIdx);
83	var lft = phrase.Subsequence(startIdx, endIdx - startIdx + 1).ToString();
84	lft = problem.CanonicalRepresentation(lft);
85	if (!contextActionInfos.ContainsKey(lft)) {
86	contextActionInfos.Add(lft, g.GetAlternatives(nt).Select(_ => policy.CreateActionInfo()).ToArray());
87	}
88	var selectedAltIdx = policy.SelectAction(random, contextActionInfos[lft]);
89	selectedAlt = alts.ElementAt(selectedAltIdx);
90	updateChain.Add(Tuple.Create(lft, selectedAltIdx));
91	}
92
93	// replace nt with alt
94	phrase.ReplaceAt(phrase.FirstNonTerminalIndex, 1, selectedAlt);
95
96	done = phrase.IsTerminal; // terminal phrase means we are done
97	}
98	return phrase;
99	}
100
101	private void DistributeReward(double reward) {
102	foreach (var e in updateChain) {
103	var lft = e.Item1;
104	var action = e.Item2;
105	contextActionInfos[lft][action].UpdateReward(reward);
106	}
107	}
108
109	private void RaiseSolutionEvaluated(string sentence, double quality) {
110	var handler = SolutionEvaluated;
111	if (handler != null) handler(sentence, quality);
112	}
113	private void RaiseFoundNewBestSolution(string sentence, double quality) {
114	var handler = FoundNewBestSolution;
115	if (handler != null) handler(sentence, quality);
116	}
117	}
118	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Update cookies preferences