Context Navigation

source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/AlternativesSampler.cs @ 11727

Visit:

Last change on this file since 11727 was 11727, checked in by gkronber, 9 years ago
#2283: worked on grammatical optimization problem solvers (simple MCTS done)
File size: 4.1 KB

Line
1	using System;
2	using System.Collections.Generic;
3	using System.Diagnostics;
4	using System.Linq;
5	using System.Text;
6	using HeuristicLab.Algorithms.Bandits;
7	using HeuristicLab.Common;
8	using HeuristicLab.Problems.GrammaticalOptimization;
9
10	namespace HeuristicLab.Algorithms.GrammaticalOptimization {
11	public class AlternativesSampler {
12	public event Action<string, double> FoundNewBestSolution;
13	public event Action<string, double> SolutionEvaluated;
14
15	private readonly int maxLen;
16	private readonly Random random;
17	private readonly IProblem problem;
18
19	public AlternativesSampler(IProblem problem, int maxLen) {
20	this.problem = problem;
21	this.maxLen = maxLen;
22	this.random = new Random(31415);
23	}
24
25	public void Run(int maxIterations) {
26	double bestQuality = double.MinValue;
27	InitPolicies(problem.Grammar);
28	for (int i = 0; i < maxIterations; i++) {
29	var sentence = SampleSentence(problem.Grammar);
30	var quality = problem.Evaluate(sentence) / problem.GetBestKnownQuality(maxLen);
31	DistributeReward(quality);
32
33	RaiseSolutionEvaluated(sentence, quality);
34
35	if (quality > bestQuality) {
36	bestQuality = quality;
37	RaiseFoundNewBestSolution(sentence, quality);
38	}
39	}
40	}
41
42
43	private Dictionary<char, IPolicy> ntPolicy;
44	private List<Tuple<char, int>> updateChain;
45
46	private void InitPolicies(IGrammar grammar) {
47	this.ntPolicy = new Dictionary<char, IPolicy>();
48	this.updateChain = new List<Tuple<char, int>>();
49	foreach (var nt in grammar.NonTerminalSymbols) {
50	ntPolicy.Add(nt, new EpsGreedyPolicy(random, grammar.GetAlternatives(nt).Count(), 0.1));
51	}
52	}
53
54	private string SampleSentence(IGrammar grammar) {
55	updateChain.Clear();
56	return CompleteSentence(grammar, grammar.SentenceSymbol.ToString());
57	}
58
59	public string CompleteSentence(IGrammar g, string phrase) {
60	if (phrase.Length > maxLen) throw new ArgumentException();
61	if (g.MinPhraseLength(phrase) > maxLen) throw new ArgumentException();
62	bool done = phrase.All(g.IsTerminal); // terminal phrase means we are done
63	while (!done) {
64	int ntIdx; char nt;
65	Grammar.FindFirstNonTerminal(g, phrase, out nt, out ntIdx);
66
67	int maxLenOfReplacement = maxLen - (phrase.Length - 1); // replacing aAb with maxLen 4 means we can only use alternatives with a minPhraseLen <= 2
68	Debug.Assert(maxLenOfReplacement > 0);
69
70	var alts = g.GetAlternatives(nt);
71	string selectedAlt;
72	// if the choice is restricted then one of the allowed alternatives is selected randomly
73	if (alts.Any(alt => g.MinPhraseLength(alt) > maxLenOfReplacement)) {
74	var allowedAlts = alts.Where(alt => g.MinPhraseLength(alt) <= maxLenOfReplacement);
75	Debug.Assert(allowedAlts.Any());
76	// replace nt with random alternative
77	selectedAlt = allowedAlts.SelectRandom(random);
78	} else {
79	// all alts are allowed => select using bandit policy
80	var selectedAltIdx = ntPolicy[nt].SelectAction();
81	selectedAlt = alts.ElementAt(selectedAltIdx);
82	updateChain.Add(Tuple.Create(nt, selectedAltIdx));
83	}
84
85	// replace nt with alt
86	phrase = phrase.Remove(ntIdx, 1);
87	phrase = phrase.Insert(ntIdx, selectedAlt);
88
89	done = phrase.All(g.IsTerminal); // terminal phrase means we are done
90	}
91	return phrase;
92	}
93
94
95	private void DistributeReward(double reward) {
96	foreach (var e in updateChain) {
97	var nt = e.Item1;
98	var action = e.Item2;
99	ntPolicy[nt].UpdateReward(action, reward);
100	}
101	}
102
103	private void RaiseSolutionEvaluated(string sentence, double quality) {
104	var handler = SolutionEvaluated;
105	if (handler != null) handler(sentence, quality);
106	}
107	private void RaiseFoundNewBestSolution(string sentence, double quality) {
108	var handler = FoundNewBestSolution;
109	if (handler != null) handler(sentence, quality);
110	}
111	}
112	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Update cookies preferences