Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/SequentialSearch.cs @ 11806

Last change on this file since 11806 was 11806, checked in by gkronber, 9 years ago

#2283: separated value-states from done-states in GenericGrammarPolicy and removed disabling of actions from bandit policies

File size: 9.5 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Diagnostics;
4using System.Linq;
5using System.Resources;
6using System.Runtime.InteropServices;
7using System.Text;
8using HeuristicLab.Algorithms.Bandits;
9using HeuristicLab.Algorithms.Bandits.BanditPolicies;
10using HeuristicLab.Algorithms.Bandits.GrammarPolicies;
11using HeuristicLab.Common;
12using HeuristicLab.Problems.GrammaticalOptimization;
13
14namespace HeuristicLab.Algorithms.GrammaticalOptimization {
15  // a search procedure that uses a policy to generate sentences and updates the policy (online RL)
16  // 1) Start with phrase = sentence symbol of grammar
17  // 2) Repeat
18  //    a) generate derived phrases using left-canonical derivation and grammar rules
19  //    b) keep only the phrases which are allowed (sentence length limit)
20  //    c) if the set of phrases is empty restart with 1)
21  //    d) otherwise use policy to select one of the possible derived phrases as active phrase
22  //       the policy has the option to fail (for instance if all derived phrases are terminal and should not be visited again), in this case we restart at 1
23  //    ... until phrase is terminal
24  // 3) Collect reward and update policy (feedback: state of visited rewards from step 2)
25  public class SequentialSearch {
26    // only for storing states so that it is not necessary to allocate new state strings whenever we select a follow state using the policy
27    private class TreeNode {
28      public int randomTries;
29      public string phrase;
30      public Sequence alternative;
31      public TreeNode[] children;
32
33      public TreeNode(string phrase, Sequence alternative) {
34        this.alternative = alternative;
35        this.phrase = phrase;
36      }
37    }
38
39
40    public event Action<string, double> FoundNewBestSolution;
41    public event Action<string, double> SolutionEvaluated;
42
43    private readonly int maxLen;
44    private readonly IProblem problem;
45    private readonly Random random;
46    private readonly int randomTries;
47    private readonly IGrammarPolicy behaviourPolicy;
48    private readonly IGrammarPolicy greedyPolicy;
49    private TreeNode rootNode;
50
51    private int tries;
52    private int maxSearchDepth;
53
54    private double bestQuality;
55    private string bestPhrase;
56    private readonly List<string> stateChain;
57
58    public SequentialSearch(IProblem problem, int maxLen, Random random, int randomTries, IGrammarPolicy behaviourPolicy) {
59      this.maxLen = maxLen;
60      this.problem = problem;
61      this.random = random;
62      this.randomTries = randomTries;
63      this.behaviourPolicy = behaviourPolicy;
64      this.greedyPolicy = new GenericGrammarPolicy(problem, new EpsGreedyPolicy(0.0), false);
65      this.stateChain = new List<string>();
66    }
67
68    public void Run(int maxIterations) {
69      bestQuality = double.MinValue;
70      Reset();
71
72      for (int i = 0; /*!bestQuality.IsAlmost(1.0) && */!Done() && i < maxIterations; i++) {
73        var phrase = SampleSentence(problem.Grammar);
74        // can fail on the last sentence
75        if (phrase.IsTerminal) {
76          var sentence = phrase.ToString();
77          tries++;
78          var quality = problem.Evaluate(sentence) / problem.BestKnownQuality(maxLen);
79          if (double.IsNaN(quality)) quality = 0.0;
80          Debug.Assert(quality >= 0 && quality <= 1.0);
81          DistributeReward(quality);
82
83          RaiseSolutionEvaluated(sentence, quality);
84
85          if (quality > bestQuality) {
86            bestQuality = quality;
87            bestPhrase = sentence;
88            RaiseFoundNewBestSolution(sentence, quality);
89          }
90        }
91      }
92    }
93
94
95    private Sequence SampleSentence(IGrammar grammar) {
96      Sequence phrase;
97      do {
98        stateChain.Clear();
99        phrase = new Sequence(rootNode.phrase);
100      } while (!Done() && !TryCompleteSentence(grammar, ref phrase));
101      return phrase;
102    }
103
104    private bool TryCompleteSentence(IGrammar g, ref Sequence phrase) {
105      if (phrase.Length > maxLen) throw new ArgumentException();
106      if (g.MinPhraseLength(phrase) > maxLen) throw new ArgumentException();
107      var curDepth = 0;
108      var n = rootNode;
109      stateChain.Add(n.phrase);
110
111      while (!phrase.IsTerminal) {
112        if (n.randomTries < randomTries) {
113          n.randomTries++;
114          maxSearchDepth = Math.Max(maxSearchDepth, curDepth);
115          g.CompleteSentenceRandomly(random, phrase, maxLen);
116          return true;
117        } else {
118          // => select using bandit policy
119          // failure means we simply restart
120          GenerateFollowStates(n); // creates child nodes for node n
121
122          int selectedChildIdx;
123          if (!behaviourPolicy.TrySelect(random, n.phrase, n.children.Select(ch => ch.phrase), out selectedChildIdx)) {
124            return false;
125          }
126          phrase.ReplaceAt(phrase.FirstNonTerminalIndex, 1, n.children[selectedChildIdx].alternative);
127
128          // prepare for next iteration
129          n = n.children[selectedChildIdx];
130          stateChain.Add(n.phrase);
131          curDepth++;
132        }
133      } // while
134
135      maxSearchDepth = Math.Max(maxSearchDepth, curDepth);
136      return true;
137    }
138
139
140    private IEnumerable<string> GenerateFollowStates(TreeNode n) {
141      // create children on the first visit
142      if (n.children == null) {
143        var g = problem.Grammar;
144        // tree is only used for easily retrieving the follow-states of a state
145        var phrase = new Sequence(n.phrase);
146        char nt = phrase.FirstNonTerminal;
147
148        int maxLenOfReplacement = maxLen - (phrase.Length - 1);
149        // replacing aAb with maxLen 4 means we can only use alternatives with a minPhraseLen <= 2
150        Debug.Assert(maxLenOfReplacement > 0);
151
152        var alts = g.GetAlternatives(nt).Where(alt => g.MinPhraseLength(alt) <= maxLenOfReplacement);
153
154        var children = new TreeNode[alts.Count()];
155        int idx = 0;
156        foreach (var alt in alts) {
157          // var newPhrase = new Sequence(phrase); // clone
158          // newPhrase.ReplaceAt(newPhrase.FirstNonTerminalIndex, 1, alt);
159          // children[idx++] = new TreeNode(newPhrase.ToString(), alt);
160
161          // since we are not using a sequence later on we might directly transform the current sequence to a string and replace there
162          var phraseStr = phrase.ToString();
163          var sb = new StringBuilder(phraseStr);
164          sb.Remove(phrase.FirstNonTerminalIndex, 1).Insert(phrase.FirstNonTerminalIndex, alt.ToString());
165          children[idx++] = new TreeNode(sb.ToString(), alt);
166        }
167        n.children = children;
168      }
169      return n.children.Select(ch => ch.phrase);
170    }
171
172    private void DistributeReward(double reward) {
173      behaviourPolicy.UpdateReward(stateChain, reward);
174      greedyPolicy.UpdateReward(stateChain, reward);
175    }
176
177
178    private void Reset() {
179      behaviourPolicy.Reset();
180      greedyPolicy.Reset();
181      maxSearchDepth = 0;
182      bestQuality = 0.0;
183      tries = 0;
184      rootNode = new TreeNode(problem.Grammar.SentenceSymbol.ToString(), new ReadonlySequence("$"));
185    }
186
187    public bool Done() {
188      int selectedStateIdx;
189      return !behaviourPolicy.TrySelect(random, rootNode.phrase, GenerateFollowStates(rootNode), out selectedStateIdx);
190    }
191
192    #region introspection
193    public void PrintStats() {
194      Console.WriteLine("depth: {0,5} tries: {1,5} best phrase {2,50} bestQ {3:F3}", maxSearchDepth, tries, bestPhrase, bestQuality);
195
196      // use behaviour strategy to generate the currently prefered sentence
197      var policy = behaviourPolicy;
198
199      var n = rootNode;
200
201      while (n != null) {
202        var phrase = n.phrase;
203        Console.ForegroundColor = ConsoleColor.White;
204        Console.WriteLine("{0,-30}", phrase);
205        var children = n.children;
206        if (children == null || !children.Any()) break;
207        var values = children.Select(ch => policy.GetValue(ch.phrase));
208        var maxValue = values.Max();
209        if (maxValue == 0) maxValue = 1.0;
210
211        // write phrases
212        foreach (var ch in children) {
213          SetColorForValue(policy.GetValue(ch.phrase) / maxValue);
214          Console.Write(" {0,-4}", ch.phrase.Substring(Math.Max(0, ch.phrase.Length - 3), Math.Min(3, ch.phrase.Length)));
215        }
216        Console.WriteLine();
217
218        // write values
219        foreach (var ch in children) {
220          SetColorForValue(policy.GetValue(ch.phrase) / maxValue);
221          Console.Write(" {0:F2}", policy.GetValue(ch.phrase) * 10.0);
222        }
223        Console.WriteLine();
224
225        // write tries
226        foreach (var ch in children) {
227          SetColorForValue(policy.GetValue(ch.phrase) / maxValue);
228          Console.Write(" {0,4}", policy.GetTries(ch.phrase));
229        }
230        Console.WriteLine();
231        int selectedChildIdx;
232        if (!policy.TrySelect(random, phrase, children.Select(ch => ch.phrase), out selectedChildIdx)) {
233          break;
234        }
235        n = n.children[selectedChildIdx];
236      }
237
238      Console.ForegroundColor = ConsoleColor.White;
239      Console.WriteLine("-------------------");
240    }
241
242    private void SetColorForValue(double v) {
243      Console.ForegroundColor = ConsoleEx.ColorForValue(v);
244    }
245    #endregion
246
247    private void RaiseSolutionEvaluated(string sentence, double quality) {
248      var handler = SolutionEvaluated;
249      if (handler != null) handler(sentence, quality);
250    }
251    private void RaiseFoundNewBestSolution(string sentence, double quality) {
252      var handler = FoundNewBestSolution;
253      if (handler != null) handler(sentence, quality);
254    }
255  }
256}
Note: See TracBrowser for help on using the repository browser.