Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.GrammaticalOptimization/Solvers/SequentialSearch.cs @ 12814

Last change on this file since 12814 was 12290, checked in by gkronber, 10 years ago

#2283 created a new branch to separate development from aballeit

File size: 8.9 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Diagnostics;
4using System.Linq;
5using System.Resources;
6using System.Runtime.InteropServices;
7using System.Text;
8using HeuristicLab.Algorithms.Bandits;
9using HeuristicLab.Algorithms.Bandits.BanditPolicies;
10using HeuristicLab.Algorithms.Bandits.GrammarPolicies;
11using HeuristicLab.Common;
12using HeuristicLab.Problems.GrammaticalOptimization;
13
14namespace HeuristicLab.Algorithms.GrammaticalOptimization {
15  // a search procedure that uses a policy to generate sentences and updates the policy (online RL)
16  // 1) Start with phrase = sentence symbol of grammar
17  // 2) Repeat
18  //    a) generate derived phrases using left-canonical derivation and grammar rules
19  //    b) keep only the phrases which are allowed (sentence length limit)
20  //    c) if the set of phrases is empty restart with 1)
21  //    d) otherwise use policy to select one of the possible derived phrases as active phrase
22  //       the policy has the option to fail (for instance if all derived phrases are terminal and should not be visited again), in this case we restart at 1
23  //    ... until phrase is terminal
24  // 3) Collect reward and update policy (feedback: state of visited rewards from step 2)
25  public class SequentialSearch : SolverBase {
26    // only for storing states so that it is not necessary to allocate new state strings whenever we select a follow state using the policy
27    private class TreeNode {
28      public int randomTries;
29      public string phrase;
30      public Sequence alternative;
31      public TreeNode[] children;
32
33      public TreeNode(string phrase, Sequence alternative) {
34        this.alternative = alternative;
35        this.phrase = phrase;
36      }
37    }
38
39
40    private readonly int maxLen;
41    private readonly IProblem problem;
42    private readonly Random random;
43    private readonly int randomTries;
44    private readonly IGrammarPolicy behaviourPolicy;
45    private TreeNode rootNode;
46
47    private int tries;
48    private int maxSearchDepth;
49
50    private string bestPhrase;
51    private readonly List<string> stateChain;
52
53    public SequentialSearch(IProblem problem, int maxLen, Random random, int randomTries, IGrammarPolicy behaviourPolicy) {
54      this.maxLen = maxLen;
55      this.problem = problem;
56      this.random = random;
57      this.randomTries = randomTries;
58      this.behaviourPolicy = behaviourPolicy;
59      this.stateChain = new List<string>();
60    }
61
62    public bool StopRequested {
63      get;
64      set;
65    }
66
67    public override void Run(int maxIterations) {
68      Reset();
69
70      for (int i = 0; !StopRequested && !Done() && i < maxIterations; i++) {
71        var phrase = SampleSentence(problem.Grammar);
72        // can fail on the last sentence
73        if (phrase.IsTerminal) {
74          var sentence = phrase.ToString();
75          tries++;
76          var quality = problem.Evaluate(sentence) / problem.BestKnownQuality(maxLen);
77          if (double.IsNaN(quality)) quality = 0.0;
78          Debug.Assert(quality >= 0 && quality <= 1.0);
79
80          if (quality > bestQuality) {
81            bestPhrase = sentence;
82          }
83
84          OnSolutionEvaluated(sentence, quality);
85          DistributeReward(quality);
86
87        }
88      }
89    }
90
91
92    private Sequence SampleSentence(IGrammar grammar) {
93      Sequence phrase;
94      do {
95        stateChain.Clear();
96        phrase = new Sequence(rootNode.phrase);
97      } while (!Done() && !TryCompleteSentence(grammar, ref phrase));
98      return phrase;
99    }
100
101    private bool TryCompleteSentence(IGrammar g, ref Sequence phrase) {
102      if (phrase.Length > maxLen) throw new ArgumentException();
103      if (g.MinPhraseLength(phrase) > maxLen) throw new ArgumentException();
104      var curDepth = 0;
105      var n = rootNode;
106      stateChain.Add(n.phrase);
107
108      while (!phrase.IsTerminal) {
109        if (n.randomTries < randomTries) {
110          n.randomTries++;
111          maxSearchDepth = Math.Max(maxSearchDepth, curDepth);
112          g.CompleteSentenceRandomly(random, phrase, maxLen);
113          return true;
114        } else {
115          // => select using bandit policy
116          // failure means we simply restart
117          GenerateFollowStates(n); // creates child nodes for node n
118
119          int selectedChildIdx;
120          if (!behaviourPolicy.TrySelect(random, n.phrase, n.children.Select(ch => ch.phrase), out selectedChildIdx)) {
121            return false;
122          }
123          phrase.ReplaceAt(phrase.FirstNonTerminalIndex, 1, n.children[selectedChildIdx].alternative);
124
125          // prepare for next iteration
126          n = n.children[selectedChildIdx];
127          stateChain.Add(n.phrase);
128          curDepth++;
129        }
130      } // while
131
132      maxSearchDepth = Math.Max(maxSearchDepth, curDepth);
133      return true;
134    }
135
136
137    private IEnumerable<string> GenerateFollowStates(TreeNode n) {
138      // create children on the first visit
139      if (n.children == null) {
140        var g = problem.Grammar;
141        // tree is only used for easily retrieving the follow-states of a state
142        var phrase = new Sequence(n.phrase);
143        char nt = phrase.FirstNonTerminal;
144
145        int maxLenOfReplacement = maxLen - (phrase.Length - 1);
146        // replacing aAb with maxLen 4 means we can only use alternatives with a minPhraseLen <= 2
147        Debug.Assert(maxLenOfReplacement > 0);
148
149        var alts = g.GetAlternatives(nt).Where(alt => g.MinPhraseLength(alt) <= maxLenOfReplacement);
150
151        var children = new TreeNode[alts.Count()];
152        int idx = 0;
153        foreach (var alt in alts) {
154          // var newPhrase = new Sequence(phrase); // clone
155          // newPhrase.ReplaceAt(newPhrase.FirstNonTerminalIndex, 1, alt);
156          // children[idx++] = new TreeNode(newPhrase.ToString(), alt);
157
158          // since we are not using a sequence later on we might directly transform the current sequence to a string and replace there
159          var phraseStr = phrase.ToString();
160          var sb = new StringBuilder(phraseStr);
161          sb.Remove(phrase.FirstNonTerminalIndex, 1).Insert(phrase.FirstNonTerminalIndex, alt.ToString());
162          children[idx++] = new TreeNode(sb.ToString(), alt);
163        }
164        n.children = children;
165      }
166      return n.children.Select(ch => ch.phrase);
167    }
168
169    private void DistributeReward(double reward) {
170      behaviourPolicy.UpdateReward(stateChain, reward);
171    }
172
173
174    private void Reset() {
175      StopRequested = false;
176      behaviourPolicy.Reset();
177      maxSearchDepth = 0;
178      bestQuality = 0.0;
179      tries = 0;
180      rootNode = new TreeNode(problem.Grammar.SentenceSymbol.ToString(), new ReadonlySequence("$"));
181    }
182
183    public bool Done() {
184      int selectedStateIdx;
185      return !behaviourPolicy.TrySelect(random, rootNode.phrase, GenerateFollowStates(rootNode), out selectedStateIdx);
186    }
187
188    #region introspection
189    public void PrintStats() {
190      Console.WriteLine("depth: {0,5} tries: {1,5} best phrase {2,50} bestQ {3:F3}", maxSearchDepth, tries, bestPhrase, bestQuality);
191
192      // use behaviour strategy to generate the currently prefered sentence
193      var policy = behaviourPolicy;
194
195      var n = rootNode;
196
197      while (n != null) {
198        var phrase = n.phrase;
199        Console.ForegroundColor = ConsoleColor.White;
200        Console.WriteLine("{0,-30}", phrase);
201        var children = n.children;
202        if (children == null || !children.Any()) break;
203        var values = children.Select(ch => policy.GetValue(ch.phrase));
204        var maxValue = values.Max();
205        if (maxValue == 0) maxValue = 1.0;
206        if (double.IsPositiveInfinity(maxValue)) maxValue = double.MaxValue;
207
208        // write phrases
209        foreach (var ch in children) {
210          SetColorForValue(policy.GetValue(ch.phrase) / maxValue);
211          Console.Write(" {0,-4}", ch.phrase.Substring(Math.Max(0, ch.phrase.Length - 3), Math.Min(3, ch.phrase.Length)));
212        }
213        Console.WriteLine();
214
215        // write values
216        foreach (var ch in children) {
217          SetColorForValue(policy.GetValue(ch.phrase) / maxValue);
218          Console.Write(" {0:F2}", policy.GetValue(ch.phrase) * 10.0);
219        }
220        Console.WriteLine();
221
222        // write tries
223        foreach (var ch in children) {
224          SetColorForValue(policy.GetValue(ch.phrase) / maxValue);
225          Console.Write(" {0,4}", policy.GetTries(ch.phrase));
226        }
227        Console.WriteLine();
228        int selectedChildIdx;
229        if (!policy.TrySelect(random, phrase, children.Select(ch => ch.phrase), out selectedChildIdx)) {
230          break;
231        }
232        n = n.children[selectedChildIdx];
233      }
234
235      Console.ForegroundColor = ConsoleColor.White;
236      Console.WriteLine("-------------------");
237    }
238
239    private void SetColorForValue(double v) {
240      Console.ForegroundColor = ConsoleEx.ColorForValue(v);
241    }
242    #endregion
243
244  }
245}
Note: See TracBrowser for help on using the repository browser.