Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/SequentialSearch.cs @ 11770

Last change on this file since 11770 was 11770, checked in by gkronber, 9 years ago

#2283: worked on generic sequential search alg with bandit policy as parameter

File size: 8.4 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Diagnostics;
4using System.Linq;
5using System.Resources;
6using System.Runtime.InteropServices;
7using System.Text;
8using HeuristicLab.Algorithms.Bandits;
9using HeuristicLab.Algorithms.Bandits.BanditPolicies;
10using HeuristicLab.Algorithms.Bandits.GrammarPolicies;
11using HeuristicLab.Common;
12using HeuristicLab.Problems.GrammaticalOptimization;
13
14namespace HeuristicLab.Algorithms.GrammaticalOptimization {
15  // a search procedure that uses a policy to generate sentences and updates the policy (online RL)
16  // 1) Start with phrase = sentence symbol of grammar
17  // 2) Repeat
18  //    a) generate derived phrases using left-canonical derivation and grammar rules
19  //    b) keep only the phrases which are allowed (sentence length limit)
20  //    c) if the set of phrases is empty restart with 1)
21  //    d) otherwise use policy to select one of the possible derived phrases as active phrase
22  //       the policy has the option to fail (for instance if all derived phrases are terminal and should not be visited again), in this case we restart at 1
23  //    ... until phrase is terminal
24  // 3) Collect reward and update policy (feedback: state of visited rewards from step 2)
25  public class SequentialSearch {
26
27    public event Action<string, double> FoundNewBestSolution;
28    public event Action<string, double> SolutionEvaluated;
29
30    private readonly int maxLen;
31    private readonly IProblem problem;
32    private readonly Random random;
33    private readonly int randomTries;
34    private readonly IGrammarPolicy behaviourPolicy;
35    private readonly IGrammarPolicy greedyPolicy;
36    private int maxSearchDepth;
37
38    private double bestQuality;
39    private string bestPhrase;
40    private int tries;
41    private readonly List<ReadonlySequence> stateChain;
42
43    public SequentialSearch(IProblem problem, int maxLen, Random random, int randomTries, IGrammarPolicy behaviourPolicy) {
44      this.maxLen = maxLen;
45      this.problem = problem;
46      this.random = random;
47      this.randomTries = randomTries;
48      this.behaviourPolicy = behaviourPolicy;
49      this.greedyPolicy = new GreedyPolicy(problem, false);
50      this.stateChain = new List<ReadonlySequence>();
51      this.cache = new Dictionary<ReadonlySequence, ReadonlySequence[]>();
52    }
53
54    public void Run(int maxIterations) {
55      bestQuality = double.MinValue;
56      //InitPolicies(problem.Grammar);
57      Reset();
58
59      for (int i = 0; !Done() && i < maxIterations; i++) {
60        var phrase = SampleSentence(problem.Grammar);
61        // can fail on the last sentence
62        if (phrase.IsTerminal) {
63          var sentence = phrase.ToString();
64          tries++;
65          var quality = problem.Evaluate(sentence) / problem.BestKnownQuality(maxLen);
66          Debug.Assert(quality >= 0 && quality <= 1.0);
67          DistributeReward(quality);
68
69          RaiseSolutionEvaluated(sentence, quality);
70
71          if (quality > bestQuality) {
72            bestQuality = quality;
73            bestPhrase = sentence;
74            RaiseFoundNewBestSolution(sentence, quality);
75          }
76        }
77      }
78
79      // clean up
80      // Reset(); GC.Collect();
81    }
82
83
84    private ReadonlySequence SampleSentence(IGrammar grammar) {
85      ReadonlySequence phrase;
86      do {
87        stateChain.Clear();
88        phrase = new ReadonlySequence(grammar.SentenceSymbol);
89        //var startPhrase = new Sequence("a*b+c*d+e*f+E");
90      } while (!Done() && !TryCompleteSentence(grammar, ref phrase));
91      return phrase;
92    }
93
94    private bool TryCompleteSentence(IGrammar g, ref ReadonlySequence phrase) {
95      if (phrase.Length > maxLen) throw new ArgumentException();
96      if (g.MinPhraseLength(phrase) > maxLen) throw new ArgumentException();
97      var curDepth = 0;
98
99      stateChain.Add(phrase);
100      while (!phrase.IsTerminal) {
101
102        //if (n.randomTries < randomTries) {
103        //  n.randomTries++;
104        //  treeDepth = Math.Max(treeDepth, curDepth);
105        //  lastNode = n;
106        //  return g.CompleteSentenceRandomly(random, phrase, maxLen);
107        //} else {
108
109        var newPhrases = GenerateFollowStates(g, phrase);
110
111        // => select using bandit policy
112        // failure means we simply restart
113        if (!behaviourPolicy.TrySelect(random, phrase, newPhrases, out phrase)) {
114          return false;
115        }
116        stateChain.Add(phrase);
117        curDepth++;
118      } // while
119
120      maxSearchDepth = Math.Max(maxSearchDepth, curDepth);
121      return true;
122    }
123
124
125    private readonly Dictionary<ReadonlySequence, ReadonlySequence[]> cache;
126    private IEnumerable<ReadonlySequence> GenerateFollowStates(IGrammar g, ReadonlySequence phrase) {
127      ReadonlySequence[] follow;
128      if (!cache.TryGetValue(phrase, out follow)) {
129        char nt = phrase.FirstNonTerminal;
130
131        int maxLenOfReplacement = maxLen - (phrase.Length - 1);
132        // replacing aAb with maxLen 4 means we can only use alternatives with a minPhraseLen <= 2
133        Debug.Assert(maxLenOfReplacement > 0);
134
135        var alts = g.GetAlternatives(nt).Where(alt => g.MinPhraseLength(alt) <= maxLenOfReplacement);
136
137        follow = new ReadonlySequence[alts.Count()];
138        int idx = 0;
139        foreach (var alt in alts) {
140          var newPhrase = new Sequence(phrase); // clone
141          newPhrase.ReplaceAt(newPhrase.FirstNonTerminalIndex, 1, alt);
142          follow[idx++] = new ReadonlySequence(newPhrase);
143        }
144        cache[phrase] = follow;
145      }
146      return follow;
147    }
148
149    private void DistributeReward(double reward) {
150      behaviourPolicy.UpdateReward(stateChain, reward);
151      greedyPolicy.UpdateReward(stateChain, reward);
152    }
153
154
155    private void Reset() {
156      behaviourPolicy.Reset();
157      greedyPolicy.Reset();
158      maxSearchDepth = 0;
159      bestQuality = 0.0;
160      tries = 0;
161      cache.Clear();
162    }
163
164    public bool Done() {
165      var g = problem.Grammar;
166      var startState = new ReadonlySequence(g.SentenceSymbol);
167      var follow = GenerateFollowStates(g, startState);
168      ReadonlySequence selectedState;
169      return !behaviourPolicy.TrySelect(random, startState, follow, out selectedState);
170    }
171
172    #region introspection
173    public void PrintStats() {
174      Console.WriteLine("depth: {0,5} tries: {1,5} best phrase {2,50} bestQ {3:F3}", maxSearchDepth, tries, bestPhrase, bestQuality);
175
176      // use greedy strategy to generate the currently prefered sentence
177      var phrase = new ReadonlySequence(problem.Grammar.SentenceSymbol);
178      var policy = behaviourPolicy;
179      while (!phrase.IsTerminal) {
180        Console.ForegroundColor = ConsoleColor.White;
181        Console.WriteLine("{0,-30}", phrase);
182        var newPhrases = GenerateFollowStates(problem.Grammar, phrase);
183        if (!newPhrases.Any()) break;
184        var values = newPhrases.Select(p => policy.GetValue(p));
185        var maxValue = values.Max();
186        if (maxValue == 0) maxValue = 1.0;
187
188        // write phrases
189        foreach (var p in newPhrases) {
190          SetColorForValue(policy.GetValue(p) / maxValue);
191          Console.Write(" {0,-4}", p.Subsequence(Math.Max(0, p.Length - 3), Math.Min(3, p.Length)));
192        }
193        Console.WriteLine();
194
195        // write values
196        foreach (var p in newPhrases) {
197          SetColorForValue(policy.GetValue(p) / maxValue);
198          Console.Write(" {0:F2}", policy.GetValue(p) * 10.0);
199        }
200        Console.WriteLine();
201
202        // write tries
203        foreach (var p in newPhrases) {
204          SetColorForValue(policy.GetValue(p) / maxValue);
205          Console.Write(" {0,4}", policy.GetTries(p));
206        }
207        Console.WriteLine();
208        if (!policy.TrySelect(random, phrase, newPhrases, out phrase)) {
209          break;
210        }
211      }
212
213      Console.ForegroundColor = ConsoleColor.White;
214      Console.WriteLine("-------------------");
215    }
216
217    private void SetColorForValue(double v) {
218      Console.ForegroundColor = ConsoleEx.ColorForValue(v);
219    }
220    #endregion
221
222    private void RaiseSolutionEvaluated(string sentence, double quality) {
223      var handler = SolutionEvaluated;
224      if (handler != null) handler(sentence, quality);
225    }
226    private void RaiseFoundNewBestSolution(string sentence, double quality) {
227      var handler = FoundNewBestSolution;
228      if (handler != null) handler(sentence, quality);
229    }
230  }
231}
Note: See TracBrowser for help on using the repository browser.