Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/SequentialSearch.cs @ 11792

Last change on this file since 11792 was 11792, checked in by gkronber, 9 years ago

#2283 work-in-progress commit (does not compile)

File size: 8.7 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Diagnostics;
4using System.Linq;
5using System.Resources;
6using System.Runtime.InteropServices;
7using System.Text;
8using HeuristicLab.Algorithms.Bandits;
9using HeuristicLab.Algorithms.Bandits.BanditPolicies;
10using HeuristicLab.Algorithms.Bandits.GrammarPolicies;
11using HeuristicLab.Common;
12using HeuristicLab.Problems.GrammaticalOptimization;
13
14namespace HeuristicLab.Algorithms.GrammaticalOptimization {
15  // a search procedure that uses a policy to generate sentences and updates the policy (online RL)
16  // 1) Start with phrase = sentence symbol of grammar
17  // 2) Repeat
18  //    a) generate derived phrases using left-canonical derivation and grammar rules
19  //    b) keep only the phrases which are allowed (sentence length limit)
20  //    c) if the set of phrases is empty restart with 1)
21  //    d) otherwise use policy to select one of the possible derived phrases as active phrase
22  //       the policy has the option to fail (for instance if all derived phrases are terminal and should not be visited again), in this case we restart at 1
23  //    ... until phrase is terminal
24  // 3) Collect reward and update policy (feedback: state of visited rewards from step 2)
25  public class SequentialSearch {
26
27    public event Action<string, double> FoundNewBestSolution;
28    public event Action<string, double> SolutionEvaluated;
29
30    private readonly int maxLen;
31    private readonly IProblem problem;
32    private readonly Random random;
33    private readonly int randomTries;
34    private readonly IGrammarPolicy behaviourPolicy;
35    private readonly IGrammarPolicy greedyPolicy;
36    private int maxSearchDepth;
37
38    private double bestQuality;
39    private string bestPhrase;
40    private int tries;
41    private readonly List<ReadonlySequence> stateChain;
42
43    public SequentialSearch(IProblem problem, int maxLen, Random random, int randomTries, IGrammarPolicy behaviourPolicy) {
44      this.maxLen = maxLen;
45      this.problem = problem;
46      this.random = random;
47      this.randomTries = randomTries;
48      this.behaviourPolicy = behaviourPolicy;
49      this.greedyPolicy = new GreedyPolicy(problem, false);
50      this.stateChain = new List<ReadonlySequence>();
51      this.cache = new Dictionary<ReadonlySequence, ReadonlySequence[]>();
52    }
53
54    public void Run(int maxIterations) {
55      bestQuality = double.MinValue;
56      //InitPolicies(problem.Grammar);
57      Reset();
58
59      for (int i = 0; bestQuality < 1.0 && !Done() && i < maxIterations; i++) {
60        var phrase = SampleSentence(problem.Grammar);
61        // can fail on the last sentence
62        if (phrase.IsTerminal) {
63          var sentence = phrase.ToString();
64          tries++;
65          var quality = problem.Evaluate(sentence) / problem.BestKnownQuality(maxLen);
66          Debug.Assert(quality >= 0 && quality <= 1.0);
67          DistributeReward(quality);
68
69          RaiseSolutionEvaluated(sentence, quality);
70
71          if (quality > bestQuality) {
72            bestQuality = quality;
73            bestPhrase = sentence;
74            RaiseFoundNewBestSolution(sentence, quality);
75          }
76        }
77      }
78    }
79
80
81    private ReadonlySequence SampleSentence(IGrammar grammar) {
82      ReadonlySequence phrase;
83      do {
84        stateChain.Clear();
85        phrase = new ReadonlySequence(grammar.SentenceSymbol);
86        //var startPhrase = new Sequence("a*b+c*d+e*f+E");
87      } while (!Done() && !TryCompleteSentence(grammar, ref phrase));
88      return phrase;
89    }
90
91    private bool TryCompleteSentence(IGrammar g, ref ReadonlySequence phrase) {
92      if (phrase.Length > maxLen) throw new ArgumentException();
93      if (g.MinPhraseLength(phrase) > maxLen) throw new ArgumentException();
94      var curDepth = 0;
95
96      stateChain.Add(phrase);
97      while (!phrase.IsTerminal) {
98
99        var newPhrases = GenerateFollowStates(g, phrase);
100       
101        throw new NotImplementedException(); // TODO: reintroduce random-trie checking once the tree of all states has been reintroduced
102        //if (n.randomTries < randomTries) {
103        //  n.randomTries++;
104        //  treeDepth = Math.Max(treeDepth, curDepth);
105        //  lastNode = n;
106        //  return g.CompleteSentenceRandomly(random, phrase, maxLen);
107        //} else {
108
109
110          // => select using bandit policy
111          // failure means we simply restart
112          if (!behaviourPolicy.TrySelect(random, phrase, newPhrases, out phrase)) {
113            return false;
114          }
115        // }
116        stateChain.Add(phrase);
117        curDepth++;
118      } // while
119
120      maxSearchDepth = Math.Max(maxSearchDepth, curDepth);
121      return true;
122    }
123
124
125    private readonly Dictionary<ReadonlySequence, ReadonlySequence[]> cache;
126    private IEnumerable<ReadonlySequence> GenerateFollowStates(IGrammar g, ReadonlySequence phrase) {
127      throw new NotImplementedException();
128      // TODO: Replace caching by a tree of all states. tree is only used for easily retrieving the follow-states of a state
129      ReadonlySequence[] follow;
130      //if (!cache.TryGetValue(phrase, out follow)) {
131        char nt = phrase.FirstNonTerminal;
132
133        int maxLenOfReplacement = maxLen - (phrase.Length - 1);
134        // replacing aAb with maxLen 4 means we can only use alternatives with a minPhraseLen <= 2
135        Debug.Assert(maxLenOfReplacement > 0);
136
137        var alts = g.GetAlternatives(nt).Where(alt => g.MinPhraseLength(alt) <= maxLenOfReplacement);
138
139        follow = new ReadonlySequence[alts.Count()];
140        int idx = 0;
141        foreach (var alt in alts) {
142          var newPhrase = new Sequence(phrase); // clone
143          newPhrase.ReplaceAt(newPhrase.FirstNonTerminalIndex, 1, alt);
144          follow[idx++] = new ReadonlySequence(newPhrase);
145        }
146      //  cache[phrase] = follow;
147      //}
148      return follow;
149    }
150
151    private void DistributeReward(double reward) {
152      behaviourPolicy.UpdateReward(stateChain, reward);
153      greedyPolicy.UpdateReward(stateChain, reward);
154    }
155
156
157    private void Reset() {
158      behaviourPolicy.Reset();
159      greedyPolicy.Reset();
160      maxSearchDepth = 0;
161      bestQuality = 0.0;
162      tries = 0;
163      cache.Clear();
164    }
165
166    public bool Done() {
167      var g = problem.Grammar;
168      var startState = new ReadonlySequence(g.SentenceSymbol);
169      var follow = GenerateFollowStates(g, startState);
170      ReadonlySequence selectedState;
171      return !behaviourPolicy.TrySelect(random, startState, follow, out selectedState);
172    }
173
174    #region introspection
175    public void PrintStats() {
176      Console.WriteLine("depth: {0,5} tries: {1,5} best phrase {2,50} bestQ {3:F3}", maxSearchDepth, tries, bestPhrase, bestQuality);
177
178      // use greedy strategy to generate the currently prefered sentence
179      var phrase = new ReadonlySequence(problem.Grammar.SentenceSymbol);
180      var policy = behaviourPolicy;
181      while (!phrase.IsTerminal) {
182        Console.ForegroundColor = ConsoleColor.White;
183        Console.WriteLine("{0,-30}", phrase);
184        var newPhrases = GenerateFollowStates(problem.Grammar, phrase);
185        if (!newPhrases.Any()) break;
186        var values = newPhrases.Select(p => policy.GetValue(p));
187        var maxValue = values.Max();
188        if (maxValue == 0) maxValue = 1.0;
189
190        // write phrases
191        foreach (var p in newPhrases) {
192          SetColorForValue(policy.GetValue(p) / maxValue);
193          Console.Write(" {0,-4}", p.Subsequence(Math.Max(0, p.Length - 3), Math.Min(3, p.Length)));
194        }
195        Console.WriteLine();
196
197        // write values
198        foreach (var p in newPhrases) {
199          SetColorForValue(policy.GetValue(p) / maxValue);
200          Console.Write(" {0:F2}", policy.GetValue(p) * 10.0);
201        }
202        Console.WriteLine();
203
204        // write tries
205        foreach (var p in newPhrases) {
206          SetColorForValue(policy.GetValue(p) / maxValue);
207          Console.Write(" {0,4}", policy.GetTries(p));
208        }
209        Console.WriteLine();
210        if (!policy.TrySelect(random, phrase, newPhrases, out phrase)) {
211          break;
212        }
213      }
214
215      Console.ForegroundColor = ConsoleColor.White;
216      Console.WriteLine("-------------------");
217    }
218
219    private void SetColorForValue(double v) {
220      Console.ForegroundColor = ConsoleEx.ColorForValue(v);
221    }
222    #endregion
223
224    private void RaiseSolutionEvaluated(string sentence, double quality) {
225      var handler = SolutionEvaluated;
226      if (handler != null) handler(sentence, quality);
227    }
228    private void RaiseFoundNewBestSolution(string sentence, double quality) {
229      var handler = FoundNewBestSolution;
230      if (handler != null) handler(sentence, quality);
231    }
232  }
233}
Note: See TracBrowser for help on using the repository browser.