Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/AlternativesContextSampler.cs @ 11727

Last change on this file since 11727 was 11727, checked in by gkronber, 8 years ago

#2283: worked on grammatical optimization problem solvers (simple MCTS done)

File size: 4.4 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Diagnostics;
4using System.Linq;
5using System.Text;
6using HeuristicLab.Algorithms.Bandits;
7using HeuristicLab.Common;
8using HeuristicLab.Problems.GrammaticalOptimization;
9
10namespace HeuristicLab.Algorithms.GrammaticalOptimization {
11  public class AlternativesContextSampler {
12    public event Action<string, double> FoundNewBestSolution;
13    public event Action<string, double> SolutionEvaluated;
14
15    private readonly int maxLen;
16    private readonly IProblem problem;
17    private readonly Random random;
18    private readonly int contextLen;
19
20    public AlternativesContextSampler(IProblem problem, int maxLen) {
21      this.maxLen = maxLen;
22      this.problem = problem;
23      this.random = new Random(31415);
24      this.contextLen = 25;
25    }
26
27    public void Run(int maxIterations) {
28      double bestQuality = double.MinValue;
29      InitPolicies(problem.Grammar);
30      for (int i = 0; i < maxIterations; i++) {
31        var sentence = SampleSentence(problem.Grammar);
32        var quality = problem.Evaluate(sentence) / problem.GetBestKnownQuality(maxLen);
33        DistributeReward(quality);
34
35        RaiseSolutionEvaluated(sentence, quality);
36
37        if (quality > bestQuality) {
38          bestQuality = quality;
39          RaiseFoundNewBestSolution(sentence, quality);
40        }
41      }
42    }
43
44
45    private Dictionary<string, IPolicy> ntPolicy;
46    private List<Tuple<string, int>> updateChain;
47    private void InitPolicies(IGrammar grammar) {
48      this.ntPolicy = new Dictionary<string, IPolicy>();
49      this.updateChain = new List<Tuple<string, int>>();
50    }
51
52    private string SampleSentence(IGrammar grammar) {
53      updateChain.Clear();
54      return CompleteSentence(grammar, grammar.SentenceSymbol.ToString());
55    }
56
57    public string CompleteSentence(IGrammar g, string phrase) {
58      if (phrase.Length > maxLen) throw new ArgumentException();
59      if (g.MinPhraseLength(phrase) > maxLen) throw new ArgumentException();
60      bool done = phrase.All(g.IsTerminal); // terminal phrase means we are done
61      while (!done) {
62        int ntIdx; char nt;
63        Grammar.FindFirstNonTerminal(g, phrase, out nt, out ntIdx);
64
65        int maxLenOfReplacement = maxLen - (phrase.Length - 1); // replacing aAb with maxLen 4 means we can only use alternatives with a minPhraseLen <= 2
66        Debug.Assert(maxLenOfReplacement > 0);
67
68        var alts = g.GetAlternatives(nt);
69        string selectedAlt;
70        // if the choice is restricted then one of the allowed alternatives is selected randomly
71        if (alts.Any(alt => g.MinPhraseLength(alt) > maxLenOfReplacement)) {
72          var allowedAlts = alts.Where(alt => g.MinPhraseLength(alt) <= maxLenOfReplacement);
73          Debug.Assert(allowedAlts.Any());
74          // replace nt with random alternative
75          selectedAlt = allowedAlts.SelectRandom(random);
76        } else {
77          // all alts are allowed => select using bandit policy
78          var startIdx = Math.Max(0, ntIdx - contextLen);
79          var endIdx = Math.Min(startIdx + contextLen, ntIdx);
80          var lft = phrase.Substring(startIdx, endIdx - startIdx + 1);
81          lft = problem.Hash(lft);
82          if (!ntPolicy.ContainsKey(lft)) {
83            ntPolicy.Add(lft, new UCB1TunedPolicy(g.GetAlternatives(nt).Count()));
84          }
85          var selectedAltIdx = ntPolicy[lft].SelectAction();
86          selectedAlt = alts.ElementAt(selectedAltIdx);
87          updateChain.Add(Tuple.Create(lft, selectedAltIdx));
88        }
89
90        // replace nt with alt
91        phrase = phrase.Remove(ntIdx, 1);
92        phrase = phrase.Insert(ntIdx, selectedAlt);
93
94        done = phrase.All(g.IsTerminal); // terminal phrase means we are done
95      }
96      return phrase;
97    }
98
99    private void DistributeReward(double reward) {
100      foreach (var e in updateChain) {
101        var lft = e.Item1;
102        var action = e.Item2;
103        ntPolicy[lft].UpdateReward(action, reward);
104      }
105    }
106
107    private void RaiseSolutionEvaluated(string sentence, double quality) {
108      var handler = SolutionEvaluated;
109      if (handler != null) handler(sentence, quality);
110    }
111    private void RaiseFoundNewBestSolution(string sentence, double quality) {
112      var handler = FoundNewBestSolution;
113      if (handler != null) handler(sentence, quality);
114    }
115  }
116}
Note: See TracBrowser for help on using the repository browser.