Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/AlternativesSampler.cs @ 11730

Last change on this file since 11730 was 11730, checked in by gkronber, 9 years ago

#2283: several major extensions for grammatical optimization

File size: 4.0 KB
RevLine 
[11727]1using System;
2using System.Collections.Generic;
3using System.Diagnostics;
4using System.Linq;
5using System.Text;
6using HeuristicLab.Algorithms.Bandits;
7using HeuristicLab.Common;
8using HeuristicLab.Problems.GrammaticalOptimization;
9
10namespace HeuristicLab.Algorithms.GrammaticalOptimization {
11  public class AlternativesSampler {
12    public event Action<string, double> FoundNewBestSolution;
13    public event Action<string, double> SolutionEvaluated;
14
15    private readonly int maxLen;
16    private readonly Random random;
17    private readonly IProblem problem;
18
19    public AlternativesSampler(IProblem problem, int maxLen) {
20      this.problem = problem;
21      this.maxLen = maxLen;
22      this.random = new Random(31415);
23    }
24
25    public void Run(int maxIterations) {
26      double bestQuality = double.MinValue;
27      InitPolicies(problem.Grammar);
28      for (int i = 0; i < maxIterations; i++) {
[11730]29        var sentence = SampleSentence(problem.Grammar).ToString();
30        var quality = problem.Evaluate(sentence) / problem.GetBestKnownQuality(maxLen);
[11727]31        DistributeReward(quality);
32
33        RaiseSolutionEvaluated(sentence, quality);
34
35        if (quality > bestQuality) {
36          bestQuality = quality;
37          RaiseFoundNewBestSolution(sentence, quality);
38        }
39      }
40    }
41
42
43    private Dictionary<char, IPolicy> ntPolicy;
44    private List<Tuple<char, int>> updateChain;
45
46    private void InitPolicies(IGrammar grammar) {
47      this.ntPolicy = new Dictionary<char, IPolicy>();
48      this.updateChain = new List<Tuple<char, int>>();
49      foreach (var nt in grammar.NonTerminalSymbols) {
50        ntPolicy.Add(nt, new EpsGreedyPolicy(random, grammar.GetAlternatives(nt).Count(), 0.1));
51      }
52    }
53
[11730]54    private Sequence SampleSentence(IGrammar grammar) {
[11727]55      updateChain.Clear();
[11730]56      return CompleteSentence(grammar, new Sequence(grammar.SentenceSymbol));
[11727]57    }
58
[11730]59    public Sequence CompleteSentence(IGrammar g, Sequence phrase) {
[11727]60      if (phrase.Length > maxLen) throw new ArgumentException();
61      if (g.MinPhraseLength(phrase) > maxLen) throw new ArgumentException();
[11730]62      bool done = phrase.IsTerminal; // terminal phrase means we are done
[11727]63      while (!done) {
[11730]64        char nt = phrase.FirstNonTerminal;
[11727]65
66        int maxLenOfReplacement = maxLen - (phrase.Length - 1); // replacing aAb with maxLen 4 means we can only use alternatives with a minPhraseLen <= 2
67        Debug.Assert(maxLenOfReplacement > 0);
68
69        var alts = g.GetAlternatives(nt);
[11730]70        Sequence selectedAlt;
[11727]71        // if the choice is restricted then one of the allowed alternatives is selected randomly
72        if (alts.Any(alt => g.MinPhraseLength(alt) > maxLenOfReplacement)) {
73          var allowedAlts = alts.Where(alt => g.MinPhraseLength(alt) <= maxLenOfReplacement);
74          Debug.Assert(allowedAlts.Any());
75          // replace nt with random alternative
76          selectedAlt = allowedAlts.SelectRandom(random);
77        } else {
78          // all alts are allowed => select using bandit policy
79          var selectedAltIdx = ntPolicy[nt].SelectAction();
80          selectedAlt = alts.ElementAt(selectedAltIdx);
81          updateChain.Add(Tuple.Create(nt, selectedAltIdx));
82        }
83
84        // replace nt with alt
[11730]85        phrase.ReplaceAt(phrase.FirstNonTerminalIndex, 1, selectedAlt);
[11727]86
[11730]87        done = phrase.IsTerminal; // terminal phrase means we are done
[11727]88      }
89      return phrase;
90    }
91
92
93    private void DistributeReward(double reward) {
94      foreach (var e in updateChain) {
95        var nt = e.Item1;
96        var action = e.Item2;
97        ntPolicy[nt].UpdateReward(action, reward);
98      }
99    }
100
101    private void RaiseSolutionEvaluated(string sentence, double quality) {
102      var handler = SolutionEvaluated;
103      if (handler != null) handler(sentence, quality);
104    }
105    private void RaiseFoundNewBestSolution(string sentence, double quality) {
106      var handler = FoundNewBestSolution;
107      if (handler != null) handler(sentence, quality);
108    }
109  }
110}
Note: See TracBrowser for help on using the repository browser.