Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/AlternativesSampler.cs @ 11732

Last change on this file since 11732 was 11732, checked in by gkronber, 9 years ago

#2283: refactoring and bug fixes

File size: 4.1 KB
RevLine 
[11727]1using System;
2using System.Collections.Generic;
3using System.Diagnostics;
4using System.Linq;
5using System.Text;
6using HeuristicLab.Algorithms.Bandits;
7using HeuristicLab.Common;
8using HeuristicLab.Problems.GrammaticalOptimization;
9
10namespace HeuristicLab.Algorithms.GrammaticalOptimization {
11  public class AlternativesSampler {
12    public event Action<string, double> FoundNewBestSolution;
13    public event Action<string, double> SolutionEvaluated;
14
15    private readonly int maxLen;
16    private readonly Random random;
17    private readonly IProblem problem;
[11732]18    private readonly IPolicy policy;
[11727]19
[11732]20    public AlternativesSampler(IProblem problem, IPolicy policy, int maxLen) {
[11727]21      this.problem = problem;
22      this.maxLen = maxLen;
[11732]23      this.random = new Random();
24      this.policy = policy;
[11727]25    }
26
27    public void Run(int maxIterations) {
28      double bestQuality = double.MinValue;
29      InitPolicies(problem.Grammar);
30      for (int i = 0; i < maxIterations; i++) {
[11730]31        var sentence = SampleSentence(problem.Grammar).ToString();
[11732]32        var quality = problem.Evaluate(sentence) / problem.BestKnownQuality(maxLen);
[11727]33        DistributeReward(quality);
34
35        RaiseSolutionEvaluated(sentence, quality);
36
37        if (quality > bestQuality) {
38          bestQuality = quality;
39          RaiseFoundNewBestSolution(sentence, quality);
40        }
41      }
42    }
43
44
[11732]45    private Dictionary<char, IPolicyActionInfo[]> ntActionInfos;
[11727]46    private List<Tuple<char, int>> updateChain;
47
48    private void InitPolicies(IGrammar grammar) {
[11732]49      this.ntActionInfos = new Dictionary<char, IPolicyActionInfo[]>();
[11727]50      this.updateChain = new List<Tuple<char, int>>();
51      foreach (var nt in grammar.NonTerminalSymbols) {
[11732]52        ntActionInfos.Add(nt, grammar.GetAlternatives(nt).Select(_ => policy.CreateActionInfo()).ToArray());
[11727]53      }
54    }
55
[11730]56    private Sequence SampleSentence(IGrammar grammar) {
[11727]57      updateChain.Clear();
[11730]58      return CompleteSentence(grammar, new Sequence(grammar.SentenceSymbol));
[11727]59    }
60
[11730]61    public Sequence CompleteSentence(IGrammar g, Sequence phrase) {
[11727]62      if (phrase.Length > maxLen) throw new ArgumentException();
63      if (g.MinPhraseLength(phrase) > maxLen) throw new ArgumentException();
[11730]64      bool done = phrase.IsTerminal; // terminal phrase means we are done
[11727]65      while (!done) {
[11730]66        char nt = phrase.FirstNonTerminal;
[11727]67
68        int maxLenOfReplacement = maxLen - (phrase.Length - 1); // replacing aAb with maxLen 4 means we can only use alternatives with a minPhraseLen <= 2
69        Debug.Assert(maxLenOfReplacement > 0);
70
71        var alts = g.GetAlternatives(nt);
[11730]72        Sequence selectedAlt;
[11727]73        // if the choice is restricted then one of the allowed alternatives is selected randomly
74        if (alts.Any(alt => g.MinPhraseLength(alt) > maxLenOfReplacement)) {
75          var allowedAlts = alts.Where(alt => g.MinPhraseLength(alt) <= maxLenOfReplacement);
76          Debug.Assert(allowedAlts.Any());
77          // replace nt with random alternative
78          selectedAlt = allowedAlts.SelectRandom(random);
79        } else {
80          // all alts are allowed => select using bandit policy
[11732]81          var selectedAltIdx = policy.SelectAction(random, ntActionInfos[nt]);
[11727]82          selectedAlt = alts.ElementAt(selectedAltIdx);
83          updateChain.Add(Tuple.Create(nt, selectedAltIdx));
84        }
85
86        // replace nt with alt
[11730]87        phrase.ReplaceAt(phrase.FirstNonTerminalIndex, 1, selectedAlt);
[11727]88
[11730]89        done = phrase.IsTerminal; // terminal phrase means we are done
[11727]90      }
91      return phrase;
92    }
93
94
95    private void DistributeReward(double reward) {
96      foreach (var e in updateChain) {
97        var nt = e.Item1;
98        var action = e.Item2;
[11732]99        ntActionInfos[nt][action].UpdateReward(reward);
[11727]100      }
101    }
102
103    private void RaiseSolutionEvaluated(string sentence, double quality) {
104      var handler = SolutionEvaluated;
105      if (handler != null) handler(sentence, quality);
106    }
107    private void RaiseFoundNewBestSolution(string sentence, double quality) {
108      var handler = FoundNewBestSolution;
109      if (handler != null) handler(sentence, quality);
110    }
111  }
112}
Note: See TracBrowser for help on using the repository browser.