Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/Problems/RoyalPhraseSequenceProblem.cs @ 12099

Last change on this file since 12099 was 12099, checked in by gkronber, 9 years ago

#2283: name for all problems (for output), new unit test, and added testsettings file

File size: 7.4 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Diagnostics;
4using System.Linq;
5using System.Text;
6using System.Text.RegularExpressions;
7using HeuristicLab.Common;
8using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
9
10namespace HeuristicLab.Problems.GrammaticalOptimization {
11  // must find one of numCorrectPhrases*sequenceLen sequences where the quality of a sequence is the length of the subsequence containing only correct _phrases_ (of length phraseLen) and starting at the first position
12  // compared to the RoyalSequence problem this problem is harder because the number of different phrases starting at a position is much larger than the number of symbols (grows exponentially with the phrase-length)
13  // if phraseLen = 1 this is the same as the RoyalSequence problem
14  // parameters
15  // - alphabetSize: number of different symbols (max=26)
16  // - phraseLen: the length of a phrase in number of symbols
17  // - sequenceLen: the number of phrases in the correct subsequence (total sequence length is n * phraseLen
18  // - numCorrectPhrases: the number of correct phrases starting at each position
19  // - phrasesAsSets: switch to determine if the ordering of symbols within a phrase is relevant
20  //
21  // this problem should be hard for GP and easy for MCTS (TD should not have an advantage compared to MCTS)
22  // for phraseLen > 1 this should be harder than RoyalSymbolProblem
23  // when phrases are symbol sets instead of sequences then value-estimation routines should be better (TD)
24  public class RoyalPhraseSequenceProblem : ISymbolicExpressionTreeProblem {
25
26    private readonly IGrammar grammar;
27    private readonly double correctReward;
28    private readonly double incorrectReward;
29    private readonly int sequenceLen;
30    private readonly int phraseLen;
31    private readonly bool phrasesAsSets;
32    private readonly SortedSet<string>[] optimalPhrasesForPos;
33    public string Name { get { return "RoyalPhraseSequence"; } }
34    public RoyalPhraseSequenceProblem(Random rand, int alphabetSize, int sequenceLen, int phraseLen = 1, int numCorrectPhrases = 1, double correctReward = 1.0, double incorrectReward = 0.0, bool phrasesAsSets = false) {
35      if (alphabetSize <= 0 || alphabetSize > 26) throw new ArgumentException();
36      if (sequenceLen <= 0) throw new ArgumentException();
37      if (numCorrectPhrases < 1 || numCorrectPhrases > alphabetSize) throw new ArgumentException();
38      if (phraseLen < 1) throw new ArgumentException();
39      if (correctReward <= incorrectReward) throw new ArgumentException();
40
41      this.sequenceLen = sequenceLen;
42      this.phraseLen = phraseLen;
43      this.correctReward = correctReward;
44      this.incorrectReward = incorrectReward;
45      this.phrasesAsSets = phrasesAsSets;
46
47      var sentenceSymbol = 'S';
48      var terminalSymbols = Enumerable.Range(0, alphabetSize).Select(off => (char)((byte)'a' + off)).ToArray();
49      var nonTerminalSymbols = new char[] { sentenceSymbol };
50
51      {
52        // create grammar
53        // S -> a..z | aS .. zS
54        var rules = terminalSymbols.Select(t => Tuple.Create(sentenceSymbol, t.ToString()))
55          .Concat(terminalSymbols.Select(t => Tuple.Create(sentenceSymbol, t + sentenceSymbol.ToString())));
56
57        this.grammar = new Grammar(sentenceSymbol, terminalSymbols, nonTerminalSymbols, rules);
58      }
59      {
60        // create grammar for tree-based GP
61        // S -> a..z | SS
62        var rules = terminalSymbols.Select(t => Tuple.Create(sentenceSymbol, t.ToString()))
63          .Concat(new Tuple<char, string>[] { Tuple.Create(sentenceSymbol, sentenceSymbol.ToString() + sentenceSymbol) });
64
65        this.TreeBasedGPGrammar = new Grammar(sentenceSymbol, terminalSymbols, nonTerminalSymbols, rules);
66      }
67
68      this.optimalPhrasesForPos = new SortedSet<string>[sequenceLen];
69      for (int i = 0; i < sequenceLen; i++) {
70        optimalPhrasesForPos[i] = new SortedSet<string>();
71        for (int j = 0; j < numCorrectPhrases; j++) {
72          string phrase = "";
73          do {
74            for (int l = 0; l < phraseLen; l++) {
75              phrase += terminalSymbols.SelectRandom(rand);
76            }
77            phrase = CanonicalPhrase(phrase);
78          } while (optimalPhrasesForPos[i].Contains(phrase)); // don't allow duplicate phrases
79          optimalPhrasesForPos[i].Add(phrase);
80        }
81      }
82
83      Debug.Assert(Evaluate(BestKnownSolution) / BestKnownQuality(phraseLen * sequenceLen) == 1.0);
84    }
85
86    public double BestKnownQuality(int maxLen) {
87      return Math.Min(maxLen / phraseLen, sequenceLen) * correctReward; // integer division
88    }
89
90    public string BestKnownSolution {
91      get {
92        string solution = "";
93        for (int i = 0; i < sequenceLen; i++) {
94          solution += optimalPhrasesForPos[i].First();
95        }
96        return solution;
97      }
98    }
99
100    public IGrammar Grammar {
101      get { return grammar; }
102    }
103
104    public double Evaluate(string sentence) {
105      // sentence must contain only terminal symbols, we are not checking if the sentence is syntactically valid here because it would be too slow!
106      Debug.Assert(sentence.Any(c => grammar.IsTerminal(c)));
107      // as long as only correct symbols are found we increase the reward by +1
108      // on the first incorrect symbol we return
109      var reward = 0.0;
110      for (int i = 0; i < Math.Min(sentence.Length / phraseLen, sequenceLen); i++) {
111        var canonicalPhrase = CanonicalPhrase(sentence.Substring(i * phraseLen, phraseLen));
112        if (optimalPhrasesForPos[i].Contains(canonicalPhrase)) {
113          reward += correctReward;
114        } else {
115          // alternatively reduce reward by number of remaining phrases
116          return Math.Max(0.0, reward + incorrectReward * (sentence.Length / phraseLen - i));
117          // stop on first incorrect symbol and return reward
118          //return reward;
119        }
120      }
121      return reward;
122    }
123
124    // TODO: cache canonical phrases in most-recently used dictionary for increased performance (see symbolicregressionpoly10problem)
125    private string CanonicalPhrase(string phrase) {
126      if (phrasesAsSets) return string.Join("", phrase.OrderBy(ch => (byte)ch));
127      else return phrase;
128    }
129
130    public string CanonicalRepresentation(string phrase) {
131      if (phrasesAsSets) {
132        var sb = new StringBuilder();
133        var numPhrases = phrase.Length / phraseLen;
134        for (int phraseIdx = 0; phraseIdx < numPhrases; phraseIdx++) {
135          var sentenceIdx = phraseIdx * phraseLen;
136          var subphrase = phrase.Substring(sentenceIdx, phraseLen);
137          subphrase = CanonicalPhrase(subphrase);
138          sb.Append(subphrase);
139        }
140
141        var remainder = phrase.Substring(numPhrases * phraseLen, phrase.Length - (numPhrases * phraseLen));
142        remainder = CanonicalPhrase(remainder);
143        sb.Append(remainder);
144
145        return sb.ToString();
146      } else
147        return phrase;
148    }
149
150    public IEnumerable<Feature> GetFeatures(string phrase) {
151      throw new NotImplementedException();
152    }
153
154    public IGrammar TreeBasedGPGrammar { get; private set; }
155    public string ConvertTreeToSentence(ISymbolicExpressionTree tree) {
156      var sb = new StringBuilder();
157      foreach (var s in tree.Root.GetSubtree(0).GetSubtree(0).IterateNodesPrefix()) {
158        if (s.Symbol.Name == "S") continue;
159        sb.Append(s.Symbol.Name);
160      }
161      return sb.ToString();
162    }
163  }
164}
Note: See TracBrowser for help on using the repository browser.