Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Problems.GrammaticalOptimization/Problems/RoyalPhraseSequenceProblem.cs @ 12394

Last change on this file since 12394 was 12391, checked in by gkronber, 10 years ago

#2283: added shuffling of terminal symbols to the royal pair problem to make sure that there is no bias from order of terminal symbols.

File size: 7.5 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Diagnostics;
4using System.Linq;
5using System.Runtime.InteropServices;
6using System.Text;
7using System.Text.RegularExpressions;
8using HeuristicLab.Common;
9using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
10
11namespace HeuristicLab.Problems.GrammaticalOptimization {
12  // must find one of numCorrectPhrases*sequenceLen sequences where the quality of a sequence is the length of the subsequence containing only correct _phrases_ (of length phraseLen) and starting at the first position
13  // compared to the RoyalSequence problem this problem is harder because the number of different phrases starting at a position is much larger than the number of symbols (grows exponentially with the phrase-length)
14  // if phraseLen = 1 this is the same as the RoyalSequence problem
15  // parameters
16  // - alphabetSize: number of different symbols (max=26)
17  // - phraseLen: the length of a phrase in number of symbols
18  // - sequenceLen: the number of phrases in the correct subsequence (total sequence length is n * phraseLen
19  // - numCorrectPhrases: the number of correct phrases starting at each position
20  // - phrasesAsSets: switch to determine if the ordering of symbols within a phrase is relevant
21  //
22  // this problem should be hard for GP and easy for MCTS (TD should not have an advantage compared to MCTS)
23  // for phraseLen > 1 this should be harder than RoyalSymbolProblem
24  // when phrases are symbol sets instead of sequences then value-estimation routines should be better (TD)
25  public class RoyalPhraseSequenceProblem : ISymbolicExpressionTreeProblem {
26
27    private readonly IGrammar grammar;
28    private readonly double correctReward;
29    private readonly double incorrectReward;
30    private readonly int sequenceLen;
31    private readonly int phraseLen;
32    private readonly bool phrasesAsSets;
33    private readonly SortedSet<string>[] optimalPhrasesForPos;
34    public string Name { get { return "RoyalPhraseSequence"; } }
35    public RoyalPhraseSequenceProblem(System.Random rand, int alphabetSize, int sequenceLen, int phraseLen = 1, int numCorrectPhrases = 1, double correctReward = 1.0, double incorrectReward = 0.0, bool phrasesAsSets = false) {
36      if (alphabetSize <= 0 || alphabetSize > 26) throw new ArgumentException();
37      if (sequenceLen <= 0) throw new ArgumentException();
38      if (numCorrectPhrases < 1 || numCorrectPhrases > alphabetSize) throw new ArgumentException();
39      if (phraseLen < 1) throw new ArgumentException();
40      if (correctReward <= incorrectReward) throw new ArgumentException();
41
42      this.sequenceLen = sequenceLen;
43      this.phraseLen = phraseLen;
44      this.correctReward = correctReward;
45      this.incorrectReward = incorrectReward;
46      this.phrasesAsSets = phrasesAsSets;
47
48      var sentenceSymbol = 'S';
49      var terminalSymbols = Enumerable.Range(0, alphabetSize).Select(off => (char)((byte)'a' + off)).ToArray();
50      var nonTerminalSymbols = new char[] { sentenceSymbol };
51
52      {
53        // create grammar
54        // S -> a..z | aS .. zS
55        var rules = terminalSymbols.Select(t => Tuple.Create(sentenceSymbol, t.ToString()))
56          .Concat(terminalSymbols.Select(t => Tuple.Create(sentenceSymbol, t + sentenceSymbol.ToString())));
57
58        this.grammar = new Grammar(sentenceSymbol, terminalSymbols, nonTerminalSymbols, rules);
59      }
60      {
61        // create grammar for tree-based GP
62        // S -> a..z | SS
63        var rules = terminalSymbols.Select(t => Tuple.Create(sentenceSymbol, t.ToString()))
64          .Concat(new Tuple<char, string>[] { Tuple.Create(sentenceSymbol, sentenceSymbol.ToString() + sentenceSymbol) });
65
66        this.TreeBasedGPGrammar = new Grammar(sentenceSymbol, terminalSymbols, nonTerminalSymbols, rules);
67      }
68
69      this.optimalPhrasesForPos = new SortedSet<string>[sequenceLen];
70      for (int i = 0; i < sequenceLen; i++) {
71        optimalPhrasesForPos[i] = new SortedSet<string>();
72        for (int j = 0; j < numCorrectPhrases; j++) {
73          string phrase = "";
74          do {
75            for (int l = 0; l < phraseLen; l++) {
76              phrase += terminalSymbols.SelectRandom(rand);
77            }
78            phrase = CanonicalPhrase(phrase);
79          } while (optimalPhrasesForPos[i].Contains(phrase)); // don't allow duplicate phrases
80          optimalPhrasesForPos[i].Add(phrase);
81        }
82      }
83
84      Debug.Assert(Evaluate(BestKnownSolution) / BestKnownQuality(phraseLen * sequenceLen) == 1.0);
85    }
86
87    public double BestKnownQuality(int maxLen) {
88      return Math.Min(maxLen / phraseLen, sequenceLen) * correctReward; // integer division
89    }
90
91    public string BestKnownSolution {
92      get {
93        string solution = "";
94        for (int i = 0; i < sequenceLen; i++) {
95          solution += optimalPhrasesForPos[i].First();
96        }
97        return solution;
98      }
99    }
100
101    public IGrammar Grammar {
102      get { return grammar; }
103    }
104
105    public double Evaluate(string sentence) {
106      // sentence must contain only terminal symbols, we are not checking if the sentence is syntactically valid here because it would be too slow!
107      Debug.Assert(sentence.Any(c => grammar.IsTerminal(c)));
108      // as long as only correct symbols are found we increase the reward by +1
109      // on the first incorrect symbol we return
110      var reward = 0.0;
111      for (int i = 0; i < Math.Min(sentence.Length / phraseLen, sequenceLen); i++) {
112        var canonicalPhrase = CanonicalPhrase(sentence.Substring(i * phraseLen, phraseLen));
113        if (optimalPhrasesForPos[i].Contains(canonicalPhrase)) {
114          reward += correctReward;
115        } else {
116          // alternatively reduce reward by number of remaining phrases
117          return Math.Max(0.0, reward + incorrectReward * (sentence.Length / phraseLen - i));
118          // stop on first incorrect symbol and return reward
119          //return reward;
120        }
121      }
122      return reward;
123    }
124
125    // TODO: cache canonical phrases in most-recently used dictionary for increased performance (see symbolicregressionpoly10problem)
126    private string CanonicalPhrase(string phrase) {
127      if (phrasesAsSets) return string.Join("", phrase.OrderBy(ch => (byte)ch));
128      else return phrase;
129    }
130
131    public string CanonicalRepresentation(string phrase) {
132      if (phrasesAsSets) {
133        var sb = new StringBuilder();
134        var numPhrases = phrase.Length / phraseLen;
135        for (int phraseIdx = 0; phraseIdx < numPhrases; phraseIdx++) {
136          var sentenceIdx = phraseIdx * phraseLen;
137          var subphrase = phrase.Substring(sentenceIdx, phraseLen);
138          subphrase = CanonicalPhrase(subphrase);
139          sb.Append(subphrase);
140        }
141
142        var remainder = phrase.Substring(numPhrases * phraseLen, phrase.Length - (numPhrases * phraseLen));
143        remainder = CanonicalPhrase(remainder);
144        sb.Append(remainder);
145
146        return sb.ToString();
147      } else
148        return phrase;
149    }
150
151    public IEnumerable<Feature> GetFeatures(string phrase)
152    {
153      return new Feature[] {new Feature(phrase, 1.0)};
154    }
155
156    public IGrammar TreeBasedGPGrammar { get; private set; }
157    public string ConvertTreeToSentence(ISymbolicExpressionTree tree) {
158      var sb = new StringBuilder();
159      foreach (var s in tree.Root.GetSubtree(0).GetSubtree(0).IterateNodesPrefix()) {
160        if (s.Symbol.Name == "S") continue;
161        sb.Append(s.Symbol.Name);
162      }
163      return sb.ToString();
164    }
165  }
166}
Note: See TracBrowser for help on using the repository browser.