Changeset 11755 for branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/RoyalPhraseSequenceProblem.cs
- Timestamp:
- 01/13/15 20:02:29 (9 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/RoyalPhraseSequenceProblem.cs
r11747 r11755 8 8 9 9 namespace HeuristicLab.Problems.GrammaticalOptimization { 10 // must find one of k*sequenceLen sequences where the quality of a sequence is the length of the subsequence containing only correct _phrases_ (of length phraseLen) and starting at the first position10 // must find one of numCorrectPhrases*sequenceLen sequences where the quality of a sequence is the length of the subsequence containing only correct _phrases_ (of length phraseLen) and starting at the first position 11 11 // compared to the RoyalSequence problem this problem is harder because the number of different phrases starting at a position is much larger than the number of symbols (grows exponentially with the phrase-length) 12 12 // if phraseLen = 1 this is the same as the RoyalSequence problem … … 15 15 // - phraseLen: the length of a phrase in number of symbols 16 16 // - sequenceLen: the number of phrases in the correct subsequence (total sequence length is n * phraseLen 17 // - k: the number of correct phrases starting at each position 17 // - numCorrectPhrases: the number of correct phrases starting at each position 18 // - phrasesAsSets: switch to determine if the ordering of symbols within a phrase is relevant 18 19 // 19 20 // this problem should be hard for GP and easy for MCTS (TD should not have an advantage compared to MCTS) 20 21 // for phraseLen > 1 this should be harder than RoyalSymbolProblem 22 // when phrases are symbol sets instead of sequences then value-estimation routines should be better (TD) 21 23 public class RoyalPhraseSequenceProblem : IProblem { 22 24 … … 24 26 private readonly double correctReward; 25 27 private readonly double incorrectReward; 26 private readonly int k;28 private readonly int _numCorrectPhrases; 27 29 private readonly int sequenceLen; 28 30 private readonly int alphabetSize; 29 31 private readonly int phraseLen; 32 private readonly bool phrasesAsSets; 30 33 private readonly SortedSet<string>[] optimalPhrasesForPos; 31 34 32 public RoyalPhraseSequenceProblem(Random rand, int alphabetSize, int sequenceLen, int phraseLen = 1, int k = 1, double correctReward = 1.0, double incorrectReward = 0.0) {35 public RoyalPhraseSequenceProblem(Random rand, int alphabetSize, int sequenceLen, int phraseLen = 1, int numCorrectPhrases = 1, double correctReward = 1.0, double incorrectReward = 0.0, bool phrasesAsSets = false) { 33 36 if (alphabetSize <= 0 || alphabetSize > 26) throw new ArgumentException(); 34 37 if (sequenceLen <= 0) throw new ArgumentException(); 35 if ( k < 1 || k> alphabetSize) throw new ArgumentException();38 if (numCorrectPhrases < 1 || numCorrectPhrases > alphabetSize) throw new ArgumentException(); 36 39 if (phraseLen < 1) throw new ArgumentException(); 37 40 if (correctReward <= incorrectReward) throw new ArgumentException(); … … 40 43 this.sequenceLen = sequenceLen; 41 44 this.phraseLen = phraseLen; 42 this. k = k;45 this._numCorrectPhrases = numCorrectPhrases; 43 46 this.correctReward = correctReward; 44 47 this.incorrectReward = incorrectReward; 48 this.phrasesAsSets = phrasesAsSets; 45 49 var sentenceSymbol = 'S'; 46 50 var terminalSymbols = Enumerable.Range(0, alphabetSize).Select(off => (char)((byte)'a' + off)).ToArray(); … … 54 58 for (int i = 0; i < sequenceLen; i++) { 55 59 optimalPhrasesForPos[i] = new SortedSet<string>(); 56 for (int j = 0; j < k; j++) {60 for (int j = 0; j < numCorrectPhrases; j++) { 57 61 string phrase = ""; 58 62 do { … … 60 64 phrase += terminalSymbols.SelectRandom(rand); 61 65 } 66 phrase = CanonicalPhrase(phrase); 62 67 } while (optimalPhrasesForPos[i].Contains(phrase)); // don't allow duplicate phrases 63 68 optimalPhrasesForPos[i].Add(phrase); … … 65 70 } 66 71 67 Debug.Assert(Evaluate(BestKnownSolution) /BestKnownQuality(phraseLen * sequenceLen) == 1.0);72 Debug.Assert(Evaluate(BestKnownSolution) / BestKnownQuality(phraseLen * sequenceLen) == 1.0); 68 73 } 69 74 … … 93 98 var reward = 0.0; 94 99 for (int i = 0; i < Math.Min(sentence.Length / phraseLen, sequenceLen); i++) { 95 if (optimalPhrasesForPos[i].Contains(sentence.Substring(i * phraseLen, phraseLen))) { 100 var canonicalPhrase = CanonicalPhrase(sentence.Substring(i * phraseLen, phraseLen)); 101 if (optimalPhrasesForPos[i].Contains(canonicalPhrase)) { 96 102 reward += correctReward; 97 103 } else { … … 105 111 } 106 112 113 private string CanonicalPhrase(string phrase) { 114 if (phrasesAsSets) return string.Join("", phrase.OrderBy(ch => (byte)ch)); 115 else return phrase; 116 } 117 107 118 public string CanonicalRepresentation(string terminalPhrase) { 108 return terminalPhrase; 119 if (phrasesAsSets) { 120 var phrases = new List<string>(); 121 var numPhrases = terminalPhrase.Length / phraseLen; 122 for (int phraseIdx = 0; phraseIdx < numPhrases; phraseIdx++) { 123 var sentenceIdx = phraseIdx * phraseLen; 124 var phrase = terminalPhrase.Substring(sentenceIdx, phraseLen); 125 phrase = CanonicalPhrase(phrase); 126 phrases.Add(phrase); 127 } 128 129 return string.Join("", phrases); 130 } else 131 return terminalPhrase; 109 132 } 110 133 }
Note: See TracChangeset
for help on using the changeset viewer.