Changeset 11755
- Timestamp:
- 01/13/15 20:02:29 (10 years ago)
- Location:
- branches/HeuristicLab.Problems.GrammaticalOptimization
- Files:
-
- 4 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization.csproj
r11747 r11755 45 45 <Compile Include="AlternativesSampler.cs" /> 46 46 <Compile Include="AlternativesContextSampler.cs" /> 47 <Compile Include="MctsQLearningSampler.cs" />48 47 <Compile Include="TemporalDifferenceTreeSearchSampler.cs" /> 49 48 <Compile Include="ExhaustiveRandomFirstSearch.cs" /> -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/FindPhrasesProblem.cs
r11754 r11755 1 1 using System; 2 2 using System.Collections.Generic; 3 using System.Data.Odbc; 3 4 using System.Diagnostics; 4 5 using System.Linq; … … 17 18 // - number of decoy (sub-optimal) phrases 18 19 // - reward for decoy phrases (must be smaller than reward for optimal phrases) 20 // - phrasesAsSets: a switch to determine wether symbols in a phrase can be shuffled (sets) or if the ordering is relevant (non-sets) 21 22 // this problem should be similar to symbolic regression and should be easier for approaches using a state esimation value and the canoncial state 23 // when phrases are symbol sets instead of sequences then value-estimation routines should be better (TD) 19 24 public class FindPhrasesProblem : IProblem { 20 25 21 26 private readonly IGrammar grammar; 22 private readonly int alphabetSize;23 27 private readonly int numPhrases; 24 28 private readonly int phraseLen; … … 27 31 private readonly double correctReward; 28 32 private readonly double decoyReward; 33 private readonly bool phrasesAsSets; 29 34 private readonly SortedSet<string> optimalPhrases; 30 35 private readonly SortedSet<string> decoyPhrases; 31 36 32 37 public FindPhrasesProblem(Random rand, int alphabetSize, int numPhrases, int phraseLen, int numOptimalPhrases, int numDecoyPhrases = 1, 33 double correctReward = 1.0, double decoyReward = 0.0 ) {38 double correctReward = 1.0, double decoyReward = 0.0, bool phrasesAsSets = false) { 34 39 if (alphabetSize <= 0 || alphabetSize > 26) throw new ArgumentException(); 35 40 if (numPhrases <= 0) throw new ArgumentException(); … … 39 44 if (correctReward <= decoyReward) throw new ArgumentException(); 40 45 41 this.alphabetSize = alphabetSize;42 46 this.numPhrases = numPhrases; 43 47 this.phraseLen = phraseLen; 44 48 this.correctReward = correctReward; 45 49 this.decoyReward = decoyReward; 50 this.phrasesAsSets = phrasesAsSets; 46 51 47 52 // create grammar … … 54 59 this.grammar = new Grammar(sentenceSymbol, terminalSymbols, nonTerminalSymbols, rules); 55 60 56 this.optimalPhrasesForPos = new SortedSet<string>[sequenceLen]; 57 for (int i = 0; i < sequenceLen; i++) { 58 optimalPhrasesForPos[i] = new SortedSet<string>(); 59 for (int j = 0; j < k; j++) { 60 string phrase = ""; 61 do { 62 for (int l = 0; l < phraseLen; l++) { 63 phrase += terminalSymbols.SelectRandom(rand); 64 } 65 } while (optimalPhrasesForPos[i].Contains(phrase)); // don't allow duplicate phrases 66 optimalPhrasesForPos[i].Add(phrase); 61 // generate optimal phrases 62 optimalPhrases = new SortedSet<string>(); 63 while (optimalPhrases.Count < numOptimalPhrases) { 64 string phrase = ""; 65 for (int l = 0; l < phraseLen; l++) { 66 phrase += terminalSymbols.SelectRandom(rand); 67 67 } 68 phrase = CanonicalPhrase(phrase); 69 70 // don't allow dups 71 if (!optimalPhrases.Contains(phrase)) optimalPhrases.Add(phrase); 68 72 } 69 73 70 Debug.Assert(Evaluate(BestKnownSolution) / BestKnownQuality(phraseLen * sequenceLen) == 1.0); 74 // generate decoy phrases 75 decoyPhrases = new SortedSet<string>(); 76 while (decoyPhrases.Count < numDecoyPhrases) { 77 string phrase = ""; 78 for (int l = 0; l < phraseLen; l++) { 79 phrase += terminalSymbols.SelectRandom(rand); 80 } 81 phrase = CanonicalPhrase(phrase); 82 83 // don't allow dups 84 if (!optimalPhrases.Contains(phrase) && !decoyPhrases.Contains(phrase)) decoyPhrases.Add(phrase); 85 } 86 87 Debug.Assert(Evaluate(BestKnownSolution) / BestKnownQuality(phraseLen * numPhrases) == 1.0); 71 88 } 72 89 73 90 public double BestKnownQuality(int maxLen) { 74 return Math.Min(maxLen / phraseLen, sequenceLen) * correctReward; // integer division91 return Math.Min(maxLen / phraseLen, numPhrases) * correctReward; // integer division 75 92 } 76 93 77 94 public string BestKnownSolution { 78 get { 79 string solution = ""; 80 for (int i = 0; i < sequenceLen; i++) { 81 solution += optimalPhrasesForPos[i].First(); 82 } 83 return solution; 84 } 95 get { return string.Join("", optimalPhrases.Take(numPhrases)); } 85 96 } 86 97 … … 92 103 // sentence must contain only terminal symbols, we are not checking if the sentence is syntactically valid here because it would be too slow! 93 104 Debug.Assert(sentence.Any(c => grammar.IsTerminal(c))); 94 // as long as only correct symbols are found we increase the reward by +1 95 // on the first incorrect symbol we return 96 var reward = 0.0;97 for (int i = 0; i < Math.Min(sentence.Length / phraseLen, sequenceLen); i++) {98 if (optimalPhrasesForPos[i].Contains(sentence.Substring(i * phraseLen, phraseLen))) {99 reward += correctReward;100 } else {101 // alternatively reduce reward by number of remaining phrases102 return Math.Max(0.0, reward + incorrectReward * (sentence.Length / phraseLen - i));103 // stop on first incorrect symbol and return reward104 //return reward;105 }105 106 107 // split the sentence in phrases 108 // phrases must not overlap in the sentence, multiple occurences of a phrase are not counted 109 // the order of phrases is not relevant 110 var numPhrases = sentence.Length / phraseLen; 111 var phrases = new SortedSet<string>(); 112 for (int phraseIdx = 0; phraseIdx < numPhrases; phraseIdx++) { 113 var sentenceIdx = phraseIdx * phraseLen; 114 var phrase = sentence.Substring(sentenceIdx, phraseLen); 115 phrase = CanonicalPhrase(phrase); 116 if (!phrases.Contains(phrase)) phrases.Add(phrase); 106 117 } 118 119 // add reward for each correct phrase that occurs in the sentence 120 // add reward for each decoy phrase that occurs in the sentence 121 var reward = phrases.Intersect(optimalPhrases).Count() * correctReward 122 + phrases.Intersect(decoyPhrases).Count() * decoyReward; 123 124 125 107 126 return reward; 108 127 } 109 128 129 private string CanonicalPhrase(string phrase) { 130 if (phrasesAsSets) return string.Join("", phrase.OrderBy(ch => (byte)ch)); 131 else return phrase; 132 } 133 110 134 public string CanonicalRepresentation(string terminalPhrase) { 111 return terminalPhrase; 135 // as the ordering of phrases does not matter we can reorder the phrases 136 // and remove duplicates 137 var numPhrases = terminalPhrase.Length / phraseLen; 138 var phrases = new SortedSet<string>(); 139 for (int phraseIdx = 0; phraseIdx < numPhrases; phraseIdx++) { 140 var sentenceIdx = phraseIdx * phraseLen; 141 var phrase = terminalPhrase.Substring(sentenceIdx, phraseLen); 142 phrase = CanonicalPhrase(phrase); 143 if (!phrases.Contains(phrase)) phrases.Add(phrase); 144 } 145 return string.Join("", phrases); 112 146 } 113 147 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/RoyalPhraseSequenceProblem.cs
r11747 r11755 8 8 9 9 namespace HeuristicLab.Problems.GrammaticalOptimization { 10 // must find one of k*sequenceLen sequences where the quality of a sequence is the length of the subsequence containing only correct _phrases_ (of length phraseLen) and starting at the first position10 // must find one of numCorrectPhrases*sequenceLen sequences where the quality of a sequence is the length of the subsequence containing only correct _phrases_ (of length phraseLen) and starting at the first position 11 11 // compared to the RoyalSequence problem this problem is harder because the number of different phrases starting at a position is much larger than the number of symbols (grows exponentially with the phrase-length) 12 12 // if phraseLen = 1 this is the same as the RoyalSequence problem … … 15 15 // - phraseLen: the length of a phrase in number of symbols 16 16 // - sequenceLen: the number of phrases in the correct subsequence (total sequence length is n * phraseLen 17 // - k: the number of correct phrases starting at each position 17 // - numCorrectPhrases: the number of correct phrases starting at each position 18 // - phrasesAsSets: switch to determine if the ordering of symbols within a phrase is relevant 18 19 // 19 20 // this problem should be hard for GP and easy for MCTS (TD should not have an advantage compared to MCTS) 20 21 // for phraseLen > 1 this should be harder than RoyalSymbolProblem 22 // when phrases are symbol sets instead of sequences then value-estimation routines should be better (TD) 21 23 public class RoyalPhraseSequenceProblem : IProblem { 22 24 … … 24 26 private readonly double correctReward; 25 27 private readonly double incorrectReward; 26 private readonly int k;28 private readonly int _numCorrectPhrases; 27 29 private readonly int sequenceLen; 28 30 private readonly int alphabetSize; 29 31 private readonly int phraseLen; 32 private readonly bool phrasesAsSets; 30 33 private readonly SortedSet<string>[] optimalPhrasesForPos; 31 34 32 public RoyalPhraseSequenceProblem(Random rand, int alphabetSize, int sequenceLen, int phraseLen = 1, int k = 1, double correctReward = 1.0, double incorrectReward = 0.0) {35 public RoyalPhraseSequenceProblem(Random rand, int alphabetSize, int sequenceLen, int phraseLen = 1, int numCorrectPhrases = 1, double correctReward = 1.0, double incorrectReward = 0.0, bool phrasesAsSets = false) { 33 36 if (alphabetSize <= 0 || alphabetSize > 26) throw new ArgumentException(); 34 37 if (sequenceLen <= 0) throw new ArgumentException(); 35 if ( k < 1 || k> alphabetSize) throw new ArgumentException();38 if (numCorrectPhrases < 1 || numCorrectPhrases > alphabetSize) throw new ArgumentException(); 36 39 if (phraseLen < 1) throw new ArgumentException(); 37 40 if (correctReward <= incorrectReward) throw new ArgumentException(); … … 40 43 this.sequenceLen = sequenceLen; 41 44 this.phraseLen = phraseLen; 42 this. k = k;45 this._numCorrectPhrases = numCorrectPhrases; 43 46 this.correctReward = correctReward; 44 47 this.incorrectReward = incorrectReward; 48 this.phrasesAsSets = phrasesAsSets; 45 49 var sentenceSymbol = 'S'; 46 50 var terminalSymbols = Enumerable.Range(0, alphabetSize).Select(off => (char)((byte)'a' + off)).ToArray(); … … 54 58 for (int i = 0; i < sequenceLen; i++) { 55 59 optimalPhrasesForPos[i] = new SortedSet<string>(); 56 for (int j = 0; j < k; j++) {60 for (int j = 0; j < numCorrectPhrases; j++) { 57 61 string phrase = ""; 58 62 do { … … 60 64 phrase += terminalSymbols.SelectRandom(rand); 61 65 } 66 phrase = CanonicalPhrase(phrase); 62 67 } while (optimalPhrasesForPos[i].Contains(phrase)); // don't allow duplicate phrases 63 68 optimalPhrasesForPos[i].Add(phrase); … … 65 70 } 66 71 67 Debug.Assert(Evaluate(BestKnownSolution) /BestKnownQuality(phraseLen * sequenceLen) == 1.0);72 Debug.Assert(Evaluate(BestKnownSolution) / BestKnownQuality(phraseLen * sequenceLen) == 1.0); 68 73 } 69 74 … … 93 98 var reward = 0.0; 94 99 for (int i = 0; i < Math.Min(sentence.Length / phraseLen, sequenceLen); i++) { 95 if (optimalPhrasesForPos[i].Contains(sentence.Substring(i * phraseLen, phraseLen))) { 100 var canonicalPhrase = CanonicalPhrase(sentence.Substring(i * phraseLen, phraseLen)); 101 if (optimalPhrasesForPos[i].Contains(canonicalPhrase)) { 96 102 reward += correctReward; 97 103 } else { … … 105 111 } 106 112 113 private string CanonicalPhrase(string phrase) { 114 if (phrasesAsSets) return string.Join("", phrase.OrderBy(ch => (byte)ch)); 115 else return phrase; 116 } 117 107 118 public string CanonicalRepresentation(string terminalPhrase) { 108 return terminalPhrase; 119 if (phrasesAsSets) { 120 var phrases = new List<string>(); 121 var numPhrases = terminalPhrase.Length / phraseLen; 122 for (int phraseIdx = 0; phraseIdx < numPhrases; phraseIdx++) { 123 var sentenceIdx = phraseIdx * phraseLen; 124 var phrase = terminalPhrase.Substring(sentenceIdx, phraseLen); 125 phrase = CanonicalPhrase(phrase); 126 phrases.Add(phrase); 127 } 128 129 return string.Join("", phrases); 130 } else 131 return terminalPhrase; 109 132 } 110 133 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/Main/Program.cs
r11747 r11755 146 146 // TODO: wie kann ich sampler noch vergleichen bzw. was kann man messen um die qualität des samplers abzuschätzen (bis auf qualität und iterationen bis zur besten lösung) => ziel schnellere iterationen zu gutem ergebnis 147 147 // TODO: research thompson sampling for max bandit? 148 // TODO: ausführlicher test von strategien für k-armed max bandit148 // TODO: ausführlicher test von strategien für numCorrectPhrases-armed max bandit 149 149 // TODO: verify TA implementation using example from the original paper 150 150 // TODO: separate policy from MCTS tree data structure to allow sharing of information over disconnected parts of the tree (semantic equivalence) … … 166 166 var random = new Random(); 167 167 168 var phraseLen = 1; 169 var sentenceLen = 25; 170 var numPhrases = sentenceLen / phraseLen; 171 var problem = new RoyalPhraseSequenceProblem(random, 10, numPhrases, phraseLen: 1, k: 1, correctReward: 1, incorrectReward: 0); 172 173 //var problem = new SymbolicRegressionPoly10Problem(); // good results e.g. 10 randomtries and EpsGreedyPolicy(0.2, (aInfo)=>aInfo.MaxReward) 168 //var phraseLen = 3; 169 //var numPhrases = 5; 170 //var problem = new RoyalPhraseSequenceProblem(random, 10, numPhrases, phraseLen: phraseLen, numCorrectPhrases: 1, correctReward: 1, incorrectReward: 0.0, phrasesAsSets: true); 171 172 //var phraseLen = 4; 173 //var numPhrases = 5; 174 //var problem = new FindPhrasesProblem(random, 15, numPhrases, phraseLen, numOptimalPhrases: numPhrases, numDecoyPhrases: 500, correctReward: 1.0, decoyReward: 0.2, phrasesAsSets: true); 175 176 var problem = new SymbolicRegressionPoly10Problem(); // good results e.g. 10 randomtries and EpsGreedyPolicy(0.2, (aInfo)=>aInfo.MaxReward) 174 177 // Ant 175 178 // good results e.g. with var alg = new MctsSampler(problem, 17, random, 1, (rand, numActions) => new ThresholdAscentPolicy(numActions, 500, 0.01)); … … 182 185 //var problem = new EvenParityProblem(); 183 186 // symbreg length = 11 q = 0.824522210419616 184 var alg = new MctsSampler(problem, sentenceLen, random, 0, new BoltzmannExplorationPolicy(200)); 187 //var alg = new MctsSampler(problem, 23, random, 0, new BoltzmannExplorationPolicy(100)); 188 var alg = new MctsSampler(problem, 23, random, 0, new EpsGreedyPolicy(0.1)); 185 189 //var alg = new MctsQLearningSampler(problem, sentenceLen, random, 0, null); 186 190 //var alg = new MctsQLearningSampler(problem, 30, random, 0, new EpsGreedyPolicy(0.2));
Note: See TracChangeset
for help on using the changeset viewer.