Changeset 15734 for branches/2886_SymRegGrammarEnumeration
- Timestamp:
- 02/07/18 17:30:02 (7 years ago)
- Location:
- branches/2886_SymRegGrammarEnumeration
- Files:
-
- 4 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/2886_SymRegGrammarEnumeration
-
Property
svn:ignore
set to
TestResults
-
Property
svn:ignore
set to
-
branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/Grammar.cs
r15725 r15734 158 158 public int CalcHashCode(SymbolString sentence) { 159 159 Debug.Assert(sentence.Any(), "Trying to evaluate empty sentence!"); 160 Debug.Assert(sentence.All(s => s is TerminalSymbol), "Trying to evaluate symbol sequence with nonterminalsymbols!");161 162 Stack< TerminalSymbol> parseStack = new Stack<TerminalSymbol>(sentence.OfType<TerminalSymbol>());163 164 TerminalSymbol peek = parseStack.Peek();160 // Debug.Assert(sentence.All(s => s is TerminalSymbol), "Trying to evaluate symbol sequence with nonterminalsymbols!"); 161 162 Stack<Symbol> parseStack = new Stack<Symbol>(sentence); 163 164 Symbol peek = parseStack.Peek(); 165 165 int[] subtreeHashes = GetSubtreeHashes(parseStack); 166 166 return AggregateHashes(peek, subtreeHashes); 167 167 } 168 168 169 private int[] GetSubtreeHashes(Stack< TerminalSymbol> parseStack) {170 TerminalSymbol currentSymbol = parseStack.Pop();169 private int[] GetSubtreeHashes(Stack<Symbol> parseStack) { 170 Symbol currentSymbol = parseStack.Pop(); 171 171 172 172 // VARIABLE 173 if (Var.VariableTerminalSymbols.Contains(currentSymbol)) {174 return currentSymbol.StringRepresentation.GetHashCode().ToEnumerable().ToArray();175 }173 // if (Var.VariableTerminalSymbols.Contains(currentSymbol)) { 174 // return currentSymbol.StringRepresentation.GetHashCode().ToEnumerable().ToArray(); 175 // } 176 176 177 177 // MULTIPLICATION … … 220 220 return result.ToArray(); 221 221 } 222 throw new ArgumentException("Trying to hash malformed sentence!"); 223 } 224 225 private int AggregateHashes(TerminalSymbol rule, IEnumerable<int> hashes) { 222 223 // var 224 return currentSymbol.StringRepresentation.GetHashCode().ToEnumerable().ToArray(); 225 226 227 // throw new ArgumentException("Trying to hash malformed sentence!"); 228 } 229 230 private int AggregateHashes(Symbol operatorSym, IEnumerable<int> hashes) { 226 231 // If multiple subtrees are "merged" (e.g. added, multiplied, etc.), consider the executed operation 227 232 var hashesArray = hashes.ToArray(); 228 int start = hashesArray.Length > 1 ? rule.StringRepresentation.GetHashCode() : 0;233 int start = hashesArray.Length > 1 ? operatorSym.StringRepresentation.GetHashCode() : 0; 229 234 return hashesArray.Aggregate(start, (result, ti) => ((result << 5) + result) ^ ti.GetHashCode()); 230 235 } -
branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/GrammarEnumerationAlgorithm.cs
r15726 r15734 57 57 public SymbolString BestTestSentence; 58 58 59 public List<Tuple<SymbolString, int>> DistinctGenerated;60 public List<Tuple<SymbolString, int>> AllGenerated;59 public List<Tuple<SymbolString, int>> distinctSentences; 60 public List<Tuple<SymbolString, int>> sentences; 61 61 #endregion 62 62 … … 88 88 89 89 protected override void Run(CancellationToken cancellationToken) { 90 Results.Add(new Result("Best R²", new DoubleValue(0.0))); 91 var rand = new System.Random(1234); 90 92 BestTrainingSentence = null; 91 93 BestTrainingSentence = null; 92 AllGenerated = new List<Tuple<SymbolString, int>>(); 93 DistinctGenerated = new List<Tuple<SymbolString, int>>(); 94 94 this.sentences = new List<Tuple<SymbolString, int>>(); 95 this.distinctSentences = new List<Tuple<SymbolString, int>>(); 96 var archivedPhrases = new Dictionary<int, SymbolString>(); 97 int expansions = 0; 95 98 Dictionary<int, SymbolString> evaluatedHashes = new Dictionary<int, SymbolString>(); 96 99 97 100 Grammar = new Grammar(Problem.ProblemData.AllowedInputVariables.ToArray()); 98 101 99 Stack<SymbolString> remainingTrees = new Stack<SymbolString>(); 100 remainingTrees.Push(new SymbolString(new[] { Grammar.StartSymbol })); 101 102 while (remainingTrees.Any()) { 102 var phrases = new Dictionary<int, SymbolString>(); 103 var phrase0 = new SymbolString(new[] { Grammar.StartSymbol }); 104 phrases.Add(Grammar.CalcHashCode(phrase0), phrase0); 105 106 while (phrases.Any()) { 103 107 if (cancellationToken.IsCancellationRequested) break; 104 108 105 SymbolString currSymbolString = remainingTrees.Pop(); 106 107 if (currSymbolString.IsSentence()) { 108 AllGenerated.Add(new Tuple<SymbolString, int>(currSymbolString, Grammar.CalcHashCode(currSymbolString))); 109 110 int currSymbolStringHash = Grammar.CalcHashCode(currSymbolString); 111 if (!evaluatedHashes.ContainsKey(currSymbolStringHash) 112 || evaluatedHashes[currSymbolStringHash].Count > currSymbolString.Count) { 113 evaluatedHashes[currSymbolStringHash] = currSymbolString; 114 115 DistinctGenerated.Add(new Tuple<SymbolString, int>(currSymbolString, Grammar.CalcHashCode(currSymbolString))); 116 EvaluateSentence(currSymbolString); 109 // FIFO 110 // SymbolString currSymbolString = phrases.First(); 111 // phrases.RemoveAt(0); 112 113 114 // LIFO 115 // SymbolString currSymbolString = phrases.Last(); 116 // phrases.RemoveAt(phrases.Count - 1); 117 118 119 // RANDOM 120 int idx = rand.Next(phrases.Count); 121 var selectedEntry = phrases.ElementAt(idx); // TODO: Perf von ElementAt ist schlecht. 122 phrases.Remove(selectedEntry.Key); 123 var currPhrase = selectedEntry.Value; 124 125 archivedPhrases.Add(selectedEntry.Key, selectedEntry.Value); 126 127 if (currPhrase.IsSentence()) { 128 int currSymbolStringHash = Grammar.CalcHashCode(currPhrase); 129 this.sentences.Add(new Tuple<SymbolString, int>(currPhrase, currSymbolStringHash)); 130 131 if (!evaluatedHashes.ContainsKey(currSymbolStringHash)) { 132 evaluatedHashes[currSymbolStringHash] = currPhrase; 133 134 this.distinctSentences.Add(new Tuple<SymbolString, int>(currPhrase, currSymbolStringHash)); 135 EvaluateSentence(currPhrase); 117 136 } 118 UpdateView( AllGenerated, DistinctGenerated);137 UpdateView(this.sentences, this.distinctSentences); 119 138 120 139 } else { 121 140 // expand next nonterminal symbols 122 int nonterminalSymbolIndex = curr SymbolString.FindIndex(s => s is NonterminalSymbol);123 NonterminalSymbol expandedSymbol = curr SymbolString[nonterminalSymbolIndex] as NonterminalSymbol;141 int nonterminalSymbolIndex = currPhrase.FindIndex(s => s is NonterminalSymbol); 142 NonterminalSymbol expandedSymbol = currPhrase[nonterminalSymbolIndex] as NonterminalSymbol; 124 143 125 144 foreach (Production productionAlternative in expandedSymbol.Alternatives) { 126 SymbolString newSentence = new SymbolString(curr SymbolString);145 SymbolString newSentence = new SymbolString(currPhrase); 127 146 newSentence.RemoveAt(nonterminalSymbolIndex); 128 147 newSentence.InsertRange(nonterminalSymbolIndex, productionAlternative); 129 148 149 expansions++; 130 150 if (newSentence.Count <= MaxTreeSize) { 131 remainingTrees.Push(newSentence); 151 var phraseHash = Grammar.CalcHashCode(newSentence); 152 if(!phrases.ContainsKey(phraseHash) && 153 !archivedPhrases.ContainsKey(phraseHash)) 154 phrases.Add(phraseHash, newSentence); 132 155 } 133 156 } … … 135 158 } 136 159 137 UpdateView( AllGenerated, DistinctGenerated, force: true);138 139 string[,] sentences = new string[ AllGenerated.Count, 3];140 for (int i = 0; i < AllGenerated.Count; i++) {141 sentences[i, 0] = AllGenerated[i].Item1.ToString();142 sentences[i, 1] = Grammar.PostfixToInfixParser( AllGenerated[i].Item1).ToString();143 sentences[i, 2] = AllGenerated[i].Item2.ToString();160 UpdateView(this.sentences, this.distinctSentences, force: true); 161 162 string[,] sentences = new string[this.sentences.Count, 3]; 163 for (int i = 0; i < this.sentences.Count; i++) { 164 sentences[i, 0] = this.sentences[i].Item1.ToString(); 165 sentences[i, 1] = Grammar.PostfixToInfixParser(this.sentences[i].Item1).ToString(); 166 sentences[i, 2] = this.sentences[i].Item2.ToString(); 144 167 } 145 168 Results.Add(new Result("All generated sentences", new StringMatrix(sentences))); 146 169 147 string[,] distinctSentences = new string[ DistinctGenerated.Count, 3];148 for (int i = 0; i < DistinctGenerated.Count; i++) {149 distinctSentences[i, 0] = DistinctGenerated[i].Item1.ToString();150 distinctSentences[i, 1] = Grammar.PostfixToInfixParser( DistinctGenerated[i].Item1).ToString();151 distinctSentences[i, 2] = DistinctGenerated[i].Item2.ToString();170 string[,] distinctSentences = new string[this.distinctSentences.Count, 3]; 171 for (int i = 0; i < this.distinctSentences.Count; i++) { 172 distinctSentences[i, 0] = this.distinctSentences[i].Item1.ToString(); 173 distinctSentences[i, 1] = Grammar.PostfixToInfixParser(this.distinctSentences[i].Item1).ToString(); 174 distinctSentences[i, 2] = this.distinctSentences[i].Item2.ToString(); 152 175 } 153 176 Results.Add(new Result("Distinct generated sentences", new StringMatrix(distinctSentences))); … … 175 198 tree, 176 199 new SymbolicDataAnalysisExpressionTreeLinearInterpreter()); 177 178 IRegressionSolution newSolution = model.CreateRegressionSolution(Problem.ProblemData); 179 180 IResult currBestTrainingSolutionResult; 181 IResult currBestTestSolutionResult; 182 if (!Results.TryGetValue(BestTrainingSolution, out currBestTrainingSolutionResult) 183 || !Results.TryGetValue(BestTestSolution, out currBestTestSolutionResult)) { 184 185 BestTrainingSentence = symbolString; 186 Results.Add(new Result(BestTrainingSolution, newSolution)); 187 Results.Add(new Result(BestTrainingSolutionQuality, new DoubleValue(newSolution.TrainingRSquared).AsReadOnly())); 188 189 BestTestSentence = symbolString; 190 Results.Add(new Result(BestTestSolution, newSolution)); 191 Results.Add(new Result(BestTestSolutionQuality, new DoubleValue(newSolution.TestRSquared).AsReadOnly())); 192 193 } else { 194 IRegressionSolution currBestTrainingSolution = (IRegressionSolution)currBestTrainingSolutionResult.Value; 195 if (currBestTrainingSolution.TrainingRSquared <= newSolution.TrainingRSquared) { 196 BestTrainingSentence = symbolString; 197 currBestTrainingSolutionResult.Value = newSolution; 198 Results.AddOrUpdateResult(BestTrainingSolutionQuality, new DoubleValue(newSolution.TrainingRSquared).AsReadOnly()); 199 } 200 201 IRegressionSolution currBestTestSolution = (IRegressionSolution)currBestTestSolutionResult.Value; 202 if (currBestTestSolution.TestRSquared <= newSolution.TestRSquared) { 203 BestTestSentence = symbolString; 204 currBestTestSolutionResult.Value = newSolution; 205 Results.AddOrUpdateResult(BestTestSolutionQuality, new DoubleValue(newSolution.TestRSquared).AsReadOnly()); 206 } 207 } 200 var probData = Problem.ProblemData; 201 var target = probData.TargetVariableTrainingValues; 202 var estVals = model.GetEstimatedValues(probData.Dataset, probData.TrainingIndices); 203 OnlineCalculatorError error; 204 var r2 = OnlinePearsonsRSquaredCalculator.Calculate(target, estVals, out error); 205 if (error != OnlineCalculatorError.None) r2 = 0.0; 206 207 var bestR2 = ((DoubleValue)(Results["Best R²"]).Value).Value; 208 ((DoubleValue)(Results["Best R²"].Value)).Value = Math.Max(r2, bestR2); 209 210 // IRegressionSolution newSolution = model.CreateRegressionSolution(Problem.ProblemData); 211 // 212 // IResult currBestTrainingSolutionResult; 213 // IResult currBestTestSolutionResult; 214 // if (!Results.TryGetValue(BestTrainingSolution, out currBestTrainingSolutionResult) 215 // || !Results.TryGetValue(BestTestSolution, out currBestTestSolutionResult)) { 216 // 217 // BestTrainingSentence = symbolString; 218 // Results.Add(new Result(BestTrainingSolution, newSolution)); 219 // Results.Add(new Result(BestTrainingSolutionQuality, new DoubleValue(newSolution.TrainingRSquared).AsReadOnly())); 220 // 221 // BestTestSentence = symbolString; 222 // Results.Add(new Result(BestTestSolution, newSolution)); 223 // Results.Add(new Result(BestTestSolutionQuality, new DoubleValue(newSolution.TestRSquared).AsReadOnly())); 224 // 225 // } else { 226 // IRegressionSolution currBestTrainingSolution = (IRegressionSolution)currBestTrainingSolutionResult.Value; 227 // if (currBestTrainingSolution.TrainingRSquared <= newSolution.TrainingRSquared) { 228 // BestTrainingSentence = symbolString; 229 // currBestTrainingSolutionResult.Value = newSolution; 230 // Results.AddOrUpdateResult(BestTrainingSolutionQuality, new DoubleValue(newSolution.TrainingRSquared).AsReadOnly()); 231 // } 232 // 233 // IRegressionSolution currBestTestSolution = (IRegressionSolution)currBestTestSolutionResult.Value; 234 // if (currBestTestSolution.TestRSquared <= newSolution.TestRSquared) { 235 // BestTestSentence = symbolString; 236 // currBestTestSolutionResult.Value = newSolution; 237 // Results.AddOrUpdateResult(BestTestSolutionQuality, new DoubleValue(newSolution.TestRSquared).AsReadOnly()); 238 // } 239 // } 208 240 } 209 241 } -
branches/2886_SymRegGrammarEnumeration/Test/GrammarEnumerationTest.cs
r15726 r15734 68 68 public void MctsSymbReg_NoConstants_Nguyen1() { 69 69 // x³ + x² + x 70 alg.MaxTreeSize = 3 0;70 alg.MaxTreeSize = 3; 71 71 Console.WriteLine(alg.MaxTreeSize); 72 72 var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(Seed); … … 92 92 int actualSolutionHash = alg.Grammar.CalcHashCode(alg.BestTestSentence); 93 93 94 Assert.IsTrue(alg. DistinctGenerated.Select(tuple => tuple.Item2).Contains(actualSolutionHash));94 Assert.IsTrue(alg.distinctSentences.Select(tuple => tuple.Item2).Contains(actualSolutionHash)); 95 95 96 96 // last because long sentences are overwritten by short ones. 97 Console.WriteLine(alg.Grammar.PostfixToInfixParser(alg. DistinctGenerated.Last(tuple => tuple.Item2 == targetSolutionHash).Item1));97 Console.WriteLine(alg.Grammar.PostfixToInfixParser(alg.distinctSentences.Last(tuple => tuple.Item2 == targetSolutionHash).Item1)); 98 98 99 99 … … 114 114 // Evaluate 115 115 EvaluateGrammarEnumeration(); 116 } 117 118 [TestMethod] 119 [TestProperty("Goal", "structure search")] 120 public void MctsSymbReg_NoConstants_Poly10() { 121 alg.MaxTreeSize = 10; 122 var provider = new HeuristicLab.Problems.Instances.DataAnalysis.VariousInstanceProvider(Seed); 123 var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Poly"))); 124 alg.Problem.ProblemData = regProblem; 125 126 alg.Start(); 127 128 Assert.AreEqual(alg.distinctSentences.Count(), 110); 129 130 // Evaluate 131 // EvaluateGrammarEnumeration(); 132 } 133 134 [TestMethod] 135 [TestProperty("Goal", "structure search")] 136 public void MctsSymbReg_NoConstants_15() { 137 alg.MaxTreeSize = 5; 138 var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(Seed); 139 var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("15"))); 140 alg.Problem.ProblemData = regProblem; 141 142 alg.Start(); 143 144 Assert.AreEqual(alg.distinctSentences.Count(), 16); 145 146 // Evaluate 147 // EvaluateGrammarEnumeration(); 116 148 } 117 149
Note: See TracChangeset
for help on using the changeset viewer.