Changeset 15746 for branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration
- Timestamp:
- 02/09/18 15:32:44 (7 years ago)
- Location:
- branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration
- Files:
-
- 1 added
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/Grammar.cs
r15734 r15746 90 90 } 91 91 92 /*93 #region Memoize subtrees94 95 public void MemoizeSubtrees(SymbolString sentence) {96 Stack<TerminalSymbol> parseStack = new Stack<TerminalSymbol>(sentence.OfType<TerminalSymbol>());97 98 // Parse root symbol "+"99 MemoizeSubtreeExpression(parseStack);100 }101 102 private SymbolString MemoizeSubtreeExpression(Stack<TerminalSymbol> parseStack) {103 SymbolString subtree = new SymbolString();104 105 if (ReferenceEquals(parseStack.Peek(), Addition)) {106 subtree.Add(parseStack.Pop());107 subtree.InsertRange(0, MemoizeSubtreeExpression(parseStack));108 subtree.InsertRange(0, MemoizeSubtreeTerm(parseStack));109 110 Expr.Alternatives[0].GeneratedSentences.Add(subtree);111 } else {112 subtree.InsertRange(0, MemoizeSubtreeTerm(parseStack));113 114 Expr.Alternatives[1].GeneratedSentences.Add(subtree);115 }116 117 return subtree;118 }119 120 private SymbolString MemoizeSubtreeTerm(Stack<TerminalSymbol> parseStack) {121 SymbolString subtree = new SymbolString();122 123 if (ReferenceEquals(parseStack.Peek(), Multiplication)) {124 subtree.Add(parseStack.Pop());125 subtree.InsertRange(0, MemoizeSubtreeTerm(parseStack));126 subtree.InsertRange(0, MemoizeSubtreeFactor(parseStack));127 128 Term.Alternatives[0].GeneratedSentences.Add(subtree);129 } else {130 subtree.InsertRange(0, MemoizeSubtreeFactor(parseStack));131 132 Term.Alternatives[1].GeneratedSentences.Add(subtree);133 }134 135 return subtree;136 }137 138 private SymbolString MemoizeSubtreeFactor(Stack<TerminalSymbol> parseStack) {139 SymbolString subtree = new SymbolString(MemoizeSubtreeVar(parseStack));140 141 Factor.Alternatives[0].GeneratedSentences.Add(subtree);142 return subtree;143 }144 145 private SymbolString MemoizeSubtreeVar(Stack<TerminalSymbol> parseStack) {146 SymbolString subtree = new SymbolString(parseStack.Pop().ToEnumerable());147 148 // ... not really149 //Var.Alternatives[0].GeneratedSentences.Add(subtree);150 return subtree;151 }152 153 154 #endregion155 */156 157 92 #region Hashing 158 93 public int CalcHashCode(SymbolString sentence) { … … 221 156 } 222 157 223 // var 158 // var or nonterminal symbol 224 159 return currentSymbol.StringRepresentation.GetHashCode().ToEnumerable().ToArray(); 225 226 227 // throw new ArgumentException("Trying to hash malformed sentence!");228 160 } 229 161 -
branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/GrammarEnumerationAlgorithm.cs
r15734 r15746 1 using System; 2 using System.Collections.Generic; 1 using System.Collections.Generic; 3 2 using System.Linq; 4 3 using System.Threading; … … 20 19 [Creatable(CreatableAttribute.Categories.DataAnalysisRegression, Priority = 250)] 21 20 public class GrammarEnumerationAlgorithm : FixedDataAnalysisAlgorithm<IRegressionProblem> { 22 private readonly string BestTrainingSolution = "Best solution (training)"; 23 private readonly string BestTrainingSolutionQuality = "Best solution quality (training)"; 24 private readonly string BestTestSolution = "Best solution (test)"; 25 private readonly string BestTestSolutionQuality = "Best solution quality (test)"; 26 21 #region properties and result names 22 private readonly string BestTrainingQualityName = "Best R² (Training)"; 23 private readonly string BestTrainingSolutionName = "Best solution (Training)"; 24 private readonly string GeneratedSentencesName = "Generated Sentences"; 25 private readonly string DistinctSentencesName = "Distinct Sentences"; 26 private readonly string PhraseExpansionsName = "Phrase Expansions"; 27 private readonly string AverageTreeLengthName = "Avg. Sentence Length among Distinct"; 28 private readonly string GeneratedEqualSentencesName = "Generated equal sentences"; 29 30 31 private readonly string SearchDataStructureParameterName = "Search Data Structure"; 27 32 private readonly string MaxTreeSizeParameterName = "Max. Tree Nodes"; 28 33 private readonly string GuiUpdateIntervalParameterName = "GUI Update Interval"; 29 private readonly string UseMemoizationParameterName = "Use Memoization?";30 31 #region properties 34 35 public override bool SupportsPause { get { return false; } } 36 32 37 protected IValueParameter<IntValue> MaxTreeSizeParameter { 33 38 get { return (IValueParameter<IntValue>)Parameters[MaxTreeSizeParameterName]; } … … 46 51 } 47 52 48 protected IValueParameter< BoolValue> UseMemoizationParameter {49 get { return (IValueParameter< BoolValue>)Parameters[UseMemoizationParameterName]; }50 } 51 public bool UseMemoization{52 get { return UseMemoizationParameter.Value.Value; }53 set { UseMemoizationParameter.Value.Value = value; }53 protected IValueParameter<EnumValue<StorageType>> SearchDataStructureParameter { 54 get { return (IValueParameter<EnumValue<StorageType>>)Parameters[SearchDataStructureParameterName]; } 55 } 56 public StorageType SearchDataStructure { 57 get { return SearchDataStructureParameter.Value.Value; } 58 set { SearchDataStructureParameter.Value.Value = value; } 54 59 } 55 60 56 61 public SymbolString BestTrainingSentence; 57 public SymbolString BestTestSentence; 58 59 public List<Tuple<SymbolString, int>> distinctSentences; 60 public List<Tuple<SymbolString, int>> sentences; 61 #endregion 62 62 63 #endregion 64 65 public Dictionary<int, SymbolString> DistinctSentences; // Semantically distinct sentences in a run. 66 public Dictionary<int, List<SymbolString>> AllSentences; // All sentences ever generated in a run. 67 public Dictionary<int, SymbolString> ArchivedPhrases; // Nodes in the search tree 68 SearchDataStore OpenPhrases; // Stack/Queue/etc. for fetching the next node in the search tree. 69 70 public int EqualGeneratedSentences; // It is not guaranteed that shorter solutions are found first. 71 // When longer solutions are overwritten with shorter ones, 72 // this counter is increased. 73 public int Expansions; // Number, how many times a nonterminal symbol is replaced with a production rule. 63 74 public Grammar Grammar; 64 65 75 66 76 #region ctors … … 70 80 71 81 public GrammarEnumerationAlgorithm() { 72 73 82 var provider = new HeuristicLab.Problems.Instances.DataAnalysis.VariousInstanceProvider(seed: 1234); 74 83 var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Poly-10"))); … … 80 89 Parameters.Add(new ValueParameter<IntValue>(MaxTreeSizeParameterName, "The number of clusters.", new IntValue(6))); 81 90 Parameters.Add(new ValueParameter<IntValue>(GuiUpdateIntervalParameterName, "Number of generated sentences, until GUI is refreshed.", new IntValue(4000))); 82 Parameters.Add(new ValueParameter<BoolValue>(UseMemoizationParameterName, "Should already subtrees be reused within a run.", new BoolValue(true))); 83 }84 85 private GrammarEnumerationAlgorithm(GrammarEnumerationAlgorithm original, Cloner cloner) : base(original, cloner) { } 86 #endregion87 91 92 Parameters.Add(new ValueParameter<EnumValue<StorageType>>(SearchDataStructureParameterName, new EnumValue<StorageType>(StorageType.RandomList))); 93 } 94 95 public GrammarEnumerationAlgorithm(GrammarEnumerationAlgorithm original, Cloner cloner) : base(original, cloner) { } 96 #endregion 88 97 89 98 protected override void Run(CancellationToken cancellationToken) { 90 Results.Add(new Result("Best R²", new DoubleValue(0.0))); 91 var rand = new System.Random(1234); 92 BestTrainingSentence = null; 93 BestTrainingSentence = null; 94 this.sentences = new List<Tuple<SymbolString, int>>(); 95 this.distinctSentences = new List<Tuple<SymbolString, int>>(); 96 var archivedPhrases = new Dictionary<int, SymbolString>(); 97 int expansions = 0; 98 Dictionary<int, SymbolString> evaluatedHashes = new Dictionary<int, SymbolString>(); 99 #region init 100 InitResults(); 101 102 AllSentences = new Dictionary<int, List<SymbolString>>(); 103 ArchivedPhrases = new Dictionary<int, SymbolString>(); // Nodes in the search tree 104 DistinctSentences = new Dictionary<int, SymbolString>(); 105 Expansions = 0; 106 EqualGeneratedSentences = 0; 99 107 100 108 Grammar = new Grammar(Problem.ProblemData.AllowedInputVariables.ToArray()); 101 109 102 var phrases = new Dictionary<int, SymbolString>();110 OpenPhrases = new SearchDataStore(SearchDataStructure); // Select search strategy 103 111 var phrase0 = new SymbolString(new[] { Grammar.StartSymbol }); 104 phrases.Add(Grammar.CalcHashCode(phrase0), phrase0); 105 106 while (phrases.Any()) { 112 #endregion 113 114 OpenPhrases.Store(Grammar.CalcHashCode(phrase0), phrase0); 115 116 while (OpenPhrases.Count > 0) { 107 117 if (cancellationToken.IsCancellationRequested) break; 108 118 109 // FIFO 110 // SymbolString currSymbolString = phrases.First(); 111 // phrases.RemoveAt(0); 112 113 114 // LIFO 115 // SymbolString currSymbolString = phrases.Last(); 116 // phrases.RemoveAt(phrases.Count - 1); 117 118 119 // RANDOM 120 int idx = rand.Next(phrases.Count); 121 var selectedEntry = phrases.ElementAt(idx); // TODO: Perf von ElementAt ist schlecht. 122 phrases.Remove(selectedEntry.Key); 123 var currPhrase = selectedEntry.Value; 124 125 archivedPhrases.Add(selectedEntry.Key, selectedEntry.Value); 126 127 if (currPhrase.IsSentence()) { 128 int currSymbolStringHash = Grammar.CalcHashCode(currPhrase); 129 this.sentences.Add(new Tuple<SymbolString, int>(currPhrase, currSymbolStringHash)); 130 131 if (!evaluatedHashes.ContainsKey(currSymbolStringHash)) { 132 evaluatedHashes[currSymbolStringHash] = currPhrase; 133 134 this.distinctSentences.Add(new Tuple<SymbolString, int>(currPhrase, currSymbolStringHash)); 135 EvaluateSentence(currPhrase); 136 } 137 UpdateView(this.sentences, this.distinctSentences); 138 139 } else { 140 // expand next nonterminal symbols 141 int nonterminalSymbolIndex = currPhrase.FindIndex(s => s is NonterminalSymbol); 142 NonterminalSymbol expandedSymbol = currPhrase[nonterminalSymbolIndex] as NonterminalSymbol; 143 144 foreach (Production productionAlternative in expandedSymbol.Alternatives) { 145 SymbolString newSentence = new SymbolString(currPhrase); 146 newSentence.RemoveAt(nonterminalSymbolIndex); 147 newSentence.InsertRange(nonterminalSymbolIndex, productionAlternative); 148 149 expansions++; 150 if (newSentence.Count <= MaxTreeSize) { 151 var phraseHash = Grammar.CalcHashCode(newSentence); 152 if(!phrases.ContainsKey(phraseHash) && 153 !archivedPhrases.ContainsKey(phraseHash)) 154 phrases.Add(phraseHash, newSentence); 119 StoredSymbolString fetchedPhrase = OpenPhrases.GetNext(); 120 SymbolString currPhrase = fetchedPhrase.SymbolString; 121 122 ArchivedPhrases.Add(fetchedPhrase.Hash, currPhrase); 123 124 // expand next nonterminal symbols 125 int nonterminalSymbolIndex = currPhrase.FindIndex(s => s is NonterminalSymbol); 126 NonterminalSymbol expandedSymbol = currPhrase[nonterminalSymbolIndex] as NonterminalSymbol; 127 128 foreach (Production productionAlternative in expandedSymbol.Alternatives) { 129 SymbolString newPhrase = new SymbolString(currPhrase); 130 newPhrase.RemoveAt(nonterminalSymbolIndex); 131 newPhrase.InsertRange(nonterminalSymbolIndex, productionAlternative); 132 133 Expansions++; 134 if (newPhrase.Count <= MaxTreeSize) { 135 var phraseHash = Grammar.CalcHashCode(newPhrase); 136 137 if (newPhrase.IsSentence()) { // Sentence was generated. 138 SaveToAllSentences(phraseHash, newPhrase); 139 140 if (!DistinctSentences.ContainsKey(phraseHash) || DistinctSentences[phraseHash].Count > newPhrase.Count) { 141 if (DistinctSentences.ContainsKey(phraseHash)) EqualGeneratedSentences++; // for analysis only 142 143 DistinctSentences[phraseHash] = newPhrase; 144 EvaluateSentence(newPhrase); 145 } 146 UpdateView(AllSentences, DistinctSentences, Expansions); 147 148 } else if (!OpenPhrases.Contains(phraseHash) && !ArchivedPhrases.ContainsKey(phraseHash)) { 149 OpenPhrases.Store(phraseHash, newPhrase); 155 150 } 156 151 } … … 158 153 } 159 154 160 UpdateView(this.sentences, this.distinctSentences, force: true); 161 162 string[,] sentences = new string[this.sentences.Count, 3]; 163 for (int i = 0; i < this.sentences.Count; i++) { 164 sentences[i, 0] = this.sentences[i].Item1.ToString(); 165 sentences[i, 1] = Grammar.PostfixToInfixParser(this.sentences[i].Item1).ToString(); 166 sentences[i, 2] = this.sentences[i].Item2.ToString(); 167 } 168 Results.Add(new Result("All generated sentences", new StringMatrix(sentences))); 169 170 string[,] distinctSentences = new string[this.distinctSentences.Count, 3]; 171 for (int i = 0; i < this.distinctSentences.Count; i++) { 172 distinctSentences[i, 0] = this.distinctSentences[i].Item1.ToString(); 173 distinctSentences[i, 1] = Grammar.PostfixToInfixParser(this.distinctSentences[i].Item1).ToString(); 174 distinctSentences[i, 2] = this.distinctSentences[i].Item2.ToString(); 175 } 176 Results.Add(new Result("Distinct generated sentences", new StringMatrix(distinctSentences))); 177 } 178 179 180 private void UpdateView(List<Tuple<SymbolString, int>> allGenerated, 181 List<Tuple<SymbolString, int>> distinctGenerated, bool force = false) { 182 int generatedSolutions = allGenerated.Count; 183 int distinctSolutions = distinctGenerated.Count; 184 185 if (force || generatedSolutions % GuiUpdateInterval == 0) { 186 Results.AddOrUpdateResult("Generated Solutions", new IntValue(generatedSolutions)); 187 Results.AddOrUpdateResult("Distinct Solutions", new IntValue(distinctSolutions)); 188 189 DoubleValue averageTreeLength = new DoubleValue(allGenerated.Select(r => r.Item1.Count).Average()); 190 Results.AddOrUpdateResult("Average Tree Length of Solutions", averageTreeLength); 191 } 192 } 193 155 UpdateView(AllSentences, DistinctSentences, Expansions, force: true); 156 UpdateFinalResults(); 157 } 158 159 // Store sentence to "MultiDictionary" 160 private void SaveToAllSentences(int hash, SymbolString sentence) { 161 if (AllSentences.ContainsKey(hash)) 162 AllSentences[hash].Add(sentence); 163 else 164 AllSentences[hash] = new List<SymbolString> { sentence }; 165 } 166 167 #region Evaluation of generated models. 168 169 // Evaluate sentence within an algorithm run. 194 170 private void EvaluateSentence(SymbolString symbolString) { 195 171 SymbolicExpressionTree tree = Grammar.ParseSymbolicExpressionTree(symbolString); … … 198 174 tree, 199 175 new SymbolicDataAnalysisExpressionTreeLinearInterpreter()); 176 200 177 var probData = Problem.ProblemData; 201 178 var target = probData.TargetVariableTrainingValues; … … 205 182 if (error != OnlineCalculatorError.None) r2 = 0.0; 206 183 207 var bestR2 = ((DoubleValue)(Results["Best R²"]).Value).Value; 208 ((DoubleValue)(Results["Best R²"].Value)).Value = Math.Max(r2, bestR2); 209 210 // IRegressionSolution newSolution = model.CreateRegressionSolution(Problem.ProblemData); 211 // 212 // IResult currBestTrainingSolutionResult; 213 // IResult currBestTestSolutionResult; 214 // if (!Results.TryGetValue(BestTrainingSolution, out currBestTrainingSolutionResult) 215 // || !Results.TryGetValue(BestTestSolution, out currBestTestSolutionResult)) { 216 // 217 // BestTrainingSentence = symbolString; 218 // Results.Add(new Result(BestTrainingSolution, newSolution)); 219 // Results.Add(new Result(BestTrainingSolutionQuality, new DoubleValue(newSolution.TrainingRSquared).AsReadOnly())); 220 // 221 // BestTestSentence = symbolString; 222 // Results.Add(new Result(BestTestSolution, newSolution)); 223 // Results.Add(new Result(BestTestSolutionQuality, new DoubleValue(newSolution.TestRSquared).AsReadOnly())); 224 // 225 // } else { 226 // IRegressionSolution currBestTrainingSolution = (IRegressionSolution)currBestTrainingSolutionResult.Value; 227 // if (currBestTrainingSolution.TrainingRSquared <= newSolution.TrainingRSquared) { 228 // BestTrainingSentence = symbolString; 229 // currBestTrainingSolutionResult.Value = newSolution; 230 // Results.AddOrUpdateResult(BestTrainingSolutionQuality, new DoubleValue(newSolution.TrainingRSquared).AsReadOnly()); 231 // } 232 // 233 // IRegressionSolution currBestTestSolution = (IRegressionSolution)currBestTestSolutionResult.Value; 234 // if (currBestTestSolution.TestRSquared <= newSolution.TestRSquared) { 235 // BestTestSentence = symbolString; 236 // currBestTestSolutionResult.Value = newSolution; 237 // Results.AddOrUpdateResult(BestTestSolutionQuality, new DoubleValue(newSolution.TestRSquared).AsReadOnly()); 238 // } 239 // } 240 } 184 var bestR2 = ((DoubleValue)Results[BestTrainingQualityName].Value).Value; 185 if (r2 > bestR2) { 186 ((DoubleValue)Results[BestTrainingQualityName].Value).Value = r2; 187 BestTrainingSentence = symbolString; 188 } 189 } 190 191 #endregion 192 193 #region Visualization in HL 194 // Initialize entries in result set. 195 private void InitResults() { 196 BestTrainingSentence = null; 197 198 Results.Add(new Result(BestTrainingQualityName, new DoubleValue(-1.0))); 199 200 Results.Add(new Result(GeneratedSentencesName, new IntValue(0))); 201 Results.Add(new Result(DistinctSentencesName, new IntValue(0))); 202 Results.Add(new Result(PhraseExpansionsName, new IntValue(0))); 203 Results.Add(new Result(GeneratedEqualSentencesName, new IntValue(0))); 204 Results.Add(new Result(AverageTreeLengthName, new DoubleValue(1.0))); 205 } 206 207 // Update the view for intermediate results in an algorithm run. 208 private int updates; 209 private void UpdateView(Dictionary<int, List<SymbolString>> allGenerated, 210 Dictionary<int, SymbolString> distinctGenerated, int expansions, bool force = false) { 211 updates++; 212 213 if (force || updates % GuiUpdateInterval == 0) { 214 var allGeneratedEnum = allGenerated.Values.SelectMany(x => x).ToArray(); 215 ((IntValue)Results[GeneratedSentencesName].Value).Value = allGeneratedEnum.Length; 216 ((IntValue)Results[DistinctSentencesName].Value).Value = distinctGenerated.Count; 217 ((IntValue)Results[PhraseExpansionsName].Value).Value = expansions; 218 ((IntValue)Results[GeneratedEqualSentencesName].Value).Value = EqualGeneratedSentences; 219 ((DoubleValue)Results[AverageTreeLengthName].Value).Value = allGeneratedEnum.Select(sentence => sentence.Count).Average(); 220 } 221 } 222 223 // Generate all Results after an algorithm run. 224 private void UpdateFinalResults() { 225 SymbolicExpressionTree tree = Grammar.ParseSymbolicExpressionTree(BestTrainingSentence); 226 SymbolicRegressionModel model = new SymbolicRegressionModel( 227 Problem.ProblemData.TargetVariable, 228 tree, 229 new SymbolicDataAnalysisExpressionTreeLinearInterpreter()); 230 231 IRegressionSolution bestTrainingSolution = new RegressionSolution(model, Problem.ProblemData); 232 Results.AddOrUpdateResult(BestTrainingSolutionName, bestTrainingSolution); 233 234 // Print generated sentences. 235 string[,] sentencesMatrix = new string[AllSentences.Values.SelectMany(x => x).Count(), 3]; 236 237 int i = 0; 238 foreach (var sentenceSet in AllSentences) { 239 foreach (var sentence in sentenceSet.Value) { 240 sentencesMatrix[i, 0] = sentence.ToString(); 241 sentencesMatrix[i, 1] = Grammar.PostfixToInfixParser(sentence).ToString(); 242 sentencesMatrix[i, 2] = sentenceSet.Key.ToString(); 243 i++; 244 } 245 } 246 Results.Add(new Result("All generated sentences", new StringMatrix(sentencesMatrix))); 247 248 string[,] distinctSentencesMatrix = new string[DistinctSentences.Count, 3]; 249 i = 0; 250 foreach (KeyValuePair<int, SymbolString> distinctSentence in DistinctSentences) { 251 distinctSentencesMatrix[i, 0] = distinctSentence.Key.ToString(); 252 distinctSentencesMatrix[i, 1] = Grammar.PostfixToInfixParser(distinctSentence.Value).ToString(); 253 distinctSentencesMatrix[i, 2] = distinctSentence.Key.ToString(); 254 i++; 255 } 256 Results.Add(new Result("Distinct generated sentences", new StringMatrix(distinctSentencesMatrix))); 257 } 258 #endregion 259 241 260 } 242 261 } -
branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration.csproj
r15714 r15746 93 93 <Compile Include="GrammarEnumeration\Grammar.cs" /> 94 94 <Compile Include="GrammarEnumeration\GrammarEnumerationAlgorithm.cs" /> 95 <Compile Include="GrammarEnumeration\SearchDataStructure.cs" /> 95 96 <Compile Include="GrammarEnumeration\Sentence.cs" /> 96 97 <Compile Include="Plugin.cs" />
Note: See TracChangeset
for help on using the changeset viewer.