Changeset 15821 for branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/GrammarEnumerationAlgorithm.cs
- Timestamp:
- 02/28/18 14:44:25 (7 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/GrammarEnumerationAlgorithm.cs
r15814 r15821 1 1 using System; 2 2 using System.Collections.Generic; 3 using System.Diagnostics;4 using System.IO;5 3 using System.Linq; 6 4 using System.Threading; 7 5 using HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration.GrammarEnumeration; 6 using HeuristicLab.Collections; 8 7 using HeuristicLab.Common; 9 8 using HeuristicLab.Core; … … 30 29 private readonly string DistinctSentencesName = "Distinct Sentences"; 31 30 private readonly string PhraseExpansionsName = "Phrase Expansions"; 32 private readonly string AverageTreeLengthName = "Avg. Sentence Length among Distinct"; 33 private readonly string GeneratedEqualSentencesName = "Generated equal sentences"; 31 private readonly string AverageSentenceLengthName = "Avg. Sentence Length among Distinct"; 32 private readonly string OverwrittenSentencesName = "Sentences overwritten"; 33 private readonly string AnalyzersParameterName = "Analyzers"; 34 34 35 35 … … 64 64 } 65 65 66 public IFixedValueParameter<ReadOnlyCheckedItemCollection<IGrammarEnumerationAnalyzer>> AnalyzersParameter { 67 get { return (IFixedValueParameter<ReadOnlyCheckedItemCollection<IGrammarEnumerationAnalyzer>>)Parameters[AnalyzersParameterName]; } 68 } 69 70 public ICheckedItemCollection<IGrammarEnumerationAnalyzer> Analyzers { 71 get { return AnalyzersParameter.Value; } 72 } 73 66 74 public SymbolString BestTrainingSentence; 67 75 68 76 #endregion 69 77 70 public Dictionary<int, SymbolString> DistinctSentences { get; private set; } // Semantically distinct sentences in a run. 71 public Dictionary<int, List<SymbolString>> AllSentences { get; private set; } // All sentences ever generated in a run. 78 public Dictionary<int, int> DistinctSentencesLength { get; private set; } // Semantically distinct sentences and their length in a run. 72 79 public HashSet<int> ArchivedPhrases { get; private set; } 73 74 internal SearchDataStore OpenPhrases { get; private set; } // Stack/Queue/etc. for fetching the next node in the search tree. 75 76 public int EqualGeneratedSentences { get; private set; } // It is not guaranteed that shorter solutions are found first. 77 // When longer solutions are overwritten with shorter ones, 78 // this counter is increased. 79 public int Expansions { get; private set; } // Number, how many times a nonterminal symbol is replaced with a production rule. 80 internal SearchDataStore OpenPhrases { get; private set; } // Stack/Queue/etc. for fetching the next node in the search tree. 81 82 #region execution stats 83 public int AllGeneratedSentencesCount { get; private set; } 84 85 public int OverwrittenSentencesCount { get; private set; } // It is not guaranteed that shorter solutions are found first. 86 // When longer solutions are overwritten with shorter ones, 87 // this counter is increased. 88 public int PhraseExpansionCount { get; private set; } // Number, how many times a nonterminal symbol is replaced with a production rule. 89 #endregion 90 80 91 public Grammar Grammar { get; private set; } 81 92 82 private readonly string dotFileName = Environment.GetFolderPath(System.Environment.SpecialFolder.DesktopDirectory) + @"\searchgraph.dot";93 private readonly ISymbolicDataAnalysisExpressionTreeInterpreter expressionTreeLinearInterpreter = new SymbolicDataAnalysisExpressionTreeLinearInterpreter(); 83 94 84 95 #region ctors … … 95 106 Parameters.Add(new ValueParameter<IntValue>(GuiUpdateIntervalParameterName, "Number of generated sentences, until GUI is refreshed.", new IntValue(1000))); 96 107 Parameters.Add(new ValueParameter<EnumValue<StorageType>>(SearchDataStructureParameterName, new EnumValue<StorageType>(StorageType.Stack))); 108 109 var availableAnalyzers = new IGrammarEnumerationAnalyzer[] { 110 new SearchGraphVisualizer(), 111 new SentenceLogger() 112 }; 113 Parameters.Add(new FixedValueParameter<ReadOnlyCheckedItemCollection<IGrammarEnumerationAnalyzer>>( 114 AnalyzersParameterName, 115 new CheckedItemCollection<IGrammarEnumerationAnalyzer>(availableAnalyzers).AsReadOnly())); 116 117 foreach (var analyzer in Analyzers) { 118 Analyzers.SetItemCheckedState(analyzer, false); 119 } 120 Analyzers.CheckedItemsChanged += AnalyzersOnCheckedItemsChanged; 97 121 } 98 122 … … 104 128 InitResults(); 105 129 106 AllSentences = new Dictionary<int, List<SymbolString>>();107 130 ArchivedPhrases = new HashSet<int>(); 108 131 109 DistinctSentences = new Dictionary<int, SymbolString>(); 110 Expansions = 0; 111 EqualGeneratedSentences = 0; 132 DistinctSentencesLength = new Dictionary<int, int>(); 133 AllGeneratedSentencesCount = 0; 134 OverwrittenSentencesCount = 0; 135 PhraseExpansionCount = 0; 112 136 113 137 Grammar = new Grammar(Problem.ProblemData.AllowedInputVariables.ToArray()); … … 118 142 #endregion 119 143 120 using (TextWriterTraceListener dotFileTrace = new TextWriterTraceListener(new FileStream(dotFileName, FileMode.Create))) { 121 LogSearchGraph(dotFileTrace, "digraph searchgraph { pad=0.02; nodesep=0.3; ranksep=0.02;ratio=0.5625;"); 122 123 OpenPhrases.Store(phrase0Hash, phrase0); 124 while (OpenPhrases.Count > 0) { 125 if (cancellationToken.IsCancellationRequested) break; 126 127 StoredSymbolString fetchedPhrase = OpenPhrases.GetNext(); 128 SymbolString currPhrase = fetchedPhrase.SymbolString; 129 #if DEBUG 130 LogSearchGraph(dotFileTrace, $"{fetchedPhrase.Hash} [label=\"{Grammar.PostfixToInfixParser(fetchedPhrase.SymbolString)}\"];"); 131 #endif 132 ArchivedPhrases.Add(fetchedPhrase.Hash); 133 134 // expand next nonterminal symbols 135 int nonterminalSymbolIndex = currPhrase.FindIndex(s => s is NonterminalSymbol); 136 NonterminalSymbol expandedSymbol = currPhrase[nonterminalSymbolIndex] as NonterminalSymbol; 137 138 foreach (Production productionAlternative in expandedSymbol.Alternatives) { 139 SymbolString newPhrase = new SymbolString(currPhrase.Count + productionAlternative.Count); 140 newPhrase.AddRange(currPhrase); 141 newPhrase.RemoveAt(nonterminalSymbolIndex); // TODO: removeat and insertRange are both O(n) 142 newPhrase.InsertRange(nonterminalSymbolIndex, productionAlternative); 143 144 Expansions++; 145 if (newPhrase.Count <= MaxTreeSize) { 146 var phraseHash = Grammar.CalcHashCode(newPhrase); 147 #if DEBUG 148 LogSearchGraph(dotFileTrace, $"{fetchedPhrase.Hash} -> {phraseHash} [label=\"{expandedSymbol.StringRepresentation} + → {productionAlternative}\"];"); 149 #endif 150 if (newPhrase.IsSentence()) { 151 // Sentence was generated. 152 SaveToAllSentences(phraseHash, newPhrase); 153 154 if (!DistinctSentences.ContainsKey(phraseHash) || DistinctSentences[phraseHash].Count > newPhrase.Count) { 155 if (DistinctSentences.ContainsKey(phraseHash)) EqualGeneratedSentences++; // for analysis only 156 157 DistinctSentences[phraseHash] = newPhrase; 158 EvaluateSentence(newPhrase); 159 160 #if DEBUG 161 LogSearchGraph(dotFileTrace, $"{phraseHash} [label=\"{Grammar.PostfixToInfixParser(newPhrase)}\", style=\"filled\", fillcolor=\"#f79423\", color=\"#f79423\"];"); 162 #endif 163 } 164 UpdateView(); 165 166 } else if (!OpenPhrases.Contains(phraseHash) && !ArchivedPhrases.Contains(phraseHash)) { 167 OpenPhrases.Store(phraseHash, newPhrase); 144 OpenPhrases.Store(phrase0Hash, phrase0); 145 while (OpenPhrases.Count > 0) { 146 if (cancellationToken.IsCancellationRequested) break; 147 148 StoredSymbolString fetchedPhrase = OpenPhrases.GetNext(); 149 SymbolString currPhrase = fetchedPhrase.SymbolString; 150 151 OnPhraseFetched(fetchedPhrase.Hash, currPhrase); 152 153 ArchivedPhrases.Add(fetchedPhrase.Hash); 154 155 // expand next nonterminal symbols 156 int nonterminalSymbolIndex = currPhrase.FindIndex(s => s is NonterminalSymbol); 157 NonterminalSymbol expandedSymbol = currPhrase[nonterminalSymbolIndex] as NonterminalSymbol; 158 159 foreach (Production productionAlternative in expandedSymbol.Alternatives) { 160 PhraseExpansionCount++; 161 162 SymbolString newPhrase = new SymbolString(currPhrase.Count + productionAlternative.Count); 163 newPhrase.AddRange(currPhrase); 164 newPhrase.RemoveAt(nonterminalSymbolIndex); // TODO: removeat and insertRange are both O(n) 165 newPhrase.InsertRange(nonterminalSymbolIndex, productionAlternative); 166 167 if (newPhrase.Count <= MaxTreeSize) { 168 var phraseHash = Grammar.CalcHashCode(newPhrase); 169 170 OnPhraseDerived(fetchedPhrase.Hash, fetchedPhrase.SymbolString, phraseHash, newPhrase, expandedSymbol, productionAlternative); 171 172 if (newPhrase.IsSentence()) { 173 AllGeneratedSentencesCount++; 174 175 OnSentenceGenerated(fetchedPhrase.Hash, fetchedPhrase.SymbolString, phraseHash, newPhrase, expandedSymbol, productionAlternative); 176 177 if (!DistinctSentencesLength.ContainsKey(phraseHash) || DistinctSentencesLength[phraseHash] > newPhrase.Count) { 178 if (DistinctSentencesLength.ContainsKey(phraseHash)) OverwrittenSentencesCount++; // for analysis only 179 180 DistinctSentencesLength[phraseHash] = newPhrase.Count; 181 EvaluateSentence(newPhrase); 182 183 OnDistinctSentenceGenerated(fetchedPhrase.Hash, fetchedPhrase.SymbolString, phraseHash, newPhrase, expandedSymbol, productionAlternative); 168 184 } 185 UpdateView(); 186 187 } else if (!OpenPhrases.Contains(phraseHash) && !ArchivedPhrases.Contains(phraseHash)) { 188 OpenPhrases.Store(phraseHash, newPhrase); 169 189 } 170 190 } 171 191 } 172 #if DEBUG173 // Overwrite formatting of start search node and best found solution.174 //LogSearchGraph(dotFileTrace, $"{Grammar.CalcHashCode(BestTrainingSentence)} [label=\"{Grammar.PostfixToInfixParser(BestTrainingSentence)}\", shape=Mcircle, style=\"filled,bold\"];");175 LogSearchGraph(dotFileTrace, $"{phrase0Hash} [label=\"{Grammar.PostfixToInfixParser(phrase0)}\", shape=doublecircle];}}");176 dotFileTrace.Flush();177 #endif178 192 } 179 193 … … 182 196 } 183 197 184 // Store sentence to "MultiDictionary" 185 private void SaveToAllSentences(int hash, SymbolString sentence) { 186 if (AllSentences.ContainsKey(hash)) 187 AllSentences[hash].Add(sentence); 188 else 189 AllSentences[hash] = new List<SymbolString> { sentence }; //TODO: here we store all sentences even if they have the same hash value, this is not strictly necessary 190 } 191 192 #region Evaluation of generated models. 198 #region Evaluation of generated models. 193 199 194 200 // Evaluate sentence within an algorithm run. … … 198 204 Problem.ProblemData.TargetVariable, 199 205 tree, 200 new SymbolicDataAnalysisExpressionTreeLinearInterpreter());206 expressionTreeLinearInterpreter); 201 207 202 208 var probData = Problem.ProblemData; … … 214 220 } 215 221 216 #endregion217 218 #region Visualization in HL222 #endregion 223 224 #region Visualization in HL 219 225 // Initialize entries in result set. 220 226 private void InitResults() { … … 228 234 Results.Add(new Result(DistinctSentencesName, new IntValue(0))); 229 235 Results.Add(new Result(PhraseExpansionsName, new IntValue(0))); 230 Results.Add(new Result( GeneratedEqualSentencesName, new IntValue(0)));231 Results.Add(new Result(Average TreeLengthName, new DoubleValue(1.0)));236 Results.Add(new Result(OverwrittenSentencesName, new IntValue(0))); 237 Results.Add(new Result(AverageSentenceLengthName, new DoubleValue(1.0))); 232 238 } 233 239 … … 238 244 239 245 if (force || updates % GuiUpdateInterval == 1) { 240 var allGeneratedEnum = AllSentences.Values.SelectMany(x => x).ToArray();241 246 ((IntValue)Results[GeneratedPhrasesName].Value).Value = ArchivedPhrases.Count; 242 247 ((IntValue)Results[SearchStructureSizeName].Value).Value = OpenPhrases.Count; 243 ((IntValue)Results[GeneratedSentencesName].Value).Value = allGeneratedEnum.Length;244 ((IntValue)Results[DistinctSentencesName].Value).Value = DistinctSentences .Count;245 ((IntValue)Results[PhraseExpansionsName].Value).Value = Expansions;246 (( IntValue)Results[GeneratedEqualSentencesName].Value).Value = EqualGeneratedSentences;247 (( DoubleValue)Results[AverageTreeLengthName].Value).Value = allGeneratedEnum.Select(sentence => sentence.Count).Average();248 ((IntValue)Results[GeneratedSentencesName].Value).Value = AllGeneratedSentencesCount; 249 ((IntValue)Results[DistinctSentencesName].Value).Value = DistinctSentencesLength.Count; 250 ((IntValue)Results[PhraseExpansionsName].Value).Value = PhraseExpansionCount; 251 ((DoubleValue)Results[AverageSentenceLengthName].Value).Value = DistinctSentencesLength.Select(pair => pair.Value).Average(); 252 ((IntValue)Results[OverwrittenSentencesName].Value).Value = OverwrittenSentencesCount; 248 253 } 249 254 } … … 259 264 IRegressionSolution bestTrainingSolution = new RegressionSolution(model, Problem.ProblemData); 260 265 Results.AddOrUpdateResult(BestTrainingSolutionName, bestTrainingSolution); 261 262 // Print generated sentences. 263 string[,] sentencesMatrix = new string[AllSentences.Values.SelectMany(x => x).Count(), 3]; 264 265 int i = 0; 266 foreach (var sentenceSet in AllSentences) { 267 foreach (var sentence in sentenceSet.Value) { 268 sentencesMatrix[i, 0] = sentence.ToString(); 269 sentencesMatrix[i, 1] = Grammar.PostfixToInfixParser(sentence).ToString(); 270 sentencesMatrix[i, 2] = sentenceSet.Key.ToString(); 271 i++; 266 } 267 #endregion 268 269 #region events 270 private void AnalyzersOnCheckedItemsChanged(object sender, CollectionItemsChangedEventArgs<IGrammarEnumerationAnalyzer> collectionItemsChangedEventArgs) { 271 foreach (IGrammarEnumerationAnalyzer grammarEnumerationAnalyzer in collectionItemsChangedEventArgs.Items) { 272 if (Analyzers.ItemChecked(grammarEnumerationAnalyzer)) { 273 grammarEnumerationAnalyzer.Register(this); 274 } else { 275 grammarEnumerationAnalyzer.Deregister(this); 272 276 } 273 277 } 274 Results.Add(new Result("All generated sentences", new StringMatrix(sentencesMatrix))); 275 276 string[,] distinctSentencesMatrix = new string[DistinctSentences.Count, 3]; 277 i = 0; 278 foreach (KeyValuePair<int, SymbolString> distinctSentence in DistinctSentences) { 279 distinctSentencesMatrix[i, 0] = distinctSentence.Key.ToString(); 280 distinctSentencesMatrix[i, 1] = Grammar.PostfixToInfixParser(distinctSentence.Value).ToString(); 281 distinctSentencesMatrix[i, 2] = distinctSentence.Key.ToString(); 282 i++; 283 } 284 Results.Add(new Result("Distinct generated sentences", new StringMatrix(distinctSentencesMatrix))); 285 } 286 287 private void LogSearchGraph(TraceListener listener, string msg) { 288 listener.Write(msg); 289 } 290 #endregion 278 } 279 280 public event EventHandler<PhraseEventArgs> PhraseFetched; 281 private void OnPhraseFetched(int hash, SymbolString symbolString) { 282 if (PhraseFetched != null) { 283 PhraseFetched(this, new PhraseEventArgs(hash, symbolString)); 284 } 285 } 286 287 public event EventHandler<PhraseAddedEventArgs> PhraseDerived; 288 private void OnPhraseDerived(int parentHash, SymbolString parentSymbolString, int addedHash, SymbolString addedSymbolString, Symbol expandedSymbol, Production expandedProduction) { 289 if (PhraseDerived != null) { 290 PhraseDerived(this, new PhraseAddedEventArgs(parentHash, parentSymbolString, addedHash, addedSymbolString, expandedSymbol, expandedProduction)); 291 } 292 } 293 294 public event EventHandler<PhraseAddedEventArgs> SentenceGenerated; 295 private void OnSentenceGenerated(int parentHash, SymbolString parentSymbolString, int addedHash, SymbolString addedSymbolString, Symbol expandedSymbol, Production expandedProduction) { 296 if (SentenceGenerated != null) { 297 SentenceGenerated(this, new PhraseAddedEventArgs(parentHash, parentSymbolString, addedHash, addedSymbolString, expandedSymbol, expandedProduction)); 298 } 299 } 300 301 public event EventHandler<PhraseAddedEventArgs> DistinctSentenceGenerated; 302 private void OnDistinctSentenceGenerated(int parentHash, SymbolString parentSymbolString, int addedHash, SymbolString addedSymbolString, Symbol expandedSymbol, Production expandedProduction) { 303 if (DistinctSentenceGenerated != null) { 304 DistinctSentenceGenerated(this, new PhraseAddedEventArgs(parentHash, parentSymbolString, addedHash, addedSymbolString, expandedSymbol, expandedProduction)); 305 } 306 } 307 308 #endregion 291 309 292 310 } 311 312 #region events for analysis 313 314 public class PhraseEventArgs : EventArgs { 315 public int Hash { get; } 316 317 public SymbolString Phrase { get; } 318 319 public PhraseEventArgs(int hash, SymbolString phrase) { 320 Hash = hash; 321 Phrase = phrase; 322 } 323 } 324 325 public class PhraseAddedEventArgs : EventArgs { 326 public int ParentHash { get; } 327 public int NewHash { get; } 328 329 public SymbolString ParentPhrase { get; } 330 public SymbolString NewPhrase { get; } 331 332 public Symbol ExpandedSymbol { get; } 333 334 public Production ExpandedProduction { get; } 335 336 public PhraseAddedEventArgs(int parentHash, SymbolString parentPhrase, int newHash, SymbolString newPhrase, Symbol expandedSymbol, Production expandedProduction) { 337 ParentHash = parentHash; 338 ParentPhrase = parentPhrase; 339 NewHash = newHash; 340 NewPhrase = newPhrase; 341 ExpandedSymbol = expandedSymbol; 342 ExpandedProduction = expandedProduction; 343 } 344 } 345 346 #endregion 293 347 }
Note: See TracChangeset
for help on using the changeset viewer.