Changeset 15800 for branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration
- Timestamp:
- 02/21/18 14:57:27 (7 years ago)
- Location:
- branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration
- Files:
-
- 4 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/Grammar.cs
r15795 r15800 145 145 #region Hashing 146 146 public int CalcHashCode(SymbolString sentence) { 147 return CalcHashCode<int>(sentence, AggregateIntHashes); 148 } 149 150 private int CalcHashCode<THashType>(SymbolString sentence, Func<Symbol, IEnumerable<THashType>, THashType> aggregateFunction) { 147 151 Debug.Assert(sentence.Any(), "Trying to evaluate empty sentence!"); 148 // Debug.Assert(sentence.All(s => s is TerminalSymbol), "Trying to evaluate symbol sequence with nonterminalsymbols!");149 152 150 153 Stack<Symbol> parseStack = new Stack<Symbol>(sentence); 151 154 152 155 Symbol peek = parseStack.Peek(); 153 string[] subtreeHashes = GetSubtreeHashes(parseStack); 154 return AggregateHashes(peek, subtreeHashes).GetHashCode(); 155 } 156 157 private string[] GetSubtreeHashes(Stack<Symbol> parseStack) { 156 return aggregateFunction(peek, GetSubtreeHashes(parseStack, aggregateFunction)).GetHashCode(); 157 } 158 159 private THashType[] GetSubtreeHashes<THashType>(Stack<Symbol> parseStack, Func<Symbol, IEnumerable<THashType>, THashType> aggregateHashes) { 158 160 Symbol currentSymbol = parseStack.Pop(); 159 161 160 162 // ADDITION 161 163 if (ReferenceEquals(currentSymbol, Addition)) { 162 var uniqueChildHashes = new HashSet< string>();164 var uniqueChildHashes = new HashSet<THashType>(); 163 165 164 166 // First subtree 165 167 if (ReferenceEquals(parseStack.Peek(), Addition)) { 166 uniqueChildHashes.UnionWith(GetSubtreeHashes(parseStack ));168 uniqueChildHashes.UnionWith(GetSubtreeHashes(parseStack, aggregateHashes)); 167 169 } else { 168 170 var peek = parseStack.Peek(); 169 uniqueChildHashes.Add( AggregateHashes(peek, GetSubtreeHashes(parseStack)));171 uniqueChildHashes.Add(aggregateHashes(peek, GetSubtreeHashes(parseStack, aggregateHashes))); 170 172 } 171 173 // Second subtree 172 174 if (ReferenceEquals(parseStack.Peek(), Addition)) { 173 uniqueChildHashes.UnionWith(GetSubtreeHashes(parseStack ));175 uniqueChildHashes.UnionWith(GetSubtreeHashes(parseStack, aggregateHashes)); 174 176 } else { 175 177 var peek = parseStack.Peek(); 176 uniqueChildHashes.Add( AggregateHashes(peek, GetSubtreeHashes(parseStack)));178 uniqueChildHashes.Add(aggregateHashes(peek, GetSubtreeHashes(parseStack, aggregateHashes))); 177 179 } 178 180 … … 184 186 // MULTIPLICATION 185 187 if (ReferenceEquals(currentSymbol, Multiplication)) { 186 var childHashes = new List< string>();188 var childHashes = new List<THashType>(); 187 189 188 190 // First subtree 189 191 if (ReferenceEquals(parseStack.Peek(), Multiplication)) { 190 childHashes.AddRange(GetSubtreeHashes(parseStack ));192 childHashes.AddRange(GetSubtreeHashes(parseStack, aggregateHashes)); 191 193 } else { 192 childHashes.Add( AggregateHashes(parseStack.Peek(), GetSubtreeHashes(parseStack)));194 childHashes.Add(aggregateHashes(parseStack.Peek(), GetSubtreeHashes(parseStack, aggregateHashes))); 193 195 } 194 196 // Second subtree 195 197 if (ReferenceEquals(parseStack.Peek(), Multiplication)) { 196 childHashes.AddRange(GetSubtreeHashes(parseStack ));198 childHashes.AddRange(GetSubtreeHashes(parseStack, aggregateHashes)); 197 199 } else { 198 childHashes.Add( AggregateHashes(parseStack.Peek(), GetSubtreeHashes(parseStack)));200 childHashes.Add(aggregateHashes(parseStack.Peek(), GetSubtreeHashes(parseStack, aggregateHashes))); 199 201 } 200 202 … … 210 212 211 213 var currFactor = childHashes[i]; 212 var invFactor = AggregateHashes(Inv, currFactor.ToEnumerable());214 var invFactor = aggregateHashes(Inv, currFactor.ToEnumerable()); 213 215 214 216 int indexOfInv = childHashes.IndexOf(invFactor); … … 227 229 if (ReferenceEquals(currentSymbol, Log) || ReferenceEquals(currentSymbol, Exp) || 228 230 ReferenceEquals(currentSymbol, Sin) || ReferenceEquals(currentSymbol, Inv)) { 229 return AggregateHashes(parseStack.Peek(), GetSubtreeHashes(parseStack)).ToEnumerable().ToArray();231 return new[] { aggregateHashes(parseStack.Peek(), GetSubtreeHashes(parseStack, aggregateHashes)) }; 230 232 } 231 233 232 234 // var or nonterminal symbol 233 return currentSymbol.StringRepresentation.ToEnumerable().ToArray();234 } 235 236 private string Aggregate Hashes(Symbol operatorSym, IEnumerable<string> hashes) {235 return new[] { aggregateHashes(currentSymbol, Enumerable.Empty<THashType>()) }; 236 } 237 238 private string AggregateStringHashes(Symbol operatorSym, IEnumerable<string> hashes) { 237 239 var hashesArray = hashes.ToArray(); 238 240 … … 240 242 return hashesArray[0]; 241 243 } 242 if ( Var.VariableTerminalSymbols.Contains(operatorSym)) {244 if (operatorSym is NonterminalSymbol || Var.VariableTerminalSymbols.Contains(operatorSym)) { 243 245 return operatorSym.StringRepresentation; 244 246 } 245 247 246 return "[" + hashesArray.Aggregate(operatorSym.StringRepresentation, (result, ti) => result + " ° " + ti) + "]"; 248 return $"[{hashesArray.Aggregate(operatorSym.StringRepresentation, (result, ti) => string.Concat(result, " ° ", ti))}]"; 249 } 250 251 private int AggregateIntHashes(Symbol operatorSym, IEnumerable<int> hashes) { 252 var hashesArray = hashes.ToArray(); 253 254 int start; 255 if ((ReferenceEquals(operatorSym, Addition) || ReferenceEquals(operatorSym, Multiplication)) && 256 hashesArray.Count() <= 1) { 257 start = 0; 258 259 } else if (operatorSym is NonterminalSymbol || Var.VariableTerminalSymbols.Contains(operatorSym)) { 260 return operatorSym.StringRepresentation.GetHashCode(); 261 262 } else { 263 start = operatorSym.StringRepresentation.GetHashCode(); 264 } 265 266 return hashesArray.Aggregate(start, (result, ti) => ((result << 5) + result) ^ ti.GetHashCode()); 247 267 } 248 268 -
branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/GrammarEnumerationAlgorithm.cs
r15784 r15800 1 1 using System; 2 2 using System.Collections.Generic; 3 using System.Diagnostics; 4 using System.IO; 3 5 using System.Linq; 4 using System.Text;5 6 using System.Threading; 6 7 using HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration.GrammarEnumeration; … … 65 66 #endregion 66 67 67 public Dictionary<int, SymbolString> DistinctSentences ;// Semantically distinct sentences in a run.68 public Dictionary<int, List<SymbolString>> AllSentences ;// All sentences ever generated in a run.69 public Dictionary<int, SymbolString> ArchivedPhrases ;// Nodes in the search tree70 SearchDataStore OpenPhrases;// Stack/Queue/etc. for fetching the next node in the search tree.71 72 public int EqualGeneratedSentences ;// It is not guaranteed that shorter solutions are found first.73 // When longer solutions are overwritten with shorter ones,74 // this counter is increased.75 public int Expansions ;// Number, how many times a nonterminal symbol is replaced with a production rule.76 public Grammar Grammar ;77 78 p ublic StringBuilder dotFileBuilder;68 public Dictionary<int, SymbolString> DistinctSentences { get; private set; } // Semantically distinct sentences in a run. 69 public Dictionary<int, List<SymbolString>> AllSentences { get; private set; } // All sentences ever generated in a run. 70 public Dictionary<int, SymbolString> ArchivedPhrases { get; private set; } // Nodes in the search tree 71 internal SearchDataStore OpenPhrases { get; private set; } // Stack/Queue/etc. for fetching the next node in the search tree. 72 73 public int EqualGeneratedSentences { get; private set; } // It is not guaranteed that shorter solutions are found first. 74 // When longer solutions are overwritten with shorter ones, 75 // this counter is increased. 76 public int Expansions { get; private set; } // Number, how many times a nonterminal symbol is replaced with a production rule. 77 public Grammar Grammar { get; private set; } 78 79 private readonly string dotFileName = Environment.GetFolderPath(System.Environment.SpecialFolder.DesktopDirectory) + @"\searchgraph.dot"; 79 80 80 81 #region ctors … … 84 85 85 86 public GrammarEnumerationAlgorithm() { 86 var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(seed: 1234);87 var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F9 ")));88 89 87 Problem = new RegressionProblem() { 90 ProblemData = regProblem88 ProblemData = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenFunctionNine(seed: 1234).GenerateRegressionData() 91 89 }; 92 90 93 91 Parameters.Add(new ValueParameter<IntValue>(MaxTreeSizeParameterName, "The number of clusters.", new IntValue(6))); 94 Parameters.Add(new ValueParameter<IntValue>(GuiUpdateIntervalParameterName, "Number of generated sentences, until GUI is refreshed.", new IntValue(4000))); 95 92 Parameters.Add(new ValueParameter<IntValue>(GuiUpdateIntervalParameterName, "Number of generated sentences, until GUI is refreshed.", new IntValue(1000))); 96 93 Parameters.Add(new ValueParameter<EnumValue<StorageType>>(SearchDataStructureParameterName, new EnumValue<StorageType>(StorageType.Stack))); 97 94 } … … 103 100 #region init 104 101 InitResults(); 105 106 dotFileBuilder = new StringBuilder();107 102 108 103 AllSentences = new Dictionary<int, List<SymbolString>>(); … … 116 111 OpenPhrases = new SearchDataStore(SearchDataStructure); // Select search strategy 117 112 var phrase0 = new SymbolString(new[] { Grammar.StartSymbol }); 113 var phrase0Hash = Grammar.CalcHashCode(phrase0); 118 114 #endregion 119 115 120 OpenPhrases.Store(Grammar.CalcHashCode(phrase0), phrase0); 121 122 while (OpenPhrases.Count > 0) { 123 if (cancellationToken.IsCancellationRequested) break; 124 125 StoredSymbolString fetchedPhrase = OpenPhrases.GetNext(); 126 SymbolString currPhrase = fetchedPhrase.SymbolString; 127 128 if (dotFileBuilder.Length == 0) { 129 dotFileBuilder.AppendLine("digraph searchgraph {"); 130 dotFileBuilder.AppendLine(fetchedPhrase.Hash + " [label=\"" + Grammar.PostfixToInfixParser(fetchedPhrase.SymbolString) + "\", shape=doublecircle];"); 131 } else { 132 dotFileBuilder.AppendLine(fetchedPhrase.Hash + " [label=\"" + Grammar.PostfixToInfixParser(fetchedPhrase.SymbolString) + "\"];"); 133 } 134 135 ArchivedPhrases.Add(fetchedPhrase.Hash, currPhrase); 136 137 // expand next nonterminal symbols 138 int nonterminalSymbolIndex = currPhrase.FindIndex(s => s is NonterminalSymbol); 139 NonterminalSymbol expandedSymbol = currPhrase[nonterminalSymbolIndex] as NonterminalSymbol; 140 141 foreach (Production productionAlternative in expandedSymbol.Alternatives) { 142 SymbolString newPhrase = new SymbolString(currPhrase); 143 newPhrase.RemoveAt(nonterminalSymbolIndex); 144 newPhrase.InsertRange(nonterminalSymbolIndex, productionAlternative); 145 146 Expansions++; 147 if (newPhrase.Count <= MaxTreeSize) { 148 var phraseHash = Grammar.CalcHashCode(newPhrase); 149 150 dotFileBuilder.AppendLine(fetchedPhrase.Hash + " -> " + phraseHash + " [label=\"" + expandedSymbol.StringRepresentation + "→" + productionAlternative + "\"];"); 151 152 if (newPhrase.IsSentence()) { // Sentence was generated. 153 SaveToAllSentences(phraseHash, newPhrase); 154 155 if (!DistinctSentences.ContainsKey(phraseHash) || DistinctSentences[phraseHash].Count > newPhrase.Count) { 156 if (DistinctSentences.ContainsKey(phraseHash)) EqualGeneratedSentences++; // for analysis only 157 158 DistinctSentences[phraseHash] = newPhrase; 159 EvaluateSentence(newPhrase); 160 161 dotFileBuilder.AppendLine(phraseHash + " [label=\"" + Grammar.PostfixToInfixParser(newPhrase) + "\", style=\"filled\"];"); 116 using (TextWriterTraceListener dotFileTrace = new TextWriterTraceListener(new FileStream(dotFileName, FileMode.Create))) { 117 dotFileTrace.Write("digraph searchgraph {"); 118 119 OpenPhrases.Store(phrase0Hash, phrase0); 120 while (OpenPhrases.Count > 0) { 121 if (cancellationToken.IsCancellationRequested) break; 122 123 StoredSymbolString fetchedPhrase = OpenPhrases.GetNext(); 124 SymbolString currPhrase = fetchedPhrase.SymbolString; 125 126 dotFileTrace.Write( 127 $"{fetchedPhrase.Hash} [label=\"{Grammar.PostfixToInfixParser(fetchedPhrase.SymbolString)}\"];"); 128 129 ArchivedPhrases.Add(fetchedPhrase.Hash, currPhrase); 130 131 // expand next nonterminal symbols 132 int nonterminalSymbolIndex = currPhrase.FindIndex(s => s is NonterminalSymbol); 133 NonterminalSymbol expandedSymbol = currPhrase[nonterminalSymbolIndex] as NonterminalSymbol; 134 135 foreach (Production productionAlternative in expandedSymbol.Alternatives) { 136 SymbolString newPhrase = new SymbolString(currPhrase); 137 newPhrase.RemoveAt(nonterminalSymbolIndex); 138 newPhrase.InsertRange(nonterminalSymbolIndex, productionAlternative); 139 140 Expansions++; 141 if (newPhrase.Count <= MaxTreeSize) { 142 var phraseHash = Grammar.CalcHashCode(newPhrase); 143 144 dotFileTrace.Write( 145 $"{fetchedPhrase.Hash} -> {phraseHash} [label=\"{expandedSymbol.StringRepresentation} + → {productionAlternative}\"];"); 146 147 if (newPhrase.IsSentence()) { 148 // Sentence was generated. 149 SaveToAllSentences(phraseHash, newPhrase); 150 151 if (!DistinctSentences.ContainsKey(phraseHash) || DistinctSentences[phraseHash].Count > newPhrase.Count) { 152 if (DistinctSentences.ContainsKey(phraseHash)) EqualGeneratedSentences++; // for analysis only 153 154 DistinctSentences[phraseHash] = newPhrase; 155 EvaluateSentence(newPhrase); 156 157 dotFileTrace.Write( 158 $"{phraseHash} [label=\"{Grammar.PostfixToInfixParser(newPhrase)}\", style=\"filled\"];"); 159 } 160 UpdateView(AllSentences, DistinctSentences); 161 162 } else if (!OpenPhrases.Contains(phraseHash) && !ArchivedPhrases.ContainsKey(phraseHash)) { 163 OpenPhrases.Store(phraseHash, newPhrase); 162 164 } 163 UpdateView(AllSentences, DistinctSentences, Expansions);164 165 } else if (!OpenPhrases.Contains(phraseHash) && !ArchivedPhrases.ContainsKey(phraseHash)) {166 OpenPhrases.Store(phraseHash, newPhrase);167 165 } 168 166 } 169 167 } 170 } 171 172 dotFileBuilder.AppendLine(Grammar.CalcHashCode(BestTrainingSentence) + " [label=\"" + Grammar.PostfixToInfixParser(BestTrainingSentence) + "\", shape=Mcircle, style=\"filled,bold\"];"); 173 dotFileBuilder.AppendLine("}"); 174 175 System.IO.File.WriteAllText(Environment.GetFolderPath(System.Environment.SpecialFolder.DesktopDirectory)+ @"\searchgraph.dot", dotFileBuilder.ToString()); 176 177 UpdateView(AllSentences, DistinctSentences, Expansions, force: true); 168 169 // Overwrite formatting of start search node and best found solution. 170 dotFileTrace.Write( 171 $"{Grammar.CalcHashCode(BestTrainingSentence)} [label=\"{Grammar.PostfixToInfixParser(BestTrainingSentence)}\", shape=Mcircle, style=\"filled,bold\"];"); 172 dotFileTrace.Write($"{phrase0Hash} [label=\"{Grammar.PostfixToInfixParser(phrase0)}\", shape=doublecircle];}}"); 173 dotFileTrace.Flush(); 174 } 175 176 UpdateView(AllSentences, DistinctSentences, force: true); 178 177 UpdateFinalResults(); 179 178 } … … 230 229 private int updates; 231 230 private void UpdateView(Dictionary<int, List<SymbolString>> allGenerated, 232 Dictionary<int, SymbolString> distinctGenerated, int expansions,bool force = false) {231 Dictionary<int, SymbolString> distinctGenerated, bool force = false) { 233 232 updates++; 234 233 … … 237 236 ((IntValue)Results[GeneratedSentencesName].Value).Value = allGeneratedEnum.Length; 238 237 ((IntValue)Results[DistinctSentencesName].Value).Value = distinctGenerated.Count; 239 ((IntValue)Results[PhraseExpansionsName].Value).Value = expansions;238 ((IntValue)Results[PhraseExpansionsName].Value).Value = Expansions; 240 239 ((IntValue)Results[GeneratedEqualSentencesName].Value).Value = EqualGeneratedSentences; 241 240 ((DoubleValue)Results[AverageTreeLengthName].Value).Value = allGeneratedEnum.Select(sentence => sentence.Count).Average(); -
branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/SearchDataStructure.cs
r15746 r15800 6 6 7 7 public struct StoredSymbolString { 8 public int Hash { get; }9 public SymbolString SymbolString { get; }8 public readonly int Hash; 9 public readonly SymbolString SymbolString; 10 10 11 11 public StoredSymbolString(int hash, SymbolString symbolString) { -
branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/Symbol.cs
r15723 r15800 1 1 using System.Collections.Generic; 2 using System.Linq;3 using HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration.GrammarEnumeration;4 2 5 3 namespace HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration { 6 4 7 5 public abstract class Symbol { 8 public readonly string StringRepresentation;6 public string StringRepresentation { get; } 9 7 10 8 protected Symbol(string representation) { … … 22 20 23 21 public class NonterminalSymbol : Symbol { 24 public List<Production> Alternatives ;22 public List<Production> Alternatives { get; } 25 23 26 24 public NonterminalSymbol(string representation) : base(representation) { … … 34 32 35 33 public class VariableSymbol : NonterminalSymbol { // Convenience class 36 public IEnumerable<TerminalSymbol> VariableTerminalSymbols ;34 public IEnumerable<TerminalSymbol> VariableTerminalSymbols { get; } 37 35 38 36 public VariableSymbol(string representation, IEnumerable<string> variableNames) : base(representation) {
Note: See TracChangeset
for help on using the changeset viewer.