Changeset 15800


Ignore:
Timestamp:
02/21/18 14:57:27 (15 months ago)
Author:
lkammere
Message:

#2886: Refactor code and fix performance issues.

Location:
branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/Grammar.cs

    r15795 r15800  
    145145    #region Hashing
    146146    public int CalcHashCode(SymbolString sentence) {
     147      return CalcHashCode<int>(sentence, AggregateIntHashes);
     148    }
     149
     150    private int CalcHashCode<THashType>(SymbolString sentence, Func<Symbol, IEnumerable<THashType>, THashType> aggregateFunction) {
    147151      Debug.Assert(sentence.Any(), "Trying to evaluate empty sentence!");
    148       // Debug.Assert(sentence.All(s => s is TerminalSymbol), "Trying to evaluate symbol sequence with nonterminalsymbols!");
    149152
    150153      Stack<Symbol> parseStack = new Stack<Symbol>(sentence);
    151154
    152155      Symbol peek = parseStack.Peek();
    153       string[] subtreeHashes = GetSubtreeHashes(parseStack);
    154       return AggregateHashes(peek, subtreeHashes).GetHashCode();
    155     }
    156 
    157     private string[] GetSubtreeHashes(Stack<Symbol> parseStack) {
     156      return aggregateFunction(peek, GetSubtreeHashes(parseStack, aggregateFunction)).GetHashCode();
     157    }
     158
     159    private THashType[] GetSubtreeHashes<THashType>(Stack<Symbol> parseStack, Func<Symbol, IEnumerable<THashType>, THashType> aggregateHashes) {
    158160      Symbol currentSymbol = parseStack.Pop();
    159161
    160162      // ADDITION
    161163      if (ReferenceEquals(currentSymbol, Addition)) {
    162         var uniqueChildHashes = new HashSet<string>();
     164        var uniqueChildHashes = new HashSet<THashType>();
    163165
    164166        // First subtree
    165167        if (ReferenceEquals(parseStack.Peek(), Addition)) {
    166           uniqueChildHashes.UnionWith(GetSubtreeHashes(parseStack));
     168          uniqueChildHashes.UnionWith(GetSubtreeHashes(parseStack, aggregateHashes));
    167169        } else {
    168170          var peek = parseStack.Peek();
    169           uniqueChildHashes.Add(AggregateHashes(peek, GetSubtreeHashes(parseStack)));
     171          uniqueChildHashes.Add(aggregateHashes(peek, GetSubtreeHashes(parseStack, aggregateHashes)));
    170172        }
    171173        // Second subtree
    172174        if (ReferenceEquals(parseStack.Peek(), Addition)) {
    173           uniqueChildHashes.UnionWith(GetSubtreeHashes(parseStack));
     175          uniqueChildHashes.UnionWith(GetSubtreeHashes(parseStack, aggregateHashes));
    174176        } else {
    175177          var peek = parseStack.Peek();
    176           uniqueChildHashes.Add(AggregateHashes(peek, GetSubtreeHashes(parseStack)));
     178          uniqueChildHashes.Add(aggregateHashes(peek, GetSubtreeHashes(parseStack, aggregateHashes)));
    177179        }
    178180
     
    184186      // MULTIPLICATION
    185187      if (ReferenceEquals(currentSymbol, Multiplication)) {
    186         var childHashes = new List<string>();
     188        var childHashes = new List<THashType>();
    187189
    188190        // First subtree
    189191        if (ReferenceEquals(parseStack.Peek(), Multiplication)) {
    190           childHashes.AddRange(GetSubtreeHashes(parseStack));
     192          childHashes.AddRange(GetSubtreeHashes(parseStack, aggregateHashes));
    191193        } else {
    192           childHashes.Add(AggregateHashes(parseStack.Peek(), GetSubtreeHashes(parseStack)));
     194          childHashes.Add(aggregateHashes(parseStack.Peek(), GetSubtreeHashes(parseStack, aggregateHashes)));
    193195        }
    194196        // Second subtree
    195197        if (ReferenceEquals(parseStack.Peek(), Multiplication)) {
    196           childHashes.AddRange(GetSubtreeHashes(parseStack));
     198          childHashes.AddRange(GetSubtreeHashes(parseStack, aggregateHashes));
    197199        } else {
    198           childHashes.Add(AggregateHashes(parseStack.Peek(), GetSubtreeHashes(parseStack)));
     200          childHashes.Add(aggregateHashes(parseStack.Peek(), GetSubtreeHashes(parseStack, aggregateHashes)));
    199201        }
    200202
     
    210212
    211213          var currFactor = childHashes[i];
    212           var invFactor = AggregateHashes(Inv, currFactor.ToEnumerable());
     214          var invFactor = aggregateHashes(Inv, currFactor.ToEnumerable());
    213215
    214216          int indexOfInv = childHashes.IndexOf(invFactor);
     
    227229      if (ReferenceEquals(currentSymbol, Log) || ReferenceEquals(currentSymbol, Exp) ||
    228230          ReferenceEquals(currentSymbol, Sin) || ReferenceEquals(currentSymbol, Inv)) {
    229         return AggregateHashes(parseStack.Peek(), GetSubtreeHashes(parseStack)).ToEnumerable().ToArray();
     231        return new[] { aggregateHashes(parseStack.Peek(), GetSubtreeHashes(parseStack, aggregateHashes)) };
    230232      }
    231233
    232234      // var or nonterminal symbol
    233       return currentSymbol.StringRepresentation.ToEnumerable().ToArray();
    234     }
    235 
    236     private string AggregateHashes(Symbol operatorSym, IEnumerable<string> hashes) {
     235      return new[] { aggregateHashes(currentSymbol, Enumerable.Empty<THashType>()) };
     236    }
     237
     238    private string AggregateStringHashes(Symbol operatorSym, IEnumerable<string> hashes) {
    237239      var hashesArray = hashes.ToArray();
    238240
     
    240242        return hashesArray[0];
    241243      }
    242       if (Var.VariableTerminalSymbols.Contains(operatorSym)) {
     244      if (operatorSym is NonterminalSymbol || Var.VariableTerminalSymbols.Contains(operatorSym)) {
    243245        return operatorSym.StringRepresentation;
    244246      }
    245247
    246       return "[" + hashesArray.Aggregate(operatorSym.StringRepresentation, (result, ti) => result + " ° " + ti) + "]";
     248      return $"[{hashesArray.Aggregate(operatorSym.StringRepresentation, (result, ti) => string.Concat(result, " ° ", ti))}]";
     249    }
     250
     251    private int AggregateIntHashes(Symbol operatorSym, IEnumerable<int> hashes) {
     252      var hashesArray = hashes.ToArray();
     253
     254      int start;
     255      if ((ReferenceEquals(operatorSym, Addition) || ReferenceEquals(operatorSym, Multiplication)) &&
     256          hashesArray.Count() <= 1) {
     257        start = 0;
     258
     259      } else if (operatorSym is NonterminalSymbol || Var.VariableTerminalSymbols.Contains(operatorSym)) {
     260        return operatorSym.StringRepresentation.GetHashCode();
     261
     262      } else {
     263        start = operatorSym.StringRepresentation.GetHashCode();
     264      }
     265
     266      return hashesArray.Aggregate(start, (result, ti) => ((result << 5) + result) ^ ti.GetHashCode());
    247267    }
    248268
  • branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/GrammarEnumerationAlgorithm.cs

    r15784 r15800  
    11using System;
    22using System.Collections.Generic;
     3using System.Diagnostics;
     4using System.IO;
    35using System.Linq;
    4 using System.Text;
    56using System.Threading;
    67using HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration.GrammarEnumeration;
     
    6566    #endregion
    6667
    67     public Dictionary<int, SymbolString> DistinctSentences;   // Semantically distinct sentences in a run.
    68     public Dictionary<int, List<SymbolString>> AllSentences; // All sentences ever generated in a run.
    69     public Dictionary<int, SymbolString> ArchivedPhrases;     // Nodes in the search tree
    70     SearchDataStore OpenPhrases;                              // Stack/Queue/etc. for fetching the next node in the search tree. 
    71 
    72     public int EqualGeneratedSentences; // It is not guaranteed that shorter solutions are found first.
    73                                         // When longer solutions are overwritten with shorter ones,
    74                                         // this counter is increased.
    75     public int Expansions;              // Number, how many times a nonterminal symbol is replaced with a production rule.
    76     public Grammar Grammar;
    77 
    78     public StringBuilder dotFileBuilder;
     68    public Dictionary<int, SymbolString> DistinctSentences { get; private set; }  // Semantically distinct sentences in a run.
     69    public Dictionary<int, List<SymbolString>> AllSentences { get; private set; } // All sentences ever generated in a run.
     70    public Dictionary<int, SymbolString> ArchivedPhrases { get; private set; }    // Nodes in the search tree
     71    internal SearchDataStore OpenPhrases { get; private set; }                    // Stack/Queue/etc. for fetching the next node in the search tree. 
     72
     73    public int EqualGeneratedSentences { get; private set; } // It is not guaranteed that shorter solutions are found first.
     74                                                             // When longer solutions are overwritten with shorter ones,
     75                                                             // this counter is increased.
     76    public int Expansions { get; private set; }              // Number, how many times a nonterminal symbol is replaced with a production rule.
     77    public Grammar Grammar { get; private set; }
     78
     79    private readonly string dotFileName = Environment.GetFolderPath(System.Environment.SpecialFolder.DesktopDirectory) + @"\searchgraph.dot";
    7980
    8081    #region ctors
     
    8485
    8586    public GrammarEnumerationAlgorithm() {
    86       var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(seed: 1234);
    87       var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F9 ")));
    88 
    8987      Problem = new RegressionProblem() {
    90         ProblemData = regProblem
     88        ProblemData = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenFunctionNine(seed: 1234).GenerateRegressionData()
    9189      };
    9290
    9391      Parameters.Add(new ValueParameter<IntValue>(MaxTreeSizeParameterName, "The number of clusters.", new IntValue(6)));
    94       Parameters.Add(new ValueParameter<IntValue>(GuiUpdateIntervalParameterName, "Number of generated sentences, until GUI is refreshed.", new IntValue(4000)));
    95 
     92      Parameters.Add(new ValueParameter<IntValue>(GuiUpdateIntervalParameterName, "Number of generated sentences, until GUI is refreshed.", new IntValue(1000)));
    9693      Parameters.Add(new ValueParameter<EnumValue<StorageType>>(SearchDataStructureParameterName, new EnumValue<StorageType>(StorageType.Stack)));
    9794    }
     
    103100      #region init
    104101      InitResults();
    105 
    106       dotFileBuilder = new StringBuilder();
    107102
    108103      AllSentences = new Dictionary<int, List<SymbolString>>();
     
    116111      OpenPhrases = new SearchDataStore(SearchDataStructure); // Select search strategy
    117112      var phrase0 = new SymbolString(new[] { Grammar.StartSymbol });
     113      var phrase0Hash = Grammar.CalcHashCode(phrase0);
    118114      #endregion
    119115
    120       OpenPhrases.Store(Grammar.CalcHashCode(phrase0), phrase0);
    121 
    122       while (OpenPhrases.Count > 0) {
    123         if (cancellationToken.IsCancellationRequested) break;
    124 
    125         StoredSymbolString fetchedPhrase = OpenPhrases.GetNext();
    126         SymbolString currPhrase = fetchedPhrase.SymbolString;
    127 
    128         if (dotFileBuilder.Length == 0) {
    129           dotFileBuilder.AppendLine("digraph searchgraph {");
    130           dotFileBuilder.AppendLine(fetchedPhrase.Hash + " [label=\"" + Grammar.PostfixToInfixParser(fetchedPhrase.SymbolString) + "\", shape=doublecircle];");
    131         } else {
    132           dotFileBuilder.AppendLine(fetchedPhrase.Hash + " [label=\"" + Grammar.PostfixToInfixParser(fetchedPhrase.SymbolString) + "\"];");
    133         }
    134 
    135         ArchivedPhrases.Add(fetchedPhrase.Hash, currPhrase);
    136 
    137         // expand next nonterminal symbols
    138         int nonterminalSymbolIndex = currPhrase.FindIndex(s => s is NonterminalSymbol);
    139         NonterminalSymbol expandedSymbol = currPhrase[nonterminalSymbolIndex] as NonterminalSymbol;
    140 
    141         foreach (Production productionAlternative in expandedSymbol.Alternatives) {
    142           SymbolString newPhrase = new SymbolString(currPhrase);
    143           newPhrase.RemoveAt(nonterminalSymbolIndex);
    144           newPhrase.InsertRange(nonterminalSymbolIndex, productionAlternative);
    145 
    146           Expansions++;
    147           if (newPhrase.Count <= MaxTreeSize) {
    148             var phraseHash = Grammar.CalcHashCode(newPhrase);
    149 
    150             dotFileBuilder.AppendLine(fetchedPhrase.Hash + " -> " + phraseHash + " [label=\"" + expandedSymbol.StringRepresentation + "&rarr;" + productionAlternative + "\"];");
    151 
    152             if (newPhrase.IsSentence()) { // Sentence was generated.
    153               SaveToAllSentences(phraseHash, newPhrase);
    154 
    155               if (!DistinctSentences.ContainsKey(phraseHash) || DistinctSentences[phraseHash].Count > newPhrase.Count) {
    156                 if (DistinctSentences.ContainsKey(phraseHash)) EqualGeneratedSentences++; // for analysis only
    157 
    158                 DistinctSentences[phraseHash] = newPhrase;
    159                 EvaluateSentence(newPhrase);
    160 
    161                 dotFileBuilder.AppendLine(phraseHash + " [label=\"" + Grammar.PostfixToInfixParser(newPhrase) + "\", style=\"filled\"];");
     116      using (TextWriterTraceListener dotFileTrace = new TextWriterTraceListener(new FileStream(dotFileName, FileMode.Create))) {
     117        dotFileTrace.Write("digraph searchgraph {");
     118
     119        OpenPhrases.Store(phrase0Hash, phrase0);
     120        while (OpenPhrases.Count > 0) {
     121          if (cancellationToken.IsCancellationRequested) break;
     122
     123          StoredSymbolString fetchedPhrase = OpenPhrases.GetNext();
     124          SymbolString currPhrase = fetchedPhrase.SymbolString;
     125
     126          dotFileTrace.Write(
     127            $"{fetchedPhrase.Hash} [label=\"{Grammar.PostfixToInfixParser(fetchedPhrase.SymbolString)}\"];");
     128
     129          ArchivedPhrases.Add(fetchedPhrase.Hash, currPhrase);
     130
     131          // expand next nonterminal symbols
     132          int nonterminalSymbolIndex = currPhrase.FindIndex(s => s is NonterminalSymbol);
     133          NonterminalSymbol expandedSymbol = currPhrase[nonterminalSymbolIndex] as NonterminalSymbol;
     134
     135          foreach (Production productionAlternative in expandedSymbol.Alternatives) {
     136            SymbolString newPhrase = new SymbolString(currPhrase);
     137            newPhrase.RemoveAt(nonterminalSymbolIndex);
     138            newPhrase.InsertRange(nonterminalSymbolIndex, productionAlternative);
     139
     140            Expansions++;
     141            if (newPhrase.Count <= MaxTreeSize) {
     142              var phraseHash = Grammar.CalcHashCode(newPhrase);
     143
     144              dotFileTrace.Write(
     145                $"{fetchedPhrase.Hash} -> {phraseHash} [label=\"{expandedSymbol.StringRepresentation} + &rarr; {productionAlternative}\"];");
     146
     147              if (newPhrase.IsSentence()) {
     148                // Sentence was generated.
     149                SaveToAllSentences(phraseHash, newPhrase);
     150
     151                if (!DistinctSentences.ContainsKey(phraseHash) || DistinctSentences[phraseHash].Count > newPhrase.Count) {
     152                  if (DistinctSentences.ContainsKey(phraseHash)) EqualGeneratedSentences++; // for analysis only
     153
     154                  DistinctSentences[phraseHash] = newPhrase;
     155                  EvaluateSentence(newPhrase);
     156
     157                  dotFileTrace.Write(
     158                    $"{phraseHash} [label=\"{Grammar.PostfixToInfixParser(newPhrase)}\", style=\"filled\"];");
     159                }
     160                UpdateView(AllSentences, DistinctSentences);
     161
     162              } else if (!OpenPhrases.Contains(phraseHash) && !ArchivedPhrases.ContainsKey(phraseHash)) {
     163                OpenPhrases.Store(phraseHash, newPhrase);
    162164              }
    163               UpdateView(AllSentences, DistinctSentences, Expansions);
    164 
    165             } else if (!OpenPhrases.Contains(phraseHash) && !ArchivedPhrases.ContainsKey(phraseHash)) {
    166               OpenPhrases.Store(phraseHash, newPhrase);
    167165            }
    168166          }
    169167        }
    170       }
    171 
    172       dotFileBuilder.AppendLine(Grammar.CalcHashCode(BestTrainingSentence) + " [label=\"" + Grammar.PostfixToInfixParser(BestTrainingSentence) + "\", shape=Mcircle, style=\"filled,bold\"];");
    173       dotFileBuilder.AppendLine("}");
    174 
    175       System.IO.File.WriteAllText(Environment.GetFolderPath(System.Environment.SpecialFolder.DesktopDirectory)+ @"\searchgraph.dot", dotFileBuilder.ToString());
    176 
    177       UpdateView(AllSentences, DistinctSentences, Expansions, force: true);
     168
     169        // Overwrite formatting of start search node and best found solution.
     170        dotFileTrace.Write(
     171          $"{Grammar.CalcHashCode(BestTrainingSentence)} [label=\"{Grammar.PostfixToInfixParser(BestTrainingSentence)}\", shape=Mcircle, style=\"filled,bold\"];");
     172        dotFileTrace.Write($"{phrase0Hash} [label=\"{Grammar.PostfixToInfixParser(phrase0)}\", shape=doublecircle];}}");
     173        dotFileTrace.Flush();
     174      }
     175
     176      UpdateView(AllSentences, DistinctSentences, force: true);
    178177      UpdateFinalResults();
    179178    }
     
    230229    private int updates;
    231230    private void UpdateView(Dictionary<int, List<SymbolString>> allGenerated,
    232         Dictionary<int, SymbolString> distinctGenerated, int expansions, bool force = false) {
     231        Dictionary<int, SymbolString> distinctGenerated, bool force = false) {
    233232      updates++;
    234233
     
    237236        ((IntValue)Results[GeneratedSentencesName].Value).Value = allGeneratedEnum.Length;
    238237        ((IntValue)Results[DistinctSentencesName].Value).Value = distinctGenerated.Count;
    239         ((IntValue)Results[PhraseExpansionsName].Value).Value = expansions;
     238        ((IntValue)Results[PhraseExpansionsName].Value).Value = Expansions;
    240239        ((IntValue)Results[GeneratedEqualSentencesName].Value).Value = EqualGeneratedSentences;
    241240        ((DoubleValue)Results[AverageTreeLengthName].Value).Value = allGeneratedEnum.Select(sentence => sentence.Count).Average();
  • branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/SearchDataStructure.cs

    r15746 r15800  
    66
    77  public struct StoredSymbolString {
    8     public int Hash { get; }
    9     public SymbolString SymbolString { get; }
     8    public readonly int Hash;
     9    public readonly SymbolString SymbolString;
    1010
    1111    public StoredSymbolString(int hash, SymbolString symbolString) {
  • branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/Symbol.cs

    r15723 r15800  
    11using System.Collections.Generic;
    2 using System.Linq;
    3 using HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration.GrammarEnumeration;
    42
    53namespace HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration {
    64
    75  public abstract class Symbol {
    8     public readonly string StringRepresentation;
     6    public string StringRepresentation { get; }
    97
    108    protected Symbol(string representation) {
     
    2220
    2321  public class NonterminalSymbol : Symbol {
    24     public List<Production> Alternatives;
     22    public List<Production> Alternatives { get; }
    2523
    2624    public NonterminalSymbol(string representation) : base(representation) {
     
    3432
    3533  public class VariableSymbol : NonterminalSymbol { // Convenience class
    36     public IEnumerable<TerminalSymbol> VariableTerminalSymbols;
     34    public IEnumerable<TerminalSymbol> VariableTerminalSymbols { get; }
    3735
    3836    public VariableSymbol(string representation, IEnumerable<string> variableNames) : base(representation) {
Note: See TracChangeset for help on using the changeset viewer.