Changeset 15812


Ignore:
Timestamp:
02/23/18 18:10:30 (19 months ago)
Author:
lkammere
Message:

#2886: Performance Improvements - Only store hash of archived phrases and reduce number of enumerators.

Location:
branches/2886_SymRegGrammarEnumeration
Files:
5 edited

Legend:

Unmodified
Added
Removed
  • branches/2886_SymRegGrammarEnumeration

    • Property svn:ignore
      •  

        old new  
        11TestResults
         2*.gitignore
    • Property svn:global-ignores set to
      .git
  • branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/Grammar.cs

    r15806 r15812  
    143143    }
    144144
    145     private int CalcHashCode<THashType>(SymbolString sentence, Func<Symbol, IEnumerable<THashType>, THashType> aggregateFunction) {
     145    private int CalcHashCode<THashType>(SymbolString sentence, Func<Symbol, THashType[], THashType> aggregateFunction) {
    146146      Debug.Assert(sentence.Any(), "Trying to evaluate empty sentence!");
    147147
     
    152152    }
    153153
    154     private THashType[] GetSubtreeHashes<THashType>(Stack<Symbol> parseStack, Func<Symbol, IEnumerable<THashType>, THashType> aggregateHashes) {
     154    private THashType[] GetSubtreeHashes<THashType>(Stack<Symbol> parseStack, Func<Symbol, THashType[], THashType> aggregateHashes) {
    155155      Symbol currentSymbol = parseStack.Pop();
    156156
     
    207207
    208208          var currFactor = childHashes[i];
    209           var invFactor = aggregateHashes(Inv, currFactor.ToEnumerable());
     209          var invFactor = aggregateHashes(Inv, new[] { currFactor });
    210210
    211211          int indexOfInv = childHashes.IndexOf(invFactor);
     
    228228
    229229      // var or nonterminal symbol
    230       return new[] { aggregateHashes(currentSymbol, Enumerable.Empty<THashType>()) };
    231     }
    232 
    233     private string AggregateStringHashes(Symbol operatorSym, IEnumerable<string> hashes) {
     230      return new[] { aggregateHashes(currentSymbol, new THashType[0]) };
     231    }
     232
     233    private string AggregateStringHashes(Symbol operatorSym, string[] hashes) {
    234234      var hashesArray = hashes.ToArray();
    235235
     
    237237        return hashesArray[0];
    238238      }
    239       if (operatorSym is NonterminalSymbol || Var.VariableTerminalSymbols.Contains(operatorSym)) {
     239      if (operatorSym is NonterminalSymbol || ((TerminalSymbol)operatorSym).IsVariable) {
    240240        return operatorSym.StringRepresentation;
    241241      }
     
    244244    }
    245245
    246     private int AggregateIntHashes(Symbol operatorSym, IEnumerable<int> hashes) {
    247       var hashesArray = hashes.ToArray();
    248 
     246    private int AggregateIntHashes(Symbol operatorSym, int[] hashes) {
    249247      int start;
    250248      if ((ReferenceEquals(operatorSym, Addition) || ReferenceEquals(operatorSym, Multiplication)) &&
    251           hashesArray.Count() <= 1) {
     249          hashes.Length <= 1) {
    252250        start = 0;
    253251
    254       } else if (operatorSym is NonterminalSymbol || Var.VariableTerminalSymbols.Contains(operatorSym)) {
     252      } else if (operatorSym is NonterminalSymbol || ((TerminalSymbol)operatorSym).IsVariable) {
    255253        return operatorSym.StringRepresentation.GetHashCode();
    256254
     
    259257      }
    260258
    261       return hashesArray.Aggregate(start, (result, ti) => ((result << 5) + result) ^ ti.GetHashCode());
     259      for (int i = 0; i < hashes.Length; i++) {
     260        start = ((start << 5) + start) ^ hashes[i];
     261      }
     262      return start;
    262263    }
    263264    #endregion
     
    314315        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack));
    315316
    316       } else if (Var.VariableTerminalSymbols.Contains(currentSymbol)) {
     317      } else if (currentSymbol.IsVariable) {
    317318        VariableTreeNode varNode = (VariableTreeNode)varSy.CreateTreeNode();
    318319        varNode.Weight = 1.0;
     
    337338      Symbol head = parseStack.Pop();
    338339
    339       SymbolString result = new SymbolString();
     340      SymbolString result = new SymbolString(parseStack.Count);
    340341
    341342      if (ReferenceEquals(head, Addition) || ReferenceEquals(head, Multiplication)) {
  • branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/GrammarEnumerationAlgorithm.cs

    r15806 r15812  
    7070    public Dictionary<int, SymbolString> DistinctSentences { get; private set; }  // Semantically distinct sentences in a run.
    7171    public Dictionary<int, List<SymbolString>> AllSentences { get; private set; } // All sentences ever generated in a run.
    72     public Dictionary<int, SymbolString> ArchivedPhrases { get; private set; }    // Nodes in the search tree
     72    public HashSet<int> ArchivedPhrases { get; private set; }
     73
    7374    internal SearchDataStore OpenPhrases { get; private set; }                    // Stack/Queue/etc. for fetching the next node in the search tree. 
    7475
     
    104105
    105106      AllSentences = new Dictionary<int, List<SymbolString>>();
    106       ArchivedPhrases = new Dictionary<int, SymbolString>(); // Nodes in the search tree
     107      ArchivedPhrases = new HashSet<int>();
     108
    107109      DistinctSentences = new Dictionary<int, SymbolString>();
    108110      Expansions = 0;
     
    125127          StoredSymbolString fetchedPhrase = OpenPhrases.GetNext();
    126128          SymbolString currPhrase = fetchedPhrase.SymbolString;
    127 
     129#if DEBUG
    128130          LogSearchGraph(dotFileTrace, $"{fetchedPhrase.Hash} [label=\"{Grammar.PostfixToInfixParser(fetchedPhrase.SymbolString)}\"];");
    129 
    130           ArchivedPhrases.Add(fetchedPhrase.Hash, currPhrase);
     131#endif
     132          ArchivedPhrases.Add(fetchedPhrase.Hash);
    131133
    132134          // expand next nonterminal symbols
     
    135137
    136138          foreach (Production productionAlternative in expandedSymbol.Alternatives) {
    137             SymbolString newPhrase = new SymbolString(currPhrase);
     139            SymbolString newPhrase = new SymbolString(currPhrase.Count + productionAlternative.Count);
     140            newPhrase.AddRange(currPhrase);
    138141            newPhrase.RemoveAt(nonterminalSymbolIndex);     // TODO: removeat and insertRange are both O(n)
    139142            newPhrase.InsertRange(nonterminalSymbolIndex, productionAlternative);
     
    142145            if (newPhrase.Count <= MaxTreeSize) {
    143146              var phraseHash = Grammar.CalcHashCode(newPhrase);
    144 
     147#if DEBUG
    145148              LogSearchGraph(dotFileTrace, $"{fetchedPhrase.Hash} -> {phraseHash} [label=\"{expandedSymbol.StringRepresentation} + &rarr; {productionAlternative}\"];");
    146 
     149#endif
    147150              if (newPhrase.IsSentence()) {
    148151                // Sentence was generated.
     
    155158                  EvaluateSentence(newPhrase);
    156159
     160#if DEBUG
    157161                  LogSearchGraph(dotFileTrace, $"{phraseHash} [label=\"{Grammar.PostfixToInfixParser(newPhrase)}\", style=\"filled\"];");
     162#endif
    158163                }
    159                 UpdateView(AllSentences, DistinctSentences);
    160 
    161               } else if (!OpenPhrases.Contains(phraseHash) && !ArchivedPhrases.ContainsKey(phraseHash)) {
     164                UpdateView();
     165
     166              } else if (!OpenPhrases.Contains(phraseHash) && !ArchivedPhrases.Contains(phraseHash)) {
    162167                OpenPhrases.Store(phraseHash, newPhrase);
    163168              }
     
    165170          }
    166171        }
    167 
     172#if DEBUG
    168173        // Overwrite formatting of start search node and best found solution.
    169174        LogSearchGraph(dotFileTrace, $"{Grammar.CalcHashCode(BestTrainingSentence)} [label=\"{Grammar.PostfixToInfixParser(BestTrainingSentence)}\", shape=Mcircle, style=\"filled,bold\"];");
    170175        LogSearchGraph(dotFileTrace, $"{phrase0Hash} [label=\"{Grammar.PostfixToInfixParser(phrase0)}\", shape=doublecircle];}}");
    171176        dotFileTrace.Flush();
    172       }
    173 
    174       UpdateView(AllSentences, DistinctSentences, force: true);
     177#endif
     178      }
     179
     180      UpdateView(force: true);
    175181      UpdateFinalResults();
    176182    }
     
    184190    }
    185191
    186     #region Evaluation of generated models.
     192#region Evaluation of generated models.
    187193
    188194    // Evaluate sentence within an algorithm run.
     
    208214    }
    209215
    210     #endregion
    211 
    212     #region Visualization in HL
     216#endregion
     217
     218#region Visualization in HL
    213219    // Initialize entries in result set.
    214220    private void InitResults() {
     
    228234    // Update the view for intermediate results in an algorithm run.
    229235    private int updates;
    230     private void UpdateView(Dictionary<int, List<SymbolString>> allGenerated,
    231         Dictionary<int, SymbolString> distinctGenerated, bool force = false) {
     236    private void UpdateView(bool force = false) {
    232237      updates++;
    233238
    234       if (force || updates % GuiUpdateInterval == 0) {
    235         var allGeneratedEnum = allGenerated.Values.SelectMany(x => x).ToArray();
     239      if (force || updates % GuiUpdateInterval == 1) {
     240        var allGeneratedEnum = AllSentences.Values.SelectMany(x => x).ToArray();
    236241        ((IntValue)Results[GeneratedPhrasesName].Value).Value = ArchivedPhrases.Count;
    237242        ((IntValue)Results[SearchStructureSizeName].Value).Value = OpenPhrases.Count;
    238243        ((IntValue)Results[GeneratedSentencesName].Value).Value = allGeneratedEnum.Length;
    239         ((IntValue)Results[DistinctSentencesName].Value).Value = distinctGenerated.Count;
     244        ((IntValue)Results[DistinctSentencesName].Value).Value = DistinctSentences.Count;
    240245        ((IntValue)Results[PhraseExpansionsName].Value).Value = Expansions;
    241246        ((IntValue)Results[GeneratedEqualSentencesName].Value).Value = EqualGeneratedSentences;
     
    279284      Results.Add(new Result("Distinct generated sentences", new StringMatrix(distinctSentencesMatrix)));
    280285    }
    281    
    282     private void LogSearchGraph(TraceListener listener, String msg) {
    283 #if false
     286
     287    private void LogSearchGraph(TraceListener listener, string msg) {
    284288      listener.Write(msg);
    285 #endif
    286     }
    287     #endregion
     289    }
     290#endregion
    288291
    289292  }
  • branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/Sentence.cs

    r15723 r15812  
    88  public class SymbolString : List<Symbol> {
    99
    10     public SymbolString() { }
     10    public SymbolString(int capacity) : base(capacity) { }
    1111
    1212    public SymbolString(IEnumerable<Symbol> symbols) : base(symbols) { }
  • branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/Symbol.cs

    r15800 r15812  
    1616
    1717  public class TerminalSymbol : Symbol {
    18     public TerminalSymbol(string representation) : base(representation) { }
     18    public readonly bool IsVariable;
     19
     20    public TerminalSymbol(string representation, bool isVariable = false) : base(representation) {
     21      IsVariable = isVariable;
     22    }
    1923  }
    2024
     
    3943
    4044      foreach (string variableName in variableNames) {
    41         TerminalSymbol s = new TerminalSymbol(variableName);
     45        TerminalSymbol s = new TerminalSymbol(variableName, isVariable: true);
    4246        createdSymbols.Add(s);
    4347        AddProduction(s);
Note: See TracChangeset for help on using the changeset viewer.