Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
06/19/18 10:57:47 (7 years ago)
Author:
bburlacu
Message:

#2886: Improve hashing performance (about 10% measured improvement)

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/Hashing/Hasher.cs

    r15960 r15965  
    1919    protected Grammar Grammar { get; }
    2020
    21     protected abstract THashType AggregateHashes(Symbol operatorSym, THashType[] hashes);
     21    protected abstract THashType AggregateHashes(Symbol operatorSym, IList<THashType> hashes);
    2222
    2323    public int CalcHashCode(SymbolString sentence) {
     
    3333    protected Hasher(bool deserializing) { }
    3434
    35     private THashType[] GetSubtreeHashes(Stack<Symbol> parseStack) {
     35    private IList<THashType> GetSubtreeHashes(Stack<Symbol> parseStack) {
    3636      Symbol currentSymbol = parseStack.Pop();
    3737
     
    5757    }
    5858
    59     private THashType[] GetAdditionSubtreeHashes(Stack<Symbol> parseStack) {
     59    private void AddAdditionChildHashes(HashSet<THashType> hashSet, Stack<Symbol> symbolStack) {
     60      var peek = symbolStack.Peek();
     61      var hashes = GetSubtreeHashes(symbolStack); // will pop the stack
     62
     63      if (peek == Grammar.Addition)
     64        hashSet.UnionWith(hashes);
     65      else
     66        hashSet.Add(AggregateHashes(peek, hashes));
     67    }
     68
     69    private IList<THashType> GetAdditionSubtreeHashes(Stack<Symbol> parseStack) {
    6070      var uniqueChildHashes = new HashSet<THashType>();
    6171
    62       // First subtree
    63       if (parseStack.Peek() == Grammar.Addition) {
    64         uniqueChildHashes.UnionWith(GetSubtreeHashes(parseStack));
    65       } else {
    66         var peek = parseStack.Peek();
    67         uniqueChildHashes.Add(AggregateHashes(peek, GetSubtreeHashes(parseStack)));
    68       }
    69       // Second subtree
    70       if (parseStack.Peek() == Grammar.Addition) {
    71         uniqueChildHashes.UnionWith(GetSubtreeHashes(parseStack));
    72       } else {
    73         var peek = parseStack.Peek();
    74         uniqueChildHashes.Add(AggregateHashes(peek, GetSubtreeHashes(parseStack)));
    75       }
     72      AddAdditionChildHashes(uniqueChildHashes, parseStack); // first child
     73      AddAdditionChildHashes(uniqueChildHashes, parseStack); // second child
    7674
    7775      var result = uniqueChildHashes.ToArray();
     
    8078    }
    8179
    82     public THashType[] GetMultiplicationSubtreeHashes(Stack<Symbol> parseStack) {
     80    private void AddMultiplicationChildHashes(List<THashType> hashList, Stack<Symbol> symbolStack) {
     81      var peek = symbolStack.Peek();
     82      var hashes = GetSubtreeHashes(symbolStack);
     83      if (peek == Grammar.Multiplication)
     84        hashList.AddRange(hashes);
     85      else
     86        hashList.Add(AggregateHashes(peek, hashes));
     87    }
     88
     89    public IList<THashType> GetMultiplicationSubtreeHashes(Stack<Symbol> parseStack) {
    8390      var childHashes = new List<THashType>();
    8491
    85       // First subtree
    86       if (parseStack.Peek() == Grammar.Multiplication) {
    87         childHashes.AddRange(GetSubtreeHashes(parseStack));
    88       } else {
    89         childHashes.Add(AggregateHashes(parseStack.Peek(), GetSubtreeHashes(parseStack)));
    90       }
    91       // Second subtree
    92       if (parseStack.Peek() == Grammar.Multiplication) {
    93         childHashes.AddRange(GetSubtreeHashes(parseStack));
    94       } else {
    95         childHashes.Add(AggregateHashes(parseStack.Peek(), GetSubtreeHashes(parseStack)));
    96       }
     92      AddMultiplicationChildHashes(childHashes, parseStack); // first child
     93      AddMultiplicationChildHashes(childHashes, parseStack); // second child
    9794
    9895      // Sort due to commutativity
    9996      childHashes.Sort();
    10097
     98      if (childHashes.Count <= 2)
     99        return childHashes;
     100
    101101      // Cancel out inverse factors.
    102       bool[] isFactorRemaining = Enumerable.Repeat(true, childHashes.Count).ToArray();
    103 
    104       for (int i = 0; i < isFactorRemaining.Length; i++) {
    105         if (!isFactorRemaining[i]) continue;
    106         if (isFactorRemaining.Length <= 2) break; // Until we have constants, we can't cancel out all terms.
     102      var isInvFactor = new bool[childHashes.Count];
     103      bool foundInvFactor = false;
     104      for (int i = 0; i < isInvFactor.Length; i++) {
     105        if (isInvFactor[i]) continue;
    107106
    108107        var currFactor = childHashes[i];
     
    110109
    111110        int indexOfInv = childHashes.IndexOf(invFactor);
    112         if (indexOfInv >= 0 && isFactorRemaining[indexOfInv]) {
    113           isFactorRemaining[i] = isFactorRemaining[indexOfInv] = false;
     111        if (indexOfInv >= 0 && !isInvFactor[indexOfInv]) {
     112          isInvFactor[i] = isInvFactor[indexOfInv] = true;
     113          foundInvFactor = true;
    114114        }
    115115      }
    116116
     117      if (!foundInvFactor)
     118        return childHashes;
     119
    117120      return childHashes
    118         .Where((c, i) => isFactorRemaining[i])
     121        .Where((c, i) => !isInvFactor[i])
    119122        .ToArray();
    120123    }
     
    134137    protected IntHasher(bool deserializing) : base(deserializing) { }
    135138
    136     protected override int AggregateHashes(Symbol operatorSym, int[] hashes) {
     139    protected override int AggregateHashes(Symbol operatorSym, IList<int> hashes) {
    137140      int start;
    138       if ((operatorSym == Grammar.Addition || operatorSym == Grammar.Multiplication) && hashes.Length <= 1) {
     141      if ((operatorSym == Grammar.Addition || operatorSym == Grammar.Multiplication) && hashes.Count <= 1) {
    139142        start = 0;
    140143
     
    146149      }
    147150
    148       for (int i = 0; i < hashes.Length; i++) {
     151      for (int i = 0; i < hashes.Count; i++) {
    149152        start = ((start << 5) + start) ^ hashes[i];
    150153      }
     
    166169    protected StringHasher(bool deserializing) : base(deserializing) { }
    167170
    168     protected override string AggregateHashes(Symbol operatorSym, string[] hashes) {
     171    protected override string AggregateHashes(Symbol operatorSym, IList<string> hashes) {
    169172      var hashesArray = hashes.ToArray();
    170173
Note: See TracChangeset for help on using the changeset viewer.