Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/Hashing/Hasher.cs @ 15965

Last change on this file since 15965 was 15965, checked in by bburlacu, 6 years ago

#2886: Improve hashing performance (about 10% measured improvement)

File size: 6.2 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Diagnostics;
4using System.Linq;
5using HeuristicLab.Common;
6using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
7
8namespace HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration.GrammarEnumeration {
9  [StorableClass]
10  public abstract class Hasher<THashType> : DeepCloneable {
11    protected Hasher(Grammar grammar) {
12      Grammar = grammar;
13    }
14
15    protected Hasher(Hasher<THashType> original, Cloner cloner) : base(original, cloner) {
16      Grammar = cloner.Clone(original.Grammar);
17    }
18
19    protected Grammar Grammar { get; }
20
21    protected abstract THashType AggregateHashes(Symbol operatorSym, IList<THashType> hashes);
22
23    public int CalcHashCode(SymbolString sentence) {
24      Debug.Assert(sentence.Any(), "Trying to evaluate empty sentence!");
25
26      Stack<Symbol> parseStack = new Stack<Symbol>(sentence);
27
28      Symbol peek = parseStack.Peek();
29      return AggregateHashes(peek, GetSubtreeHashes(parseStack)).GetHashCode();
30    }
31
32    [StorableConstructor]
33    protected Hasher(bool deserializing) { }
34
35    private IList<THashType> GetSubtreeHashes(Stack<Symbol> parseStack) {
36      Symbol currentSymbol = parseStack.Pop();
37
38      // ADDITION
39      if (currentSymbol == Grammar.Addition) {
40        return GetAdditionSubtreeHashes(parseStack);
41      }
42
43      // MULTIPLICATION
44      if (currentSymbol == Grammar.Multiplication) {
45        return GetMultiplicationSubtreeHashes(parseStack);
46      }
47
48      // LOG, EXP, SIN, INV
49      if (currentSymbol == Grammar.Log || currentSymbol == Grammar.Exp ||
50          currentSymbol == Grammar.Sin || currentSymbol == Grammar.Inv) {
51        // Directly aggregate the single subtree
52        return new[] { AggregateHashes(parseStack.Peek(), GetSubtreeHashes(parseStack)) };
53      }
54
55      // var, const or nonterminal symbol
56      return new[] { AggregateHashes(currentSymbol, new THashType[0]) };
57    }
58
59    private void AddAdditionChildHashes(HashSet<THashType> hashSet, Stack<Symbol> symbolStack) {
60      var peek = symbolStack.Peek();
61      var hashes = GetSubtreeHashes(symbolStack); // will pop the stack
62
63      if (peek == Grammar.Addition)
64        hashSet.UnionWith(hashes);
65      else
66        hashSet.Add(AggregateHashes(peek, hashes));
67    }
68
69    private IList<THashType> GetAdditionSubtreeHashes(Stack<Symbol> parseStack) {
70      var uniqueChildHashes = new HashSet<THashType>();
71
72      AddAdditionChildHashes(uniqueChildHashes, parseStack); // first child
73      AddAdditionChildHashes(uniqueChildHashes, parseStack); // second child
74
75      var result = uniqueChildHashes.ToArray();
76      Array.Sort(result);
77      return result;
78    }
79
80    private void AddMultiplicationChildHashes(List<THashType> hashList, Stack<Symbol> symbolStack) {
81      var peek = symbolStack.Peek();
82      var hashes = GetSubtreeHashes(symbolStack);
83      if (peek == Grammar.Multiplication)
84        hashList.AddRange(hashes);
85      else
86        hashList.Add(AggregateHashes(peek, hashes));
87    }
88
89    public IList<THashType> GetMultiplicationSubtreeHashes(Stack<Symbol> parseStack) {
90      var childHashes = new List<THashType>();
91
92      AddMultiplicationChildHashes(childHashes, parseStack); // first child
93      AddMultiplicationChildHashes(childHashes, parseStack); // second child
94
95      // Sort due to commutativity
96      childHashes.Sort();
97
98      if (childHashes.Count <= 2)
99        return childHashes;
100
101      // Cancel out inverse factors.
102      var isInvFactor = new bool[childHashes.Count];
103      bool foundInvFactor = false;
104      for (int i = 0; i < isInvFactor.Length; i++) {
105        if (isInvFactor[i]) continue;
106
107        var currFactor = childHashes[i];
108        var invFactor = AggregateHashes(Grammar.Inv, new[] { currFactor });
109
110        int indexOfInv = childHashes.IndexOf(invFactor);
111        if (indexOfInv >= 0 && !isInvFactor[indexOfInv]) {
112          isInvFactor[i] = isInvFactor[indexOfInv] = true;
113          foundInvFactor = true;
114        }
115      }
116
117      if (!foundInvFactor)
118        return childHashes;
119
120      return childHashes
121        .Where((c, i) => !isInvFactor[i])
122        .ToArray();
123    }
124  }
125
126  [StorableClass]
127  public class IntHasher : Hasher<int> {
128    public IntHasher(Grammar grammar) : base(grammar) { }
129
130    public IntHasher(IntHasher original, Cloner cloner) : base(original, cloner) { }
131
132    public override IDeepCloneable Clone(Cloner cloner) {
133      return new IntHasher(this, cloner);
134    }
135
136    [StorableConstructor]
137    protected IntHasher(bool deserializing) : base(deserializing) { }
138
139    protected override int AggregateHashes(Symbol operatorSym, IList<int> hashes) {
140      int start;
141      if ((operatorSym == Grammar.Addition || operatorSym == Grammar.Multiplication) && hashes.Count <= 1) {
142        start = 0;
143
144      } else if (operatorSym is NonterminalSymbol || operatorSym is VariableTerminalSymbol) {
145        return operatorSym.StringRepresentation.GetHashCode();
146
147      } else {
148        start = operatorSym.StringRepresentation.GetHashCode();
149      }
150
151      for (int i = 0; i < hashes.Count; i++) {
152        start = ((start << 5) + start) ^ hashes[i];
153      }
154      return start;
155    }
156  }
157
158  [StorableClass]
159  public class StringHasher : Hasher<string> {
160    public StringHasher(Grammar grammar) : base(grammar) { }
161
162    public StringHasher(StringHasher original, Cloner cloner) : base(original, cloner) { }
163
164    public override IDeepCloneable Clone(Cloner cloner) {
165      return new StringHasher(this, cloner);
166    }
167
168    [StorableConstructor]
169    protected StringHasher(bool deserializing) : base(deserializing) { }
170
171    protected override string AggregateHashes(Symbol operatorSym, IList<string> hashes) {
172      var hashesArray = hashes.ToArray();
173
174      if ((operatorSym == Grammar.Addition || operatorSym == Grammar.Multiplication) && hashesArray.Length <= 1) {
175        return hashesArray[0];
176      }
177      if (operatorSym is NonterminalSymbol || operatorSym is VariableTerminalSymbol) {
178        return operatorSym.StringRepresentation;
179      }
180
181      return $"[{operatorSym.StringRepresentation} ° {string.Join(" ° ", hashesArray)}]";
182    }
183  }
184}
Note: See TracBrowser for help on using the repository browser.