source: branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/Hashing/Hasher.cs @ 15975

Last change on this file since 15975 was 15975, checked in by bburlacu, 3 years ago

#2886: address additional serialization issues, make Production implement IList<T> instead of deriving from List<T>

File size: 6.2 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Diagnostics;
4using System.Linq;
5using HeuristicLab.Common;
6using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
7
8namespace HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration.GrammarEnumeration {
9  [StorableClass]
10  public abstract class Hasher<THashType> : DeepCloneable {
11    protected Hasher(Grammar grammar) {
12      Grammar = grammar;
13    }
14
15    protected Hasher(Hasher<THashType> original, Cloner cloner) : base(original, cloner) {
16      Grammar = cloner.Clone(original.Grammar);
17    }
18
19    [Storable]
20    protected Grammar Grammar { get; private set; }
21
22    protected abstract THashType AggregateHashes(Symbol operatorSym, IList<THashType> hashes);
23
24    public int CalcHashCode(SymbolString sentence) {
25      Debug.Assert(sentence.Any(), "Trying to evaluate empty sentence!");
26
27      Stack<Symbol> parseStack = new Stack<Symbol>(sentence);
28
29      Symbol peek = parseStack.Peek();
30      return AggregateHashes(peek, GetSubtreeHashes(parseStack)).GetHashCode();
31    }
32
33    [StorableConstructor]
34    protected Hasher(bool deserializing) { }
35
36    private IList<THashType> GetSubtreeHashes(Stack<Symbol> parseStack) {
37      Symbol currentSymbol = parseStack.Pop();
38
39      // ADDITION
40      if (currentSymbol == Grammar.Addition) {
41        return GetAdditionSubtreeHashes(parseStack);
42      }
43
44      // MULTIPLICATION
45      if (currentSymbol == Grammar.Multiplication) {
46        return GetMultiplicationSubtreeHashes(parseStack);
47      }
48
49      // LOG, EXP, SIN, INV
50      if (currentSymbol == Grammar.Log || currentSymbol == Grammar.Exp ||
51          currentSymbol == Grammar.Sin || currentSymbol == Grammar.Inv) {
52        // Directly aggregate the single subtree
53        return new[] { AggregateHashes(parseStack.Peek(), GetSubtreeHashes(parseStack)) };
54      }
55
56      // var, const or nonterminal symbol
57      return new[] { AggregateHashes(currentSymbol, new THashType[0]) };
58    }
59
60    private void AddAdditionChildHashes(HashSet<THashType> hashSet, Stack<Symbol> symbolStack) {
61      var peek = symbolStack.Peek();
62      var hashes = GetSubtreeHashes(symbolStack); // will pop the stack
63
64      if (peek == Grammar.Addition)
65        hashSet.UnionWith(hashes);
66      else
67        hashSet.Add(AggregateHashes(peek, hashes));
68    }
69
70    private IList<THashType> GetAdditionSubtreeHashes(Stack<Symbol> parseStack) {
71      var uniqueChildHashes = new HashSet<THashType>();
72
73      AddAdditionChildHashes(uniqueChildHashes, parseStack); // first child
74      AddAdditionChildHashes(uniqueChildHashes, parseStack); // second child
75
76      var result = uniqueChildHashes.ToArray();
77      Array.Sort(result);
78      return result;
79    }
80
81    private void AddMultiplicationChildHashes(List<THashType> hashList, Stack<Symbol> symbolStack) {
82      var peek = symbolStack.Peek();
83      var hashes = GetSubtreeHashes(symbolStack);
84      if (peek == Grammar.Multiplication)
85        hashList.AddRange(hashes);
86      else
87        hashList.Add(AggregateHashes(peek, hashes));
88    }
89
90    public IList<THashType> GetMultiplicationSubtreeHashes(Stack<Symbol> parseStack) {
91      var childHashes = new List<THashType>();
92
93      AddMultiplicationChildHashes(childHashes, parseStack); // first child
94      AddMultiplicationChildHashes(childHashes, parseStack); // second child
95
96      // Sort due to commutativity
97      childHashes.Sort();
98
99      if (childHashes.Count <= 2)
100        return childHashes;
101
102      // Cancel out inverse factors.
103      var isInvFactor = new bool[childHashes.Count];
104      bool foundInvFactor = false;
105      for (int i = 0; i < isInvFactor.Length; i++) {
106        if (isInvFactor[i]) continue;
107
108        var currFactor = childHashes[i];
109        var invFactor = AggregateHashes(Grammar.Inv, new[] { currFactor });
110
111        int indexOfInv = childHashes.IndexOf(invFactor);
112        if (indexOfInv >= 0 && !isInvFactor[indexOfInv]) {
113          isInvFactor[i] = isInvFactor[indexOfInv] = true;
114          foundInvFactor = true;
115        }
116      }
117
118      if (!foundInvFactor)
119        return childHashes;
120
121      return childHashes
122        .Where((c, i) => !isInvFactor[i])
123        .ToArray();
124    }
125  }
126
127  [StorableClass]
128  public class IntHasher : Hasher<int> {
129    public IntHasher(Grammar grammar) : base(grammar) { }
130
131    public IntHasher(IntHasher original, Cloner cloner) : base(original, cloner) { }
132
133    public override IDeepCloneable Clone(Cloner cloner) {
134      return new IntHasher(this, cloner);
135    }
136
137    [StorableConstructor]
138    protected IntHasher(bool deserializing) : base(deserializing) { }
139
140    protected override int AggregateHashes(Symbol operatorSym, IList<int> hashes) {
141      int start;
142      if ((operatorSym == Grammar.Addition || operatorSym == Grammar.Multiplication) && hashes.Count <= 1) {
143        start = 0;
144
145      } else if (operatorSym is NonterminalSymbol || operatorSym is VariableTerminalSymbol) {
146        return operatorSym.StringRepresentation.GetHashCode();
147
148      } else {
149        start = operatorSym.StringRepresentation.GetHashCode();
150      }
151
152      for (int i = 0; i < hashes.Count; i++) {
153        start = ((start << 5) + start) ^ hashes[i];
154      }
155      return start;
156    }
157  }
158
159  [StorableClass]
160  public class StringHasher : Hasher<string> {
161    public StringHasher(Grammar grammar) : base(grammar) { }
162
163    public StringHasher(StringHasher original, Cloner cloner) : base(original, cloner) { }
164
165    public override IDeepCloneable Clone(Cloner cloner) {
166      return new StringHasher(this, cloner);
167    }
168
169    [StorableConstructor]
170    protected StringHasher(bool deserializing) : base(deserializing) { }
171
172    protected override string AggregateHashes(Symbol operatorSym, IList<string> hashes) {
173      var hashesArray = hashes.ToArray();
174
175      if ((operatorSym == Grammar.Addition || operatorSym == Grammar.Multiplication) && hashesArray.Length <= 1) {
176        return hashesArray[0];
177      }
178      if (operatorSym is NonterminalSymbol || operatorSym is VariableTerminalSymbol) {
179        return operatorSym.StringRepresentation;
180      }
181
182      return $"[{operatorSym.StringRepresentation} ° {string.Join(" ° ", hashesArray)}]";
183    }
184  }
185}
Note: See TracBrowser for help on using the repository browser.