Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/Hashing/Hasher.cs @ 16022

Last change on this file since 16022 was 15975, checked in by bburlacu, 7 years ago

#2886: address additional serialization issues, make Production implement IList<T> instead of deriving from List<T>

File size: 6.2 KB
RevLine 
[15832]1using System;
2using System.Collections.Generic;
3using System.Diagnostics;
4using System.Linq;
[15960]5using HeuristicLab.Common;
6using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
[15832]7
8namespace HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration.GrammarEnumeration {
[15960]9  [StorableClass]
10  public abstract class Hasher<THashType> : DeepCloneable {
[15832]11    protected Hasher(Grammar grammar) {
12      Grammar = grammar;
13    }
14
[15960]15    protected Hasher(Hasher<THashType> original, Cloner cloner) : base(original, cloner) {
16      Grammar = cloner.Clone(original.Grammar);
17    }
18
[15975]19    [Storable]
20    protected Grammar Grammar { get; private set; }
[15832]21
[15965]22    protected abstract THashType AggregateHashes(Symbol operatorSym, IList<THashType> hashes);
[15832]23
24    public int CalcHashCode(SymbolString sentence) {
25      Debug.Assert(sentence.Any(), "Trying to evaluate empty sentence!");
26
27      Stack<Symbol> parseStack = new Stack<Symbol>(sentence);
28
29      Symbol peek = parseStack.Peek();
30      return AggregateHashes(peek, GetSubtreeHashes(parseStack)).GetHashCode();
31    }
32
[15960]33    [StorableConstructor]
34    protected Hasher(bool deserializing) { }
35
[15965]36    private IList<THashType> GetSubtreeHashes(Stack<Symbol> parseStack) {
[15832]37      Symbol currentSymbol = parseStack.Pop();
38
39      // ADDITION
40      if (currentSymbol == Grammar.Addition) {
[15835]41        return GetAdditionSubtreeHashes(parseStack);
[15832]42      }
43
44      // MULTIPLICATION
45      if (currentSymbol == Grammar.Multiplication) {
[15835]46        return GetMultiplicationSubtreeHashes(parseStack);
[15832]47      }
48
49      // LOG, EXP, SIN, INV
50      if (currentSymbol == Grammar.Log || currentSymbol == Grammar.Exp ||
[15851]51          currentSymbol == Grammar.Sin || currentSymbol == Grammar.Inv) {
[15835]52        // Directly aggregate the single subtree
[15832]53        return new[] { AggregateHashes(parseStack.Peek(), GetSubtreeHashes(parseStack)) };
54      }
55
[15849]56      // var, const or nonterminal symbol
[15832]57      return new[] { AggregateHashes(currentSymbol, new THashType[0]) };
58    }
[15835]59
[15965]60    private void AddAdditionChildHashes(HashSet<THashType> hashSet, Stack<Symbol> symbolStack) {
61      var peek = symbolStack.Peek();
62      var hashes = GetSubtreeHashes(symbolStack); // will pop the stack
63
64      if (peek == Grammar.Addition)
65        hashSet.UnionWith(hashes);
66      else
67        hashSet.Add(AggregateHashes(peek, hashes));
68    }
69
70    private IList<THashType> GetAdditionSubtreeHashes(Stack<Symbol> parseStack) {
[15835]71      var uniqueChildHashes = new HashSet<THashType>();
72
[15965]73      AddAdditionChildHashes(uniqueChildHashes, parseStack); // first child
74      AddAdditionChildHashes(uniqueChildHashes, parseStack); // second child
[15835]75
76      var result = uniqueChildHashes.ToArray();
77      Array.Sort(result);
78      return result;
79    }
80
[15965]81    private void AddMultiplicationChildHashes(List<THashType> hashList, Stack<Symbol> symbolStack) {
82      var peek = symbolStack.Peek();
83      var hashes = GetSubtreeHashes(symbolStack);
84      if (peek == Grammar.Multiplication)
85        hashList.AddRange(hashes);
86      else
87        hashList.Add(AggregateHashes(peek, hashes));
88    }
89
90    public IList<THashType> GetMultiplicationSubtreeHashes(Stack<Symbol> parseStack) {
[15835]91      var childHashes = new List<THashType>();
92
[15965]93      AddMultiplicationChildHashes(childHashes, parseStack); // first child
94      AddMultiplicationChildHashes(childHashes, parseStack); // second child
[15835]95
96      // Sort due to commutativity
97      childHashes.Sort();
98
[15965]99      if (childHashes.Count <= 2)
100        return childHashes;
101
[15835]102      // Cancel out inverse factors.
[15965]103      var isInvFactor = new bool[childHashes.Count];
104      bool foundInvFactor = false;
105      for (int i = 0; i < isInvFactor.Length; i++) {
106        if (isInvFactor[i]) continue;
[15835]107
108        var currFactor = childHashes[i];
109        var invFactor = AggregateHashes(Grammar.Inv, new[] { currFactor });
110
111        int indexOfInv = childHashes.IndexOf(invFactor);
[15965]112        if (indexOfInv >= 0 && !isInvFactor[indexOfInv]) {
113          isInvFactor[i] = isInvFactor[indexOfInv] = true;
114          foundInvFactor = true;
[15835]115        }
116      }
117
[15965]118      if (!foundInvFactor)
119        return childHashes;
120
[15835]121      return childHashes
[15965]122        .Where((c, i) => !isInvFactor[i])
[15835]123        .ToArray();
124    }
[15832]125  }
126
[15960]127  [StorableClass]
[15832]128  public class IntHasher : Hasher<int> {
129    public IntHasher(Grammar grammar) : base(grammar) { }
130
[15960]131    public IntHasher(IntHasher original, Cloner cloner) : base(original, cloner) { }
132
133    public override IDeepCloneable Clone(Cloner cloner) {
134      return new IntHasher(this, cloner);
135    }
136
137    [StorableConstructor]
138    protected IntHasher(bool deserializing) : base(deserializing) { }
139
[15965]140    protected override int AggregateHashes(Symbol operatorSym, IList<int> hashes) {
[15832]141      int start;
[15965]142      if ((operatorSym == Grammar.Addition || operatorSym == Grammar.Multiplication) && hashes.Count <= 1) {
[15832]143        start = 0;
144
145      } else if (operatorSym is NonterminalSymbol || operatorSym is VariableTerminalSymbol) {
146        return operatorSym.StringRepresentation.GetHashCode();
147
148      } else {
149        start = operatorSym.StringRepresentation.GetHashCode();
150      }
151
[15965]152      for (int i = 0; i < hashes.Count; i++) {
[15832]153        start = ((start << 5) + start) ^ hashes[i];
154      }
155      return start;
156    }
157  }
158
[15960]159  [StorableClass]
[15832]160  public class StringHasher : Hasher<string> {
161    public StringHasher(Grammar grammar) : base(grammar) { }
162
[15960]163    public StringHasher(StringHasher original, Cloner cloner) : base(original, cloner) { }
164
165    public override IDeepCloneable Clone(Cloner cloner) {
166      return new StringHasher(this, cloner);
167    }
168
169    [StorableConstructor]
170    protected StringHasher(bool deserializing) : base(deserializing) { }
171
[15965]172    protected override string AggregateHashes(Symbol operatorSym, IList<string> hashes) {
[15832]173      var hashesArray = hashes.ToArray();
174
175      if ((operatorSym == Grammar.Addition || operatorSym == Grammar.Multiplication) && hashesArray.Length <= 1) {
176        return hashesArray[0];
177      }
178      if (operatorSym is NonterminalSymbol || operatorSym is VariableTerminalSymbol) {
179        return operatorSym.StringRepresentation;
180      }
181
182      return $"[{operatorSym.StringRepresentation} ° {string.Join(" ° ", hashesArray)}]";
183    }
184  }
185}
Note: See TracBrowser for help on using the repository browser.