Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/Hashing/Hasher.cs @ 16176

Last change on this file since 16176 was 16176, checked in by bburlacu, 6 years ago

#2886: Fix hashing

File size: 7.9 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Diagnostics;
25using System.Linq;
26using HeuristicLab.Common;
27using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
28
29namespace HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration {
30
31  [StorableClass]
32  public abstract class Hasher<THashType> : DeepCloneable {
33    protected struct HashItem : IComparable<HashItem>, IEquatable<HashItem> {
34      public THashType Value { get; private set; }
35      public Symbol Symbol { get; private set; }
36
37      public int CompareTo(HashItem other) {
38        var res = Symbol.CompareTo(other.Symbol);
39        return res == 0 ? Comparer<THashType>.Default.Compare(Value, other.Value) : res;
40      }
41
42      private HashItem(THashType h, Symbol s) {
43        Value = h;
44        Symbol = s;
45      }
46
47      public static HashItem Create(THashType h, Symbol s) {
48        return new HashItem(h, s);
49      }
50
51      public bool Equals(HashItem other) {
52        return Symbol.Equals(other.Symbol) && Value.Equals(other.Value);
53      }
54    }
55    protected Hasher(Grammar grammar) {
56      Grammar = grammar;
57    }
58
59    protected Hasher(Hasher<THashType> original, Cloner cloner) : base(original, cloner) {
60      Grammar = cloner.Clone(original.Grammar);
61    }
62
63    [Storable]
64    protected Grammar Grammar { get; private set; }
65
66    protected abstract HashItem AggregateHashes(Symbol operatorSym, IList<HashItem> hashes);
67
68    public int CalcHashCode(SymbolList sentence) {
69      Debug.Assert(sentence.Any(), "Trying to evaluate empty sentence!");
70
71      Stack<Symbol> parseStack = new Stack<Symbol>(sentence);
72
73      Symbol peek = parseStack.Peek();
74      return AggregateHashes(peek, GetSubtreeHashes(parseStack)).Value.GetHashCode();
75    }
76
77    [StorableConstructor]
78    protected Hasher(bool deserializing) { }
79
80    private IList<HashItem> GetSubtreeHashes(Stack<Symbol> parseStack) {
81      Symbol currentSymbol = parseStack.Pop();
82
83      // ADDITION
84      if (currentSymbol == Grammar.Addition) {
85        return GetAdditionSubtreeHashes(parseStack);
86      }
87
88      // MULTIPLICATION
89      if (currentSymbol == Grammar.Multiplication) {
90        return GetMultiplicationSubtreeHashes(parseStack);
91      }
92
93      // LOG, EXP, SIN, INV
94      if (currentSymbol == Grammar.Log || currentSymbol == Grammar.Exp ||
95          currentSymbol == Grammar.Sin || currentSymbol == Grammar.Inv) {
96        // Directly aggregate the single subtree
97        return new[] { AggregateHashes(currentSymbol, GetSubtreeHashes(parseStack)) };
98      }
99
100      // var, const or nonterminal symbol
101      return new[] { AggregateHashes(currentSymbol, new HashItem[0]) };
102    }
103
104    private void AddAdditionChildHashes(HashSet<HashItem> hashSet, Stack<Symbol> symbolStack) {
105      var peek = symbolStack.Peek();
106      var hashes = GetSubtreeHashes(symbolStack); // will pop the stack
107
108      if (peek == Grammar.Addition)
109        hashSet.UnionWith(hashes);
110      else
111        hashSet.Add(AggregateHashes(peek, hashes));
112    }
113
114    private IList<HashItem> GetAdditionSubtreeHashes(Stack<Symbol> parseStack) {
115      var uniqueChildHashes = new HashSet<HashItem>();
116
117      AddAdditionChildHashes(uniqueChildHashes, parseStack); // first child
118      AddAdditionChildHashes(uniqueChildHashes, parseStack); // second child
119
120      var result = uniqueChildHashes.ToArray();
121      Array.Sort(result);
122      return result;
123    }
124
125    private void AddMultiplicationChildHashes(List<HashItem> hashList, Stack<Symbol> symbolStack) {
126      var peek = symbolStack.Peek();
127      var hashes = GetSubtreeHashes(symbolStack);
128      if (peek == Grammar.Multiplication)
129        hashList.AddRange(hashes);
130      else
131        hashList.Add(AggregateHashes(peek, hashes));
132    }
133
134    private IList<HashItem> GetMultiplicationSubtreeHashes(Stack<Symbol> parseStack) {
135      var childHashes = new List<HashItem>();
136
137      AddMultiplicationChildHashes(childHashes, parseStack); // first child
138      AddMultiplicationChildHashes(childHashes, parseStack); // second child
139
140      // Sort due to commutativity
141      childHashes.Sort();
142
143      if (childHashes.Count <= 2)
144        return childHashes;
145
146      // Cancel out inverse factors.
147      var isInvFactor = new bool[childHashes.Count];
148      bool foundInvFactor = false;
149      for (int i = 0; i < isInvFactor.Length; i++) {
150        if (isInvFactor[i]) continue;
151
152        var currFactor = childHashes[i];
153        var invFactor = AggregateHashes(Grammar.Inv, new[] { currFactor });
154
155        int indexOfInv = childHashes.IndexOf(invFactor);
156        if (indexOfInv >= 0 && !isInvFactor[indexOfInv]) {
157          isInvFactor[i] = isInvFactor[indexOfInv] = true;
158          foundInvFactor = true;
159        }
160      }
161
162      if (!foundInvFactor)
163        return childHashes;
164
165      return childHashes
166        .Where((c, i) => !isInvFactor[i])
167        .ToArray();
168    }
169  }
170
171  [StorableClass]
172  public class IntHasher : Hasher<int> {
173    public IntHasher(Grammar grammar) : base(grammar) { }
174
175    public IntHasher(IntHasher original, Cloner cloner) : base(original, cloner) { }
176
177    public override IDeepCloneable Clone(Cloner cloner) {
178      return new IntHasher(this, cloner);
179    }
180
181    [StorableConstructor]
182    protected IntHasher(bool deserializing) : base(deserializing) { }
183
184    protected override HashItem AggregateHashes(Symbol operatorSym, IList<HashItem> hashes) {
185      int start;
186      if ((operatorSym == Grammar.Addition || operatorSym == Grammar.Multiplication) && hashes.Count <= 1) {
187        start = 0;
188
189      } else if (operatorSym is NonterminalSymbol || operatorSym is VariableTerminalSymbol) {
190        return HashItem.Create(operatorSym.StringRepresentation.GetHashCode(), operatorSym);
191
192      } else {
193        start = operatorSym.StringRepresentation.GetHashCode();
194      }
195
196      for (int i = 0; i < hashes.Count; i++) {
197        start = ((start << 5) + start) ^ hashes[i].Value;
198      }
199      return HashItem.Create(start, operatorSym);
200    }
201  }
202
203  [StorableClass]
204  public class StringHasher : Hasher<string> {
205    public StringHasher(Grammar grammar) : base(grammar) { }
206
207    public StringHasher(StringHasher original, Cloner cloner) : base(original, cloner) { }
208
209    public override IDeepCloneable Clone(Cloner cloner) {
210      return new StringHasher(this, cloner);
211    }
212
213    [StorableConstructor]
214    protected StringHasher(bool deserializing) : base(deserializing) { }
215
216    protected override HashItem AggregateHashes(Symbol operatorSym, IList<HashItem> hashes) {
217      var hashesArray = hashes.ToArray();
218
219      if ((operatorSym == Grammar.Addition || operatorSym == Grammar.Multiplication) && hashesArray.Length <= 1) {
220        return hashesArray[0];
221      }
222      if (operatorSym is NonterminalSymbol || operatorSym is VariableTerminalSymbol) {
223        return HashItem.Create(operatorSym.StringRepresentation, operatorSym);
224      }
225
226      return HashItem.Create($"[{operatorSym.StringRepresentation} ° {string.Join(" ° ", hashesArray.Select(x => x.Value))}]", operatorSym);
227    }
228  }
229}
Note: See TracBrowser for help on using the repository browser.