Ignore:
Timestamp:
02/06/18 16:21:16 (3 years ago)
Author:
lkammere
Message:

#2886: Refactor tree hash function.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/Grammar.cs

    r15724 r15725  
    1 using System.Collections;
     1using System;
    22using System.Collections.Generic;
    3 using System.ComponentModel;
    43using System.Diagnostics;
    54using System.Linq;
     
    163162      Stack<TerminalSymbol> parseStack = new Stack<TerminalSymbol>(sentence.OfType<TerminalSymbol>());
    164163
    165       int[] subtreeHashes = GetSubtreeHashes(parseStack.Pop(), parseStack);
    166       return AggregateHashes(subtreeHashes);
    167     }
    168 
    169     private int[] GetSubtreeHashes(TerminalSymbol currentSymbol, Stack<TerminalSymbol> parseStack) {
    170       List<int> childHashes = null;
    171 
     164      TerminalSymbol peek = parseStack.Peek();
     165      int[] subtreeHashes = GetSubtreeHashes(parseStack);
     166      return AggregateHashes(peek, subtreeHashes);
     167    }
     168
     169    private int[] GetSubtreeHashes(Stack<TerminalSymbol> parseStack) {
     170      TerminalSymbol currentSymbol = parseStack.Pop();
     171
     172      // VARIABLE
    172173      if (Var.VariableTerminalSymbols.Contains(currentSymbol)) {
    173         childHashes = currentSymbol.StringRepresentation.GetHashCode().ToEnumerable().ToList();
    174 
    175       } else if (ReferenceEquals(currentSymbol, Multiplication)) {
    176         // MULTIPLICATION
    177         childHashes = new List<int>();
     174        return currentSymbol.StringRepresentation.GetHashCode().ToEnumerable().ToArray();
     175      }
     176
     177      // MULTIPLICATION
     178      if (ReferenceEquals(currentSymbol, Multiplication)) {
     179        List<int> childHashes = new List<int>();
    178180
    179181        // First subtree
    180         TerminalSymbol firstSubtreeRoot = parseStack.Pop();
    181         int[] firstSubtreeHashes = GetSubtreeHashes(firstSubtreeRoot, parseStack);
    182 
    183         if (ReferenceEquals(firstSubtreeRoot, Multiplication)) {
    184           childHashes.AddRange(firstSubtreeHashes);
     182        if (ReferenceEquals(parseStack.Peek(), Multiplication)) {
     183          childHashes.AddRange(GetSubtreeHashes(parseStack));
    185184        } else {
    186           childHashes.Add(AggregateHashes(firstSubtreeHashes));
     185          childHashes.Add(AggregateHashes(parseStack.Peek(), GetSubtreeHashes(parseStack)));
    187186        }
    188 
    189187        // Second subtree
    190         TerminalSymbol secondSubtreeRoot = parseStack.Pop();
    191         int[] secondSubtreeHashes = GetSubtreeHashes(secondSubtreeRoot, parseStack);
    192 
    193         if (ReferenceEquals(secondSubtreeRoot, Multiplication)) {
    194           childHashes.AddRange(secondSubtreeHashes);
     188        if (ReferenceEquals(parseStack.Peek(), Multiplication)) {
     189          childHashes.AddRange(GetSubtreeHashes(parseStack));
    195190        } else {
    196           childHashes.Add(AggregateHashes(secondSubtreeHashes));
     191          childHashes.Add(AggregateHashes(parseStack.Peek(), GetSubtreeHashes(parseStack)));
    197192        }
    198193
    199194        // Sort due to commutativity
    200195        childHashes.Sort();
    201 
    202 
    203 
    204       } else if (ReferenceEquals(currentSymbol, Addition)) {
    205         // ADDITION
     196        return childHashes.ToArray();
     197      }
     198
     199      // ADDITION
     200      if (ReferenceEquals(currentSymbol, Addition)) {
    206201        HashSet<int> uniqueChildHashes = new HashSet<int>();
    207202
    208         TerminalSymbol firstSubtreeRoot = parseStack.Pop();
    209         int[] firstSubtreeHashes = GetSubtreeHashes(firstSubtreeRoot, parseStack);
    210 
    211203        // First subtree
    212         if (ReferenceEquals(firstSubtreeRoot, Addition)) {
    213           uniqueChildHashes.UnionWith(firstSubtreeHashes);
     204        if (ReferenceEquals(parseStack.Peek(), Addition)) {
     205          uniqueChildHashes.UnionWith(GetSubtreeHashes(parseStack));
    214206        } else {
    215           uniqueChildHashes.Add(AggregateHashes(firstSubtreeHashes));
     207          var peek = parseStack.Peek();
     208          uniqueChildHashes.Add(AggregateHashes(peek, GetSubtreeHashes(parseStack)));
    216209        }
    217 
    218210        // Second subtree
    219         TerminalSymbol secondSubtreeRoot = parseStack.Pop();
    220         int[] secondSubtreeHashes = GetSubtreeHashes(secondSubtreeRoot, parseStack);
    221 
    222         if (ReferenceEquals(secondSubtreeRoot, Addition)) {
    223           uniqueChildHashes.UnionWith(secondSubtreeHashes);
     211        if (ReferenceEquals(parseStack.Peek(), Addition)) {
     212          uniqueChildHashes.UnionWith(GetSubtreeHashes(parseStack));
    224213        } else {
    225           uniqueChildHashes.Add(AggregateHashes(secondSubtreeHashes));
     214          var peek = parseStack.Peek();
     215          uniqueChildHashes.Add(AggregateHashes(peek, GetSubtreeHashes(parseStack)));
    226216        }
    227217
    228         childHashes = uniqueChildHashes.ToList();
    229         childHashes.Sort();
    230       }
    231 
    232       Debug.Assert(childHashes != null, "Sentence not hashable!");
    233       return childHashes.ToArray();
    234     }
    235 
    236     private int AggregateHashes(IEnumerable<int> hashes) {
    237       return hashes.Aggregate(0, (result, ti) => ((result << 5) + result) ^ ti.GetHashCode());
     218        var result = uniqueChildHashes.ToList();
     219        result.Sort();
     220        return result.ToArray();
     221      }
     222      throw new ArgumentException("Trying to hash malformed sentence!");
     223    }
     224
     225    private int AggregateHashes(TerminalSymbol rule, IEnumerable<int> hashes) {
     226      // If multiple subtrees are "merged" (e.g. added, multiplied, etc.), consider the executed operation
     227      var hashesArray = hashes.ToArray();
     228      int start = hashesArray.Length > 1 ? rule.StringRepresentation.GetHashCode() : 0;
     229      return hashesArray.Aggregate(start, (result, ti) => ((result << 5) + result) ^ ti.GetHashCode());
    238230    }
    239231    #endregion
     
    297289
    298290      if (ReferenceEquals(head, Addition) || ReferenceEquals(head, Multiplication)) {
    299         result.AddRange(PostfixToInfixSubtreeParser(parseStack));
     291        // right part
     292        SymbolString rightPart = PostfixToInfixSubtreeParser(parseStack);
     293        SymbolString leftPart = PostfixToInfixSubtreeParser(parseStack);
     294
     295        result.AddRange(leftPart);
    300296        result.Add(head);
    301         result.AddRange(PostfixToInfixSubtreeParser(parseStack));
     297        result.AddRange(rightPart);
    302298      } else {
    303299        result.Add(head);
Note: See TracChangeset for help on using the changeset viewer.