Changeset 15725


Ignore:
Timestamp:
02/06/18 16:21:16 (21 months ago)
Author:
lkammere
Message:

#2886: Refactor tree hash function.

Location:
branches/2886_SymRegGrammarEnumeration
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/Grammar.cs

    r15724 r15725  
    1 using System.Collections;
     1using System;
    22using System.Collections.Generic;
    3 using System.ComponentModel;
    43using System.Diagnostics;
    54using System.Linq;
     
    163162      Stack<TerminalSymbol> parseStack = new Stack<TerminalSymbol>(sentence.OfType<TerminalSymbol>());
    164163
    165       int[] subtreeHashes = GetSubtreeHashes(parseStack.Pop(), parseStack);
    166       return AggregateHashes(subtreeHashes);
    167     }
    168 
    169     private int[] GetSubtreeHashes(TerminalSymbol currentSymbol, Stack<TerminalSymbol> parseStack) {
    170       List<int> childHashes = null;
    171 
     164      TerminalSymbol peek = parseStack.Peek();
     165      int[] subtreeHashes = GetSubtreeHashes(parseStack);
     166      return AggregateHashes(peek, subtreeHashes);
     167    }
     168
     169    private int[] GetSubtreeHashes(Stack<TerminalSymbol> parseStack) {
     170      TerminalSymbol currentSymbol = parseStack.Pop();
     171
     172      // VARIABLE
    172173      if (Var.VariableTerminalSymbols.Contains(currentSymbol)) {
    173         childHashes = currentSymbol.StringRepresentation.GetHashCode().ToEnumerable().ToList();
    174 
    175       } else if (ReferenceEquals(currentSymbol, Multiplication)) {
    176         // MULTIPLICATION
    177         childHashes = new List<int>();
     174        return currentSymbol.StringRepresentation.GetHashCode().ToEnumerable().ToArray();
     175      }
     176
     177      // MULTIPLICATION
     178      if (ReferenceEquals(currentSymbol, Multiplication)) {
     179        List<int> childHashes = new List<int>();
    178180
    179181        // First subtree
    180         TerminalSymbol firstSubtreeRoot = parseStack.Pop();
    181         int[] firstSubtreeHashes = GetSubtreeHashes(firstSubtreeRoot, parseStack);
    182 
    183         if (ReferenceEquals(firstSubtreeRoot, Multiplication)) {
    184           childHashes.AddRange(firstSubtreeHashes);
     182        if (ReferenceEquals(parseStack.Peek(), Multiplication)) {
     183          childHashes.AddRange(GetSubtreeHashes(parseStack));
    185184        } else {
    186           childHashes.Add(AggregateHashes(firstSubtreeHashes));
     185          childHashes.Add(AggregateHashes(parseStack.Peek(), GetSubtreeHashes(parseStack)));
    187186        }
    188 
    189187        // Second subtree
    190         TerminalSymbol secondSubtreeRoot = parseStack.Pop();
    191         int[] secondSubtreeHashes = GetSubtreeHashes(secondSubtreeRoot, parseStack);
    192 
    193         if (ReferenceEquals(secondSubtreeRoot, Multiplication)) {
    194           childHashes.AddRange(secondSubtreeHashes);
     188        if (ReferenceEquals(parseStack.Peek(), Multiplication)) {
     189          childHashes.AddRange(GetSubtreeHashes(parseStack));
    195190        } else {
    196           childHashes.Add(AggregateHashes(secondSubtreeHashes));
     191          childHashes.Add(AggregateHashes(parseStack.Peek(), GetSubtreeHashes(parseStack)));
    197192        }
    198193
    199194        // Sort due to commutativity
    200195        childHashes.Sort();
    201 
    202 
    203 
    204       } else if (ReferenceEquals(currentSymbol, Addition)) {
    205         // ADDITION
     196        return childHashes.ToArray();
     197      }
     198
     199      // ADDITION
     200      if (ReferenceEquals(currentSymbol, Addition)) {
    206201        HashSet<int> uniqueChildHashes = new HashSet<int>();
    207202
    208         TerminalSymbol firstSubtreeRoot = parseStack.Pop();
    209         int[] firstSubtreeHashes = GetSubtreeHashes(firstSubtreeRoot, parseStack);
    210 
    211203        // First subtree
    212         if (ReferenceEquals(firstSubtreeRoot, Addition)) {
    213           uniqueChildHashes.UnionWith(firstSubtreeHashes);
     204        if (ReferenceEquals(parseStack.Peek(), Addition)) {
     205          uniqueChildHashes.UnionWith(GetSubtreeHashes(parseStack));
    214206        } else {
    215           uniqueChildHashes.Add(AggregateHashes(firstSubtreeHashes));
     207          var peek = parseStack.Peek();
     208          uniqueChildHashes.Add(AggregateHashes(peek, GetSubtreeHashes(parseStack)));
    216209        }
    217 
    218210        // Second subtree
    219         TerminalSymbol secondSubtreeRoot = parseStack.Pop();
    220         int[] secondSubtreeHashes = GetSubtreeHashes(secondSubtreeRoot, parseStack);
    221 
    222         if (ReferenceEquals(secondSubtreeRoot, Addition)) {
    223           uniqueChildHashes.UnionWith(secondSubtreeHashes);
     211        if (ReferenceEquals(parseStack.Peek(), Addition)) {
     212          uniqueChildHashes.UnionWith(GetSubtreeHashes(parseStack));
    224213        } else {
    225           uniqueChildHashes.Add(AggregateHashes(secondSubtreeHashes));
     214          var peek = parseStack.Peek();
     215          uniqueChildHashes.Add(AggregateHashes(peek, GetSubtreeHashes(parseStack)));
    226216        }
    227217
    228         childHashes = uniqueChildHashes.ToList();
    229         childHashes.Sort();
    230       }
    231 
    232       Debug.Assert(childHashes != null, "Sentence not hashable!");
    233       return childHashes.ToArray();
    234     }
    235 
    236     private int AggregateHashes(IEnumerable<int> hashes) {
    237       return hashes.Aggregate(0, (result, ti) => ((result << 5) + result) ^ ti.GetHashCode());
     218        var result = uniqueChildHashes.ToList();
     219        result.Sort();
     220        return result.ToArray();
     221      }
     222      throw new ArgumentException("Trying to hash malformed sentence!");
     223    }
     224
     225    private int AggregateHashes(TerminalSymbol rule, IEnumerable<int> hashes) {
     226      // If multiple subtrees are "merged" (e.g. added, multiplied, etc.), consider the executed operation
     227      var hashesArray = hashes.ToArray();
     228      int start = hashesArray.Length > 1 ? rule.StringRepresentation.GetHashCode() : 0;
     229      return hashesArray.Aggregate(start, (result, ti) => ((result << 5) + result) ^ ti.GetHashCode());
    238230    }
    239231    #endregion
     
    297289
    298290      if (ReferenceEquals(head, Addition) || ReferenceEquals(head, Multiplication)) {
    299         result.AddRange(PostfixToInfixSubtreeParser(parseStack));
     291        // right part
     292        SymbolString rightPart = PostfixToInfixSubtreeParser(parseStack);
     293        SymbolString leftPart = PostfixToInfixSubtreeParser(parseStack);
     294
     295        result.AddRange(leftPart);
    300296        result.Add(head);
    301         result.AddRange(PostfixToInfixSubtreeParser(parseStack));
     297        result.AddRange(rightPart);
    302298      } else {
    303299        result.Add(head);
  • branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/GrammarEnumerationAlgorithm.cs

    r15724 r15725  
    1 using System.Collections.Generic;
     1using System;
     2using System.Collections.Generic;
    23using System.Linq;
    34using System.Threading;
     
    2829    private readonly string UseMemoizationParameterName = "Use Memoization?";
    2930
    30 
    3131    #region properties
    3232    protected IValueParameter<IntValue> MaxTreeSizeParameter {
     
    8989      BestTrainingSentence = null;
    9090
    91       List<SymbolString> allGenerated = new List<SymbolString>();
     91      List<Tuple<SymbolString, int>> allGenerated = new List<Tuple<SymbolString, int>>();
    9292      List<SymbolString> distinctGenerated = new List<SymbolString>();
    93 
    94       int expansions = 0;
    9593
    9694      HashSet<int> evaluatedHashes = new HashSet<int>();
     
    107105
    108106        if (currSymbolString.IsSentence()) {
    109           allGenerated.Add(Grammar.PostfixToInfixParser(currSymbolString));
    110 
    111           //if (evaluatedHashes.Add(grammar.CalcHashCode(currSymbolString))) {
    112           EvaluateSentence(currSymbolString);
    113           distinctGenerated.Add(Grammar.PostfixToInfixParser(currSymbolString));
    114           //}
    115 
    116           UpdateView(allGenerated, distinctGenerated, expansions);
     107          allGenerated.Add(new Tuple<SymbolString, int>(currSymbolString, Grammar.CalcHashCode(currSymbolString)));
     108
     109          if (evaluatedHashes.Add(Grammar.CalcHashCode(currSymbolString))) {
     110            EvaluateSentence(currSymbolString);
     111            distinctGenerated.Add(Grammar.PostfixToInfixParser(currSymbolString));
     112          }
     113
     114          UpdateView(allGenerated, distinctGenerated);
    117115
    118116        } else {
     
    133131      }
    134132
    135       UpdateView(allGenerated, distinctGenerated, expansions, force: true);
    136 
    137       StringArray sentences = new StringArray(allGenerated.Select(r => r.ToString()).ToArray());
    138       Results.Add(new Result("All generated sentences", sentences));
     133      UpdateView(allGenerated, distinctGenerated, force: true);
     134
     135      string[,] sentences = new string[allGenerated.Count, 3];
     136      for (int i = 0; i < allGenerated.Count; i++) {
     137        sentences[i, 0] = allGenerated[i].Item1.ToString();
     138        sentences[i, 1] = Grammar.PostfixToInfixParser(allGenerated[i].Item1).ToString();
     139        sentences[i, 2] = allGenerated[i].Item2.ToString();
     140      }
     141      Results.Add(new Result("All generated sentences", new StringMatrix(sentences)));
     142
    139143      StringArray distinctSentences = new StringArray(distinctGenerated.Select(r => r.ToString()).ToArray());
    140144      Results.Add(new Result("Distinct generated sentences", distinctSentences));
     
    142146
    143147
    144     private void UpdateView(List<SymbolString> allGenerated, List<SymbolString> distinctGenerated, int expansions, bool force = false) {
     148    private void UpdateView(List<Tuple<SymbolString, int>> allGenerated, List<SymbolString> distinctGenerated, bool force = false) {
    145149      int generatedSolutions = allGenerated.Count;
    146150      int distinctSolutions = distinctGenerated.Count;
     
    150154        Results.AddOrUpdateResult("Distinct Solutions", new IntValue(distinctSolutions));
    151155
    152         DoubleValue averageTreeLength = new DoubleValue(allGenerated.Select(r => r.Count).Average());
     156        DoubleValue averageTreeLength = new DoubleValue(allGenerated.Select(r => r.Item1.Count).Average());
    153157        Results.AddOrUpdateResult("Average Tree Length of Solutions", averageTreeLength);
    154 
    155         IntValue expansionsValue = new IntValue(expansions);
    156         Results.AddOrUpdateResult("Expansions", expansionsValue);
    157158      }
    158159    }
  • branches/2886_SymRegGrammarEnumeration/Test/TreeHashingTest.cs

    r15714 r15725  
    8181      Assert.AreEqual(hash1, hash2);
    8282    }
     83
     84    [TestMethod]
     85    [TestCategory("TreeHashing")]
     86    public void ComplexInequality() {
     87      SymbolString s1 = new SymbolString(new[] { varA, varA, varA, grammar.Multiplication, grammar.Multiplication });
     88      SymbolString s2 = new SymbolString(new[] { varA, varA, varA, grammar.Multiplication, grammar.Addition });
     89
     90      int hash1 = grammar.CalcHashCode(s1);
     91      int hash2 = grammar.CalcHashCode(s2);
     92
     93      Assert.AreNotEqual(hash1, hash2);
     94    }
     95
     96
     97    #region parser
     98
     99    [TestMethod]
     100    [TestCategory("Parser")]
     101    public void InfixParserSimpleTest() {
     102      SymbolString postfix = new SymbolString(new[] { varA, varB, varC, grammar.Addition, grammar.Addition });
     103      SymbolString infix = new SymbolString(new[] { varA, grammar.Addition, varB, grammar.Addition, varC });
     104
     105      Assert.AreEqual(infix.ToString(), grammar.PostfixToInfixParser(postfix).ToString());
     106    }
     107
     108    [TestMethod]
     109    [TestCategory("Parser")]
     110    public void InfixParserComplexTest() {
     111      SymbolString postfix = new SymbolString(new[] { varB, varA, varB, varC, grammar.Multiplication, grammar.Addition, grammar.Addition});
     112      SymbolString infix = new SymbolString(new[] { varB, grammar.Addition, varA, grammar.Addition, varB, grammar.Multiplication, varC });
     113
     114      Assert.AreEqual(infix.ToString(), grammar.PostfixToInfixParser(postfix).ToString());
     115    }
     116
     117    #endregion
    83118  }
    84119}
Note: See TracChangeset for help on using the changeset viewer.