Changeset 15714


Ignore:
Timestamp:
02/02/18 18:48:10 (19 months ago)
Author:
lkammere
Message:

#2886 Add tree hashing for addition and multiplication.

Location:
branches/2886_SymRegGrammarEnumeration
Files:
1 added
6 edited

Legend:

Unmodified
Added
Removed
  • branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/Grammar.cs

    r15712 r15714  
    1 namespace HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration {
     1using System.Collections;
     2using System.Collections.Generic;
     3using System.ComponentModel;
     4using System.Diagnostics;
     5using System.Linq;
     6using HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration.GrammarEnumeration;
     7using HeuristicLab.Common;
     8
     9namespace HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration {
    210  public class Grammar {
    311
    412    public Symbol StartSymbol;
    513
     14    #region Symbols
     15    public VariableSymbol Var;
     16
     17    public NonterminalSymbol Expr;
     18    public NonterminalSymbol Term;
     19    public NonterminalSymbol Factor;
     20
     21    public TerminalSymbol Addition;
     22    public TerminalSymbol Multiplication;
     23    #endregion
     24
     25
    626    public Grammar(string[] variables) {
    727      #region Define Symbols
    8       VariableSymbol var = new VariableSymbol("var", variables);
     28      Var = new VariableSymbol("var", variables);
    929
    10       NonterminalSymbol expr = new NonterminalSymbol("Expr");
    11       NonterminalSymbol term = new NonterminalSymbol("Expr");
    12       NonterminalSymbol factor = new NonterminalSymbol("Expr");
     30      Expr = new NonterminalSymbol("Expr");
     31      Term = new NonterminalSymbol("Term");
     32      Factor = new NonterminalSymbol("Factor");
    1333
    14       TerminalSymbol addition = new TerminalSymbol("+");
    15       TerminalSymbol multiplication = new TerminalSymbol("*");
     34      Addition = new TerminalSymbol("+");
     35      Multiplication = new TerminalSymbol("*");
    1636      #endregion
    1737
    1838
    1939      #region Production ruless
    20       StartSymbol = expr;
     40      StartSymbol = Expr;
    2141
    22       expr.AddProduction(term, expr, addition);
    23       expr.AddProduction(term);
     42      Expr.AddProduction(Term, Expr, Addition);
     43      Expr.AddProduction(Term);
    2444
    25       term.AddProduction(factor, term, multiplication);
    26       term.AddProduction(factor);
     45      Term.AddProduction(Factor, Term, Multiplication);
     46      Term.AddProduction(Factor);
    2747
    28       factor.AddProduction(var);
     48      Factor.AddProduction(Var);
    2949      #endregion
     50    }
     51
     52    public int CalcHashCode(SymbolString sentence) {
     53      Debug.Assert(sentence.Any(), "Trying to evaluate empty sentence!");
     54      Debug.Assert(sentence.All(s => s is TerminalSymbol), "Trying to evaluate symbol sequence with nonterminalsymbols!");
     55
     56      Stack<TerminalSymbol> parseStack = new Stack<TerminalSymbol>(sentence.OfType<TerminalSymbol>());
     57
     58      int[] subtreeHashes = GetSubtreeHashes(parseStack.Pop(), parseStack);
     59      return AggregateHashes(subtreeHashes);
     60    }
     61
     62    private int[] GetSubtreeHashes(TerminalSymbol currentSymbol, Stack<TerminalSymbol> parseStack) {
     63      List<int> childHashes = null;
     64
     65      if (Var.VariableTerminalSymbols.Contains(currentSymbol)) {
     66        childHashes = currentSymbol.StringRepresentation.GetHashCode().ToEnumerable().ToList();
     67
     68      } else if (ReferenceEquals(currentSymbol, Multiplication)) { // MULTIPLICATION
     69        childHashes = new List<int>();
     70
     71        // First subtree
     72        TerminalSymbol firstSubtreeRoot = parseStack.Pop();
     73        int[] firstSubtreeHashes = GetSubtreeHashes(firstSubtreeRoot, parseStack);
     74
     75        if (ReferenceEquals(firstSubtreeRoot, Multiplication)) {
     76          childHashes.AddRange(firstSubtreeHashes);
     77        } else {
     78          childHashes.Add(AggregateHashes(firstSubtreeHashes));
     79        }
     80
     81        // Second subtree
     82        TerminalSymbol secondSubtreeRoot = parseStack.Pop();
     83        int[] secondSubtreeHashes = GetSubtreeHashes(secondSubtreeRoot, parseStack);
     84
     85        if (ReferenceEquals(secondSubtreeRoot, Multiplication)) {
     86          childHashes.AddRange(secondSubtreeHashes);
     87        } else {
     88          childHashes.Add(AggregateHashes(secondSubtreeHashes));
     89        }
     90
     91        // Sort due to commutativity
     92        childHashes.Sort();
     93
     94
     95
     96      } else if (ReferenceEquals(currentSymbol, Addition)) { // ADDITION
     97        HashSet<int> uniqueChildHashes = new HashSet<int>();
     98
     99        TerminalSymbol firstSubtreeRoot = parseStack.Pop();
     100        int[] firstSubtreeHashes = GetSubtreeHashes(firstSubtreeRoot, parseStack);
     101
     102        // First subtree
     103        if (ReferenceEquals(firstSubtreeRoot, Addition)) {
     104          uniqueChildHashes.UnionWith(firstSubtreeHashes);
     105        } else {
     106          uniqueChildHashes.Add(AggregateHashes(firstSubtreeHashes));
     107        }
     108
     109        // Second subtree
     110        TerminalSymbol secondSubtreeRoot = parseStack.Pop();
     111        int[] secondSubtreeHashes = GetSubtreeHashes(secondSubtreeRoot, parseStack);
     112
     113        if (ReferenceEquals(secondSubtreeRoot, Addition)) {
     114          uniqueChildHashes.UnionWith(secondSubtreeHashes);
     115        } else {
     116          uniqueChildHashes.Add(AggregateHashes(secondSubtreeHashes));
     117        }
     118
     119        childHashes = uniqueChildHashes.ToList();
     120        childHashes.Sort();
     121      }
     122
     123      Debug.Assert(childHashes != null, "Sentence not hashable!");
     124      return childHashes.ToArray();
     125    }
     126
     127
     128    private int AggregateHashes(IEnumerable<int> hashes) {
     129      return hashes.Aggregate(0, (result, ti) => ((result << 5) + result) ^ ti.GetHashCode());
    30130    }
    31131  }
  • branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/GrammarEnumerationAlgorithm.cs

    r15712 r15714  
    1 using System.Collections.Generic;
     1using System;
     2using System.Collections.Generic;
    23using System.Collections.ObjectModel;
    34using System.Diagnostics;
     
    4142      DoubleValue averageTreeLength = new DoubleValue();
    4243      Results.Add(new Result("Average Tree Length of Solutions", averageTreeLength));
    43 
    4444
    4545      int maxStringLength = 4;
  • branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/Symbol.cs

    r15712 r15714  
    1 using System;
    2 using System.Collections.Generic;
     1using System.Collections.Generic;
     2using System.Linq;
    33
    44namespace HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration {
    55
    66  public abstract class Symbol {
    7     private readonly string stringRepresentation;
     7    public readonly string StringRepresentation;
    88
    99    protected Symbol(string representation) {
    10       stringRepresentation = representation;
     10      StringRepresentation = representation;
    1111    }
    1212
    1313    public override string ToString() {
    14       return stringRepresentation;
     14      return StringRepresentation;
    1515    }
    1616  }
     
    3333
    3434  public class VariableSymbol : NonterminalSymbol { // Convenience class
     35    public IEnumerable<TerminalSymbol> VariableTerminalSymbols;
     36
    3537    public VariableSymbol(string representation, IEnumerable<string> variableNames) : base(representation) {
     38      List<TerminalSymbol> createdSymbols = new List<TerminalSymbol>();
     39      VariableTerminalSymbols = createdSymbols;
     40
    3641      foreach (string variableName in variableNames) {
    37         AddProduction(new TerminalSymbol("[" + variableName + "]"));
     42        TerminalSymbol s = new TerminalSymbol(variableName);
     43        createdSymbols.Add(s);
     44        AddProduction(s);
    3845      }
    3946    }
  • branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration.csproj

    r15712 r15714  
    9898    <Compile Include="GrammarEnumeration\Symbol.cs" />
    9999  </ItemGroup>
     100  <ItemGroup />
    100101  <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
    101102  <!-- To modify your build process, add your task inside one of the targets below and uncomment it.
  • branches/2886_SymRegGrammarEnumeration/Test/GrammarEnumerationTest.cs

    r15712 r15714  
    1010
    1111namespace HeuristicLab.Algorithms.DataAnalysis.MctsSymbolicRegression {
    12   [TestClass()]
     12  //[TestClass()]
    1313  public class MctsSymbolicRegressionTest {
    1414    private const int Seed = 1234;
  • branches/2886_SymRegGrammarEnumeration/Test/Test.csproj

    r15712 r15714  
    103103    <Otherwise>
    104104      <ItemGroup>
    105         <Reference Include="Microsoft.VisualStudio.QualityTools.UnitTestFramework" />
     105        <Reference Include="Microsoft.VisualStudio.QualityTools.UnitTestFramework">
     106          <Private>False</Private>
     107        </Reference>
    106108      </ItemGroup>
    107109    </Otherwise>
     
    110112    <Compile Include="GrammarEnumerationTest.cs" />
    111113    <Compile Include="Properties\AssemblyInfo.cs" />
     114    <Compile Include="TreeHashingTest.cs" />
    112115  </ItemGroup>
    113116  <ItemGroup>
Note: See TracChangeset for help on using the changeset viewer.