Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
03/08/18 10:54:04 (7 years ago)
Author:
lkammere
Message:

#2886: Fix Equals methods in Symbols.
Move semantical hashing of phrases to separate class.

Location:
branches/2886_SymRegGrammarEnumeration
Files:
2 added
7 edited

Legend:

Unmodified
Added
Removed
  • branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/Analysis/SearchGraphVisualizer.cs

    r15821 r15832  
    5050        var alg = (GrammarEnumerationAlgorithm)sender;
    5151        var phrase0 = new SymbolString(new[] { alg.Grammar.StartSymbol });
    52         var phrase0Hash = alg.Grammar.CalcHashCode(phrase0);
     52        var phrase0Hash = alg.Grammar.Hasher.CalcHashCode(phrase0);
    5353
    5454        dotFileTrace.WriteLine($"{phrase0Hash} [label=\"{phrase0}\", shape=doublecircle]; }}");
  • branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/Grammar.cs

    r15828 r15832  
    1 using System;
    2 using System.Collections.Generic;
     1using System.Collections.Generic;
    32using System.Diagnostics;
    43using System.Linq;
    54using HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration.GrammarEnumeration;
    6 using HeuristicLab.Common;
    75using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
    86using HeuristicLab.Problems.DataAnalysis.Symbolic;
     
    1210
    1311    public Symbol StartSymbol { get; }
     12
     13    public Hasher<int> Hasher { get; }
    1414
    1515    #region Symbols
     
    146146      infixExpressionFormatter = new InfixExpressionFormatter();
    147147      #endregion
    148     }
    149 
    150     #region Hashing
    151     public int CalcHashCode(SymbolString sentence) {
    152       return CalcHashCode<int>(sentence, AggregateIntHashes);
    153     }
    154 
    155     private int CalcHashCode<THashType>(SymbolString sentence, Func<Symbol, THashType[], THashType> aggregateFunction) {
    156       Debug.Assert(sentence.Any(), "Trying to evaluate empty sentence!");
    157 
    158       Stack<Symbol> parseStack = new Stack<Symbol>(sentence);
    159 
    160       Symbol peek = parseStack.Peek();
    161       return aggregateFunction(peek, GetSubtreeHashes(parseStack, aggregateFunction)).GetHashCode();
    162     }
    163 
    164     private THashType[] GetSubtreeHashes<THashType>(Stack<Symbol> parseStack, Func<Symbol, THashType[], THashType> aggregateHashes) {
    165       Symbol currentSymbol = parseStack.Pop();
    166 
    167       // ADDITION
    168       if (currentSymbol == Addition) {
    169         var uniqueChildHashes = new HashSet<THashType>();
    170 
    171         // First subtree
    172         if (parseStack.Peek() == Addition) {
    173           uniqueChildHashes.UnionWith(GetSubtreeHashes(parseStack, aggregateHashes));
    174         } else {
    175           var peek = parseStack.Peek();
    176           uniqueChildHashes.Add(aggregateHashes(peek, GetSubtreeHashes(parseStack, aggregateHashes)));
    177         }
    178         // Second subtree
    179         if (parseStack.Peek() == Addition) {
    180           uniqueChildHashes.UnionWith(GetSubtreeHashes(parseStack, aggregateHashes));
    181         } else {
    182           var peek = parseStack.Peek();
    183           uniqueChildHashes.Add(aggregateHashes(peek, GetSubtreeHashes(parseStack, aggregateHashes)));
    184         }
    185 
    186         var result = uniqueChildHashes.ToArray();
    187         Array.Sort(result);
    188         return result;
    189       }
    190 
    191       // MULTIPLICATION
    192       if (currentSymbol == Multiplication) {
    193         var childHashes = new List<THashType>();
    194 
    195         // First subtree
    196         if (parseStack.Peek() == Multiplication) {
    197           childHashes.AddRange(GetSubtreeHashes(parseStack, aggregateHashes));
    198         } else {
    199           childHashes.Add(aggregateHashes(parseStack.Peek(), GetSubtreeHashes(parseStack, aggregateHashes)));
    200         }
    201         // Second subtree
    202         if (parseStack.Peek() == Multiplication) {
    203           childHashes.AddRange(GetSubtreeHashes(parseStack, aggregateHashes));
    204         } else {
    205           childHashes.Add(aggregateHashes(parseStack.Peek(), GetSubtreeHashes(parseStack, aggregateHashes)));
    206         }
    207 
    208         // Sort due to commutativity
    209         childHashes.Sort();
    210 
    211         // Cancel out inverse factors.
    212         bool[] isFactorRemaining = Enumerable.Repeat(true, childHashes.Count).ToArray();
    213 
    214         for (int i = 0; i < isFactorRemaining.Length; i++) {
    215           if (!isFactorRemaining[i]) continue;
    216           if (isFactorRemaining.Length <= 2) break; // Until we have constants, we can't cancel out all terms.
    217 
    218           var currFactor = childHashes[i];
    219           var invFactor = aggregateHashes(Inv, new[] { currFactor });
    220 
    221           int indexOfInv = childHashes.IndexOf(invFactor);
    222           if (indexOfInv >= 0 && isFactorRemaining[indexOfInv]) {
    223             isFactorRemaining[i] = isFactorRemaining[indexOfInv] = false;
    224           }
    225         }
    226 
    227         return childHashes
    228           .Where((c, i) => isFactorRemaining[i])
    229           .ToArray();
    230       }
    231 
    232       // LOG, EXP, SIN, INV
    233       if (currentSymbol == Log || currentSymbol == Exp ||
    234           currentSymbol == Sin || currentSymbol == Cos ||
    235           currentSymbol == Inv) {
    236         return new[] { aggregateHashes(parseStack.Peek(), GetSubtreeHashes(parseStack, aggregateHashes)) };
    237       }
    238 
    239       // var or nonterminal symbol
    240       return new[] { aggregateHashes(currentSymbol, new THashType[0]) };
    241     }
    242 
    243     private string AggregateStringHashes(Symbol operatorSym, string[] hashes) {
    244       var hashesArray = hashes.ToArray();
    245 
    246       if ((operatorSym == Addition || operatorSym == Multiplication) && hashesArray.Length <= 1) {
    247         return hashesArray[0];
    248       }
    249       if (operatorSym is NonterminalSymbol || ((TerminalSymbol)operatorSym).IsVariable) {
    250         return operatorSym.StringRepresentation;
    251       }
    252 
    253       return $"[{operatorSym.StringRepresentation} ° {string.Join(" ° ", hashesArray)}]";
    254     }
    255 
    256     private int AggregateIntHashes(Symbol operatorSym, int[] hashes) {
    257       int start;
    258       if ((operatorSym == Addition || operatorSym == Multiplication) && hashes.Length <= 1) {
    259         start = 0;
    260 
    261       } else if (operatorSym is NonterminalSymbol || ((TerminalSymbol)operatorSym).IsVariable) {
    262         return operatorSym.StringRepresentation.GetHashCode();
    263 
    264       } else {
    265         start = operatorSym.StringRepresentation.GetHashCode();
    266       }
    267 
    268       for (int i = 0; i < hashes.Length; i++) {
    269         start = ((start << 5) + start) ^ hashes[i];
    270       }
    271       return start;
    272     }
    273     #endregion
     148
     149      Hasher = new IntHasher(this);
     150    }
    274151
    275152    #region Parse to SymbolicExpressionTree
     
    323200        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack));
    324201
    325       } else if (currentSymbol ==  Cos) {
     202      } else if (currentSymbol == Cos) {
    326203        parsedSubTree = cosSy.CreateTreeNode();
    327204        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack));
     
    334211        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack));
    335212
    336       } else if (currentSymbol.IsVariable) {
     213      } else if (currentSymbol is VariableTerminalSymbol) {
    337214        VariableTreeNode varNode = (VariableTreeNode)varSy.CreateTreeNode();
    338215        varNode.Weight = 1.0;
  • branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/GrammarEnumerationAlgorithm.cs

    r15827 r15832  
    135135      OpenPhrases = new SearchDataStore(SearchDataStructure); // Select search strategy
    136136      var phrase0 = new SymbolString(new[] { Grammar.StartSymbol });
    137       var phrase0Hash = Grammar.CalcHashCode(phrase0);
     137      var phrase0Hash = Grammar.Hasher.CalcHashCode(phrase0);
    138138      #endregion
    139139
     
    160160
    161161          if (newPhrase.Count() <= MaxTreeSize) {
    162             var phraseHash = Grammar.CalcHashCode(newPhrase);
     162            var phraseHash = Grammar.Hasher.CalcHashCode(newPhrase);
    163163
    164164            OnPhraseDerived(fetchedPhrase.Hash, fetchedPhrase.SymbolString, phraseHash, newPhrase, expandedSymbol, appliedProductions[i]);
  • branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/Symbol.cs

    r15828 r15832  
    2020    #region IEquatable
    2121    public static bool operator ==(Symbol s1, Symbol s2) {
    22       return !ReferenceEquals(null, s1) && !ReferenceEquals(null, s2) && (ReferenceEquals(s1, s2)
    23         || s1.Equals(s2));
     22      if (ReferenceEquals(s1, s2)) return true;
     23      if (ReferenceEquals(s1, null) || ReferenceEquals(s2, null)) return false;
     24      return s1.Equals(s2);
    2425    }
    2526
     
    2930
    3031    public bool Equals(Symbol other) {
    31       if (ReferenceEquals(null, other)) return false;
    32       if (ReferenceEquals(this, other)) return true;
    33       return string.Equals(StringRepresentation, other.StringRepresentation);
     32      if (ReferenceEquals(other, null)) return false;
     33      if (ReferenceEquals(other, this)) return true;
     34      if (this.GetType() != other.GetType()) return false; // Otherwise, this needs to be reimplemented in derived classes.
     35      return StringRepresentation == other.StringRepresentation;
    3436    }
    3537
    3638    public override bool Equals(object obj) {
    37       if (ReferenceEquals(null, obj)) return false;
    38       if (ReferenceEquals(this, obj)) return true;
    39       if (obj.GetType() != this.GetType()) return false;
     39      if (ReferenceEquals(obj, null)) return false;
     40      if (ReferenceEquals(obj, this)) return true;
     41      if (this.GetType() != obj.GetType()) return false;
    4042      return Equals((Symbol)obj);
    4143    }
     
    4850
    4951  public class TerminalSymbol : Symbol {
    50     public readonly bool IsVariable;
    5152
    52     public TerminalSymbol(string representation, bool isVariable = false) : base(representation) {
    53       IsVariable = isVariable;
    54     }
     53    public TerminalSymbol(string representation) : base(representation) { }
    5554  }
    5655
     56  public class VariableTerminalSymbol : TerminalSymbol {
     57    public VariableTerminalSymbol(string representation) : base(representation) { }
     58  }
     59
     60
    5761  public class NonterminalSymbol : Symbol {
    58     public List<Production> Alternatives { get; }
     62    private readonly List<Production> alternatives;
     63
     64    public IReadOnlyList<Production> Alternatives {
     65      get { return alternatives.AsReadOnly(); }
     66    }
    5967
    6068    public NonterminalSymbol(string representation) : base(representation) {
    61       Alternatives = new List<Production>();
     69      alternatives = new List<Production>();
    6270    }
    6371
    6472    public void AddProduction(params Symbol[] production) {
    65       Alternatives.Add(new Production(production));
     73      alternatives.Add(new Production(production));
    6674    }
    6775  }
    6876
    6977  public class VariableSymbol : NonterminalSymbol { // Convenience class
    70     public IEnumerable<TerminalSymbol> VariableTerminalSymbols { get; }
     78    public IEnumerable<VariableTerminalSymbol> VariableTerminalSymbols { get; }
    7179
    7280    public VariableSymbol(string representation, IEnumerable<string> variableNames) : base(representation) {
    73       List<TerminalSymbol> createdSymbols = new List<TerminalSymbol>();
     81      List<VariableTerminalSymbol> createdSymbols = new List<VariableTerminalSymbol>();
    7482      VariableTerminalSymbols = createdSymbols;
    7583
    7684      foreach (string variableName in variableNames) {
    77         TerminalSymbol s = new TerminalSymbol(variableName, isVariable: true);
     85        VariableTerminalSymbol s = new VariableTerminalSymbol(variableName);
    7886        createdSymbols.Add(s);
    7987        AddProduction(s);
  • branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration.csproj

    r15824 r15832  
    116116    <Compile Include="GrammarEnumeration\Grammar.cs" />
    117117    <Compile Include="GrammarEnumeration\GrammarEnumerationAlgorithm.cs" />
     118    <Compile Include="Hashing\Hasher.cs" />
    118119    <Compile Include="GrammarEnumeration\SearchDataStructure.cs" />
    119120    <Compile Include="GrammarEnumeration\Sentence.cs" />
  • branches/2886_SymRegGrammarEnumeration/Test/GrammarEnumerationTest.cs

    r15821 r15832  
    7272      });
    7373
    74       int targetSolutionHash = alg.Grammar.CalcHashCode(targetSolution);
    75       int actualSolutionHash = alg.Grammar.CalcHashCode(alg.BestTrainingSentence);
     74      int targetSolutionHash = alg.Grammar.Hasher.CalcHashCode(targetSolution);
     75      int actualSolutionHash = alg.Grammar.Hasher.CalcHashCode(alg.BestTrainingSentence);
    7676
    7777      Assert.IsTrue(alg.DistinctSentencesLength.ContainsKey(targetSolutionHash), "Actual solution was not generated!");
     
    127127      });
    128128
    129       int targetSolutionHash = alg.Grammar.CalcHashCode(targetSolution);
    130       int actualSolutionHash = alg.Grammar.CalcHashCode(alg.BestTrainingSentence);
     129      int targetSolutionHash = alg.Grammar.Hasher.CalcHashCode(targetSolution);
     130      int actualSolutionHash = alg.Grammar.Hasher.CalcHashCode(alg.BestTrainingSentence);
    131131
    132132      Assert.IsTrue(alg.DistinctSentencesLength.ContainsKey(targetSolutionHash), "Actual solution was not generated!");
     
    156156      });
    157157
    158       int targetSolutionHash = alg.Grammar.CalcHashCode(targetSolution);
    159       int actualSolutionHash = alg.Grammar.CalcHashCode(alg.BestTrainingSentence);
     158      int targetSolutionHash = alg.Grammar.Hasher.CalcHashCode(targetSolution);
     159      int actualSolutionHash = alg.Grammar.Hasher.CalcHashCode(alg.BestTrainingSentence);
    160160
    161161      Assert.IsTrue(alg.DistinctSentencesLength.ContainsKey(targetSolutionHash), "Actual solution was not generated!");
  • branches/2886_SymRegGrammarEnumeration/Test/TreeHashingTest.cs

    r15824 r15832  
    2828      SymbolString s2 = new SymbolString(new[] { varA, varB, grammar.Addition, varC, grammar.Addition });
    2929
    30       int hash1 = grammar.CalcHashCode(s1);
    31       int hash2 = grammar.CalcHashCode(s2);
     30      int hash1 = grammar.Hasher.CalcHashCode(s1);
     31      int hash2 = grammar.Hasher.CalcHashCode(s2);
    3232
    3333      Assert.AreEqual(hash1, hash2);
     
    4040      SymbolString s2 = new SymbolString(new[] { varB, varB, grammar.Addition, varB, grammar.Addition });
    4141
    42       int hash1 = grammar.CalcHashCode(s1);
    43       int hash2 = grammar.CalcHashCode(s2);
     42      int hash1 = grammar.Hasher.CalcHashCode(s1);
     43      int hash2 = grammar.Hasher.CalcHashCode(s2);
    4444
    4545      Assert.AreNotEqual(hash1, hash2);
     
    5252      SymbolString s2 = new SymbolString(new[] { varB, varA, grammar.Addition });
    5353
    54       int hash1 = grammar.CalcHashCode(s1);
    55       int hash2 = grammar.CalcHashCode(s2);
     54      int hash1 = grammar.Hasher.CalcHashCode(s1);
     55      int hash2 = grammar.Hasher.CalcHashCode(s2);
    5656
    5757      Assert.AreEqual(hash1, hash2);
     
    6464      SymbolString s2 = new SymbolString(new[] { varA, varB, varA, grammar.Addition, grammar.Addition });
    6565
    66       int hash1 = grammar.CalcHashCode(s1);
    67       int hash2 = grammar.CalcHashCode(s2);
     66      int hash1 = grammar.Hasher.CalcHashCode(s1);
     67      int hash2 = grammar.Hasher.CalcHashCode(s2);
    6868
    6969      Assert.AreEqual(hash1, hash2);
     
    7676      SymbolString s2 = new SymbolString(new[] { varA });
    7777
    78       int hash1 = grammar.CalcHashCode(s1);
    79       int hash2 = grammar.CalcHashCode(s2);
     78      int hash1 = grammar.Hasher.CalcHashCode(s1);
     79      int hash2 = grammar.Hasher.CalcHashCode(s2);
    8080
    8181      Assert.AreEqual(hash1, hash2);
     
    8888      SymbolString s2 = new SymbolString(new[] { varA, varA, varA, grammar.Multiplication, grammar.Addition });
    8989
    90       int hash1 = grammar.CalcHashCode(s1);
    91       int hash2 = grammar.CalcHashCode(s2);
     90      int hash1 = grammar.Hasher.CalcHashCode(s1);
     91      int hash2 = grammar.Hasher.CalcHashCode(s2);
    9292
    9393      Assert.AreNotEqual(hash1, hash2);
     
    100100      SymbolString s2 = new SymbolString(new Symbol[] { varA, grammar.Expr, grammar.Addition });
    101101
    102       int hash1 = grammar.CalcHashCode(s1);
    103       int hash2 = grammar.CalcHashCode(s2);
     102      int hash1 = grammar.Hasher.CalcHashCode(s1);
     103      int hash2 = grammar.Hasher.CalcHashCode(s2);
    104104
    105105      Assert.AreEqual(hash1, hash2);
     
    114114      SymbolString s2 = new SymbolString(new Symbol[] { varA, varB, grammar.Multiplication });
    115115
    116       int hash1 = grammar.CalcHashCode(s1);
    117       int hash2 = grammar.CalcHashCode(s2);
     116      int hash1 = grammar.Hasher.CalcHashCode(s1);
     117      int hash2 = grammar.Hasher.CalcHashCode(s2);
    118118
    119119      Assert.AreEqual(hash1, hash2);
     
    126126      SymbolString s2 = new SymbolString(new Symbol[] { varA, varA, varA, grammar.Multiplication, grammar.Addition, grammar.Sin, varA, grammar.Inv, varA, grammar.Sin, varA, grammar.Multiplication, grammar.Multiplication, grammar.Addition });
    127127
    128       int hash1 = grammar.CalcHashCode(s1);
    129       int hash2 = grammar.CalcHashCode(s2);
     128      int hash1 = grammar.Hasher.CalcHashCode(s1);
     129      int hash2 = grammar.Hasher.CalcHashCode(s2);
    130130
    131131      Assert.AreEqual(hash1, hash2);
Note: See TracChangeset for help on using the changeset viewer.