Free cookie consent management tool by TermsFeed Policy Generator

Changeset 15791


Ignore:
Timestamp:
02/20/18 13:43:36 (6 years ago)
Author:
lkammere
Message:

#2886: Replace integer hashing of phrases with simplification to (temporary) string representations.

Location:
branches/2886_SymRegGrammarEnumeration
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/Grammar.cs

    r15784 r15791  
    120120      SimpleTerm.AddProduction(Var, SimpleTerm, Multiplication);
    121121      SimpleTerm.AddProduction(Var);
    122 
     122 
    123123      InvExpr.AddProduction(InvTerm, InvExpr, Addition);
    124124      InvExpr.AddProduction(InvTerm);
     
    159159
    160160      Symbol peek = parseStack.Peek();
    161       int[] subtreeHashes = GetSubtreeHashes(parseStack);
    162       return AggregateHashes(peek, subtreeHashes);
    163     }
    164 
    165     private int[] GetSubtreeHashes(Stack<Symbol> parseStack) {
     161      string[] subtreeHashes = GetSubtreeHashes(parseStack);
     162      return AggregateHashes(peek, subtreeHashes).GetHashCode();
     163    }
     164
     165    private string[] GetSubtreeHashes(Stack<Symbol> parseStack) {
    166166      Symbol currentSymbol = parseStack.Pop();
    167167
    168168      // ADDITION
    169169      if (ReferenceEquals(currentSymbol, Addition)) {
    170         HashSet<int> uniqueChildHashes = new HashSet<int>();
     170        var uniqueChildHashes = new HashSet<string>();
    171171
    172172        // First subtree
     
    192192      // MULTIPLICATION
    193193      if (ReferenceEquals(currentSymbol, Multiplication)) {
    194         List<int> childHashes = new List<int>();
     194        var childHashes = new List<string>();
    195195
    196196        // First subtree
     
    227227
    228228      // var or nonterminal symbol
    229       return currentSymbol.StringRepresentation.GetHashCode().ToEnumerable().ToArray();
    230     }
    231 
    232     private int AggregateHashes(Symbol operatorSym, IEnumerable<int> hashes) {
     229      return currentSymbol.StringRepresentation.ToEnumerable().ToArray();
     230    }
     231
     232    private string AggregateHashes(Symbol operatorSym, IEnumerable<string> hashes) {
    233233      var hashesArray = hashes.ToArray();
    234234
    235       int start;
    236235      if ((ReferenceEquals(operatorSym, Addition) || ReferenceEquals(operatorSym, Multiplication)) && hashesArray.Count() <= 1) {
    237         start = 0;
    238       } else {
    239         start = operatorSym.StringRepresentation.GetHashCode();
    240       }
    241 
    242       return hashesArray.Aggregate(start, (result, ti) => ((result << 5) + result) ^ ti.GetHashCode());
     236        return hashesArray[0];
     237      }
     238      if (Var.VariableTerminalSymbols.Contains(operatorSym)) {
     239        return operatorSym.StringRepresentation;
     240      }
     241
     242      return "[" + hashesArray.Aggregate(operatorSym.StringRepresentation, (result, ti) => result + " ° " + ti) + "]";
    243243    }
    244244    #endregion
  • branches/2886_SymRegGrammarEnumeration/Test/GrammarEnumerationTest.cs

    r15784 r15791  
    7575      int actualSolutionHash = alg.Grammar.CalcHashCode(alg.BestTrainingSentence);
    7676
    77       Assert.IsTrue(alg.DistinctSentences.ContainsKey(actualSolutionHash), "Actual solution was not generated!");
     77      Assert.IsTrue(alg.DistinctSentences.ContainsKey(targetSolutionHash), "Actual solution was not generated!");
    7878
    7979      Assert.AreEqual(targetSolutionHash, actualSolutionHash, "Actual solution was not recognized as best one.");
     
    112112    public void NoConstants_Nguyen6() {
    113113      // sin(x) + sin(x + x²)
    114       alg.MaxTreeSize = 13;
     114      alg.MaxTreeSize = 10;
    115115      alg.Problem.ProblemData = new NguyenFunctionSix(Seed).GenerateRegressionData();
    116116
     
    130130      int actualSolutionHash = alg.Grammar.CalcHashCode(alg.BestTrainingSentence);
    131131
    132       Assert.IsTrue(alg.DistinctSentences.ContainsKey(actualSolutionHash), "Actual solution was not generated!");
    133 
     132      Assert.IsTrue(alg.DistinctSentences.ContainsKey(targetSolutionHash), "Actual solution was not generated!");
    134133      Assert.AreEqual(targetSolutionHash, actualSolutionHash, "Actual solution was not recognized as best one.");
    135134
     
    141140    public void NoConstants_Nguyen9() {
    142141      // sin(x) + sin(y²)
    143       alg.MaxTreeSize = 10;
     142      alg.MaxTreeSize = 11;
    144143      alg.Problem.ProblemData = new NguyenFunctionNine(Seed).GenerateRegressionData();
    145144
    146145      alg.Start();
     146
     147      TerminalSymbol xSymbol = alg.Grammar.Var.VariableTerminalSymbols.First(v => v.StringRepresentation == "X");
     148      TerminalSymbol ySymbol = alg.Grammar.Var.VariableTerminalSymbols.First(v => v.StringRepresentation == "Y");
     149      TerminalSymbol mulSymbol = alg.Grammar.Multiplication;
     150      TerminalSymbol addSymbol = alg.Grammar.Addition;
     151      TerminalSymbol sinSymbol = alg.Grammar.Sin;
     152
     153      SymbolString targetSolution = new SymbolString(new[] {
     154        xSymbol, sinSymbol,
     155        ySymbol, ySymbol, mulSymbol, sinSymbol, addSymbol
     156      });
     157
     158      int targetSolutionHash = alg.Grammar.CalcHashCode(targetSolution);
     159      int actualSolutionHash = alg.Grammar.CalcHashCode(alg.BestTrainingSentence);
     160
     161      Assert.IsTrue(alg.DistinctSentences.ContainsKey(targetSolutionHash), "Actual solution was not generated!");
     162      Assert.AreEqual(targetSolutionHash, actualSolutionHash, "Actual solution was not recognized as best one.");
     163
    147164      EvaluateGrammarEnumeration();
    148165    }
     
    162179    [TestProperty("Goal", "structure search")]
    163180    public void NoConstants_Inverse() {
    164       // 1 / (log(x)*x + x)
     181      // sin(x) / (log(x)*x + x)
    165182      alg.MaxTreeSize = 12;
    166183
    167184      var x = Enumerable.Range(0, 100).Select(_ => rand.NextDouble() + 1.1).ToList();
    168       var y = x.Select(xi => 1 / (Math.Log(xi) * xi + xi)).ToList();
     185      var y = x.Select(xi => xi / (Math.Log(xi) * xi + xi)).ToList();
    169186      alg.Problem.ProblemData = new RegressionProblemData(new Dataset(new List<string>() { "x", "y" }, new List<IList>() { x, y }), "x".ToEnumerable(), "y");
    170187
  • branches/2886_SymRegGrammarEnumeration/Test/TreeHashingTest.cs

    r15784 r15791  
    109109    [TestCategory("TreeHashing")]
    110110    public void InverseFactorCancelationSimple() {
     111      // 1/a * b * a * a
    111112      SymbolString s1 = new SymbolString(new Symbol[] { varA, grammar.Inv, varB, grammar.Multiplication, varA, grammar.Multiplication, varA, grammar.Multiplication });
     113      // a * b
    112114      SymbolString s2 = new SymbolString(new Symbol[] { varA, varB, grammar.Multiplication });
    113115
     
    130132    }
    131133
     134    [TestMethod]
     135    [TestCategory("TreeHashing")]
     136    public void CompoundInverseCancellationToSingleInverse() {
     137      SymbolString s1 = new SymbolString(new Symbol[] { varA, varB, grammar.Addition, grammar.Inv, grammar.Inv, grammar.Inv });
     138      SymbolString s2 = new SymbolString(new Symbol[] { varA, varB, grammar.Addition, grammar.Inv });
     139
     140      int hash1 = grammar.CalcHashCode(s1);
     141      int hash2 = grammar.CalcHashCode(s2);
     142
     143      Assert.AreEqual(hash1, hash2);
     144    }
     145
     146    [TestMethod]
     147    [TestCategory("TreeHashing")]
     148    public void CompoundInverseCancellationToDivisor() {
     149      SymbolString s1 = new SymbolString(new Symbol[] { varA, varB, grammar.Addition, grammar.Inv, grammar.Inv });
     150      SymbolString s2 = new SymbolString(new Symbol[] { varA, varB, grammar.Addition });
     151
     152      int hash1 = grammar.CalcHashCode(s1);
     153      int hash2 = grammar.CalcHashCode(s2);
     154
     155      Assert.AreEqual(hash1, hash2);
     156    }
     157
     158    [TestMethod]
     159    [TestCategory("TreeHashing")]
     160    public void a_UncancelableCompoundInverse() {
     161      // 1 / ( 1/b + sin(a*c) )
     162      SymbolString s1 = new SymbolString(new Symbol[] { varB, grammar.Inv, varA, varC, grammar.Multiplication, grammar.Sin, grammar.Addition, grammar.Inv });
     163      // b + sin(a*c)
     164      SymbolString s2 = new SymbolString(new Symbol[] { varB, varA, varC, grammar.Multiplication, grammar.Sin, grammar.Addition });
     165
     166      int hash1 = grammar.CalcHashCode(s1);
     167      int hash2 = grammar.CalcHashCode(s2);
     168
     169      Assert.AreNotEqual(hash1, hash2);
     170    }
     171
    132172    #region parser
    133173
     
    144184    [TestCategory("Parser")]
    145185    public void InfixParserComplexTest() {
    146       SymbolString postfix = new SymbolString(new[] { varB, varA, varB, varC, grammar.Multiplication, grammar.Addition, grammar.Addition});
     186      SymbolString postfix = new SymbolString(new[] { varB, varA, varB, varC, grammar.Multiplication, grammar.Addition, grammar.Addition });
    147187      SymbolString infix = new SymbolString(new[] { varB, grammar.Addition, varA, grammar.Addition, varB, grammar.Multiplication, varC });
    148188
Note: See TracChangeset for help on using the changeset viewer.