Changeset 15726


Ignore:
Timestamp:
02/06/18 22:11:56 (19 months ago)
Author:
lkammere
Message:

#2886: Overwrite long sentences when a shorter one with same hash was found.

Location:
branches/2886_SymRegGrammarEnumeration
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/GrammarEnumerationAlgorithm.cs

    r15725 r15726  
    5757    public SymbolString BestTestSentence;
    5858
     59    public List<Tuple<SymbolString, int>> DistinctGenerated;
     60    public List<Tuple<SymbolString, int>> AllGenerated;
    5961    #endregion
    6062
     
    8890      BestTrainingSentence = null;
    8991      BestTrainingSentence = null;
    90 
    91       List<Tuple<SymbolString, int>> allGenerated = new List<Tuple<SymbolString, int>>();
    92       List<SymbolString> distinctGenerated = new List<SymbolString>();
    93 
    94       HashSet<int> evaluatedHashes = new HashSet<int>();
     92      AllGenerated = new List<Tuple<SymbolString, int>>();
     93      DistinctGenerated = new List<Tuple<SymbolString, int>>();
     94
     95      Dictionary<int, SymbolString> evaluatedHashes = new Dictionary<int, SymbolString>();
    9596
    9697      Grammar = new Grammar(Problem.ProblemData.AllowedInputVariables.ToArray());
     
    105106
    106107        if (currSymbolString.IsSentence()) {
    107           allGenerated.Add(new Tuple<SymbolString, int>(currSymbolString, Grammar.CalcHashCode(currSymbolString)));
    108 
    109           if (evaluatedHashes.Add(Grammar.CalcHashCode(currSymbolString))) {
     108          AllGenerated.Add(new Tuple<SymbolString, int>(currSymbolString, Grammar.CalcHashCode(currSymbolString)));
     109
     110          int currSymbolStringHash = Grammar.CalcHashCode(currSymbolString);
     111          if (!evaluatedHashes.ContainsKey(currSymbolStringHash)
     112              || evaluatedHashes[currSymbolStringHash].Count > currSymbolString.Count) {
     113            evaluatedHashes[currSymbolStringHash] = currSymbolString;
     114
     115            DistinctGenerated.Add(new Tuple<SymbolString, int>(currSymbolString, Grammar.CalcHashCode(currSymbolString)));
    110116            EvaluateSentence(currSymbolString);
    111             distinctGenerated.Add(Grammar.PostfixToInfixParser(currSymbolString));
    112117          }
    113 
    114           UpdateView(allGenerated, distinctGenerated);
     118          UpdateView(AllGenerated, DistinctGenerated);
    115119
    116120        } else {
     
    131135      }
    132136
    133       UpdateView(allGenerated, distinctGenerated, force: true);
    134 
    135       string[,] sentences = new string[allGenerated.Count, 3];
    136       for (int i = 0; i < allGenerated.Count; i++) {
    137         sentences[i, 0] = allGenerated[i].Item1.ToString();
    138         sentences[i, 1] = Grammar.PostfixToInfixParser(allGenerated[i].Item1).ToString();
    139         sentences[i, 2] = allGenerated[i].Item2.ToString();
     137      UpdateView(AllGenerated, DistinctGenerated, force: true);
     138
     139      string[,] sentences = new string[AllGenerated.Count, 3];
     140      for (int i = 0; i < AllGenerated.Count; i++) {
     141        sentences[i, 0] = AllGenerated[i].Item1.ToString();
     142        sentences[i, 1] = Grammar.PostfixToInfixParser(AllGenerated[i].Item1).ToString();
     143        sentences[i, 2] = AllGenerated[i].Item2.ToString();
    140144      }
    141145      Results.Add(new Result("All generated sentences", new StringMatrix(sentences)));
    142146
    143       StringArray distinctSentences = new StringArray(distinctGenerated.Select(r => r.ToString()).ToArray());
    144       Results.Add(new Result("Distinct generated sentences", distinctSentences));
    145     }
    146 
    147 
    148     private void UpdateView(List<Tuple<SymbolString, int>> allGenerated, List<SymbolString> distinctGenerated, bool force = false) {
     147      string[,] distinctSentences = new string[DistinctGenerated.Count, 3];
     148      for (int i = 0; i < DistinctGenerated.Count; i++) {
     149        distinctSentences[i, 0] = DistinctGenerated[i].Item1.ToString();
     150        distinctSentences[i, 1] = Grammar.PostfixToInfixParser(DistinctGenerated[i].Item1).ToString();
     151        distinctSentences[i, 2] = DistinctGenerated[i].Item2.ToString();
     152      }
     153      Results.Add(new Result("Distinct generated sentences", new StringMatrix(distinctSentences)));
     154    }
     155
     156
     157    private void UpdateView(List<Tuple<SymbolString, int>> allGenerated,
     158        List<Tuple<SymbolString, int>> distinctGenerated, bool force = false) {
    149159      int generatedSolutions = allGenerated.Count;
    150160      int distinctSolutions = distinctGenerated.Count;
     
    183193      } else {
    184194        IRegressionSolution currBestTrainingSolution = (IRegressionSolution)currBestTrainingSolutionResult.Value;
    185         if (currBestTrainingSolution.TrainingRSquared < newSolution.TrainingRSquared) {
     195        if (currBestTrainingSolution.TrainingRSquared <= newSolution.TrainingRSquared) {
    186196          BestTrainingSentence = symbolString;
    187197          currBestTrainingSolutionResult.Value = newSolution;
     
    190200
    191201        IRegressionSolution currBestTestSolution = (IRegressionSolution)currBestTestSolutionResult.Value;
    192         if (currBestTestSolution.TestRSquared < newSolution.TestRSquared) {
     202        if (currBestTestSolution.TestRSquared <= newSolution.TestRSquared) {
    193203          BestTestSentence = symbolString;
    194204          currBestTestSolutionResult.Value = newSolution;
  • branches/2886_SymRegGrammarEnumeration/Test/GrammarEnumerationTest.cs

    r15724 r15726  
    22using System.Linq;
    33using HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration;
     4using HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration.GrammarEnumeration;
    45using HeuristicLab.Common;
    56using HeuristicLab.Core;
     
    4344
    4445
    45     private void TestGrammarEnumeration(IRegressionProblemData problemData) {
    46       alg.Problem.ProblemData = problemData;
    47 
    48       // Run
    49       alg.Start();
    50 
     46    private void EvaluateGrammarEnumeration() {
    5147      // Evaluate results
    5248      var eps = 1.0 - SuccessThreshold;
     
    7268    public void MctsSymbReg_NoConstants_Nguyen1() {
    7369      // x³ + x² + x
    74       alg.MaxTreeSize = 15;
     70      alg.MaxTreeSize = 30;
     71      Console.WriteLine(alg.MaxTreeSize);
    7572      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(Seed);
    7673      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F1 ")));
    77       TestGrammarEnumeration(regProblem);
     74      alg.Problem.ProblemData = regProblem;
     75
     76      alg.Start();
     77
     78      // Evaluate
     79      // EvaluateGrammarEnumeration();
     80
     81      TerminalSymbol varSymbol = alg.Grammar.Var.VariableTerminalSymbols.First();
     82      TerminalSymbol mulSymbol = alg.Grammar.Multiplication;
     83      TerminalSymbol addSymbol = alg.Grammar.Addition;
     84
     85      SymbolString targetSolution = new SymbolString(new[] {
     86        varSymbol, varSymbol, varSymbol, mulSymbol, mulSymbol,
     87        varSymbol, varSymbol, mulSymbol, addSymbol,
     88        varSymbol, addSymbol
     89      });
     90
     91      int targetSolutionHash = alg.Grammar.CalcHashCode(targetSolution);
     92      int actualSolutionHash = alg.Grammar.CalcHashCode(alg.BestTestSentence);
     93
     94      Assert.IsTrue(alg.DistinctGenerated.Select(tuple => tuple.Item2).Contains(actualSolutionHash));
     95
     96      // last because long sentences are overwritten by short ones.
     97      Console.WriteLine(alg.Grammar.PostfixToInfixParser(alg.DistinctGenerated.Last(tuple => tuple.Item2 == targetSolutionHash).Item1));
     98
     99
     100      Assert.AreEqual(targetSolutionHash, actualSolutionHash);
    78101    }
    79102
     
    82105    public void MctsSymbReg_NoConstants_Nguyen2() {
    83106      // x^4 + x³ + x² + x
    84       alg.MaxTreeSize = 20;
     107      alg.MaxTreeSize = 30;
    85108      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(Seed);
    86109      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F2 ")));
    87       TestGrammarEnumeration(regProblem);
     110      alg.Problem.ProblemData = regProblem;
     111
     112      alg.Start();
     113
     114      // Evaluate
     115      EvaluateGrammarEnumeration();
    88116    }
    89117
Note: See TracChangeset for help on using the changeset viewer.