Changeset 15746


Ignore:
Timestamp:
02/09/18 15:32:44 (19 months ago)
Author:
lkammere
Message:

#2886: Refactor grammar enumeration alg.

Location:
branches/2886_SymRegGrammarEnumeration
Files:
1 added
5 edited

Legend:

Unmodified
Added
Removed
  • branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/Grammar.cs

    r15734 r15746  
    9090    }
    9191
    92     /*
    93     #region Memoize subtrees
    94 
    95     public void MemoizeSubtrees(SymbolString sentence) {
    96       Stack<TerminalSymbol> parseStack = new Stack<TerminalSymbol>(sentence.OfType<TerminalSymbol>());
    97 
    98       // Parse root symbol "+"
    99       MemoizeSubtreeExpression(parseStack);
    100     }
    101 
    102     private SymbolString MemoizeSubtreeExpression(Stack<TerminalSymbol> parseStack) {
    103       SymbolString subtree = new SymbolString();
    104 
    105       if (ReferenceEquals(parseStack.Peek(), Addition)) {
    106         subtree.Add(parseStack.Pop());
    107         subtree.InsertRange(0, MemoizeSubtreeExpression(parseStack));
    108         subtree.InsertRange(0, MemoizeSubtreeTerm(parseStack));
    109 
    110         Expr.Alternatives[0].GeneratedSentences.Add(subtree);
    111       } else {
    112         subtree.InsertRange(0, MemoizeSubtreeTerm(parseStack));
    113 
    114         Expr.Alternatives[1].GeneratedSentences.Add(subtree);
    115       }
    116 
    117       return subtree;
    118     }
    119 
    120     private SymbolString MemoizeSubtreeTerm(Stack<TerminalSymbol> parseStack) {
    121       SymbolString subtree = new SymbolString();
    122 
    123       if (ReferenceEquals(parseStack.Peek(), Multiplication)) {
    124         subtree.Add(parseStack.Pop());
    125         subtree.InsertRange(0, MemoizeSubtreeTerm(parseStack));
    126         subtree.InsertRange(0, MemoizeSubtreeFactor(parseStack));
    127 
    128         Term.Alternatives[0].GeneratedSentences.Add(subtree);
    129       } else {
    130         subtree.InsertRange(0, MemoizeSubtreeFactor(parseStack));
    131 
    132         Term.Alternatives[1].GeneratedSentences.Add(subtree);
    133       }
    134 
    135       return subtree;
    136     }
    137 
    138     private SymbolString MemoizeSubtreeFactor(Stack<TerminalSymbol> parseStack) {
    139       SymbolString subtree = new SymbolString(MemoizeSubtreeVar(parseStack));
    140 
    141       Factor.Alternatives[0].GeneratedSentences.Add(subtree);
    142       return subtree;
    143     }
    144 
    145     private SymbolString MemoizeSubtreeVar(Stack<TerminalSymbol> parseStack) {
    146       SymbolString subtree = new SymbolString(parseStack.Pop().ToEnumerable());
    147 
    148       // ... not really
    149       //Var.Alternatives[0].GeneratedSentences.Add(subtree);
    150       return subtree;
    151     }
    152 
    153 
    154     #endregion
    155     */
    156 
    15792    #region Hashing
    15893    public int CalcHashCode(SymbolString sentence) {
     
    221156      }
    222157
    223       // var
     158      // var or nonterminal symbol
    224159      return currentSymbol.StringRepresentation.GetHashCode().ToEnumerable().ToArray();
    225 
    226 
    227       // throw new ArgumentException("Trying to hash malformed sentence!");
    228160    }
    229161
  • branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/GrammarEnumerationAlgorithm.cs

    r15734 r15746  
    1 using System;
    2 using System.Collections.Generic;
     1using System.Collections.Generic;
    32using System.Linq;
    43using System.Threading;
     
    2019  [Creatable(CreatableAttribute.Categories.DataAnalysisRegression, Priority = 250)]
    2120  public class GrammarEnumerationAlgorithm : FixedDataAnalysisAlgorithm<IRegressionProblem> {
    22     private readonly string BestTrainingSolution = "Best solution (training)";
    23     private readonly string BestTrainingSolutionQuality = "Best solution quality (training)";
    24     private readonly string BestTestSolution = "Best solution (test)";
    25     private readonly string BestTestSolutionQuality = "Best solution quality (test)";
    26 
     21    #region properties and result names
     22    private readonly string BestTrainingQualityName = "Best R² (Training)";
     23    private readonly string BestTrainingSolutionName = "Best solution (Training)";
     24    private readonly string GeneratedSentencesName = "Generated Sentences";
     25    private readonly string DistinctSentencesName = "Distinct Sentences";
     26    private readonly string PhraseExpansionsName = "Phrase Expansions";
     27    private readonly string AverageTreeLengthName = "Avg. Sentence Length among Distinct";
     28    private readonly string GeneratedEqualSentencesName = "Generated equal sentences";
     29
     30
     31    private readonly string SearchDataStructureParameterName = "Search Data Structure";
    2732    private readonly string MaxTreeSizeParameterName = "Max. Tree Nodes";
    2833    private readonly string GuiUpdateIntervalParameterName = "GUI Update Interval";
    29     private readonly string UseMemoizationParameterName = "Use Memoization?";
    30 
    31     #region properties
     34   
     35    public override bool SupportsPause { get { return false; } }
     36
    3237    protected IValueParameter<IntValue> MaxTreeSizeParameter {
    3338      get { return (IValueParameter<IntValue>)Parameters[MaxTreeSizeParameterName]; }
     
    4651    }
    4752
    48     protected IValueParameter<BoolValue> UseMemoizationParameter {
    49       get { return (IValueParameter<BoolValue>)Parameters[UseMemoizationParameterName]; }
    50     }
    51     public bool UseMemoization {
    52       get { return UseMemoizationParameter.Value.Value; }
    53       set { UseMemoizationParameter.Value.Value = value; }
     53    protected IValueParameter<EnumValue<StorageType>> SearchDataStructureParameter {
     54      get { return (IValueParameter<EnumValue<StorageType>>)Parameters[SearchDataStructureParameterName]; }
     55    }
     56    public StorageType SearchDataStructure {
     57      get { return SearchDataStructureParameter.Value.Value; }
     58      set { SearchDataStructureParameter.Value.Value = value; }
    5459    }
    5560
    5661    public SymbolString BestTrainingSentence;
    57     public SymbolString BestTestSentence;
    58 
    59     public List<Tuple<SymbolString, int>> distinctSentences;
    60     public List<Tuple<SymbolString, int>> sentences;
    61     #endregion
    62 
     62
     63    #endregion
     64
     65    public Dictionary<int, SymbolString> DistinctSentences;   // Semantically distinct sentences in a run.
     66    public Dictionary<int, List<SymbolString>> AllSentences;  // All sentences ever generated in a run.
     67    public Dictionary<int, SymbolString> ArchivedPhrases;     // Nodes in the search tree
     68    SearchDataStore OpenPhrases;                              // Stack/Queue/etc. for fetching the next node in the search tree. 
     69
     70    public int EqualGeneratedSentences; // It is not guaranteed that shorter solutions are found first.
     71                                        // When longer solutions are overwritten with shorter ones,
     72                                        // this counter is increased.
     73    public int Expansions;              // Number, how many times a nonterminal symbol is replaced with a production rule.
    6374    public Grammar Grammar;
    64 
    6575
    6676    #region ctors
     
    7080
    7181    public GrammarEnumerationAlgorithm() {
    72 
    7382      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.VariousInstanceProvider(seed: 1234);
    7483      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Poly-10")));
     
    8089      Parameters.Add(new ValueParameter<IntValue>(MaxTreeSizeParameterName, "The number of clusters.", new IntValue(6)));
    8190      Parameters.Add(new ValueParameter<IntValue>(GuiUpdateIntervalParameterName, "Number of generated sentences, until GUI is refreshed.", new IntValue(4000)));
    82       Parameters.Add(new ValueParameter<BoolValue>(UseMemoizationParameterName, "Should already subtrees be reused within a run.", new BoolValue(true)));
    83     }
    84 
    85     private GrammarEnumerationAlgorithm(GrammarEnumerationAlgorithm original, Cloner cloner) : base(original, cloner) { }
    86     #endregion
    87 
     91
     92      Parameters.Add(new ValueParameter<EnumValue<StorageType>>(SearchDataStructureParameterName, new EnumValue<StorageType>(StorageType.RandomList)));
     93    }
     94
     95    public GrammarEnumerationAlgorithm(GrammarEnumerationAlgorithm original, Cloner cloner) : base(original, cloner) { }
     96    #endregion
    8897
    8998    protected override void Run(CancellationToken cancellationToken) {
    90       Results.Add(new Result("Best R²", new DoubleValue(0.0)));
    91       var rand = new System.Random(1234);
    92       BestTrainingSentence = null;
    93       BestTrainingSentence = null;
    94       this.sentences = new List<Tuple<SymbolString, int>>();
    95       this.distinctSentences = new List<Tuple<SymbolString, int>>();
    96       var archivedPhrases = new Dictionary<int, SymbolString>();
    97       int expansions = 0;
    98       Dictionary<int, SymbolString> evaluatedHashes = new Dictionary<int, SymbolString>();
     99      #region init
     100      InitResults();
     101
     102      AllSentences = new Dictionary<int, List<SymbolString>>();
     103      ArchivedPhrases = new Dictionary<int, SymbolString>(); // Nodes in the search tree
     104      DistinctSentences = new Dictionary<int, SymbolString>();
     105      Expansions = 0;
     106      EqualGeneratedSentences = 0;
    99107
    100108      Grammar = new Grammar(Problem.ProblemData.AllowedInputVariables.ToArray());
    101109
    102       var phrases = new Dictionary<int, SymbolString>();
     110      OpenPhrases = new SearchDataStore(SearchDataStructure); // Select search strategy
    103111      var phrase0 = new SymbolString(new[] { Grammar.StartSymbol });
    104       phrases.Add(Grammar.CalcHashCode(phrase0), phrase0);
    105 
    106       while (phrases.Any()) {
     112      #endregion
     113
     114      OpenPhrases.Store(Grammar.CalcHashCode(phrase0), phrase0);
     115
     116      while (OpenPhrases.Count > 0) {
    107117        if (cancellationToken.IsCancellationRequested) break;
    108118
    109         // FIFO
    110         // SymbolString currSymbolString = phrases.First();
    111         // phrases.RemoveAt(0);
    112 
    113        
    114         // LIFO
    115         // SymbolString currSymbolString = phrases.Last();
    116         // phrases.RemoveAt(phrases.Count - 1);
    117        
    118 
    119         // RANDOM
    120         int idx = rand.Next(phrases.Count);
    121         var selectedEntry = phrases.ElementAt(idx);  // TODO: Perf von ElementAt ist schlecht.
    122         phrases.Remove(selectedEntry.Key);
    123         var currPhrase = selectedEntry.Value;
    124 
    125         archivedPhrases.Add(selectedEntry.Key, selectedEntry.Value);
    126 
    127         if (currPhrase.IsSentence()) {
    128           int currSymbolStringHash = Grammar.CalcHashCode(currPhrase);
    129           this.sentences.Add(new Tuple<SymbolString, int>(currPhrase, currSymbolStringHash));
    130 
    131           if (!evaluatedHashes.ContainsKey(currSymbolStringHash)) {
    132             evaluatedHashes[currSymbolStringHash] = currPhrase;
    133 
    134             this.distinctSentences.Add(new Tuple<SymbolString, int>(currPhrase, currSymbolStringHash));
    135             EvaluateSentence(currPhrase);
    136           }
    137           UpdateView(this.sentences, this.distinctSentences);
    138 
    139         } else {
    140           // expand next nonterminal symbols
    141           int nonterminalSymbolIndex = currPhrase.FindIndex(s => s is NonterminalSymbol);
    142           NonterminalSymbol expandedSymbol = currPhrase[nonterminalSymbolIndex] as NonterminalSymbol;
    143 
    144           foreach (Production productionAlternative in expandedSymbol.Alternatives) {
    145             SymbolString newSentence = new SymbolString(currPhrase);
    146             newSentence.RemoveAt(nonterminalSymbolIndex);
    147             newSentence.InsertRange(nonterminalSymbolIndex, productionAlternative);
    148 
    149             expansions++;
    150             if (newSentence.Count <= MaxTreeSize) {
    151               var phraseHash = Grammar.CalcHashCode(newSentence);
    152               if(!phrases.ContainsKey(phraseHash) &&
    153                 !archivedPhrases.ContainsKey(phraseHash))
    154               phrases.Add(phraseHash, newSentence);
     119        StoredSymbolString fetchedPhrase = OpenPhrases.GetNext();
     120        SymbolString currPhrase = fetchedPhrase.SymbolString;
     121
     122        ArchivedPhrases.Add(fetchedPhrase.Hash, currPhrase);
     123
     124        // expand next nonterminal symbols
     125        int nonterminalSymbolIndex = currPhrase.FindIndex(s => s is NonterminalSymbol);
     126        NonterminalSymbol expandedSymbol = currPhrase[nonterminalSymbolIndex] as NonterminalSymbol;
     127
     128        foreach (Production productionAlternative in expandedSymbol.Alternatives) {
     129          SymbolString newPhrase = new SymbolString(currPhrase);
     130          newPhrase.RemoveAt(nonterminalSymbolIndex);
     131          newPhrase.InsertRange(nonterminalSymbolIndex, productionAlternative);
     132
     133          Expansions++;
     134          if (newPhrase.Count <= MaxTreeSize) {
     135            var phraseHash = Grammar.CalcHashCode(newPhrase);
     136
     137            if (newPhrase.IsSentence()) { // Sentence was generated.
     138              SaveToAllSentences(phraseHash, newPhrase);
     139
     140              if (!DistinctSentences.ContainsKey(phraseHash) || DistinctSentences[phraseHash].Count > newPhrase.Count) {
     141                if (DistinctSentences.ContainsKey(phraseHash)) EqualGeneratedSentences++; // for analysis only
     142
     143                DistinctSentences[phraseHash] = newPhrase;
     144                EvaluateSentence(newPhrase);
     145              }
     146              UpdateView(AllSentences, DistinctSentences, Expansions);
     147
     148            } else if (!OpenPhrases.Contains(phraseHash) && !ArchivedPhrases.ContainsKey(phraseHash)) {
     149              OpenPhrases.Store(phraseHash, newPhrase);
    155150            }
    156151          }
     
    158153      }
    159154
    160       UpdateView(this.sentences, this.distinctSentences, force: true);
    161 
    162       string[,] sentences = new string[this.sentences.Count, 3];
    163       for (int i = 0; i < this.sentences.Count; i++) {
    164         sentences[i, 0] = this.sentences[i].Item1.ToString();
    165         sentences[i, 1] = Grammar.PostfixToInfixParser(this.sentences[i].Item1).ToString();
    166         sentences[i, 2] = this.sentences[i].Item2.ToString();
    167       }
    168       Results.Add(new Result("All generated sentences", new StringMatrix(sentences)));
    169 
    170       string[,] distinctSentences = new string[this.distinctSentences.Count, 3];
    171       for (int i = 0; i < this.distinctSentences.Count; i++) {
    172         distinctSentences[i, 0] = this.distinctSentences[i].Item1.ToString();
    173         distinctSentences[i, 1] = Grammar.PostfixToInfixParser(this.distinctSentences[i].Item1).ToString();
    174         distinctSentences[i, 2] = this.distinctSentences[i].Item2.ToString();
    175       }
    176       Results.Add(new Result("Distinct generated sentences", new StringMatrix(distinctSentences)));
    177     }
    178 
    179 
    180     private void UpdateView(List<Tuple<SymbolString, int>> allGenerated,
    181         List<Tuple<SymbolString, int>> distinctGenerated, bool force = false) {
    182       int generatedSolutions = allGenerated.Count;
    183       int distinctSolutions = distinctGenerated.Count;
    184 
    185       if (force || generatedSolutions % GuiUpdateInterval == 0) {
    186         Results.AddOrUpdateResult("Generated Solutions", new IntValue(generatedSolutions));
    187         Results.AddOrUpdateResult("Distinct Solutions", new IntValue(distinctSolutions));
    188 
    189         DoubleValue averageTreeLength = new DoubleValue(allGenerated.Select(r => r.Item1.Count).Average());
    190         Results.AddOrUpdateResult("Average Tree Length of Solutions", averageTreeLength);
    191       }
    192     }
    193 
     155      UpdateView(AllSentences, DistinctSentences, Expansions, force: true);
     156      UpdateFinalResults();
     157    }
     158
     159    // Store sentence to "MultiDictionary"
     160    private void SaveToAllSentences(int hash, SymbolString sentence) {
     161      if (AllSentences.ContainsKey(hash))
     162        AllSentences[hash].Add(sentence);
     163      else
     164        AllSentences[hash] = new List<SymbolString> { sentence };
     165    }
     166
     167    #region Evaluation of generated models.
     168
     169    // Evaluate sentence within an algorithm run.
    194170    private void EvaluateSentence(SymbolString symbolString) {
    195171      SymbolicExpressionTree tree = Grammar.ParseSymbolicExpressionTree(symbolString);
     
    198174        tree,
    199175        new SymbolicDataAnalysisExpressionTreeLinearInterpreter());
     176
    200177      var probData = Problem.ProblemData;
    201178      var target = probData.TargetVariableTrainingValues;
     
    205182      if (error != OnlineCalculatorError.None) r2 = 0.0;
    206183
    207       var bestR2 = ((DoubleValue)(Results["Best R²"]).Value).Value;
    208       ((DoubleValue)(Results["Best R²"].Value)).Value = Math.Max(r2, bestR2);
    209 
    210       // IRegressionSolution newSolution = model.CreateRegressionSolution(Problem.ProblemData);
    211       //
    212       // IResult currBestTrainingSolutionResult;
    213       // IResult currBestTestSolutionResult;
    214       // if (!Results.TryGetValue(BestTrainingSolution, out currBestTrainingSolutionResult)
    215       //      || !Results.TryGetValue(BestTestSolution, out currBestTestSolutionResult)) {
    216       //
    217       //   BestTrainingSentence = symbolString;
    218       //   Results.Add(new Result(BestTrainingSolution, newSolution));
    219       //   Results.Add(new Result(BestTrainingSolutionQuality, new DoubleValue(newSolution.TrainingRSquared).AsReadOnly()));
    220       //
    221       //   BestTestSentence = symbolString;
    222       //   Results.Add(new Result(BestTestSolution, newSolution));
    223       //   Results.Add(new Result(BestTestSolutionQuality, new DoubleValue(newSolution.TestRSquared).AsReadOnly()));
    224       //
    225       // } else {
    226       //   IRegressionSolution currBestTrainingSolution = (IRegressionSolution)currBestTrainingSolutionResult.Value;
    227       //   if (currBestTrainingSolution.TrainingRSquared <= newSolution.TrainingRSquared) {
    228       //     BestTrainingSentence = symbolString;
    229       //     currBestTrainingSolutionResult.Value = newSolution;
    230       //     Results.AddOrUpdateResult(BestTrainingSolutionQuality, new DoubleValue(newSolution.TrainingRSquared).AsReadOnly());
    231       //   }
    232       //
    233       //   IRegressionSolution currBestTestSolution = (IRegressionSolution)currBestTestSolutionResult.Value;
    234       //   if (currBestTestSolution.TestRSquared <= newSolution.TestRSquared) {
    235       //     BestTestSentence = symbolString;
    236       //     currBestTestSolutionResult.Value = newSolution;
    237       //     Results.AddOrUpdateResult(BestTestSolutionQuality, new DoubleValue(newSolution.TestRSquared).AsReadOnly());
    238       //   }
    239       // }
    240     }
     184      var bestR2 = ((DoubleValue)Results[BestTrainingQualityName].Value).Value;
     185      if (r2 > bestR2) {
     186        ((DoubleValue)Results[BestTrainingQualityName].Value).Value = r2;
     187        BestTrainingSentence = symbolString;
     188      }
     189    }
     190
     191    #endregion
     192
     193    #region Visualization in HL
     194    // Initialize entries in result set.
     195    private void InitResults() {
     196      BestTrainingSentence = null;
     197
     198      Results.Add(new Result(BestTrainingQualityName, new DoubleValue(-1.0)));
     199
     200      Results.Add(new Result(GeneratedSentencesName, new IntValue(0)));
     201      Results.Add(new Result(DistinctSentencesName, new IntValue(0)));
     202      Results.Add(new Result(PhraseExpansionsName, new IntValue(0)));
     203      Results.Add(new Result(GeneratedEqualSentencesName, new IntValue(0)));
     204      Results.Add(new Result(AverageTreeLengthName, new DoubleValue(1.0)));
     205    }
     206
     207    // Update the view for intermediate results in an algorithm run.
     208    private int updates;
     209    private void UpdateView(Dictionary<int, List<SymbolString>> allGenerated,
     210        Dictionary<int, SymbolString> distinctGenerated, int expansions, bool force = false) {
     211      updates++;
     212
     213      if (force || updates % GuiUpdateInterval == 0) {
     214        var allGeneratedEnum = allGenerated.Values.SelectMany(x => x).ToArray();
     215        ((IntValue)Results[GeneratedSentencesName].Value).Value = allGeneratedEnum.Length;
     216        ((IntValue)Results[DistinctSentencesName].Value).Value = distinctGenerated.Count;
     217        ((IntValue)Results[PhraseExpansionsName].Value).Value = expansions;
     218        ((IntValue)Results[GeneratedEqualSentencesName].Value).Value = EqualGeneratedSentences;
     219        ((DoubleValue)Results[AverageTreeLengthName].Value).Value = allGeneratedEnum.Select(sentence => sentence.Count).Average();
     220      }
     221    }
     222
     223    // Generate all Results after an algorithm run.
     224    private void UpdateFinalResults() {
     225      SymbolicExpressionTree tree = Grammar.ParseSymbolicExpressionTree(BestTrainingSentence);
     226      SymbolicRegressionModel model = new SymbolicRegressionModel(
     227        Problem.ProblemData.TargetVariable,
     228        tree,
     229        new SymbolicDataAnalysisExpressionTreeLinearInterpreter());
     230
     231      IRegressionSolution bestTrainingSolution = new RegressionSolution(model, Problem.ProblemData);
     232      Results.AddOrUpdateResult(BestTrainingSolutionName, bestTrainingSolution);
     233
     234      // Print generated sentences.
     235      string[,] sentencesMatrix = new string[AllSentences.Values.SelectMany(x => x).Count(), 3];
     236
     237      int i = 0;
     238      foreach (var sentenceSet in AllSentences) {
     239        foreach (var sentence in sentenceSet.Value) {
     240          sentencesMatrix[i, 0] = sentence.ToString();
     241          sentencesMatrix[i, 1] = Grammar.PostfixToInfixParser(sentence).ToString();
     242          sentencesMatrix[i, 2] = sentenceSet.Key.ToString();
     243          i++;
     244        }
     245      }
     246      Results.Add(new Result("All generated sentences", new StringMatrix(sentencesMatrix)));
     247
     248      string[,] distinctSentencesMatrix = new string[DistinctSentences.Count, 3];
     249      i = 0;
     250      foreach (KeyValuePair<int, SymbolString> distinctSentence in DistinctSentences) {
     251        distinctSentencesMatrix[i, 0] = distinctSentence.Key.ToString();
     252        distinctSentencesMatrix[i, 1] = Grammar.PostfixToInfixParser(distinctSentence.Value).ToString();
     253        distinctSentencesMatrix[i, 2] = distinctSentence.Key.ToString();
     254        i++;
     255      }
     256      Results.Add(new Result("Distinct generated sentences", new StringMatrix(distinctSentencesMatrix)));
     257    }
     258    #endregion
     259 
    241260  }
    242261}
  • branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration.csproj

    r15714 r15746  
    9393    <Compile Include="GrammarEnumeration\Grammar.cs" />
    9494    <Compile Include="GrammarEnumeration\GrammarEnumerationAlgorithm.cs" />
     95    <Compile Include="GrammarEnumeration\SearchDataStructure.cs" />
    9596    <Compile Include="GrammarEnumeration\Sentence.cs" />
    9697    <Compile Include="Plugin.cs" />
  • branches/2886_SymRegGrammarEnumeration/Test/GrammarEnumerationTest.cs

    r15734 r15746  
    11using System;
     2using System.Diagnostics;
    23using System.Linq;
    34using HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration;
     
    2425    [TestInitialize]
    2526    public void InitTest() {
    26       Console.Write("init called... ");
    2727      rand = new FastRandom(Seed);
    2828
     
    3333    }
    3434
    35     [TestCleanup()]
     35    [TestCleanup]
    3636    public void Cleanup() {
    3737      if (alg.BestTrainingSentence != null) {
    3838        Console.WriteLine("Training: " + alg.Grammar.PostfixToInfixParser(alg.BestTrainingSentence));
    3939      }
    40       if (alg.BestTestSentence != null) {
    41         Console.WriteLine("Test: " + alg.Grammar.PostfixToInfixParser(alg.BestTestSentence));
    42       }
    4340    }
    4441
     
    4946
    5047      // Check if algorithm terminated correctly
    51       Assert.IsTrue(alg.Results.ContainsKey("Best solution quality (training)"), "No training solution returned!");
    52       Assert.IsTrue(alg.Results.ContainsKey("Best solution quality (test)"), "No test solution returned!");
     48      Assert.IsTrue(alg.Results.ContainsKey("Best solution (Training)"), "No training solution returned!");
    5349
    5450      // Check resultss
    55       Assert.AreEqual(1.0, ((DoubleValue)alg.Results["Best solution quality (training)"].Value).Value, eps, "Training quality too low!");
    56       Assert.AreEqual(1.0, ((DoubleValue)alg.Results["Best solution quality (test)"].Value).Value, eps, "Test quality too low!");
    57 
    58       // Check overfitting
    59       Assert.AreEqual(alg.Grammar.CalcHashCode(alg.BestTrainingSentence),
    60                     alg.Grammar.CalcHashCode(alg.BestTestSentence),
    61                     "Training and Test solution are not equal!");
     51      Assert.AreEqual(1.0, ((IRegressionSolution)alg.Results["Best solution (Training)"].Value).TestRSquared, eps, "Test quality too low!");
    6252    }
    6353
     
    6858    public void MctsSymbReg_NoConstants_Nguyen1() {
    6959      // x³ + x² + x
    70       alg.MaxTreeSize = 3;
     60      alg.MaxTreeSize = 15;
    7161      Console.WriteLine(alg.MaxTreeSize);
    7262      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(Seed);
     
    7565
    7666      alg.Start();
    77 
    78       // Evaluate
    79       // EvaluateGrammarEnumeration();
    8067
    8168      TerminalSymbol varSymbol = alg.Grammar.Var.VariableTerminalSymbols.First();
     
    9077
    9178      int targetSolutionHash = alg.Grammar.CalcHashCode(targetSolution);
    92       int actualSolutionHash = alg.Grammar.CalcHashCode(alg.BestTestSentence);
    93 
    94       Assert.IsTrue(alg.distinctSentences.Select(tuple => tuple.Item2).Contains(actualSolutionHash));
    95 
    96       // last because long sentences are overwritten by short ones.
    97       Console.WriteLine(alg.Grammar.PostfixToInfixParser(alg.distinctSentences.Last(tuple => tuple.Item2 == targetSolutionHash).Item1));
    98 
    99 
    100       Assert.AreEqual(targetSolutionHash, actualSolutionHash);
     79      int actualSolutionHash = alg.Grammar.CalcHashCode(alg.BestTrainingSentence);
     80
     81      Assert.IsTrue(alg.DistinctSentences.ContainsKey(actualSolutionHash), "Actual solution was not generated!");
     82
     83      Assert.AreEqual(targetSolutionHash, actualSolutionHash, "Actual solution was not recognized as best one.");
     84
     85      // Evaluate
     86      EvaluateGrammarEnumeration();
    10187    }
    10288
     
    10591    public void MctsSymbReg_NoConstants_Nguyen2() {
    10692      // x^4 + x³ + x² + x
    107       alg.MaxTreeSize = 30;
     93      alg.MaxTreeSize = 25;
    10894      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(Seed);
    10995      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F2 ")));
     
    11197
    11298      alg.Start();
     99
     100      TerminalSymbol varSymbol = alg.Grammar.Var.VariableTerminalSymbols.First();
     101      TerminalSymbol mulSymbol = alg.Grammar.Multiplication;
     102      TerminalSymbol addSymbol = alg.Grammar.Addition;
     103
     104      SymbolString targetSolution = new SymbolString(new[] {
     105        varSymbol, varSymbol, varSymbol, varSymbol, mulSymbol, mulSymbol, mulSymbol,
     106        varSymbol, varSymbol, varSymbol, mulSymbol, mulSymbol, addSymbol,
     107        varSymbol, varSymbol, mulSymbol, addSymbol,
     108        varSymbol, addSymbol
     109      });
     110
     111      int targetSolutionHash = alg.Grammar.CalcHashCode(targetSolution);
     112      int actualSolutionHash = alg.Grammar.CalcHashCode(alg.BestTrainingSentence);
     113
     114      var targetSolutionStr = alg.Grammar.PostfixToInfixParser(targetSolution).ToString();
     115      var foundTargetSolutionStr = alg.Grammar.PostfixToInfixParser(alg.DistinctSentences[targetSolutionHash]).ToString();
     116      var actualSolutionStr = alg.Grammar.PostfixToInfixParser(alg.DistinctSentences[targetSolutionHash]).ToString();
     117
     118      Assert.IsTrue(alg.DistinctSentences.ContainsKey(targetSolutionHash), "Target solution was not generated!");
     119
     120      Assert.AreEqual(targetSolutionHash, actualSolutionHash, "Actual solution was not recognized as best one.");
    113121
    114122      // Evaluate
     
    126134      alg.Start();
    127135
    128       Assert.AreEqual(alg.distinctSentences.Count(), 110);
     136      Assert.AreEqual(alg.DistinctSentences.Count(), 110);
    129137
    130138      // Evaluate
     
    142150      alg.Start();
    143151
    144       Assert.AreEqual(alg.distinctSentences.Count(), 16);
     152      Assert.AreEqual(alg.DistinctSentences.Count(), 16);
    145153
    146154      // Evaluate
  • branches/2886_SymRegGrammarEnumeration/Test/TreeHashingTest.cs

    r15725 r15746  
    9494    }
    9595
     96    [TestMethod]
     97    [TestCategory("TreeHashing")]
     98    public void NonterminalHashing() {
     99      SymbolString s1 = new SymbolString(new Symbol[] { varA, varA, grammar.Expr, grammar.Addition, grammar.Addition });
     100      SymbolString s2 = new SymbolString(new Symbol[] { varA, grammar.Expr, grammar.Addition });
     101
     102      int hash1 = grammar.CalcHashCode(s1);
     103      int hash2 = grammar.CalcHashCode(s2);
     104
     105      Assert.AreEqual(hash1, hash2);
     106    }
     107
    96108
    97109    #region parser
Note: See TracChangeset for help on using the changeset viewer.