Changeset 15734


Ignore:
Timestamp:
02/07/18 17:30:02 (19 months ago)
Author:
gkronber
Message:

#2886 worked on grammar enumeration

Location:
branches/2886_SymRegGrammarEnumeration
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • branches/2886_SymRegGrammarEnumeration

    • Property svn:ignore set to
      TestResults
  • branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/Grammar.cs

    r15725 r15734  
    158158    public int CalcHashCode(SymbolString sentence) {
    159159      Debug.Assert(sentence.Any(), "Trying to evaluate empty sentence!");
    160       Debug.Assert(sentence.All(s => s is TerminalSymbol), "Trying to evaluate symbol sequence with nonterminalsymbols!");
    161 
    162       Stack<TerminalSymbol> parseStack = new Stack<TerminalSymbol>(sentence.OfType<TerminalSymbol>());
    163 
    164       TerminalSymbol peek = parseStack.Peek();
     160      // Debug.Assert(sentence.All(s => s is TerminalSymbol), "Trying to evaluate symbol sequence with nonterminalsymbols!");
     161
     162      Stack<Symbol> parseStack = new Stack<Symbol>(sentence);
     163
     164      Symbol peek = parseStack.Peek();
    165165      int[] subtreeHashes = GetSubtreeHashes(parseStack);
    166166      return AggregateHashes(peek, subtreeHashes);
    167167    }
    168168
    169     private int[] GetSubtreeHashes(Stack<TerminalSymbol> parseStack) {
    170       TerminalSymbol currentSymbol = parseStack.Pop();
     169    private int[] GetSubtreeHashes(Stack<Symbol> parseStack) {
     170      Symbol currentSymbol = parseStack.Pop();
    171171
    172172      // VARIABLE
    173       if (Var.VariableTerminalSymbols.Contains(currentSymbol)) {
    174         return currentSymbol.StringRepresentation.GetHashCode().ToEnumerable().ToArray();
    175       }
     173      // if (Var.VariableTerminalSymbols.Contains(currentSymbol)) {
     174      //   return currentSymbol.StringRepresentation.GetHashCode().ToEnumerable().ToArray();
     175      // }
    176176
    177177      // MULTIPLICATION
     
    220220        return result.ToArray();
    221221      }
    222       throw new ArgumentException("Trying to hash malformed sentence!");
    223     }
    224 
    225     private int AggregateHashes(TerminalSymbol rule, IEnumerable<int> hashes) {
     222
     223      // var
     224      return currentSymbol.StringRepresentation.GetHashCode().ToEnumerable().ToArray();
     225
     226
     227      // throw new ArgumentException("Trying to hash malformed sentence!");
     228    }
     229
     230    private int AggregateHashes(Symbol operatorSym, IEnumerable<int> hashes) {
    226231      // If multiple subtrees are "merged" (e.g. added, multiplied, etc.), consider the executed operation
    227232      var hashesArray = hashes.ToArray();
    228       int start = hashesArray.Length > 1 ? rule.StringRepresentation.GetHashCode() : 0;
     233      int start = hashesArray.Length > 1 ? operatorSym.StringRepresentation.GetHashCode() : 0;
    229234      return hashesArray.Aggregate(start, (result, ti) => ((result << 5) + result) ^ ti.GetHashCode());
    230235    }
  • branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/GrammarEnumerationAlgorithm.cs

    r15726 r15734  
    5757    public SymbolString BestTestSentence;
    5858
    59     public List<Tuple<SymbolString, int>> DistinctGenerated;
    60     public List<Tuple<SymbolString, int>> AllGenerated;
     59    public List<Tuple<SymbolString, int>> distinctSentences;
     60    public List<Tuple<SymbolString, int>> sentences;
    6161    #endregion
    6262
     
    8888
    8989    protected override void Run(CancellationToken cancellationToken) {
     90      Results.Add(new Result("Best R²", new DoubleValue(0.0)));
     91      var rand = new System.Random(1234);
    9092      BestTrainingSentence = null;
    9193      BestTrainingSentence = null;
    92       AllGenerated = new List<Tuple<SymbolString, int>>();
    93       DistinctGenerated = new List<Tuple<SymbolString, int>>();
    94 
     94      this.sentences = new List<Tuple<SymbolString, int>>();
     95      this.distinctSentences = new List<Tuple<SymbolString, int>>();
     96      var archivedPhrases = new Dictionary<int, SymbolString>();
     97      int expansions = 0;
    9598      Dictionary<int, SymbolString> evaluatedHashes = new Dictionary<int, SymbolString>();
    9699
    97100      Grammar = new Grammar(Problem.ProblemData.AllowedInputVariables.ToArray());
    98101
    99       Stack<SymbolString> remainingTrees = new Stack<SymbolString>();
    100       remainingTrees.Push(new SymbolString(new[] { Grammar.StartSymbol }));
    101 
    102       while (remainingTrees.Any()) {
     102      var phrases = new Dictionary<int, SymbolString>();
     103      var phrase0 = new SymbolString(new[] { Grammar.StartSymbol });
     104      phrases.Add(Grammar.CalcHashCode(phrase0), phrase0);
     105
     106      while (phrases.Any()) {
    103107        if (cancellationToken.IsCancellationRequested) break;
    104108
    105         SymbolString currSymbolString = remainingTrees.Pop();
    106 
    107         if (currSymbolString.IsSentence()) {
    108           AllGenerated.Add(new Tuple<SymbolString, int>(currSymbolString, Grammar.CalcHashCode(currSymbolString)));
    109 
    110           int currSymbolStringHash = Grammar.CalcHashCode(currSymbolString);
    111           if (!evaluatedHashes.ContainsKey(currSymbolStringHash)
    112               || evaluatedHashes[currSymbolStringHash].Count > currSymbolString.Count) {
    113             evaluatedHashes[currSymbolStringHash] = currSymbolString;
    114 
    115             DistinctGenerated.Add(new Tuple<SymbolString, int>(currSymbolString, Grammar.CalcHashCode(currSymbolString)));
    116             EvaluateSentence(currSymbolString);
     109        // FIFO
     110        // SymbolString currSymbolString = phrases.First();
     111        // phrases.RemoveAt(0);
     112
     113       
     114        // LIFO
     115        // SymbolString currSymbolString = phrases.Last();
     116        // phrases.RemoveAt(phrases.Count - 1);
     117       
     118
     119        // RANDOM
     120        int idx = rand.Next(phrases.Count);
     121        var selectedEntry = phrases.ElementAt(idx);  // TODO: Perf von ElementAt ist schlecht.
     122        phrases.Remove(selectedEntry.Key);
     123        var currPhrase = selectedEntry.Value;
     124
     125        archivedPhrases.Add(selectedEntry.Key, selectedEntry.Value);
     126
     127        if (currPhrase.IsSentence()) {
     128          int currSymbolStringHash = Grammar.CalcHashCode(currPhrase);
     129          this.sentences.Add(new Tuple<SymbolString, int>(currPhrase, currSymbolStringHash));
     130
     131          if (!evaluatedHashes.ContainsKey(currSymbolStringHash)) {
     132            evaluatedHashes[currSymbolStringHash] = currPhrase;
     133
     134            this.distinctSentences.Add(new Tuple<SymbolString, int>(currPhrase, currSymbolStringHash));
     135            EvaluateSentence(currPhrase);
    117136          }
    118           UpdateView(AllGenerated, DistinctGenerated);
     137          UpdateView(this.sentences, this.distinctSentences);
    119138
    120139        } else {
    121140          // expand next nonterminal symbols
    122           int nonterminalSymbolIndex = currSymbolString.FindIndex(s => s is NonterminalSymbol);
    123           NonterminalSymbol expandedSymbol = currSymbolString[nonterminalSymbolIndex] as NonterminalSymbol;
     141          int nonterminalSymbolIndex = currPhrase.FindIndex(s => s is NonterminalSymbol);
     142          NonterminalSymbol expandedSymbol = currPhrase[nonterminalSymbolIndex] as NonterminalSymbol;
    124143
    125144          foreach (Production productionAlternative in expandedSymbol.Alternatives) {
    126             SymbolString newSentence = new SymbolString(currSymbolString);
     145            SymbolString newSentence = new SymbolString(currPhrase);
    127146            newSentence.RemoveAt(nonterminalSymbolIndex);
    128147            newSentence.InsertRange(nonterminalSymbolIndex, productionAlternative);
    129148
     149            expansions++;
    130150            if (newSentence.Count <= MaxTreeSize) {
    131               remainingTrees.Push(newSentence);
     151              var phraseHash = Grammar.CalcHashCode(newSentence);
     152              if(!phrases.ContainsKey(phraseHash) &&
     153                !archivedPhrases.ContainsKey(phraseHash))
     154              phrases.Add(phraseHash, newSentence);
    132155            }
    133156          }
     
    135158      }
    136159
    137       UpdateView(AllGenerated, DistinctGenerated, force: true);
    138 
    139       string[,] sentences = new string[AllGenerated.Count, 3];
    140       for (int i = 0; i < AllGenerated.Count; i++) {
    141         sentences[i, 0] = AllGenerated[i].Item1.ToString();
    142         sentences[i, 1] = Grammar.PostfixToInfixParser(AllGenerated[i].Item1).ToString();
    143         sentences[i, 2] = AllGenerated[i].Item2.ToString();
     160      UpdateView(this.sentences, this.distinctSentences, force: true);
     161
     162      string[,] sentences = new string[this.sentences.Count, 3];
     163      for (int i = 0; i < this.sentences.Count; i++) {
     164        sentences[i, 0] = this.sentences[i].Item1.ToString();
     165        sentences[i, 1] = Grammar.PostfixToInfixParser(this.sentences[i].Item1).ToString();
     166        sentences[i, 2] = this.sentences[i].Item2.ToString();
    144167      }
    145168      Results.Add(new Result("All generated sentences", new StringMatrix(sentences)));
    146169
    147       string[,] distinctSentences = new string[DistinctGenerated.Count, 3];
    148       for (int i = 0; i < DistinctGenerated.Count; i++) {
    149         distinctSentences[i, 0] = DistinctGenerated[i].Item1.ToString();
    150         distinctSentences[i, 1] = Grammar.PostfixToInfixParser(DistinctGenerated[i].Item1).ToString();
    151         distinctSentences[i, 2] = DistinctGenerated[i].Item2.ToString();
     170      string[,] distinctSentences = new string[this.distinctSentences.Count, 3];
     171      for (int i = 0; i < this.distinctSentences.Count; i++) {
     172        distinctSentences[i, 0] = this.distinctSentences[i].Item1.ToString();
     173        distinctSentences[i, 1] = Grammar.PostfixToInfixParser(this.distinctSentences[i].Item1).ToString();
     174        distinctSentences[i, 2] = this.distinctSentences[i].Item2.ToString();
    152175      }
    153176      Results.Add(new Result("Distinct generated sentences", new StringMatrix(distinctSentences)));
     
    175198        tree,
    176199        new SymbolicDataAnalysisExpressionTreeLinearInterpreter());
    177 
    178       IRegressionSolution newSolution = model.CreateRegressionSolution(Problem.ProblemData);
    179 
    180       IResult currBestTrainingSolutionResult;
    181       IResult currBestTestSolutionResult;
    182       if (!Results.TryGetValue(BestTrainingSolution, out currBestTrainingSolutionResult)
    183            || !Results.TryGetValue(BestTestSolution, out currBestTestSolutionResult)) {
    184 
    185         BestTrainingSentence = symbolString;
    186         Results.Add(new Result(BestTrainingSolution, newSolution));
    187         Results.Add(new Result(BestTrainingSolutionQuality, new DoubleValue(newSolution.TrainingRSquared).AsReadOnly()));
    188 
    189         BestTestSentence = symbolString;
    190         Results.Add(new Result(BestTestSolution, newSolution));
    191         Results.Add(new Result(BestTestSolutionQuality, new DoubleValue(newSolution.TestRSquared).AsReadOnly()));
    192 
    193       } else {
    194         IRegressionSolution currBestTrainingSolution = (IRegressionSolution)currBestTrainingSolutionResult.Value;
    195         if (currBestTrainingSolution.TrainingRSquared <= newSolution.TrainingRSquared) {
    196           BestTrainingSentence = symbolString;
    197           currBestTrainingSolutionResult.Value = newSolution;
    198           Results.AddOrUpdateResult(BestTrainingSolutionQuality, new DoubleValue(newSolution.TrainingRSquared).AsReadOnly());
    199         }
    200 
    201         IRegressionSolution currBestTestSolution = (IRegressionSolution)currBestTestSolutionResult.Value;
    202         if (currBestTestSolution.TestRSquared <= newSolution.TestRSquared) {
    203           BestTestSentence = symbolString;
    204           currBestTestSolutionResult.Value = newSolution;
    205           Results.AddOrUpdateResult(BestTestSolutionQuality, new DoubleValue(newSolution.TestRSquared).AsReadOnly());
    206         }
    207       }
     200      var probData = Problem.ProblemData;
     201      var target = probData.TargetVariableTrainingValues;
     202      var estVals = model.GetEstimatedValues(probData.Dataset, probData.TrainingIndices);
     203      OnlineCalculatorError error;
     204      var r2 = OnlinePearsonsRSquaredCalculator.Calculate(target, estVals, out error);
     205      if (error != OnlineCalculatorError.None) r2 = 0.0;
     206
     207      var bestR2 = ((DoubleValue)(Results["Best R²"]).Value).Value;
     208      ((DoubleValue)(Results["Best R²"].Value)).Value = Math.Max(r2, bestR2);
     209
     210      // IRegressionSolution newSolution = model.CreateRegressionSolution(Problem.ProblemData);
     211      //
     212      // IResult currBestTrainingSolutionResult;
     213      // IResult currBestTestSolutionResult;
     214      // if (!Results.TryGetValue(BestTrainingSolution, out currBestTrainingSolutionResult)
     215      //      || !Results.TryGetValue(BestTestSolution, out currBestTestSolutionResult)) {
     216      //
     217      //   BestTrainingSentence = symbolString;
     218      //   Results.Add(new Result(BestTrainingSolution, newSolution));
     219      //   Results.Add(new Result(BestTrainingSolutionQuality, new DoubleValue(newSolution.TrainingRSquared).AsReadOnly()));
     220      //
     221      //   BestTestSentence = symbolString;
     222      //   Results.Add(new Result(BestTestSolution, newSolution));
     223      //   Results.Add(new Result(BestTestSolutionQuality, new DoubleValue(newSolution.TestRSquared).AsReadOnly()));
     224      //
     225      // } else {
     226      //   IRegressionSolution currBestTrainingSolution = (IRegressionSolution)currBestTrainingSolutionResult.Value;
     227      //   if (currBestTrainingSolution.TrainingRSquared <= newSolution.TrainingRSquared) {
     228      //     BestTrainingSentence = symbolString;
     229      //     currBestTrainingSolutionResult.Value = newSolution;
     230      //     Results.AddOrUpdateResult(BestTrainingSolutionQuality, new DoubleValue(newSolution.TrainingRSquared).AsReadOnly());
     231      //   }
     232      //
     233      //   IRegressionSolution currBestTestSolution = (IRegressionSolution)currBestTestSolutionResult.Value;
     234      //   if (currBestTestSolution.TestRSquared <= newSolution.TestRSquared) {
     235      //     BestTestSentence = symbolString;
     236      //     currBestTestSolutionResult.Value = newSolution;
     237      //     Results.AddOrUpdateResult(BestTestSolutionQuality, new DoubleValue(newSolution.TestRSquared).AsReadOnly());
     238      //   }
     239      // }
    208240    }
    209241  }
  • branches/2886_SymRegGrammarEnumeration/Test/GrammarEnumerationTest.cs

    r15726 r15734  
    6868    public void MctsSymbReg_NoConstants_Nguyen1() {
    6969      // x³ + x² + x
    70       alg.MaxTreeSize = 30;
     70      alg.MaxTreeSize = 3;
    7171      Console.WriteLine(alg.MaxTreeSize);
    7272      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(Seed);
     
    9292      int actualSolutionHash = alg.Grammar.CalcHashCode(alg.BestTestSentence);
    9393
    94       Assert.IsTrue(alg.DistinctGenerated.Select(tuple => tuple.Item2).Contains(actualSolutionHash));
     94      Assert.IsTrue(alg.distinctSentences.Select(tuple => tuple.Item2).Contains(actualSolutionHash));
    9595
    9696      // last because long sentences are overwritten by short ones.
    97       Console.WriteLine(alg.Grammar.PostfixToInfixParser(alg.DistinctGenerated.Last(tuple => tuple.Item2 == targetSolutionHash).Item1));
     97      Console.WriteLine(alg.Grammar.PostfixToInfixParser(alg.distinctSentences.Last(tuple => tuple.Item2 == targetSolutionHash).Item1));
    9898
    9999
     
    114114      // Evaluate
    115115      EvaluateGrammarEnumeration();
     116    }
     117
     118    [TestMethod]
     119    [TestProperty("Goal", "structure search")]
     120    public void MctsSymbReg_NoConstants_Poly10() {
     121      alg.MaxTreeSize = 10;
     122      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.VariousInstanceProvider(Seed);
     123      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Poly")));
     124      alg.Problem.ProblemData = regProblem;
     125
     126      alg.Start();
     127
     128      Assert.AreEqual(alg.distinctSentences.Count(), 110);
     129
     130      // Evaluate
     131      // EvaluateGrammarEnumeration();
     132    }
     133
     134    [TestMethod]
     135    [TestProperty("Goal", "structure search")]
     136    public void MctsSymbReg_NoConstants_15() {
     137      alg.MaxTreeSize = 5;
     138      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(Seed);
     139      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("15")));
     140      alg.Problem.ProblemData = regProblem;
     141
     142      alg.Start();
     143
     144      Assert.AreEqual(alg.distinctSentences.Count(), 16);
     145
     146      // Evaluate
     147      // EvaluateGrammarEnumeration();
    116148    }
    117149
Note: See TracChangeset for help on using the changeset viewer.