Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/Analysis/RSquaredEvaluator.cs @ 15860

Last change on this file since 15860 was 15860, checked in by lkammere, 6 years ago

#2886: Change complexity measure from number of nodes in tree to number of variable references.

File size: 5.1 KB
Line 
1using System;
2using System.Diagnostics;
3using HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration.GrammarEnumeration;
4using HeuristicLab.Common;
5using HeuristicLab.Core;
6using HeuristicLab.Data;
7using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
8using HeuristicLab.Optimization;
9using HeuristicLab.Problems.DataAnalysis;
10using HeuristicLab.Problems.DataAnalysis.Symbolic;
11using HeuristicLab.Problems.DataAnalysis.Symbolic.Regression;
12
13namespace HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration {
14  public class RSquaredEvaluator : Item, IGrammarEnumerationAnalyzer {
15    private readonly string BestTrainingQualityResultName = "Best R² (Training)";
16    private readonly string BestTrainingModelResultName = "Best model (Training)";
17    private readonly string BestTrainingSolutionResultName = "Best solution (Training)";
18
19    private readonly ISymbolicDataAnalysisExpressionTreeInterpreter expressionTreeLinearInterpreter = new SymbolicDataAnalysisExpressionTreeLinearInterpreter();
20
21    public RSquaredEvaluator() { }
22
23    protected RSquaredEvaluator(RSquaredEvaluator original, Cloner cloner) { }
24
25    public override IDeepCloneable Clone(Cloner cloner) {
26      return new RSquaredEvaluator(this, cloner);
27    }
28
29    public void Register(GrammarEnumerationAlgorithm algorithm) {
30      algorithm.Started += OnStarted;
31      algorithm.Stopped += OnStopped;
32
33      algorithm.DistinctSentenceGenerated += AlgorithmOnDistinctSentenceGenerated;
34    }
35
36    public void Deregister(GrammarEnumerationAlgorithm algorithm) {
37      algorithm.Started -= OnStarted;
38      algorithm.Stopped -= OnStopped;
39
40      algorithm.DistinctSentenceGenerated -= AlgorithmOnDistinctSentenceGenerated;
41    }
42
43    private void AlgorithmOnDistinctSentenceGenerated(object sender, PhraseAddedEventArgs phraseAddedEventArgs) {
44      GrammarEnumerationAlgorithm algorithm = (GrammarEnumerationAlgorithm)sender;
45      EvaluateSentence(algorithm, phraseAddedEventArgs.NewPhrase);
46    }
47
48    private void OnStarted(object sender, EventArgs eventArgs) {
49      GrammarEnumerationAlgorithm algorithm = (GrammarEnumerationAlgorithm)sender;
50      algorithm.Results.Add(new Result(BestTrainingQualityResultName, new DoubleValue(-1.0)));
51
52      algorithm.BestTrainingSentence = null;
53    }
54
55    private void OnStopped(object sender, EventArgs eventArgs) {
56      GrammarEnumerationAlgorithm algorithm = (GrammarEnumerationAlgorithm)sender;
57      if (algorithm.Results.ContainsKey(BestTrainingModelResultName)) {
58        SymbolicRegressionModel model = (SymbolicRegressionModel)algorithm.Results[BestTrainingModelResultName].Value;
59        IRegressionSolution bestTrainingSolution = new RegressionSolution(model, algorithm.Problem.ProblemData);
60
61        algorithm.Results.AddOrUpdateResult(BestTrainingSolutionResultName, bestTrainingSolution);
62      }
63    }
64
65    private void EvaluateSentence(GrammarEnumerationAlgorithm algorithm, SymbolString symbolString) {
66      var problemData = algorithm.Problem.ProblemData;
67
68      SymbolicExpressionTree tree = algorithm.Grammar.ParseSymbolicExpressionTree(symbolString);
69      Debug.Assert(SymbolicRegressionConstantOptimizationEvaluator.CanOptimizeConstants(tree));
70
71      // TODO: Initialize constant values randomly
72      // TODO: Restarts
73
74      double r2 = SymbolicRegressionConstantOptimizationEvaluator.OptimizeConstants(expressionTreeLinearInterpreter,
75        tree,
76        problemData,
77        problemData.TrainingIndices,
78        applyLinearScaling: false,
79        maxIterations: 50,
80        updateVariableWeights: true,
81        updateConstantsInTree: true);
82
83      foreach (var symbolicExpressionTreeNode in tree.IterateNodesPostfix()) {
84        ConstantTreeNode constTreeNode = symbolicExpressionTreeNode as ConstantTreeNode;
85        if (constTreeNode != null && constTreeNode.Value.IsAlmost(0.0)) {
86          constTreeNode.Value = 0.0;
87        }
88      }
89
90      SymbolicRegressionModel model = new SymbolicRegressionModel(
91        problemData.TargetVariable,
92        tree,
93        expressionTreeLinearInterpreter);
94
95      //var target = problemData.TargetVariableTrainingValues;
96      //var estVals = model.GetEstimatedValues(problemData.Dataset, problemData.TrainingIndices);
97      //OnlineCalculatorError error;
98      //double r2 = OnlinePearsonsRCalculator.Calculate(target, estVals, out error);
99      //if (error != OnlineCalculatorError.None) r2 = 0.0;
100
101      var bestR2Result = (DoubleValue)algorithm.Results[BestTrainingQualityResultName].Value;
102      bool better = r2 > bestR2Result.Value;
103      bool equallyGood = r2.IsAlmost(bestR2Result.Value);
104      bool shorter = false;
105
106      if (!better && equallyGood) {
107        shorter = algorithm.BestTrainingSentence != null &&
108          algorithm.Grammar.GetComplexity(algorithm.BestTrainingSentence) > algorithm.Grammar.GetComplexity(symbolString);
109      }
110      if (better || (equallyGood && shorter)) {
111        bestR2Result.Value = r2;
112        algorithm.Results.AddOrUpdateResult(BestTrainingModelResultName, model);
113
114        algorithm.BestTrainingSentence = symbolString;
115      }
116    }
117  }
118}
Note: See TracBrowser for help on using the repository browser.