Free cookie consent management tool by TermsFeed Policy Generator

source: tags/3.3.0/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers/BestSymbolicRegressionSolutionAnalyzer.cs @ 9129

Last change on this file since 9129 was 3710, checked in by gkronber, 14 years ago

Implemented reviewer comments. #893 (HeuristicLab 3.3.0 application review)

File size: 12.4 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System.Linq;
23using HeuristicLab.Common;
24using HeuristicLab.Core;
25using HeuristicLab.Data;
26using HeuristicLab.Operators;
27using HeuristicLab.Optimization;
28using HeuristicLab.Parameters;
29using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
30using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
31using HeuristicLab.Problems.DataAnalysis.Regression.Symbolic;
32using HeuristicLab.Problems.DataAnalysis.Symbolic;
33using System.Collections.Generic;
34using HeuristicLab.Problems.DataAnalysis.Symbolic.Symbols;
35using HeuristicLab.Problems.DataAnalysis;
36using HeuristicLab.Problems.DataAnalysis.Evaluators;
37
38namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic.Analyzers {
39  [Item("BestSymbolicRegressionSolutionAnalyzer", "An operator for analyzing the best solution of symbolic regression problems given in symbolic expression tree encoding.")]
40  [StorableClass]
41  public sealed class BestSymbolicRegressionSolutionAnalyzer : SingleSuccessorOperator, ISymbolicRegressionAnalyzer {
42    private const string SymbolicExpressionTreeParameterName = "SymbolicExpressionTree";
43    private const string SymbolicExpressionTreeInterpreterParameterName = "SymbolicExpressionTreeInterpreter";
44    private const string ProblemDataParameterName = "ProblemData";
45    private const string QualityParameterName = "Quality";
46    private const string UpperEstimationLimitParameterName = "UpperEstimationLimit";
47    private const string LowerEstimationLimitParameterName = "LowerEstimationLimit";
48    private const string BestSolutionParameterName = "BestSolution";
49    private const string BestSolutionQualityParameterName = "BestSolutionQuality";
50    private const string ResultsParameterName = "Results";
51    private const string BestSolutionResultName = "Best solution (on validiation set)";
52    private const string BestSolutionInputvariableCountResultName = "Variables used by best solution";
53    private const string BestSolutionTrainingRSquared = "Best solution R² (training)";
54    private const string BestSolutionTestRSquared = "Best solution R² (test)";
55    private const string BestSolutionTrainingMse = "Best solution mean squared error (training)";
56    private const string BestSolutionTestMse = "Best solution mean squared error (test)";
57    private const string BestSolutionTrainingRelativeError = "Best solution average relative error (training)";
58    private const string BestSolutionTestRelativeError = "Best solution average relative error (test)";
59
60    public ScopeTreeLookupParameter<SymbolicExpressionTree> SymbolicExpressionTreeParameter {
61      get { return (ScopeTreeLookupParameter<SymbolicExpressionTree>)Parameters[SymbolicExpressionTreeParameterName]; }
62    }
63    public IValueLookupParameter<ISymbolicExpressionTreeInterpreter> SymbolicExpressionTreeInterpreterParameter {
64      get { return (IValueLookupParameter<ISymbolicExpressionTreeInterpreter>)Parameters[SymbolicExpressionTreeInterpreterParameterName]; }
65    }
66    public IValueLookupParameter<DataAnalysisProblemData> ProblemDataParameter {
67      get { return (IValueLookupParameter<DataAnalysisProblemData>)Parameters[ProblemDataParameterName]; }
68    }
69    public ScopeTreeLookupParameter<DoubleValue> QualityParameter {
70      get { return (ScopeTreeLookupParameter<DoubleValue>)Parameters[QualityParameterName]; }
71    }
72    public IValueLookupParameter<DoubleValue> UpperEstimationLimitParameter {
73      get { return (IValueLookupParameter<DoubleValue>)Parameters[UpperEstimationLimitParameterName]; }
74    }
75    public IValueLookupParameter<DoubleValue> LowerEstimationLimitParameter {
76      get { return (IValueLookupParameter<DoubleValue>)Parameters[LowerEstimationLimitParameterName]; }
77    }
78    public ILookupParameter<SymbolicRegressionSolution> BestSolutionParameter {
79      get { return (ILookupParameter<SymbolicRegressionSolution>)Parameters[BestSolutionParameterName]; }
80    }
81    public ILookupParameter<DoubleValue> BestSolutionQualityParameter {
82      get { return (ILookupParameter<DoubleValue>)Parameters[BestSolutionQualityParameterName]; }
83    }
84    public ILookupParameter<ResultCollection> ResultsParameter {
85      get { return (ILookupParameter<ResultCollection>)Parameters[ResultsParameterName]; }
86    }
87
88    public BestSymbolicRegressionSolutionAnalyzer()
89      : base() {
90      Parameters.Add(new ScopeTreeLookupParameter<SymbolicExpressionTree>(SymbolicExpressionTreeParameterName, "The symbolic expression trees to analyze."));
91      Parameters.Add(new ValueLookupParameter<ISymbolicExpressionTreeInterpreter>(SymbolicExpressionTreeInterpreterParameterName, "The interpreter that should be used for the analysis of symbolic expression trees."));
92      Parameters.Add(new ValueLookupParameter<DataAnalysisProblemData>(ProblemDataParameterName, "The problem data for which the symbolic expression tree is a solution."));
93      Parameters.Add(new ValueLookupParameter<DoubleValue>(UpperEstimationLimitParameterName, "The upper estimation limit that was set for the evaluation of the symbolic expression trees."));
94      Parameters.Add(new ValueLookupParameter<DoubleValue>(LowerEstimationLimitParameterName, "The lower estimation limit that was set for the evaluation of the symbolic expression trees."));
95      Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>(QualityParameterName, "The qualities of the symbolic regression trees which should be analyzed."));
96      Parameters.Add(new LookupParameter<SymbolicRegressionSolution>(BestSolutionParameterName, "The best symbolic regression solution."));
97      Parameters.Add(new LookupParameter<DoubleValue>(BestSolutionQualityParameterName, "The quality of the best symbolic regression solution."));
98      Parameters.Add(new LookupParameter<ResultCollection>(ResultsParameterName, "The result collection where the best symbolic regression solution should be stored."));
99    }
100
101    public override IOperation Apply() {
102      ItemArray<DoubleValue> qualities = QualityParameter.ActualValue;
103      ResultCollection results = ResultsParameter.ActualValue;
104      ISymbolicExpressionTreeInterpreter interpreter = SymbolicExpressionTreeInterpreterParameter.ActualValue;
105      ItemArray<SymbolicExpressionTree> expressions = SymbolicExpressionTreeParameter.ActualValue;
106      DataAnalysisProblemData problemData = ProblemDataParameter.ActualValue;
107      DoubleValue upperEstimationLimit = UpperEstimationLimitParameter.ActualValue;
108      DoubleValue lowerEstimationLimit = LowerEstimationLimitParameter.ActualValue;
109
110      int i = qualities.Select((x, index) => new { index, x.Value }).OrderBy(x => x.Value).First().index;
111
112      SymbolicRegressionSolution solution = BestSolutionParameter.ActualValue;
113      if (solution == null) {
114        var model = new SymbolicRegressionModel((ISymbolicExpressionTreeInterpreter)interpreter.Clone(), expressions[i], GetInputVariables(expressions[i]));
115        solution = new SymbolicRegressionSolution(problemData, model, lowerEstimationLimit.Value, upperEstimationLimit.Value);
116        BestSolutionParameter.ActualValue = solution;
117        BestSolutionQualityParameter.ActualValue = qualities[i];
118        results.Add(new Result(BestSolutionResultName, solution));
119        results.Add(new Result(BestSolutionInputvariableCountResultName, new IntValue(model.InputVariables.Count())));
120        #region calculate R2,MSE,Rel Error
121        double[] trainingValues = problemData.Dataset.GetVariableValues(
122          problemData.TargetVariable.Value,
123          problemData.TrainingSamplesStart.Value,
124          problemData.TrainingSamplesEnd.Value);
125        double[] testValues = problemData.Dataset.GetVariableValues(
126          problemData.TargetVariable.Value,
127          problemData.TestSamplesStart.Value,
128          problemData.TestSamplesEnd.Value);
129        double trainingR2 = SimpleRSquaredEvaluator.Calculate(trainingValues, solution.EstimatedTrainingValues);
130        double testR2 = SimpleRSquaredEvaluator.Calculate(testValues, solution.EstimatedTestValues);
131        double trainingMse = SimpleMSEEvaluator.Calculate(trainingValues, solution.EstimatedTrainingValues);
132        double testMse = SimpleMSEEvaluator.Calculate(testValues, solution.EstimatedTestValues);
133        double trainingRelError = SimpleMeanAbsolutePercentageErrorEvaluator.Calculate(trainingValues, solution.EstimatedTrainingValues);
134        double testRelError = SimpleMeanAbsolutePercentageErrorEvaluator.Calculate(testValues, solution.EstimatedTestValues);
135        results.Add(new Result(BestSolutionTrainingRSquared, new DoubleValue(trainingR2)));
136        results.Add(new Result(BestSolutionTestRSquared, new DoubleValue(testR2)));
137        results.Add(new Result(BestSolutionTrainingMse, new DoubleValue(trainingMse)));
138        results.Add(new Result(BestSolutionTestMse, new DoubleValue(testMse)));
139        results.Add(new Result(BestSolutionTrainingRelativeError, new DoubleValue(trainingRelError)));
140        results.Add(new Result(BestSolutionTestRelativeError, new DoubleValue(testRelError)));
141        #endregion
142      } else {
143        if (BestSolutionQualityParameter.ActualValue.Value > qualities[i].Value) {
144          var model = new SymbolicRegressionModel((ISymbolicExpressionTreeInterpreter)interpreter.Clone(), expressions[i], GetInputVariables(expressions[i]));
145          solution = new SymbolicRegressionSolution(problemData, model, lowerEstimationLimit.Value, upperEstimationLimit.Value);
146          BestSolutionParameter.ActualValue = solution;
147          BestSolutionQualityParameter.ActualValue = qualities[i];
148          results[BestSolutionResultName].Value = solution;
149          results[BestSolutionInputvariableCountResultName].Value = new IntValue(model.InputVariables.Count());
150          #region update R2,MSE, Rel Error
151          double[] trainingValues = problemData.Dataset.GetVariableValues(
152            problemData.TargetVariable.Value,
153            problemData.TrainingSamplesStart.Value,
154            problemData.TrainingSamplesEnd.Value);
155          double[] testValues = problemData.Dataset.GetVariableValues(
156            problemData.TargetVariable.Value,
157            problemData.TestSamplesStart.Value,
158            problemData.TestSamplesEnd.Value);
159          double trainingR2 = SimpleRSquaredEvaluator.Calculate(trainingValues, solution.EstimatedTrainingValues);
160          double testR2 = SimpleRSquaredEvaluator.Calculate(testValues, solution.EstimatedTestValues);
161          double trainingMse = SimpleMSEEvaluator.Calculate(trainingValues, solution.EstimatedTrainingValues);
162          double testMse = SimpleMSEEvaluator.Calculate(testValues, solution.EstimatedTestValues);
163          double trainingRelError = SimpleMeanAbsolutePercentageErrorEvaluator.Calculate(trainingValues, solution.EstimatedTrainingValues);
164          double testRelError = SimpleMeanAbsolutePercentageErrorEvaluator.Calculate(testValues, solution.EstimatedTestValues);
165          results[BestSolutionTrainingRSquared].Value = new DoubleValue(trainingR2);
166          results[BestSolutionTestRSquared].Value = new DoubleValue(testR2);
167          results[BestSolutionTrainingMse].Value = new DoubleValue(trainingMse);
168          results[BestSolutionTestMse].Value = new DoubleValue(testMse);
169          results[BestSolutionTrainingRelativeError].Value = new DoubleValue(trainingRelError);
170          results[BestSolutionTestRelativeError].Value = new DoubleValue(testRelError);
171          #endregion
172        }
173      }
174
175      return base.Apply();
176    }
177
178    private IEnumerable<string> GetInputVariables(SymbolicExpressionTree tree) {
179      return (from varNode in tree.IterateNodesPrefix().OfType<VariableTreeNode>()
180              select varNode.VariableName).Distinct();
181    }
182  }
183}
Note: See TracBrowser for help on using the repository browser.