Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/SupportVectorRegression/BestSupportVectorRegressionSolutionAnalyzer.cs @ 3877

Last change on this file since 3877 was 3877, checked in by gkronber, 14 years ago

Added linear regression and support vector machine algorithms for data analysis. #1012, #1009

File size: 11.6 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System.Linq;
23using HeuristicLab.Common;
24using HeuristicLab.Core;
25using HeuristicLab.Data;
26using HeuristicLab.Operators;
27using HeuristicLab.Optimization;
28using HeuristicLab.Parameters;
29using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
30using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
31using HeuristicLab.Problems.DataAnalysis.Regression.Symbolic;
32using HeuristicLab.Problems.DataAnalysis.Symbolic;
33using System.Collections.Generic;
34using HeuristicLab.Problems.DataAnalysis.Symbolic.Symbols;
35using HeuristicLab.Problems.DataAnalysis;
36using HeuristicLab.Problems.DataAnalysis.Evaluators;
37using HeuristicLab.Problems.DataAnalysis.SupportVectorMachine;
38
39namespace HeuristicLab.Problems.DataAnalysis.Regression.SupportVectorRegression {
40  [Item("BestSupportVectorRegressionSolutionAnalyzer", "An operator for analyzing the best support vector solution of regression problems.")]
41  [StorableClass]
42  public sealed class BestSupportVectorRegressionSolutionAnalyzer : SingleSuccessorOperator, IAnalyzer {
43    private const string SupportVectorRegressionModelParameterName = "SupportVectorRegressionModel";
44    private const string ProblemDataParameterName = "ProblemData";
45    private const string QualityParameterName = "Quality";
46    private const string UpperEstimationLimitParameterName = "UpperEstimationLimit";
47    private const string LowerEstimationLimitParameterName = "LowerEstimationLimit";
48    private const string BestSolutionParameterName = "BestSolution";
49    private const string BestSolutionQualityParameterName = "BestSolutionQuality";
50    private const string ResultsParameterName = "Results";
51    private const string BestSolutionResultName = "Best solution (on validiation set)";
52    private const string BestSolutionInputvariableCountResultName = "Variables used by best solution";
53    private const string BestSolutionTrainingRSquared = "Best solution R² (training)";
54    private const string BestSolutionTestRSquared = "Best solution R² (test)";
55    private const string BestSolutionTrainingMse = "Best solution mean squared error (training)";
56    private const string BestSolutionTestMse = "Best solution mean squared error (test)";
57    private const string BestSolutionTrainingRelativeError = "Best solution average relative error (training)";
58    private const string BestSolutionTestRelativeError = "Best solution average relative error (test)";
59
60    public ScopeTreeLookupParameter<SupportVectorMachineModel> SupportVectorRegressionModelParameter {
61      get { return (ScopeTreeLookupParameter<SupportVectorMachineModel>)Parameters[SupportVectorRegressionModelParameterName]; }
62    }
63    public IValueLookupParameter<DataAnalysisProblemData> ProblemDataParameter {
64      get { return (IValueLookupParameter<DataAnalysisProblemData>)Parameters[ProblemDataParameterName]; }
65    }
66    public ScopeTreeLookupParameter<DoubleValue> QualityParameter {
67      get { return (ScopeTreeLookupParameter<DoubleValue>)Parameters[QualityParameterName]; }
68    }
69    public IValueLookupParameter<DoubleValue> UpperEstimationLimitParameter {
70      get { return (IValueLookupParameter<DoubleValue>)Parameters[UpperEstimationLimitParameterName]; }
71    }
72    public IValueLookupParameter<DoubleValue> LowerEstimationLimitParameter {
73      get { return (IValueLookupParameter<DoubleValue>)Parameters[LowerEstimationLimitParameterName]; }
74    }
75    public ILookupParameter<SupportVectorRegressionSolution> BestSolutionParameter {
76      get { return (ILookupParameter<SupportVectorRegressionSolution>)Parameters[BestSolutionParameterName]; }
77    }
78    public ILookupParameter<DoubleValue> BestSolutionQualityParameter {
79      get { return (ILookupParameter<DoubleValue>)Parameters[BestSolutionQualityParameterName]; }
80    }
81    public ILookupParameter<ResultCollection> ResultsParameter {
82      get { return (ILookupParameter<ResultCollection>)Parameters[ResultsParameterName]; }
83    }
84
85    public BestSupportVectorRegressionSolutionAnalyzer()
86      : base() {
87      Parameters.Add(new ScopeTreeLookupParameter<SupportVectorMachineModel>(SupportVectorRegressionModelParameterName, "The support vector regression models to analyze."));
88      Parameters.Add(new ValueLookupParameter<DataAnalysisProblemData>(ProblemDataParameterName, "The problem data for which the support vector model is a solution."));
89      Parameters.Add(new ValueLookupParameter<DoubleValue>(UpperEstimationLimitParameterName, "The upper estimation limit that was set for the evaluation of the support vector model."));
90      Parameters.Add(new ValueLookupParameter<DoubleValue>(LowerEstimationLimitParameterName, "The lower estimation limit that was set for the evaluation of the support vector model."));
91      Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>(QualityParameterName, "The qualities of the support vector models which should be analyzed."));
92      Parameters.Add(new LookupParameter<SupportVectorRegressionSolution>(BestSolutionParameterName, "The best support vector solution."));
93      Parameters.Add(new LookupParameter<DoubleValue>(BestSolutionQualityParameterName, "The quality of the best support vector solution."));
94      Parameters.Add(new LookupParameter<ResultCollection>(ResultsParameterName, "The result collection where the best support vector solution should be stored."));
95    }
96
97    public override IOperation Apply() {
98      ItemArray<DoubleValue> qualities = QualityParameter.ActualValue;
99      ResultCollection results = ResultsParameter.ActualValue;
100      ItemArray<SupportVectorMachineModel> models = SupportVectorRegressionModelParameter.ActualValue;
101      DataAnalysisProblemData problemData = ProblemDataParameter.ActualValue;
102      DoubleValue upperEstimationLimit = UpperEstimationLimitParameter.ActualValue;
103      DoubleValue lowerEstimationLimit = LowerEstimationLimitParameter.ActualValue;
104      var inputVariables = ProblemDataParameter.ActualValue.InputVariables.Select(x => x.Value);
105
106      int i = qualities.Select((x, index) => new { index, x.Value }).OrderBy(x => x.Value).First().index;
107
108      SupportVectorRegressionSolution solution = BestSolutionParameter.ActualValue;
109      if (solution == null) {
110        solution = new SupportVectorRegressionSolution(problemData, models[i], inputVariables, lowerEstimationLimit.Value, upperEstimationLimit.Value);
111        BestSolutionParameter.ActualValue = solution;
112        BestSolutionQualityParameter.ActualValue = qualities[i];
113        results.Add(new Result(BestSolutionResultName, solution));
114        results.Add(new Result(BestSolutionInputvariableCountResultName, new IntValue(inputVariables.Count())));
115        #region calculate R2,MSE,Rel Error
116        double[] trainingValues = problemData.Dataset.GetVariableValues(
117          problemData.TargetVariable.Value,
118          problemData.TrainingSamplesStart.Value,
119          problemData.TrainingSamplesEnd.Value);
120        double[] testValues = problemData.Dataset.GetVariableValues(
121          problemData.TargetVariable.Value,
122          problemData.TestSamplesStart.Value,
123          problemData.TestSamplesEnd.Value);
124        double trainingR2 = SimpleRSquaredEvaluator.Calculate(trainingValues, solution.EstimatedTrainingValues);
125        double testR2 = SimpleRSquaredEvaluator.Calculate(testValues, solution.EstimatedTestValues);
126        double trainingMse = SimpleMSEEvaluator.Calculate(trainingValues, solution.EstimatedTrainingValues);
127        double testMse = SimpleMSEEvaluator.Calculate(testValues, solution.EstimatedTestValues);
128        double trainingRelError = SimpleMeanAbsolutePercentageErrorEvaluator.Calculate(trainingValues, solution.EstimatedTrainingValues);
129        double testRelError = SimpleMeanAbsolutePercentageErrorEvaluator.Calculate(testValues, solution.EstimatedTestValues);
130        results.Add(new Result(BestSolutionTrainingRSquared, new DoubleValue(trainingR2)));
131        results.Add(new Result(BestSolutionTestRSquared, new DoubleValue(testR2)));
132        results.Add(new Result(BestSolutionTrainingMse, new DoubleValue(trainingMse)));
133        results.Add(new Result(BestSolutionTestMse, new DoubleValue(testMse)));
134        results.Add(new Result(BestSolutionTrainingRelativeError, new DoubleValue(trainingRelError)));
135        results.Add(new Result(BestSolutionTestRelativeError, new DoubleValue(testRelError)));
136        #endregion
137      } else {
138        if (BestSolutionQualityParameter.ActualValue.Value > qualities[i].Value) {
139          solution = new SupportVectorRegressionSolution(problemData, models[i], inputVariables, lowerEstimationLimit.Value, upperEstimationLimit.Value);
140          BestSolutionParameter.ActualValue = solution;
141          BestSolutionQualityParameter.ActualValue = qualities[i];
142          results[BestSolutionResultName].Value = solution;
143          results[BestSolutionInputvariableCountResultName].Value = new IntValue(inputVariables.Count());
144          #region update R2,MSE, Rel Error
145          double[] trainingValues = problemData.Dataset.GetVariableValues(
146            problemData.TargetVariable.Value,
147            problemData.TrainingSamplesStart.Value,
148            problemData.TrainingSamplesEnd.Value);
149          double[] testValues = problemData.Dataset.GetVariableValues(
150            problemData.TargetVariable.Value,
151            problemData.TestSamplesStart.Value,
152            problemData.TestSamplesEnd.Value);
153          double trainingR2 = SimpleRSquaredEvaluator.Calculate(trainingValues, solution.EstimatedTrainingValues);
154          double testR2 = SimpleRSquaredEvaluator.Calculate(testValues, solution.EstimatedTestValues);
155          double trainingMse = SimpleMSEEvaluator.Calculate(trainingValues, solution.EstimatedTrainingValues);
156          double testMse = SimpleMSEEvaluator.Calculate(testValues, solution.EstimatedTestValues);
157          double trainingRelError = SimpleMeanAbsolutePercentageErrorEvaluator.Calculate(trainingValues, solution.EstimatedTrainingValues);
158          double testRelError = SimpleMeanAbsolutePercentageErrorEvaluator.Calculate(testValues, solution.EstimatedTestValues);
159          results[BestSolutionTrainingRSquared].Value = new DoubleValue(trainingR2);
160          results[BestSolutionTestRSquared].Value = new DoubleValue(testR2);
161          results[BestSolutionTrainingMse].Value = new DoubleValue(trainingMse);
162          results[BestSolutionTestMse].Value = new DoubleValue(testMse);
163          results[BestSolutionTrainingRelativeError].Value = new DoubleValue(trainingRelError);
164          results[BestSolutionTestRelativeError].Value = new DoubleValue(testRelError);
165          #endregion
166        }
167      }
168
169      return base.Apply();
170    }
171
172    private IEnumerable<string> GetInputVariables(SymbolicExpressionTree tree) {
173      return (from varNode in tree.IterateNodesPrefix().OfType<VariableTreeNode>()
174              select varNode.VariableName).Distinct();
175    }
176  }
177}
Note: See TracBrowser for help on using the repository browser.