source: trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/BestValidationSymbolicRegressionSolutionVisualizer.cs @ 3531

Last change on this file since 3531 was 3531, checked in by gkronber, 12 years ago

Added operator for calculation of relative variable frequencies. #938 (Data types and operators for regression problems)

File size: 14.0 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System.Linq;
23using HeuristicLab.Common;
24using HeuristicLab.Core;
25using HeuristicLab.Data;
26using HeuristicLab.Operators;
27using HeuristicLab.Optimization;
28using HeuristicLab.Parameters;
29using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
30using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
31using HeuristicLab.Problems.DataAnalysis.Evaluators;
32using HeuristicLab.Problems.DataAnalysis.Symbolic;
33using System.Collections.Generic;
34using HeuristicLab.Analysis;
35
36namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic {
37  /// <summary>
38  /// An operator for visualizing the best symbolic regression solution based on the validation set.
39  /// </summary>
40  [Item("BestSymbolicExpressionTreeVisualizer", "An operator for visualizing the best symbolic regression solution based on the validation set.")]
41  [StorableClass]
42  public sealed class BestValidationSymbolicRegressionSolutionVisualizer : SingleSuccessorOperator, ISingleObjectiveSolutionsVisualizer, ISolutionsVisualizer {
43    private const string SymbolicExpressionTreeInterpreterParameterName = "SymbolicExpressionTreeInterpreter";
44    private const string UpperEstimationLimitParameterName = "UpperEstimationLimit";
45    private const string LowerEstimationLimitParameterName = "LowerEstimationLimit";
46    private const string SymbolicRegressionModelParameterName = "SymbolicRegressionModel";
47    private const string DataAnalysisProblemDataParameterName = "DataAnalysisProblemData";
48    private const string BestValidationSolutionParameterName = "BestValidationSolution";
49    private const string ValidationSamplesStartParameterName = "ValidationSamplesStart";
50    private const string ValidationSamplesEndParameterName = "ValidationSamplesEnd";
51    private const string QualityParameterName = "Quality";
52    private const string ResultsParameterName = "Results";
53    private const string VariableFrequenciesParameterName = "VariableFrequencies";
54
55    #region parameter properties
56    public ILookupParameter<ISymbolicExpressionTreeInterpreter> SymbolicExpressionTreeInterpreterParameter {
57      get { return (ILookupParameter<ISymbolicExpressionTreeInterpreter>)Parameters[SymbolicExpressionTreeInterpreterParameterName]; }
58    }
59    public IValueLookupParameter<DoubleValue> UpperEstimationLimitParameter {
60      get { return (IValueLookupParameter<DoubleValue>)Parameters[UpperEstimationLimitParameterName]; }
61    }
62    public IValueLookupParameter<DoubleValue> LowerEstimationLimitParameter {
63      get { return (IValueLookupParameter<DoubleValue>)Parameters[LowerEstimationLimitParameterName]; }
64    }
65    public IValueLookupParameter<IntValue> ValidationSamplesStartParameter {
66      get { return (IValueLookupParameter<IntValue>)Parameters[ValidationSamplesStartParameterName]; }
67    }
68    public IValueLookupParameter<IntValue> ValidationSamplesEndParameter {
69      get { return (IValueLookupParameter<IntValue>)Parameters[ValidationSamplesEndParameterName]; }
70    }
71
72    public ILookupParameter<ItemArray<SymbolicExpressionTree>> SymbolicExpressionTreeParameter {
73      get { return (ILookupParameter<ItemArray<SymbolicExpressionTree>>)Parameters[SymbolicRegressionModelParameterName]; }
74    }
75    public ILookupParameter<DataAnalysisProblemData> DataAnalysisProblemDataParameter {
76      get { return (ILookupParameter<DataAnalysisProblemData>)Parameters[DataAnalysisProblemDataParameterName]; }
77    }
78    public ILookupParameter<SymbolicRegressionSolution> BestValidationSolutionParameter {
79      get { return (ILookupParameter<SymbolicRegressionSolution>)Parameters[BestValidationSolutionParameterName]; }
80    }
81    ILookupParameter ISolutionsVisualizer.VisualizationParameter {
82      get { return BestValidationSolutionParameter; }
83    }
84
85    public ILookupParameter<ItemArray<DoubleValue>> QualityParameter {
86      get { return (ILookupParameter<ItemArray<DoubleValue>>)Parameters[QualityParameterName]; }
87    }
88
89    public ILookupParameter<ResultCollection> ResultParameter {
90      get { return (ILookupParameter<ResultCollection>)Parameters[ResultsParameterName]; }
91    }
92    public ILookupParameter<DataTable> VariableFrequenciesParameter {
93      get { return (ILookupParameter<DataTable>)Parameters[VariableFrequenciesParameterName]; }
94    }
95
96    #endregion
97
98    #region properties
99    public ISymbolicExpressionTreeInterpreter SymbolicExpressionTreeInterpreter {
100      get { return SymbolicExpressionTreeInterpreterParameter.ActualValue; }
101    }
102    public DoubleValue UpperEstimationLimit {
103      get { return UpperEstimationLimitParameter.ActualValue; }
104    }
105    public DoubleValue LowerEstimationLimit {
106      get { return LowerEstimationLimitParameter.ActualValue; }
107    }
108    public IntValue ValidationSamplesStart {
109      get { return ValidationSamplesStartParameter.ActualValue; }
110    }
111    public IntValue ValidationSamplesEnd {
112      get { return ValidationSamplesEndParameter.ActualValue; }
113    }
114    public DataTable VariableFrequencies {
115      get { return VariableFrequenciesParameter.ActualValue; }
116      set { VariableFrequenciesParameter.ActualValue = value; }
117    }
118    #endregion
119
120    public BestValidationSymbolicRegressionSolutionVisualizer()
121      : base() {
122      Parameters.Add(new SubScopesLookupParameter<SymbolicExpressionTree>(SymbolicRegressionModelParameterName, "The symbolic regression solutions from which the best solution should be visualized."));
123      Parameters.Add(new SubScopesLookupParameter<DoubleValue>(QualityParameterName, "The quality of the symbolic regression solutions."));
124      Parameters.Add(new LookupParameter<DataAnalysisProblemData>(DataAnalysisProblemDataParameterName, "The symbolic regression problme data on which the best solution should be evaluated."));
125      Parameters.Add(new LookupParameter<ISymbolicExpressionTreeInterpreter>(SymbolicExpressionTreeInterpreterParameterName, "The interpreter that should be used to calculate the output values of symbolic expression trees."));
126      Parameters.Add(new ValueLookupParameter<DoubleValue>(UpperEstimationLimitParameterName, "The upper limit that should be used as cut off value for the output values of symbolic expression trees."));
127      Parameters.Add(new ValueLookupParameter<DoubleValue>(LowerEstimationLimitParameterName, "The lower limit that should be used as cut off value for the output values of symbolic expression trees."));
128      Parameters.Add(new ValueLookupParameter<IntValue>(ValidationSamplesStartParameterName, "The start index of the validation partition (part of the training partition)."));
129      Parameters.Add(new ValueLookupParameter<IntValue>(ValidationSamplesEndParameterName, "The end index of the validation partition (part of the training partition)."));
130      Parameters.Add(new LookupParameter<SymbolicRegressionSolution>(BestValidationSolutionParameterName, "The best symbolic expression tree based on the validation data for the symbolic regression problem."));
131      Parameters.Add(new LookupParameter<DataTable>(VariableFrequenciesParameterName, "The relative variable reference frequencies aggregated over the whole population."));
132      Parameters.Add(new LookupParameter<ResultCollection>(ResultsParameterName, "The result collection of the algorithm."));
133    }
134
135    public override IOperation Apply() {
136      ItemArray<SymbolicExpressionTree> expressions = SymbolicExpressionTreeParameter.ActualValue;
137      DataAnalysisProblemData problemData = DataAnalysisProblemDataParameter.ActualValue;
138      #region update variable frequencies
139      var inputVariables = problemData.InputVariables.Select(x => x.Value);
140      if (VariableFrequencies == null) {
141        VariableFrequencies = new DataTable("Variable Frequencies", "Relative frequency of variable references aggregated over the whole population.");
142        AddResult("VariableFrequencies", VariableFrequencies);
143        // add a data row for each input variable
144        foreach (var inputVariable in inputVariables)
145          VariableFrequencies.Rows.Add(new DataRow(inputVariable));
146      }
147      foreach (var pair in VariableFrequencyAnalyser.CalculateVariableFrequencies(expressions, inputVariables)) {
148        VariableFrequencies.Rows[pair.Key].Values.Add(pair.Value);
149      }
150      #endregion
151
152      #region determination of validation-best solution
153      int validationSamplesStart = ValidationSamplesStart.Value;
154      int validationSamplesEnd = ValidationSamplesEnd.Value;
155      var validationValues = problemData.Dataset.GetVariableValues(problemData.TargetVariable.Value, validationSamplesStart, validationSamplesEnd);
156      double upperEstimationLimit = UpperEstimationLimit.Value;
157      double lowerEstimationLimit = LowerEstimationLimit.Value;
158      var currentBestExpression = (from expression in expressions
159                                   let validationQuality =
160                                     SymbolicRegressionMeanSquaredErrorEvaluator.Calculate(
161                                       SymbolicExpressionTreeInterpreter, expression,
162                                       lowerEstimationLimit, upperEstimationLimit,
163                                       problemData.Dataset, problemData.TargetVariable.Value,
164                                       validationSamplesStart, validationSamplesEnd)
165                                   select new { Expression = expression, ValidationQuality = validationQuality })
166                                   .OrderBy(x => x.ValidationQuality)
167                                   .First();
168
169      SymbolicRegressionSolution bestOfRunSolution = BestValidationSolutionParameter.ActualValue;
170      #endregion
171      #region update of validation-best solution
172      if (bestOfRunSolution == null) {
173        // no best of run solution yet -> make a solution from the currentBestExpression
174        UpdateBestOfRunSolution(problemData, currentBestExpression.Expression, SymbolicExpressionTreeInterpreter, lowerEstimationLimit, upperEstimationLimit);
175      } else {
176        // compare quality of current best with best of run solution
177        var estimatedValidationValues = bestOfRunSolution.EstimatedValues.Skip(validationSamplesStart).Take(validationSamplesEnd - validationSamplesStart);
178        var bestOfRunValidationQuality = SimpleMSEEvaluator.Calculate(validationValues, estimatedValidationValues);
179        if (bestOfRunValidationQuality > currentBestExpression.ValidationQuality) {
180          UpdateBestOfRunSolution(problemData, currentBestExpression.Expression, SymbolicExpressionTreeInterpreter, lowerEstimationLimit, upperEstimationLimit);
181        }
182      }
183      #endregion
184      return base.Apply();
185    }
186
187    private void UpdateBestOfRunSolution(DataAnalysisProblemData problemData, SymbolicExpressionTree tree, ISymbolicExpressionTreeInterpreter interpreter,
188      double lowerEstimationLimit, double upperEstimationLimit) {
189      var newBestSolution = CreateDataAnalysisSolution(problemData, tree, interpreter, lowerEstimationLimit, upperEstimationLimit);
190      if (BestValidationSolutionParameter.ActualValue == null)
191        BestValidationSolutionParameter.ActualValue = newBestSolution;
192      else
193        // only update model
194        BestValidationSolutionParameter.ActualValue.Model = newBestSolution.Model;
195
196      var trainingValues = problemData.Dataset.GetVariableValues(problemData.TargetVariable.Value, problemData.TrainingSamplesStart.Value, problemData.TrainingSamplesEnd.Value);
197      var testValues = problemData.Dataset.GetVariableValues(problemData.TargetVariable.Value, problemData.TestSamplesStart.Value, problemData.TestSamplesEnd.Value);
198
199      AddResult("MeanSquaredError (Training)", new DoubleValue(SimpleMSEEvaluator.Calculate(trainingValues, newBestSolution.EstimatedTrainingValues)));
200      AddResult("MeanRelativeError (Training)", new PercentValue(SimpleMeanAbsolutePercentageErrorEvaluator.Calculate(trainingValues, newBestSolution.EstimatedTrainingValues)));
201      AddResult("RSquared (Training)", new DoubleValue(SimpleRSquaredEvaluator.Calculate(trainingValues, newBestSolution.EstimatedTrainingValues)));
202
203      AddResult("MeanSquaredError (Test)", new DoubleValue(SimpleMSEEvaluator.Calculate(testValues, newBestSolution.EstimatedTestValues)));
204      AddResult("MeanRelativeError (Test)", new PercentValue(SimpleMeanAbsolutePercentageErrorEvaluator.Calculate(testValues, newBestSolution.EstimatedTestValues)));
205      AddResult("RSquared (Test)", new DoubleValue(SimpleRSquaredEvaluator.Calculate(testValues, newBestSolution.EstimatedTestValues)));
206    }
207
208    private void AddResult(string resultName, IItem value) {
209      var resultCollection = ResultParameter.ActualValue;
210      if (resultCollection.ContainsKey(resultName)) {
211        resultCollection[resultName].Value = value;
212      } else {
213        resultCollection.Add(new Result(resultName, value));
214      }
215    }
216
217    private SymbolicRegressionSolution CreateDataAnalysisSolution(DataAnalysisProblemData problemData, SymbolicExpressionTree expression, ISymbolicExpressionTreeInterpreter interpreter,
218      double lowerEstimationLimit, double upperEstimationLimit) {
219      var model = new SymbolicRegressionModel(interpreter, expression, problemData.InputVariables.Select(s => s.Value));
220      return new SymbolicRegressionSolution(problemData, model, lowerEstimationLimit, upperEstimationLimit);
221    }
222  }
223}
Note: See TracBrowser for help on using the repository browser.