Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/BestValidationSymbolicRegressionSolutionVisualizer.cs @ 3542

Last change on this file since 3542 was 3542, checked in by gkronber, 14 years ago

Fixed problem with BestValidationSolutionVisualizer when scaling parameters are not available. #938 (Data types and operators for regression problems)

File size: 17.1 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System.Linq;
23using HeuristicLab.Common;
24using HeuristicLab.Core;
25using HeuristicLab.Data;
26using HeuristicLab.Operators;
27using HeuristicLab.Optimization;
28using HeuristicLab.Parameters;
29using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
30using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
31using HeuristicLab.Problems.DataAnalysis.Evaluators;
32using HeuristicLab.Problems.DataAnalysis.Symbolic;
33using System.Collections.Generic;
34using HeuristicLab.Analysis;
35
36using HeuristicLab.Problems.DataAnalysis.Symbolic.Symbols;
37
38namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic {
39  /// <summary>
40  /// An operator for visualizing the best symbolic regression solution based on the validation set.
41  /// </summary>
42  [Item("BestSymbolicExpressionTreeVisualizer", "An operator for visualizing the best symbolic regression solution based on the validation set.")]
43  [StorableClass]
44  public sealed class BestValidationSymbolicRegressionSolutionVisualizer : SingleSuccessorOperator, ISingleObjectiveSolutionsVisualizer, ISolutionsVisualizer {
45    private const string SymbolicExpressionTreeInterpreterParameterName = "SymbolicExpressionTreeInterpreter";
46    private const string UpperEstimationLimitParameterName = "UpperEstimationLimit";
47    private const string LowerEstimationLimitParameterName = "LowerEstimationLimit";
48    private const string AlphaParameterName = "Alpha";
49    private const string BetaParameterName = "Beta";
50    private const string SymbolicRegressionModelParameterName = "SymbolicRegressionModel";
51    private const string DataAnalysisProblemDataParameterName = "DataAnalysisProblemData";
52    private const string BestValidationSolutionParameterName = "BestValidationSolution";
53    private const string ValidationSamplesStartParameterName = "ValidationSamplesStart";
54    private const string ValidationSamplesEndParameterName = "ValidationSamplesEnd";
55    private const string QualityParameterName = "Quality";
56    private const string ResultsParameterName = "Results";
57    private const string VariableFrequenciesParameterName = "VariableFrequencies";
58
59    #region parameter properties
60    public ILookupParameter<ISymbolicExpressionTreeInterpreter> SymbolicExpressionTreeInterpreterParameter {
61      get { return (ILookupParameter<ISymbolicExpressionTreeInterpreter>)Parameters[SymbolicExpressionTreeInterpreterParameterName]; }
62    }
63    public IValueLookupParameter<DoubleValue> UpperEstimationLimitParameter {
64      get { return (IValueLookupParameter<DoubleValue>)Parameters[UpperEstimationLimitParameterName]; }
65    }
66    public IValueLookupParameter<DoubleValue> LowerEstimationLimitParameter {
67      get { return (IValueLookupParameter<DoubleValue>)Parameters[LowerEstimationLimitParameterName]; }
68    }
69    public IValueLookupParameter<IntValue> ValidationSamplesStartParameter {
70      get { return (IValueLookupParameter<IntValue>)Parameters[ValidationSamplesStartParameterName]; }
71    }
72    public IValueLookupParameter<IntValue> ValidationSamplesEndParameter {
73      get { return (IValueLookupParameter<IntValue>)Parameters[ValidationSamplesEndParameterName]; }
74    }
75
76    public ILookupParameter<ItemArray<SymbolicExpressionTree>> SymbolicExpressionTreeParameter {
77      get { return (ILookupParameter<ItemArray<SymbolicExpressionTree>>)Parameters[SymbolicRegressionModelParameterName]; }
78    }
79    public ILookupParameter<ItemArray<DoubleValue>> AlphaParameter {
80      get { return (ILookupParameter<ItemArray<DoubleValue>>)Parameters[AlphaParameterName]; }
81    }
82    public ILookupParameter<ItemArray<DoubleValue>> BetaParameter {
83      get { return (ILookupParameter<ItemArray<DoubleValue>>)Parameters[BetaParameterName]; }
84    }
85    public ILookupParameter<DataAnalysisProblemData> DataAnalysisProblemDataParameter {
86      get { return (ILookupParameter<DataAnalysisProblemData>)Parameters[DataAnalysisProblemDataParameterName]; }
87    }
88    public ILookupParameter<SymbolicRegressionSolution> BestValidationSolutionParameter {
89      get { return (ILookupParameter<SymbolicRegressionSolution>)Parameters[BestValidationSolutionParameterName]; }
90    }
91    ILookupParameter ISolutionsVisualizer.VisualizationParameter {
92      get { return BestValidationSolutionParameter; }
93    }
94
95    public ILookupParameter<ItemArray<DoubleValue>> QualityParameter {
96      get { return (ILookupParameter<ItemArray<DoubleValue>>)Parameters[QualityParameterName]; }
97    }
98
99    public ILookupParameter<ResultCollection> ResultParameter {
100      get { return (ILookupParameter<ResultCollection>)Parameters[ResultsParameterName]; }
101    }
102    public ILookupParameter<DataTable> VariableFrequenciesParameter {
103      get { return (ILookupParameter<DataTable>)Parameters[VariableFrequenciesParameterName]; }
104    }
105
106    #endregion
107
108    #region properties
109    public ISymbolicExpressionTreeInterpreter SymbolicExpressionTreeInterpreter {
110      get { return SymbolicExpressionTreeInterpreterParameter.ActualValue; }
111    }
112    public DoubleValue UpperEstimationLimit {
113      get { return UpperEstimationLimitParameter.ActualValue; }
114    }
115    public DoubleValue LowerEstimationLimit {
116      get { return LowerEstimationLimitParameter.ActualValue; }
117    }
118    public IntValue ValidationSamplesStart {
119      get { return ValidationSamplesStartParameter.ActualValue; }
120    }
121    public IntValue ValidationSamplesEnd {
122      get { return ValidationSamplesEndParameter.ActualValue; }
123    }
124    public DataTable VariableFrequencies {
125      get { return VariableFrequenciesParameter.ActualValue; }
126      set { VariableFrequenciesParameter.ActualValue = value; }
127    }
128    #endregion
129
130    public BestValidationSymbolicRegressionSolutionVisualizer()
131      : base() {
132      Parameters.Add(new SubScopesLookupParameter<SymbolicExpressionTree>(SymbolicRegressionModelParameterName, "The symbolic regression solutions from which the best solution should be visualized."));
133      Parameters.Add(new SubScopesLookupParameter<DoubleValue>(QualityParameterName, "The quality of the symbolic regression solutions."));
134      Parameters.Add(new LookupParameter<DataAnalysisProblemData>(DataAnalysisProblemDataParameterName, "The symbolic regression problme data on which the best solution should be evaluated."));
135      Parameters.Add(new LookupParameter<ISymbolicExpressionTreeInterpreter>(SymbolicExpressionTreeInterpreterParameterName, "The interpreter that should be used to calculate the output values of symbolic expression trees."));
136      Parameters.Add(new SubScopesLookupParameter<DoubleValue>(AlphaParameterName, "Alpha parameter for linear scaling of the estimated values."));
137      Parameters.Add(new SubScopesLookupParameter<DoubleValue>(BetaParameterName, "Beta parameter for linear scaling ot the estimated values."));
138      Parameters.Add(new ValueLookupParameter<DoubleValue>(UpperEstimationLimitParameterName, "The upper limit that should be used as cut off value for the output values of symbolic expression trees."));
139      Parameters.Add(new ValueLookupParameter<DoubleValue>(LowerEstimationLimitParameterName, "The lower limit that should be used as cut off value for the output values of symbolic expression trees."));
140      Parameters.Add(new ValueLookupParameter<IntValue>(ValidationSamplesStartParameterName, "The start index of the validation partition (part of the training partition)."));
141      Parameters.Add(new ValueLookupParameter<IntValue>(ValidationSamplesEndParameterName, "The end index of the validation partition (part of the training partition)."));
142      Parameters.Add(new LookupParameter<SymbolicRegressionSolution>(BestValidationSolutionParameterName, "The best symbolic expression tree based on the validation data for the symbolic regression problem."));
143      Parameters.Add(new LookupParameter<DataTable>(VariableFrequenciesParameterName, "The relative variable reference frequencies aggregated over the whole population."));
144      Parameters.Add(new LookupParameter<ResultCollection>(ResultsParameterName, "The result collection of the algorithm."));
145    }
146
147    public override IOperation Apply() {
148      ItemArray<SymbolicExpressionTree> expressions = SymbolicExpressionTreeParameter.ActualValue;
149      ItemArray<DoubleValue> alphas = AlphaParameter.ActualValue;
150      ItemArray<DoubleValue> betas = BetaParameter.ActualValue;
151      var scaledExpressions = from i in Enumerable.Range(0, expressions.Count())
152                              let expr = expressions[i]
153                              let alpha = alphas[i] == null ? 0.0 : alphas[i].Value
154                              let beta = betas[i] == null ? 1.0 : betas[i].Value
155                              select new { Expression = expr, Alpha = alpha, Beta = beta };
156      DataAnalysisProblemData problemData = DataAnalysisProblemDataParameter.ActualValue;
157      #region update variable frequencies
158      var inputVariables = problemData.InputVariables.Select(x => x.Value);
159      if (VariableFrequencies == null) {
160        VariableFrequencies = new DataTable("Variable Frequencies", "Relative frequency of variable references aggregated over the whole population.");
161        AddResult("VariableFrequencies", VariableFrequencies);
162        // add a data row for each input variable
163        foreach (var inputVariable in inputVariables)
164          VariableFrequencies.Rows.Add(new DataRow(inputVariable));
165      }
166      foreach (var pair in VariableFrequencyAnalyser.CalculateVariableFrequencies(expressions, inputVariables)) {
167        VariableFrequencies.Rows[pair.Key].Values.Add(pair.Value);
168      }
169      #endregion
170
171      #region determination of validation-best solution
172      int validationSamplesStart = ValidationSamplesStart.Value;
173      int validationSamplesEnd = ValidationSamplesEnd.Value;
174      var validationValues = problemData.Dataset.GetVariableValues(problemData.TargetVariable.Value, validationSamplesStart, validationSamplesEnd);
175      double upperEstimationLimit = UpperEstimationLimit.Value;
176      double lowerEstimationLimit = LowerEstimationLimit.Value;
177      var currentBestExpression = (from expression in scaledExpressions
178                                   let validationQuality =
179                                     SymbolicRegressionScaledMeanSquaredErrorEvaluator.CalculateWithScaling(
180                                       SymbolicExpressionTreeInterpreter, expression.Expression,
181                                       lowerEstimationLimit, upperEstimationLimit,
182                                       problemData.Dataset, problemData.TargetVariable.Value,
183                                       validationSamplesStart, validationSamplesEnd,
184                                       expression.Beta, expression.Alpha)
185                                   select new { Expression = expression, ValidationQuality = validationQuality })
186                                   .OrderBy(x => x.ValidationQuality)
187                                   .First();
188
189      SymbolicRegressionSolution bestOfRunSolution = BestValidationSolutionParameter.ActualValue;
190      #endregion
191      #region update of validation-best solution
192      if (bestOfRunSolution == null) {
193        // no best of run solution yet -> make a solution from the currentBestExpression
194        UpdateBestOfRunSolution(problemData, currentBestExpression.Expression.Expression, SymbolicExpressionTreeInterpreter, lowerEstimationLimit, upperEstimationLimit, currentBestExpression.Expression.Alpha, currentBestExpression.Expression.Beta);
195      } else {
196        // compare quality of current best with best of run solution
197        var estimatedValidationValues = bestOfRunSolution.EstimatedValues.Skip(validationSamplesStart).Take(validationSamplesEnd - validationSamplesStart);
198        var bestOfRunValidationQuality = SimpleMSEEvaluator.Calculate(validationValues, estimatedValidationValues);
199        if (bestOfRunValidationQuality > currentBestExpression.ValidationQuality) {
200          UpdateBestOfRunSolution(problemData, currentBestExpression.Expression.Expression, SymbolicExpressionTreeInterpreter, lowerEstimationLimit, upperEstimationLimit, currentBestExpression.Expression.Alpha, currentBestExpression.Expression.Beta);
201        }
202      }
203      #endregion
204      return base.Apply();
205    }
206
207    private void UpdateBestOfRunSolution(DataAnalysisProblemData problemData, SymbolicExpressionTree tree, ISymbolicExpressionTreeInterpreter interpreter,
208      double lowerEstimationLimit, double upperEstimationLimit,
209      double alpha, double beta) {
210      var newBestSolution = CreateDataAnalysisSolution(problemData, tree, interpreter, lowerEstimationLimit, upperEstimationLimit, alpha, beta);
211      if (BestValidationSolutionParameter.ActualValue == null)
212        BestValidationSolutionParameter.ActualValue = newBestSolution;
213      else
214        // only update model
215        BestValidationSolutionParameter.ActualValue.Model = newBestSolution.Model;
216
217      var trainingValues = problemData.Dataset.GetVariableValues(problemData.TargetVariable.Value, problemData.TrainingSamplesStart.Value, problemData.TrainingSamplesEnd.Value);
218      var testValues = problemData.Dataset.GetVariableValues(problemData.TargetVariable.Value, problemData.TestSamplesStart.Value, problemData.TestSamplesEnd.Value);
219
220      AddResult("MeanSquaredError (Training)", new DoubleValue(SimpleMSEEvaluator.Calculate(trainingValues, newBestSolution.EstimatedTrainingValues)));
221      AddResult("MeanRelativeError (Training)", new PercentValue(SimpleMeanAbsolutePercentageErrorEvaluator.Calculate(trainingValues, newBestSolution.EstimatedTrainingValues)));
222      AddResult("RSquared (Training)", new DoubleValue(SimpleRSquaredEvaluator.Calculate(trainingValues, newBestSolution.EstimatedTrainingValues)));
223
224      AddResult("MeanSquaredError (Test)", new DoubleValue(SimpleMSEEvaluator.Calculate(testValues, newBestSolution.EstimatedTestValues)));
225      AddResult("MeanRelativeError (Test)", new PercentValue(SimpleMeanAbsolutePercentageErrorEvaluator.Calculate(testValues, newBestSolution.EstimatedTestValues)));
226      AddResult("RSquared (Test)", new DoubleValue(SimpleRSquaredEvaluator.Calculate(testValues, newBestSolution.EstimatedTestValues)));
227    }
228
229    private void AddResult(string resultName, IItem value) {
230      var resultCollection = ResultParameter.ActualValue;
231      if (resultCollection.ContainsKey(resultName)) {
232        resultCollection[resultName].Value = value;
233      } else {
234        resultCollection.Add(new Result(resultName, value));
235      }
236    }
237
238    private SymbolicRegressionSolution CreateDataAnalysisSolution(DataAnalysisProblemData problemData, SymbolicExpressionTree tree, ISymbolicExpressionTreeInterpreter interpreter,
239      double lowerEstimationLimit, double upperEstimationLimit,
240      double alpha, double beta) {
241      var mainBranch = tree.Root.SubTrees[0].SubTrees[0];
242      var scaledMainBranch = MakeSum(MakeProduct(beta, mainBranch), alpha);
243
244      // remove the main branch before cloning to prevent cloning of sub-trees
245      tree.Root.SubTrees[0].RemoveSubTree(0);
246      var scaledTree = (SymbolicExpressionTree)tree.Clone();
247      // insert main branch into the original tree again
248      tree.Root.SubTrees[0].InsertSubTree(0, mainBranch);
249      // insert the scaled main branch into the cloned tree
250      scaledTree.Root.SubTrees[0].InsertSubTree(0, scaledMainBranch);
251      // create a new solution using the scaled tree
252      var model = new SymbolicRegressionModel(interpreter, scaledTree, problemData.InputVariables.Select(s => s.Value));
253      return new SymbolicRegressionSolution(problemData, model, lowerEstimationLimit, upperEstimationLimit);
254    }
255
256    private SymbolicExpressionTreeNode MakeSum(SymbolicExpressionTreeNode treeNode, double alpha) {
257      var node = (new Addition()).CreateTreeNode();
258      var alphaConst = MakeConstant(alpha);
259      node.AddSubTree(treeNode);
260      node.AddSubTree(alphaConst);
261      return node;
262    }
263
264    private SymbolicExpressionTreeNode MakeProduct(double beta, SymbolicExpressionTreeNode treeNode) {
265      var node = (new Multiplication()).CreateTreeNode();
266      var betaConst = MakeConstant(beta);
267      node.AddSubTree(treeNode);
268      node.AddSubTree(betaConst);
269      return node;
270    }
271
272    private SymbolicExpressionTreeNode MakeConstant(double c) {
273      var node = (ConstantTreeNode)(new Constant()).CreateTreeNode();
274      node.Value = c;
275      return node;
276    }
277  }
278}
Note: See TracBrowser for help on using the repository browser.