Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/BestValidationSymbolicRegressionSolutionVisualizer.cs @ 3616

Last change on this file since 3616 was 3616, checked in by swagner, 14 years ago

Worked on refactoring of algorithm analysis and tracing (#999)

  • adapted GA and TSP
  • removed stuff related to visualizers
File size: 17.2 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System.Linq;
23using HeuristicLab.Common;
24using HeuristicLab.Core;
25using HeuristicLab.Data;
26using HeuristicLab.Operators;
27using HeuristicLab.Optimization;
28using HeuristicLab.Parameters;
29using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
30using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
31using HeuristicLab.Problems.DataAnalysis.Evaluators;
32using HeuristicLab.Problems.DataAnalysis.Symbolic;
33using System.Collections.Generic;
34using HeuristicLab.Analysis;
35
36using HeuristicLab.Problems.DataAnalysis.Symbolic.Symbols;
37
38namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic {
39  /// <summary>
40  /// An operator for visualizing the best symbolic regression solution based on the validation set.
41  /// </summary>
42  [Item("BestSymbolicExpressionTreeVisualizer", "An operator for visualizing the best symbolic regression solution based on the validation set.")]
43  [StorableClass]
44  public sealed class BestValidationSymbolicRegressionSolutionVisualizer : SingleSuccessorOperator, IAnalyzer {
45    private const string SymbolicExpressionTreeInterpreterParameterName = "SymbolicExpressionTreeInterpreter";
46    private const string UpperEstimationLimitParameterName = "UpperEstimationLimit";
47    private const string LowerEstimationLimitParameterName = "LowerEstimationLimit";
48    private const string AlphaParameterName = "Alpha";
49    private const string BetaParameterName = "Beta";
50    private const string SymbolicRegressionModelParameterName = "SymbolicRegressionModel";
51    private const string DataAnalysisProblemDataParameterName = "DataAnalysisProblemData";
52    private const string BestValidationSolutionParameterName = "BestValidationSolution";
53    private const string ValidationSamplesStartParameterName = "ValidationSamplesStart";
54    private const string ValidationSamplesEndParameterName = "ValidationSamplesEnd";
55    private const string QualityParameterName = "Quality";
56    private const string ResultsParameterName = "Results";
57    private const string VariableFrequenciesParameterName = "VariableFrequencies";
58
59    #region parameter properties
60    public ILookupParameter<ISymbolicExpressionTreeInterpreter> SymbolicExpressionTreeInterpreterParameter {
61      get { return (ILookupParameter<ISymbolicExpressionTreeInterpreter>)Parameters[SymbolicExpressionTreeInterpreterParameterName]; }
62    }
63    public IValueLookupParameter<DoubleValue> UpperEstimationLimitParameter {
64      get { return (IValueLookupParameter<DoubleValue>)Parameters[UpperEstimationLimitParameterName]; }
65    }
66    public IValueLookupParameter<DoubleValue> LowerEstimationLimitParameter {
67      get { return (IValueLookupParameter<DoubleValue>)Parameters[LowerEstimationLimitParameterName]; }
68    }
69    public IValueLookupParameter<IntValue> ValidationSamplesStartParameter {
70      get { return (IValueLookupParameter<IntValue>)Parameters[ValidationSamplesStartParameterName]; }
71    }
72    public IValueLookupParameter<IntValue> ValidationSamplesEndParameter {
73      get { return (IValueLookupParameter<IntValue>)Parameters[ValidationSamplesEndParameterName]; }
74    }
75
76    public ILookupParameter<ItemArray<SymbolicExpressionTree>> SymbolicExpressionTreeParameter {
77      get { return (ILookupParameter<ItemArray<SymbolicExpressionTree>>)Parameters[SymbolicRegressionModelParameterName]; }
78    }
79    public ILookupParameter<ItemArray<DoubleValue>> AlphaParameter {
80      get { return (ILookupParameter<ItemArray<DoubleValue>>)Parameters[AlphaParameterName]; }
81    }
82    public ILookupParameter<ItemArray<DoubleValue>> BetaParameter {
83      get { return (ILookupParameter<ItemArray<DoubleValue>>)Parameters[BetaParameterName]; }
84    }
85    public ILookupParameter<DataAnalysisProblemData> DataAnalysisProblemDataParameter {
86      get { return (ILookupParameter<DataAnalysisProblemData>)Parameters[DataAnalysisProblemDataParameterName]; }
87    }
88    public ILookupParameter<SymbolicRegressionSolution> BestValidationSolutionParameter {
89      get { return (ILookupParameter<SymbolicRegressionSolution>)Parameters[BestValidationSolutionParameterName]; }
90    }
91
92    public ILookupParameter<ItemArray<DoubleValue>> QualityParameter {
93      get { return (ILookupParameter<ItemArray<DoubleValue>>)Parameters[QualityParameterName]; }
94    }
95
96    public ILookupParameter<ResultCollection> ResultParameter {
97      get { return (ILookupParameter<ResultCollection>)Parameters[ResultsParameterName]; }
98    }
99    public ILookupParameter<DataTable> VariableFrequenciesParameter {
100      get { return (ILookupParameter<DataTable>)Parameters[VariableFrequenciesParameterName]; }
101    }
102
103    #endregion
104
105    #region properties
106    public ISymbolicExpressionTreeInterpreter SymbolicExpressionTreeInterpreter {
107      get { return SymbolicExpressionTreeInterpreterParameter.ActualValue; }
108    }
109    public DoubleValue UpperEstimationLimit {
110      get { return UpperEstimationLimitParameter.ActualValue; }
111    }
112    public DoubleValue LowerEstimationLimit {
113      get { return LowerEstimationLimitParameter.ActualValue; }
114    }
115    public IntValue ValidationSamplesStart {
116      get { return ValidationSamplesStartParameter.ActualValue; }
117    }
118    public IntValue ValidationSamplesEnd {
119      get { return ValidationSamplesEndParameter.ActualValue; }
120    }
121    public DataTable VariableFrequencies {
122      get { return VariableFrequenciesParameter.ActualValue; }
123      set { VariableFrequenciesParameter.ActualValue = value; }
124    }
125    #endregion
126
127    public BestValidationSymbolicRegressionSolutionVisualizer()
128      : base() {
129      Parameters.Add(new SubScopesLookupParameter<SymbolicExpressionTree>(SymbolicRegressionModelParameterName, "The symbolic regression solutions from which the best solution should be visualized."));
130      Parameters.Add(new SubScopesLookupParameter<DoubleValue>(QualityParameterName, "The quality of the symbolic regression solutions."));
131      Parameters.Add(new LookupParameter<DataAnalysisProblemData>(DataAnalysisProblemDataParameterName, "The symbolic regression problme data on which the best solution should be evaluated."));
132      Parameters.Add(new LookupParameter<ISymbolicExpressionTreeInterpreter>(SymbolicExpressionTreeInterpreterParameterName, "The interpreter that should be used to calculate the output values of symbolic expression trees."));
133      Parameters.Add(new SubScopesLookupParameter<DoubleValue>(AlphaParameterName, "Alpha parameter for linear scaling of the estimated values."));
134      Parameters.Add(new SubScopesLookupParameter<DoubleValue>(BetaParameterName, "Beta parameter for linear scaling ot the estimated values."));
135      Parameters.Add(new ValueLookupParameter<DoubleValue>(UpperEstimationLimitParameterName, "The upper limit that should be used as cut off value for the output values of symbolic expression trees."));
136      Parameters.Add(new ValueLookupParameter<DoubleValue>(LowerEstimationLimitParameterName, "The lower limit that should be used as cut off value for the output values of symbolic expression trees."));
137      Parameters.Add(new ValueLookupParameter<IntValue>(ValidationSamplesStartParameterName, "The start index of the validation partition (part of the training partition)."));
138      Parameters.Add(new ValueLookupParameter<IntValue>(ValidationSamplesEndParameterName, "The end index of the validation partition (part of the training partition)."));
139      Parameters.Add(new LookupParameter<SymbolicRegressionSolution>(BestValidationSolutionParameterName, "The best symbolic expression tree based on the validation data for the symbolic regression problem."));
140      Parameters.Add(new LookupParameter<DataTable>(VariableFrequenciesParameterName, "The relative variable reference frequencies aggregated over the whole population."));
141      Parameters.Add(new LookupParameter<ResultCollection>(ResultsParameterName, "The result collection of the algorithm."));
142    }
143
144    public override IOperation Apply() {
145      ItemArray<SymbolicExpressionTree> expressions = SymbolicExpressionTreeParameter.ActualValue;
146      ItemArray<DoubleValue> alphas = AlphaParameter.ActualValue;
147      ItemArray<DoubleValue> betas = BetaParameter.ActualValue;
148      var scaledExpressions = from i in Enumerable.Range(0, expressions.Count())
149                              let expr = expressions[i]
150                              let alpha = alphas[i] == null ? 0.0 : alphas[i].Value
151                              let beta = betas[i] == null ? 1.0 : betas[i].Value
152                              select new { Expression = expr, Alpha = alpha, Beta = beta };
153      DataAnalysisProblemData problemData = DataAnalysisProblemDataParameter.ActualValue;
154      #region update variable frequencies
155      var inputVariables = problemData.InputVariables.Select(x => x.Value);
156      if (VariableFrequencies == null) {
157        VariableFrequencies = new DataTable("Variable Frequencies", "Relative frequency of variable references aggregated over the whole population.");
158        AddResult("VariableFrequencies", VariableFrequencies);
159        // add a data row for each input variable
160        foreach (var inputVariable in inputVariables)
161          VariableFrequencies.Rows.Add(new DataRow(inputVariable));
162      }
163      foreach (var pair in VariableFrequencyAnalyser.CalculateVariableFrequencies(expressions, inputVariables)) {
164        VariableFrequencies.Rows[pair.Key].Values.Add(pair.Value);
165      }
166      #endregion
167
168      #region determination of validation-best solution
169      int validationSamplesStart = ValidationSamplesStart.Value;
170      int validationSamplesEnd = ValidationSamplesEnd.Value;
171      var validationValues = problemData.Dataset.GetVariableValues(problemData.TargetVariable.Value, validationSamplesStart, validationSamplesEnd);
172      double upperEstimationLimit = UpperEstimationLimit.Value;
173      double lowerEstimationLimit = LowerEstimationLimit.Value;
174      var currentBestExpression = (from expression in scaledExpressions
175                                   let validationQuality =
176                                     SymbolicRegressionScaledMeanSquaredErrorEvaluator.CalculateWithScaling(
177                                       SymbolicExpressionTreeInterpreter, expression.Expression,
178                                       lowerEstimationLimit, upperEstimationLimit,
179                                       problemData.Dataset, problemData.TargetVariable.Value,
180                                       validationSamplesStart, validationSamplesEnd,
181                                       expression.Beta, expression.Alpha)
182                                   select new { Expression = expression, ValidationQuality = validationQuality })
183                                   .OrderBy(x => x.ValidationQuality)
184                                   .First();
185
186      SymbolicRegressionSolution bestOfRunSolution = BestValidationSolutionParameter.ActualValue;
187      #endregion
188      #region update of validation-best solution
189      if (bestOfRunSolution == null) {
190        // no best of run solution yet -> make a solution from the currentBestExpression
191        UpdateBestOfRunSolution(problemData, currentBestExpression.Expression.Expression, SymbolicExpressionTreeInterpreter, lowerEstimationLimit, upperEstimationLimit, currentBestExpression.Expression.Alpha, currentBestExpression.Expression.Beta);
192      } else {
193        // compare quality of current best with best of run solution
194        var estimatedValidationValues = bestOfRunSolution.EstimatedValues.Skip(validationSamplesStart).Take(validationSamplesEnd - validationSamplesStart);
195        var bestOfRunValidationQuality = SimpleMSEEvaluator.Calculate(validationValues, estimatedValidationValues);
196        if (bestOfRunValidationQuality > currentBestExpression.ValidationQuality) {
197          UpdateBestOfRunSolution(problemData, currentBestExpression.Expression.Expression, SymbolicExpressionTreeInterpreter, lowerEstimationLimit, upperEstimationLimit, currentBestExpression.Expression.Alpha, currentBestExpression.Expression.Beta);
198        }
199      }
200      #endregion
201      return base.Apply();
202    }
203
204    private void UpdateBestOfRunSolution(DataAnalysisProblemData problemData, SymbolicExpressionTree tree, ISymbolicExpressionTreeInterpreter interpreter,
205      double lowerEstimationLimit, double upperEstimationLimit,
206      double alpha, double beta) {
207      var newBestSolution = CreateDataAnalysisSolution(problemData, tree, interpreter, lowerEstimationLimit, upperEstimationLimit, alpha, beta);
208      if (BestValidationSolutionParameter.ActualValue == null)
209        BestValidationSolutionParameter.ActualValue = newBestSolution;
210      else
211        // only update model
212        BestValidationSolutionParameter.ActualValue.Model = newBestSolution.Model;
213
214      AddResult("NumberOfInputVariables", new IntValue(CountInputVariables(tree)));
215
216      var trainingValues = problemData.Dataset.GetVariableValues(problemData.TargetVariable.Value, problemData.TrainingSamplesStart.Value, problemData.TrainingSamplesEnd.Value);
217      var testValues = problemData.Dataset.GetVariableValues(problemData.TargetVariable.Value, problemData.TestSamplesStart.Value, problemData.TestSamplesEnd.Value);
218
219      AddResult("MeanSquaredError (Training)", new DoubleValue(SimpleMSEEvaluator.Calculate(trainingValues, newBestSolution.EstimatedTrainingValues)));
220      AddResult("MeanRelativeError (Training)", new PercentValue(SimpleMeanAbsolutePercentageErrorEvaluator.Calculate(trainingValues, newBestSolution.EstimatedTrainingValues)));
221      AddResult("RSquared (Training)", new DoubleValue(SimpleRSquaredEvaluator.Calculate(trainingValues, newBestSolution.EstimatedTrainingValues)));
222
223      AddResult("MeanSquaredError (Test)", new DoubleValue(SimpleMSEEvaluator.Calculate(testValues, newBestSolution.EstimatedTestValues)));
224      AddResult("MeanRelativeError (Test)", new PercentValue(SimpleMeanAbsolutePercentageErrorEvaluator.Calculate(testValues, newBestSolution.EstimatedTestValues)));
225      AddResult("RSquared (Test)", new DoubleValue(SimpleRSquaredEvaluator.Calculate(testValues, newBestSolution.EstimatedTestValues)));
226    }
227
228    private int CountInputVariables(SymbolicExpressionTree tree) {
229      return (from node in tree.IterateNodesPrefix().OfType<VariableTreeNode>()
230              select node.VariableName)
231             .Distinct()
232             .Count();
233    }
234
235    private void AddResult(string resultName, IItem value) {
236      var resultCollection = ResultParameter.ActualValue;
237      if (resultCollection.ContainsKey(resultName)) {
238        resultCollection[resultName].Value = value;
239      } else {
240        resultCollection.Add(new Result(resultName, value));
241      }
242    }
243
244    private SymbolicRegressionSolution CreateDataAnalysisSolution(DataAnalysisProblemData problemData, SymbolicExpressionTree tree, ISymbolicExpressionTreeInterpreter interpreter,
245      double lowerEstimationLimit, double upperEstimationLimit,
246      double alpha, double beta) {
247      var mainBranch = tree.Root.SubTrees[0].SubTrees[0];
248      var scaledMainBranch = MakeSum(MakeProduct(beta, mainBranch), alpha);
249
250      // remove the main branch before cloning to prevent cloning of sub-trees
251      tree.Root.SubTrees[0].RemoveSubTree(0);
252      var scaledTree = (SymbolicExpressionTree)tree.Clone();
253      // insert main branch into the original tree again
254      tree.Root.SubTrees[0].InsertSubTree(0, mainBranch);
255      // insert the scaled main branch into the cloned tree
256      scaledTree.Root.SubTrees[0].InsertSubTree(0, scaledMainBranch);
257      // create a new solution using the scaled tree
258      var model = new SymbolicRegressionModel(interpreter, scaledTree, problemData.InputVariables.Select(s => s.Value));
259      return new SymbolicRegressionSolution(problemData, model, lowerEstimationLimit, upperEstimationLimit);
260    }
261
262    private SymbolicExpressionTreeNode MakeSum(SymbolicExpressionTreeNode treeNode, double alpha) {
263      var node = (new Addition()).CreateTreeNode();
264      var alphaConst = MakeConstant(alpha);
265      node.AddSubTree(treeNode);
266      node.AddSubTree(alphaConst);
267      return node;
268    }
269
270    private SymbolicExpressionTreeNode MakeProduct(double beta, SymbolicExpressionTreeNode treeNode) {
271      var node = (new Multiplication()).CreateTreeNode();
272      var betaConst = MakeConstant(beta);
273      node.AddSubTree(treeNode);
274      node.AddSubTree(betaConst);
275      return node;
276    }
277
278    private SymbolicExpressionTreeNode MakeConstant(double c) {
279      var node = (ConstantTreeNode)(new Constant()).CreateTreeNode();
280      node.Value = c;
281      return node;
282    }
283  }
284}
Note: See TracBrowser for help on using the repository browser.