Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Problems.DataAnalysis.MultiVariate.Regression/3.3/Symbolic/Analyzer/ValidationBestScaledSymbolicVectorRegressionSolutionAnalyzer.cs @ 4112

Last change on this file since 4112 was 4112, checked in by gkronber, 13 years ago

Fixed some bugs in multi-variate regression classes. #1089

File size: 12.5 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System.Collections.Generic;
23using System.Linq;
24using HeuristicLab.Analysis;
25using HeuristicLab.Core;
26using HeuristicLab.Data;
27using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
28using HeuristicLab.Operators;
29using HeuristicLab.Optimization;
30using HeuristicLab.Parameters;
31using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
32using HeuristicLab.Problems.DataAnalysis.MultiVariate.Regression.Symbolic.Evaluators;
33using HeuristicLab.Problems.DataAnalysis.Symbolic;
34
35namespace HeuristicLab.Problems.DataAnalysis.MultiVariate.Regression.Symbolic.Analyzers {
36  /// <summary>
37  /// An operator that analyzes the validation best scaled symbolic vector regression solution.
38  /// </summary>
39  [Item("ValidationBestScaledSymbolicVectorRegressionSolutionAnalyzer", "An operator that analyzes the validation best scaled symbolic vector regression solution.")]
40  [StorableClass]
41  public sealed class ValidationBestScaledSymbolicVectorRegressionSolutionAnalyzer : SingleSuccessorOperator, IAnalyzer {
42    private const string SymbolicExpressionTreeParameterName = "SymbolicExpressionTree";
43    private const string ScaledSymbolicExpressionTreeParameterName = "ScaledSymbolicExpressionTree";
44    private const string SymbolicExpressionTreeInterpreterParameterName = "SymbolicExpressionTreeInterpreter";
45    private const string ProblemDataParameterName = "ProblemData";
46    private const string TrainingSamplesStartParameterName = "TrainingSamplesStart";
47    private const string TrainingSamplesEndParameterName = "TrainingSamplesEnd";
48    private const string ValidationSamplesStartParameterName = "ValidationSamplesStart";
49    private const string ValidationSamplesEndParameterName = "ValidationSamplesEnd";
50    private const string TestSamplesStartParameterName = "TestSamplesStart";
51    private const string TestSamplesEndParameterName = "TestSamplesEnd";
52    private const string QualityParameterName = "Quality";
53    private const string ScaledQualityParameterName = "ScaledQuality";
54    private const string UpperEstimationLimitParameterName = "UpperEstimationLimit";
55    private const string LowerEstimationLimitParameterName = "LowerEstimationLimit";
56    private const string AlphaParameterName = "Alpha";
57    private const string BetaParameterName = "Beta";
58    private const string BestSolutionParameterName = "Best solution (validation)";
59    private const string BestSolutionQualityParameterName = "Best solution quality (validation)";
60    private const string CurrentBestValidationQualityParameterName = "Current best validation quality";
61    private const string BestSolutionQualityValuesParameterName = "Validation Quality";
62    private const string ResultsParameterName = "Results";
63    private const string BestKnownQualityParameterName = "BestKnownQuality";
64
65    #region parameter properties
66    public ScopeTreeLookupParameter<SymbolicExpressionTree> SymbolicExpressionTreeParameter {
67      get { return (ScopeTreeLookupParameter<SymbolicExpressionTree>)Parameters[SymbolicExpressionTreeParameterName]; }
68    }
69    public ScopeTreeLookupParameter<DoubleArray> AlphaParameter {
70      get { return (ScopeTreeLookupParameter<DoubleArray>)Parameters[AlphaParameterName]; }
71    }
72    public ScopeTreeLookupParameter<DoubleArray> BetaParameter {
73      get { return (ScopeTreeLookupParameter<DoubleArray>)Parameters[BetaParameterName]; }
74    }
75    public IValueLookupParameter<ISymbolicExpressionTreeInterpreter> SymbolicExpressionTreeInterpreterParameter {
76      get { return (IValueLookupParameter<ISymbolicExpressionTreeInterpreter>)Parameters[SymbolicExpressionTreeInterpreterParameterName]; }
77    }
78    public IValueLookupParameter<MultiVariateDataAnalysisProblemData> ProblemDataParameter {
79      get { return (IValueLookupParameter<MultiVariateDataAnalysisProblemData>)Parameters[ProblemDataParameterName]; }
80    }
81    public IValueLookupParameter<IntValue> ValidationSamplesStartParameter {
82      get { return (IValueLookupParameter<IntValue>)Parameters[ValidationSamplesStartParameterName]; }
83    }
84    public IValueLookupParameter<IntValue> ValidationSamplesEndParameter {
85      get { return (IValueLookupParameter<IntValue>)Parameters[ValidationSamplesEndParameterName]; }
86    }
87    public IValueLookupParameter<DoubleArray> UpperEstimationLimitParameter {
88      get { return (IValueLookupParameter<DoubleArray>)Parameters[UpperEstimationLimitParameterName]; }
89    }
90    public IValueLookupParameter<DoubleArray> LowerEstimationLimitParameter {
91      get { return (IValueLookupParameter<DoubleArray>)Parameters[LowerEstimationLimitParameterName]; }
92    }
93    public ILookupParameter<SymbolicExpressionTree> BestSolutionParameter {
94      get { return (ILookupParameter<SymbolicExpressionTree>)Parameters[BestSolutionParameterName]; }
95    }
96    public ILookupParameter<DoubleValue> BestSolutionQualityParameter {
97      get { return (ILookupParameter<DoubleValue>)Parameters[BestSolutionQualityParameterName]; }
98    }
99    public ILookupParameter<ResultCollection> ResultsParameter {
100      get { return (ILookupParameter<ResultCollection>)Parameters[ResultsParameterName]; }
101    }
102    public ILookupParameter<DoubleValue> BestKnownQualityParameter {
103      get { return (ILookupParameter<DoubleValue>)Parameters[BestKnownQualityParameterName]; }
104    }
105    #endregion
106    #region properties
107    public MultiVariateDataAnalysisProblemData ProblemData {
108      get { return ProblemDataParameter.ActualValue; }
109    }
110    public ItemArray<DoubleArray> Alpha {
111      get { return AlphaParameter.ActualValue; }
112    }
113    public ItemArray<DoubleArray> Beta {
114      get { return BetaParameter.ActualValue; }
115    }
116    public DoubleArray LowerEstimationLimit {
117      get { return LowerEstimationLimitParameter.ActualValue; }
118    }
119    public DoubleArray UpperEstimationLimit {
120      get { return UpperEstimationLimitParameter.ActualValue; }
121    }
122    #endregion
123
124    public ValidationBestScaledSymbolicVectorRegressionSolutionAnalyzer()
125      : base() {
126      Parameters.Add(new ScopeTreeLookupParameter<SymbolicExpressionTree>(SymbolicExpressionTreeParameterName, "The symbolic expression trees to analyze."));
127      Parameters.Add(new ScopeTreeLookupParameter<DoubleArray>(AlphaParameterName, "The alpha parameter for linear scaling."));
128      Parameters.Add(new ScopeTreeLookupParameter<DoubleArray>(BetaParameterName, "The beta parameter for linear scaling."));
129      Parameters.Add(new ValueLookupParameter<ISymbolicExpressionTreeInterpreter>(SymbolicExpressionTreeInterpreterParameterName, "The interpreter that should be used for the analysis of symbolic expression trees."));
130      Parameters.Add(new ValueLookupParameter<MultiVariateDataAnalysisProblemData>(ProblemDataParameterName, "The problem data for which the symbolic expression tree is a solution."));
131      Parameters.Add(new ValueLookupParameter<IntValue>(ValidationSamplesStartParameterName, "The first index of the validation partition of the data set."));
132      Parameters.Add(new ValueLookupParameter<IntValue>(ValidationSamplesEndParameterName, "The last index of the validation partition of the data set."));
133      Parameters.Add(new ValueLookupParameter<DoubleArray>(UpperEstimationLimitParameterName, "The upper estimation limit that was set for the evaluation of the symbolic expression trees."));
134      Parameters.Add(new ValueLookupParameter<DoubleArray>(LowerEstimationLimitParameterName, "The lower estimation limit that was set for the evaluation of the symbolic expression trees."));
135      Parameters.Add(new LookupParameter<SymbolicExpressionTree>(BestSolutionParameterName, "The best symbolic regression solution."));
136      Parameters.Add(new LookupParameter<DoubleValue>(BestSolutionQualityParameterName, "The quality of the best symbolic regression solution."));
137      Parameters.Add(new LookupParameter<ResultCollection>(ResultsParameterName, "The result collection where the best symbolic regression solution should be stored."));
138      Parameters.Add(new LookupParameter<DoubleValue>(BestKnownQualityParameterName, "The best known (validation) quality achieved on the data set."));
139
140    }
141
142    public override IOperation Apply() {
143      var trees = SymbolicExpressionTreeParameter.ActualValue;
144      IEnumerable<SymbolicExpressionTree> scaledTrees;
145      if (Alpha.Length == trees.Length) {
146        scaledTrees = from i in Enumerable.Range(0, trees.Length)
147                      select SymbolicVectorRegressionSolutionLinearScaler.Scale(trees[i], Beta[i].ToArray(), Alpha[i].ToArray());
148      } else {
149        scaledTrees = trees;
150      }
151      IEnumerable<string> selectedTargetVariables = from item in ProblemData.TargetVariables.CheckedItems
152                                                    select item.Value.Value;
153      var interpreter = SymbolicExpressionTreeInterpreterParameter.ActualValue;
154      int validationStart = ValidationSamplesStartParameter.ActualValue.Value;
155      int validationEnd = ValidationSamplesEndParameter.ActualValue.Value;
156      IEnumerable<int> rows = Enumerable.Range(validationStart, validationEnd - validationStart);
157      SymbolicExpressionTree bestTree = null;
158      double bestValidationError = double.PositiveInfinity;
159      foreach (var tree in scaledTrees) {
160        // calculate error on validation set
161        double validationMse = SymbolicVectorRegressionNormalizedMseEvaluator.Calculate(tree, interpreter, ProblemData, selectedTargetVariables, rows, LowerEstimationLimit, UpperEstimationLimit);
162        if (bestValidationError > validationMse) {
163          bestValidationError = validationMse;
164          bestTree = tree;
165        }
166      }
167      if (BestSolutionQualityParameter.ActualValue == null || BestSolutionQualityParameter.ActualValue.Value > bestValidationError) {
168        var bestSolution = bestTree;
169
170        //bestSolution.Name = BestSolutionParameterName;
171        //solution.Description = "Best solution on validation partition found over the whole run.";
172
173        BestSolutionParameter.ActualValue = bestSolution;
174        BestSolutionQualityParameter.ActualValue = new DoubleValue(bestValidationError);
175      }
176
177      // update results
178      var results = ResultsParameter.ActualValue;
179      if (!results.ContainsKey(BestSolutionQualityValuesParameterName)) {
180        results.Add(new Result(BestSolutionParameterName, BestSolutionParameter.ActualValue));
181        results.Add(new Result(BestSolutionQualityValuesParameterName, new DataTable(BestSolutionQualityValuesParameterName, BestSolutionQualityValuesParameterName)));
182        results.Add(new Result(BestSolutionQualityParameterName, new DoubleValue()));
183        results.Add(new Result(CurrentBestValidationQualityParameterName, new DoubleValue()));
184      }
185      results[BestSolutionParameterName].Value = BestSolutionParameter.ActualValue;
186      results[BestSolutionQualityParameterName].Value = new DoubleValue(BestSolutionQualityParameter.ActualValue.Value);
187      results[CurrentBestValidationQualityParameterName].Value = new DoubleValue(bestValidationError);
188
189      DataTable validationValues = (DataTable)results[BestSolutionQualityValuesParameterName].Value;
190      AddValue(validationValues, BestSolutionQualityParameter.ActualValue.Value, BestSolutionQualityParameterName, BestSolutionQualityParameterName);
191      AddValue(validationValues, bestValidationError, CurrentBestValidationQualityParameterName, CurrentBestValidationQualityParameterName);
192
193      return base.Apply();
194    }
195
196    private static void AddValue(DataTable table, double data, string name, string description) {
197      DataRow row;
198      table.Rows.TryGetValue(name, out row);
199      if (row == null) {
200        row = new DataRow(name, description);
201        row.Values.Add(data);
202        table.Rows.Add(row);
203      } else {
204        row.Values.Add(data);
205      }
206    }
207  }
208}
Note: See TracBrowser for help on using the repository browser.