Free cookie consent management tool by TermsFeed Policy Generator

source: branches/DataAnalysis/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers/FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer.cs @ 10884

Last change on this file since 10884 was 5275, checked in by gkronber, 14 years ago

Merged changes from trunk to data analysis exploration branch and added fractional distance metric evaluator. #1142

File size: 13.5 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System.Collections.Generic;
23using System.Linq;
24using HeuristicLab.Analysis;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Data;
28using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
29using HeuristicLab.Operators;
30using HeuristicLab.Optimization;
31using HeuristicLab.Parameters;
32using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
33using HeuristicLab.Problems.DataAnalysis.Symbolic;
34
35namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic.Analyzers {
36  /// <summary>
37  /// An operator that analyzes the validation best scaled symbolic regression solution.
38  /// </summary>
39  [Item("FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer", "An operator that analyzes the validation best scaled symbolic regression solution.")]
40  [StorableClass]
41  public sealed class FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer : SymbolicRegressionValidationAnalyzer, ISymbolicRegressionAnalyzer {
42    private const string MaximizationParameterName = "Maximization";
43    private const string CalculateSolutionComplexityParameterName = "CalculateSolutionComplexity";
44    private const string BestSolutionParameterName = "Best solution (validation)";
45    private const string BestSolutionQualityParameterName = "Best solution quality (validation)";
46    private const string BestSolutionLengthParameterName = "Best solution length (validation)";
47    private const string BestSolutionHeightParameterName = "Best solution height (validiation)";
48    private const string CurrentBestValidationQualityParameterName = "Current best validation quality";
49    private const string BestSolutionQualityValuesParameterName = "Validation Quality";
50    private const string ResultsParameterName = "Results";
51    private const string VariableFrequenciesParameterName = "VariableFrequencies";
52    private const string BestKnownQualityParameterName = "BestKnownQuality";
53    private const string GenerationsParameterName = "Generations";
54
55    #region parameter properties
56    public ILookupParameter<BoolValue> MaximizationParameter {
57      get { return (ILookupParameter<BoolValue>)Parameters[MaximizationParameterName]; }
58    }
59    public IValueParameter<BoolValue> CalculateSolutionComplexityParameter {
60      get { return (IValueParameter<BoolValue>)Parameters[CalculateSolutionComplexityParameterName]; }
61    }
62    public ILookupParameter<SymbolicRegressionSolution> BestSolutionParameter {
63      get { return (ILookupParameter<SymbolicRegressionSolution>)Parameters[BestSolutionParameterName]; }
64    }
65    public ILookupParameter<IntValue> GenerationsParameter {
66      get { return (ILookupParameter<IntValue>)Parameters[GenerationsParameterName]; }
67    }
68    public ILookupParameter<DoubleValue> BestSolutionQualityParameter {
69      get { return (ILookupParameter<DoubleValue>)Parameters[BestSolutionQualityParameterName]; }
70    }
71    public ILookupParameter<IntValue> BestSolutionLengthParameter {
72      get { return (ILookupParameter<IntValue>)Parameters[BestSolutionLengthParameterName]; }
73    }
74    public ILookupParameter<IntValue> BestSolutionHeightParameter {
75      get { return (ILookupParameter<IntValue>)Parameters[BestSolutionHeightParameterName]; }
76    }
77    public ILookupParameter<ResultCollection> ResultsParameter {
78      get { return (ILookupParameter<ResultCollection>)Parameters[ResultsParameterName]; }
79    }
80    public ILookupParameter<DoubleValue> BestKnownQualityParameter {
81      get { return (ILookupParameter<DoubleValue>)Parameters[BestKnownQualityParameterName]; }
82    }
83    public ILookupParameter<DataTable> VariableFrequenciesParameter {
84      get { return (ILookupParameter<DataTable>)Parameters[VariableFrequenciesParameterName]; }
85    }
86
87    #endregion
88    #region properties
89    public BoolValue Maximization {
90      get { return MaximizationParameter.ActualValue; }
91    }
92    public BoolValue CalculateSolutionComplexity {
93      get { return CalculateSolutionComplexityParameter.Value; }
94      set { CalculateSolutionComplexityParameter.Value = value; }
95    }
96    public ResultCollection Results {
97      get { return ResultsParameter.ActualValue; }
98    }
99    public DataTable VariableFrequencies {
100      get { return VariableFrequenciesParameter.ActualValue; }
101    }
102    public IntValue Generations {
103      get { return GenerationsParameter.ActualValue; }
104    }
105    public DoubleValue BestSolutionQuality {
106      get { return BestSolutionQualityParameter.ActualValue; }
107    }
108    public IntValue BestSolutionLength {
109      get { return BestSolutionLengthParameter.ActualValue; }
110      set { BestSolutionLengthParameter.ActualValue = value; }
111    }
112    public IntValue BestSolutionHeight {
113      get { return BestSolutionHeightParameter.ActualValue; }
114      set { BestSolutionHeightParameter.ActualValue = value; }
115    }
116
117    #endregion
118
119    [StorableConstructor]
120    private FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer(bool deserializing) : base(deserializing) { }
121    private FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer(FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer original, Cloner cloner) : base(original, cloner) { }
122    public FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer()
123      : base() {
124      Parameters.Add(new LookupParameter<BoolValue>(MaximizationParameterName, "The direction of optimization."));
125      Parameters.Add(new ValueParameter<BoolValue>(CalculateSolutionComplexityParameterName, "Determines if the length and height of the validation best solution should be calculated.", new BoolValue(false)));
126      Parameters.Add(new LookupParameter<SymbolicRegressionSolution>(BestSolutionParameterName, "The best symbolic regression solution."));
127      Parameters.Add(new LookupParameter<IntValue>(GenerationsParameterName, "The number of generations calculated so far."));
128      Parameters.Add(new LookupParameter<DoubleValue>(BestSolutionQualityParameterName, "The quality of the best symbolic regression solution."));
129      Parameters.Add(new LookupParameter<IntValue>(BestSolutionLengthParameterName, "The length of the best symbolic regression solution."));
130      Parameters.Add(new LookupParameter<IntValue>(BestSolutionHeightParameterName, "The height of the best symbolic regression solution."));
131      Parameters.Add(new LookupParameter<ResultCollection>(ResultsParameterName, "The result collection where the best symbolic regression solution should be stored."));
132      Parameters.Add(new LookupParameter<DoubleValue>(BestKnownQualityParameterName, "The best known (validation) quality achieved on the data set."));
133      Parameters.Add(new LookupParameter<DataTable>(VariableFrequenciesParameterName, "The variable frequencies table to use for the calculation of variable impacts"));
134    }
135
136    public override IDeepCloneable Clone(Cloner cloner) {
137      return new FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer(this, cloner);
138    }
139
140    [StorableHook(HookType.AfterDeserialization)]
141    private void AfterDeserialization() {
142      #region compatibility remove before releasing 3.4
143      if (!Parameters.ContainsKey("Evaluator")) {
144        Parameters.Add(new LookupParameter<ISymbolicRegressionEvaluator>("Evaluator", "The evaluator which should be used to evaluate the solution on the validation set."));
145      }
146      if (!Parameters.ContainsKey(MaximizationParameterName)) {
147        Parameters.Add(new LookupParameter<BoolValue>(MaximizationParameterName, "The direction of optimization."));
148      }
149      if (!Parameters.ContainsKey(CalculateSolutionComplexityParameterName)) {
150        Parameters.Add(new ValueParameter<BoolValue>(CalculateSolutionComplexityParameterName, "Determines if the length and height of the validation best solution should be calculated.", new BoolValue(false)));
151      }
152      if (!Parameters.ContainsKey(BestSolutionLengthParameterName)) {
153        Parameters.Add(new LookupParameter<IntValue>(BestSolutionLengthParameterName, "The length of the best symbolic regression solution."));
154      }
155      if (!Parameters.ContainsKey(BestSolutionHeightParameterName)) {
156        Parameters.Add(new LookupParameter<IntValue>(BestSolutionHeightParameterName, "The height of the best symbolic regression solution."));
157      }
158      #endregion
159    }
160
161    protected override void Analyze(SymbolicExpressionTree[] trees, double[] validationQuality) {
162      double bestQuality = Maximization.Value ? double.NegativeInfinity : double.PositiveInfinity;
163      SymbolicExpressionTree bestTree = null;
164
165      for (int i = 0; i < trees.Length; i++) {
166        double quality = validationQuality[i];
167        if ((Maximization.Value && quality > bestQuality) ||
168            (!Maximization.Value && quality < bestQuality)) {
169          bestQuality = quality;
170          bestTree = trees[i];
171        }
172      }
173
174      // if the best validation tree is better than the current best solution => update
175      bool newBest =
176        BestSolutionQuality == null ||
177        (Maximization.Value && bestQuality > BestSolutionQuality.Value) ||
178        (!Maximization.Value && bestQuality < BestSolutionQuality.Value);
179      if (newBest) {
180        double lowerEstimationLimit = LowerEstimationLimit.Value;
181        double upperEstimationLimit = UpperEstimationLimit.Value;
182        string targetVariable = ProblemData.TargetVariable.Value;
183
184        // calculate scaling parameters and only for the best tree using the full training set
185        double alpha, beta;
186        SymbolicRegressionScaledMeanSquaredErrorEvaluator.Calculate(SymbolicExpressionTreeInterpreter, bestTree,
187          lowerEstimationLimit, upperEstimationLimit,
188          ProblemData.Dataset, targetVariable,
189          ProblemData.TrainingIndizes, out beta, out alpha);
190
191        // scale tree for solution
192        var scaledTree = SymbolicRegressionSolutionLinearScaler.Scale(bestTree, alpha, beta);
193        var model = new SymbolicRegressionModel((ISymbolicExpressionTreeInterpreter)SymbolicExpressionTreeInterpreter.Clone(),
194          scaledTree);
195        var solution = new SymbolicRegressionSolution((DataAnalysisProblemData)ProblemData.Clone(), model, lowerEstimationLimit, upperEstimationLimit);
196        solution.Name = BestSolutionParameterName;
197        solution.Description = "Best solution on validation partition found over the whole run.";
198
199        BestSolutionParameter.ActualValue = solution;
200        BestSolutionQualityParameter.ActualValue = new DoubleValue(bestQuality);
201
202        if (CalculateSolutionComplexity.Value) {
203          BestSolutionLength = new IntValue(solution.Model.SymbolicExpressionTree.Size);
204          BestSolutionHeight = new IntValue(solution.Model.SymbolicExpressionTree.Height);
205          if (!Results.ContainsKey(BestSolutionLengthParameterName)) {
206            Results.Add(new Result(BestSolutionLengthParameterName, "Length of the best solution on the validation set", new IntValue()));
207            Results.Add(new Result(BestSolutionHeightParameterName, "Height of the best solution on the validation set", new IntValue()));
208          }
209          Results[BestSolutionLengthParameterName].Value = BestSolutionLength;
210          Results[BestSolutionHeightParameterName].Value = BestSolutionHeight;
211        }
212
213        BestSymbolicRegressionSolutionAnalyzer.UpdateBestSolutionResults(solution, ProblemData, Results, Generations, VariableFrequencies);
214      }
215
216      if (!Results.ContainsKey(BestSolutionQualityValuesParameterName)) {
217        Results.Add(new Result(BestSolutionQualityValuesParameterName, new DataTable(BestSolutionQualityValuesParameterName, BestSolutionQualityValuesParameterName)));
218        Results.Add(new Result(BestSolutionQualityParameterName, new DoubleValue()));
219        Results.Add(new Result(CurrentBestValidationQualityParameterName, new DoubleValue()));
220      }
221      Results[BestSolutionQualityParameterName].Value = new DoubleValue(BestSolutionQualityParameter.ActualValue.Value);
222      Results[CurrentBestValidationQualityParameterName].Value = new DoubleValue(bestQuality);
223
224      DataTable validationValues = (DataTable)Results[BestSolutionQualityValuesParameterName].Value;
225      AddValue(validationValues, BestSolutionQualityParameter.ActualValue.Value, BestSolutionQualityParameterName, BestSolutionQualityParameterName);
226      AddValue(validationValues, bestQuality, CurrentBestValidationQualityParameterName, CurrentBestValidationQualityParameterName);
227    }
228
229    private static void AddValue(DataTable table, double data, string name, string description) {
230      DataRow row;
231      table.Rows.TryGetValue(name, out row);
232      if (row == null) {
233        row = new DataRow(name, description);
234        row.Values.Add(data);
235        table.Rows.Add(row);
236      } else {
237        row.Values.Add(data);
238      }
239    }
240  }
241}
Note: See TracBrowser for help on using the repository browser.