Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers/FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer.cs @ 5863

Last change on this file since 5863 was 5863, checked in by mkommend, 13 years ago

#1418: Added NonDiscoverableType attribute to outdated analyzers.

File size: 14.6 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2011 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using HeuristicLab.Analysis;
23using HeuristicLab.Common;
24using HeuristicLab.Core;
25using HeuristicLab.Data;
26using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
27using HeuristicLab.Optimization;
28using HeuristicLab.Parameters;
29using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
30using HeuristicLab.PluginInfrastructure;
31using HeuristicLab.Problems.DataAnalysis.Symbolic;
32
33namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic.Analyzers {
34  /// <summary>
35  /// An operator that analyzes the validation best scaled symbolic regression solution.
36  /// </summary>
37  [Item("FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer", "An operator that analyzes the validation best scaled symbolic regression solution.")]
38  [StorableClass]
39  [NonDiscoverableType]
40  public sealed class FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer : SymbolicRegressionValidationAnalyzer, ISymbolicRegressionAnalyzer {
41    private const string ApplyLinearScalingParameterName = "ApplyLinearScaling";
42    private const string MaximizationParameterName = "Maximization";
43    private const string CalculateSolutionComplexityParameterName = "CalculateSolutionComplexity";
44    private const string BestSolutionParameterName = "Best solution (validation)";
45    private const string BestSolutionQualityParameterName = "Best solution quality (validation)";
46    private const string BestSolutionLengthParameterName = "Best solution length (validation)";
47    private const string BestSolutionHeightParameterName = "Best solution height (validiation)";
48    private const string CurrentBestValidationQualityParameterName = "Current best validation quality";
49    private const string BestSolutionQualityValuesParameterName = "Validation Quality";
50    private const string ResultsParameterName = "Results";
51    private const string VariableFrequenciesParameterName = "VariableFrequencies";
52    private const string BestKnownQualityParameterName = "BestKnownQuality";
53    private const string GenerationsParameterName = "Generations";
54
55    #region parameter properties
56    public ILookupParameter<BoolValue> MaximizationParameter {
57      get { return (ILookupParameter<BoolValue>)Parameters[MaximizationParameterName]; }
58    }
59    public IValueParameter<BoolValue> CalculateSolutionComplexityParameter {
60      get { return (IValueParameter<BoolValue>)Parameters[CalculateSolutionComplexityParameterName]; }
61    }
62    public ILookupParameter<SymbolicRegressionSolution> BestSolutionParameter {
63      get { return (ILookupParameter<SymbolicRegressionSolution>)Parameters[BestSolutionParameterName]; }
64    }
65    public ILookupParameter<IntValue> GenerationsParameter {
66      get { return (ILookupParameter<IntValue>)Parameters[GenerationsParameterName]; }
67    }
68    public ILookupParameter<DoubleValue> BestSolutionQualityParameter {
69      get { return (ILookupParameter<DoubleValue>)Parameters[BestSolutionQualityParameterName]; }
70    }
71    public ILookupParameter<IntValue> BestSolutionLengthParameter {
72      get { return (ILookupParameter<IntValue>)Parameters[BestSolutionLengthParameterName]; }
73    }
74    public ILookupParameter<IntValue> BestSolutionHeightParameter {
75      get { return (ILookupParameter<IntValue>)Parameters[BestSolutionHeightParameterName]; }
76    }
77    public ILookupParameter<ResultCollection> ResultsParameter {
78      get { return (ILookupParameter<ResultCollection>)Parameters[ResultsParameterName]; }
79    }
80    public ILookupParameter<DoubleValue> BestKnownQualityParameter {
81      get { return (ILookupParameter<DoubleValue>)Parameters[BestKnownQualityParameterName]; }
82    }
83    public ILookupParameter<DataTable> VariableFrequenciesParameter {
84      get { return (ILookupParameter<DataTable>)Parameters[VariableFrequenciesParameterName]; }
85    }
86    public IValueLookupParameter<BoolValue> ApplyLinearScalingParameter {
87      get { return (IValueLookupParameter<BoolValue>)Parameters[ApplyLinearScalingParameterName]; }
88    }
89    #endregion
90    #region properties
91    public BoolValue Maximization {
92      get { return MaximizationParameter.ActualValue; }
93    }
94    public BoolValue CalculateSolutionComplexity {
95      get { return CalculateSolutionComplexityParameter.Value; }
96      set { CalculateSolutionComplexityParameter.Value = value; }
97    }
98    public ResultCollection Results {
99      get { return ResultsParameter.ActualValue; }
100    }
101    public DataTable VariableFrequencies {
102      get { return VariableFrequenciesParameter.ActualValue; }
103    }
104    public IntValue Generations {
105      get { return GenerationsParameter.ActualValue; }
106    }
107    public DoubleValue BestSolutionQuality {
108      get { return BestSolutionQualityParameter.ActualValue; }
109    }
110    public IntValue BestSolutionLength {
111      get { return BestSolutionLengthParameter.ActualValue; }
112      set { BestSolutionLengthParameter.ActualValue = value; }
113    }
114    public IntValue BestSolutionHeight {
115      get { return BestSolutionHeightParameter.ActualValue; }
116      set { BestSolutionHeightParameter.ActualValue = value; }
117    }
118    public BoolValue ApplyLinearScaling {
119      get { return ApplyLinearScalingParameter.ActualValue; }
120      set { ApplyLinearScalingParameter.ActualValue = value; }
121    }
122    #endregion
123
124    [StorableConstructor]
125    private FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer(bool deserializing) : base(deserializing) { }
126    private FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer(FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer original, Cloner cloner) : base(original, cloner) { }
127    public FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer()
128      : base() {
129      Parameters.Add(new ValueLookupParameter<BoolValue>(ApplyLinearScalingParameterName, "The switch determines if the best solution should be linearly scaled on the whole training set.", new BoolValue(true)));
130      Parameters.Add(new LookupParameter<BoolValue>(MaximizationParameterName, "The direction of optimization."));
131      Parameters.Add(new ValueParameter<BoolValue>(CalculateSolutionComplexityParameterName, "Determines if the length and height of the validation best solution should be calculated.", new BoolValue(true)));
132      Parameters.Add(new LookupParameter<SymbolicRegressionSolution>(BestSolutionParameterName, "The best symbolic regression solution."));
133      Parameters.Add(new LookupParameter<IntValue>(GenerationsParameterName, "The number of generations calculated so far."));
134      Parameters.Add(new LookupParameter<DoubleValue>(BestSolutionQualityParameterName, "The quality of the best symbolic regression solution."));
135      Parameters.Add(new LookupParameter<IntValue>(BestSolutionLengthParameterName, "The length of the best symbolic regression solution."));
136      Parameters.Add(new LookupParameter<IntValue>(BestSolutionHeightParameterName, "The height of the best symbolic regression solution."));
137      Parameters.Add(new LookupParameter<ResultCollection>(ResultsParameterName, "The result collection where the best symbolic regression solution should be stored."));
138      Parameters.Add(new LookupParameter<DoubleValue>(BestKnownQualityParameterName, "The best known (validation) quality achieved on the data set."));
139      Parameters.Add(new LookupParameter<DataTable>(VariableFrequenciesParameterName, "The variable frequencies table to use for the calculation of variable impacts"));
140    }
141
142    public override IDeepCloneable Clone(Cloner cloner) {
143      return new FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer(this, cloner);
144    }
145
146    [StorableHook(HookType.AfterDeserialization)]
147    private void AfterDeserialization() {
148      #region compatibility remove before releasing 3.4
149      if (!Parameters.ContainsKey("Evaluator")) {
150        Parameters.Add(new LookupParameter<ISymbolicRegressionEvaluator>("Evaluator", "The evaluator which should be used to evaluate the solution on the validation set."));
151      }
152      if (!Parameters.ContainsKey(MaximizationParameterName)) {
153        Parameters.Add(new LookupParameter<BoolValue>(MaximizationParameterName, "The direction of optimization."));
154      }
155      if (!Parameters.ContainsKey(CalculateSolutionComplexityParameterName)) {
156        Parameters.Add(new ValueParameter<BoolValue>(CalculateSolutionComplexityParameterName, "Determines if the length and height of the validation best solution should be calculated.", new BoolValue(false)));
157      }
158      if (!Parameters.ContainsKey(BestSolutionLengthParameterName)) {
159        Parameters.Add(new LookupParameter<IntValue>(BestSolutionLengthParameterName, "The length of the best symbolic regression solution."));
160      }
161      if (!Parameters.ContainsKey(BestSolutionHeightParameterName)) {
162        Parameters.Add(new LookupParameter<IntValue>(BestSolutionHeightParameterName, "The height of the best symbolic regression solution."));
163      }
164      if (!Parameters.ContainsKey(ApplyLinearScalingParameterName)) {
165        Parameters.Add(new ValueLookupParameter<BoolValue>(ApplyLinearScalingParameterName, "The switch determines if the best solution should be linearly scaled on the whole training set.", new BoolValue(true)));
166      }
167      #endregion
168    }
169
170    protected override void Analyze(SymbolicExpressionTree[] trees, double[] validationQuality) {
171      double bestQuality = Maximization.Value ? double.NegativeInfinity : double.PositiveInfinity;
172      SymbolicExpressionTree bestTree = null;
173
174      for (int i = 0; i < trees.Length; i++) {
175        double quality = validationQuality[i];
176        if ((Maximization.Value && quality > bestQuality) ||
177            (!Maximization.Value && quality < bestQuality)) {
178          bestQuality = quality;
179          bestTree = trees[i];
180        }
181      }
182
183      // if the best validation tree is better than the current best solution => update
184      bool newBest =
185        BestSolutionQuality == null ||
186        (Maximization.Value && bestQuality > BestSolutionQuality.Value) ||
187        (!Maximization.Value && bestQuality < BestSolutionQuality.Value);
188      if (newBest) {
189        double upperEstimationLimit = UpperEstimationLimit != null ? UpperEstimationLimit.Value : double.PositiveInfinity;
190        double lowerEstimationLimit = LowerEstimationLimit != null ? LowerEstimationLimit.Value : double.NegativeInfinity;
191        string targetVariable = ProblemData.TargetVariable.Value;
192
193        if (ApplyLinearScaling.Value) {
194          // calculate scaling parameters and only for the best tree using the full training set
195          double alpha, beta;
196          SymbolicRegressionScaledMeanSquaredErrorEvaluator.Calculate(SymbolicExpressionTreeInterpreter, bestTree,
197            lowerEstimationLimit, upperEstimationLimit,
198            ProblemData.Dataset, targetVariable,
199            ProblemData.TrainingIndizes, out beta, out alpha);
200
201          // scale tree for solution
202          bestTree = SymbolicRegressionSolutionLinearScaler.Scale(bestTree, alpha, beta);
203        }
204        var model = new SymbolicRegressionModel((ISymbolicExpressionTreeInterpreter)SymbolicExpressionTreeInterpreter.Clone(),
205          bestTree);
206        var solution = new SymbolicRegressionSolution((DataAnalysisProblemData)ProblemData.Clone(), model, lowerEstimationLimit, upperEstimationLimit);
207        solution.Name = BestSolutionParameterName;
208        solution.Description = "Best solution on validation partition found over the whole run.";
209
210        BestSolutionParameter.ActualValue = solution;
211        BestSolutionQualityParameter.ActualValue = new DoubleValue(bestQuality);
212
213        if (CalculateSolutionComplexity.Value) {
214          BestSolutionLength = new IntValue(solution.Model.SymbolicExpressionTree.Size);
215          BestSolutionHeight = new IntValue(solution.Model.SymbolicExpressionTree.Height);
216          if (!Results.ContainsKey(BestSolutionLengthParameterName)) {
217            Results.Add(new Result(BestSolutionLengthParameterName, "Length of the best solution on the validation set", new IntValue()));
218            Results.Add(new Result(BestSolutionHeightParameterName, "Height of the best solution on the validation set", new IntValue()));
219          }
220          Results[BestSolutionLengthParameterName].Value = BestSolutionLength;
221          Results[BestSolutionHeightParameterName].Value = BestSolutionHeight;
222        }
223
224        BestSymbolicRegressionSolutionAnalyzer.UpdateBestSolutionResults(solution, ProblemData, Results, Generations, VariableFrequencies);
225      }
226
227      if (!Results.ContainsKey(BestSolutionQualityValuesParameterName)) {
228        Results.Add(new Result(BestSolutionQualityValuesParameterName, new DataTable(BestSolutionQualityValuesParameterName, BestSolutionQualityValuesParameterName)));
229        Results.Add(new Result(BestSolutionQualityParameterName, new DoubleValue()));
230        Results.Add(new Result(CurrentBestValidationQualityParameterName, new DoubleValue()));
231      }
232      Results[BestSolutionQualityParameterName].Value = new DoubleValue(BestSolutionQualityParameter.ActualValue.Value);
233      Results[CurrentBestValidationQualityParameterName].Value = new DoubleValue(bestQuality);
234
235      DataTable validationValues = (DataTable)Results[BestSolutionQualityValuesParameterName].Value;
236      AddValue(validationValues, BestSolutionQualityParameter.ActualValue.Value, BestSolutionQualityParameterName, BestSolutionQualityParameterName);
237      AddValue(validationValues, bestQuality, CurrentBestValidationQualityParameterName, CurrentBestValidationQualityParameterName);
238    }
239
240    private static void AddValue(DataTable table, double data, string name, string description) {
241      DataRow row;
242      table.Rows.TryGetValue(name, out row);
243      if (row == null) {
244        row = new DataRow(name, description);
245        row.Values.Add(data);
246        table.Rows.Add(row);
247      } else {
248        row.Values.Add(data);
249      }
250    }
251  }
252}
Note: See TracBrowser for help on using the repository browser.