Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers/FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer.cs @ 5594

Last change on this file since 5594 was 5445, checked in by swagner, 14 years ago

Updated year of copyrights (#1406)

File size: 14.5 KB
RevLine 
[3996]1#region License Information
2/* HeuristicLab
[5445]3 * Copyright (C) 2002-2011 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
[3996]4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
[4068]22using HeuristicLab.Analysis;
[4722]23using HeuristicLab.Common;
[3996]24using HeuristicLab.Core;
25using HeuristicLab.Data;
[4068]26using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
[3996]27using HeuristicLab.Optimization;
28using HeuristicLab.Parameters;
29using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
30using HeuristicLab.Problems.DataAnalysis.Symbolic;
31
32namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic.Analyzers {
33  /// <summary>
34  /// An operator that analyzes the validation best scaled symbolic regression solution.
35  /// </summary>
36  [Item("FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer", "An operator that analyzes the validation best scaled symbolic regression solution.")]
37  [StorableClass]
[5198]38  public sealed class FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer : SymbolicRegressionValidationAnalyzer, ISymbolicRegressionAnalyzer {
[5331]39    private const string ApplyLinearScalingParameterName = "ApplyLinearScaling";
[4191]40    private const string MaximizationParameterName = "Maximization";
[5246]41    private const string CalculateSolutionComplexityParameterName = "CalculateSolutionComplexity";
[3996]42    private const string BestSolutionParameterName = "Best solution (validation)";
43    private const string BestSolutionQualityParameterName = "Best solution quality (validation)";
[5246]44    private const string BestSolutionLengthParameterName = "Best solution length (validation)";
45    private const string BestSolutionHeightParameterName = "Best solution height (validiation)";
[3996]46    private const string CurrentBestValidationQualityParameterName = "Current best validation quality";
47    private const string BestSolutionQualityValuesParameterName = "Validation Quality";
48    private const string ResultsParameterName = "Results";
49    private const string VariableFrequenciesParameterName = "VariableFrequencies";
50    private const string BestKnownQualityParameterName = "BestKnownQuality";
51    private const string GenerationsParameterName = "Generations";
52
53    #region parameter properties
[4191]54    public ILookupParameter<BoolValue> MaximizationParameter {
55      get { return (ILookupParameter<BoolValue>)Parameters[MaximizationParameterName]; }
56    }
[5246]57    public IValueParameter<BoolValue> CalculateSolutionComplexityParameter {
58      get { return (IValueParameter<BoolValue>)Parameters[CalculateSolutionComplexityParameterName]; }
59    }
[3996]60    public ILookupParameter<SymbolicRegressionSolution> BestSolutionParameter {
61      get { return (ILookupParameter<SymbolicRegressionSolution>)Parameters[BestSolutionParameterName]; }
62    }
63    public ILookupParameter<IntValue> GenerationsParameter {
64      get { return (ILookupParameter<IntValue>)Parameters[GenerationsParameterName]; }
65    }
66    public ILookupParameter<DoubleValue> BestSolutionQualityParameter {
67      get { return (ILookupParameter<DoubleValue>)Parameters[BestSolutionQualityParameterName]; }
68    }
[5246]69    public ILookupParameter<IntValue> BestSolutionLengthParameter {
70      get { return (ILookupParameter<IntValue>)Parameters[BestSolutionLengthParameterName]; }
71    }
72    public ILookupParameter<IntValue> BestSolutionHeightParameter {
73      get { return (ILookupParameter<IntValue>)Parameters[BestSolutionHeightParameterName]; }
74    }
[3996]75    public ILookupParameter<ResultCollection> ResultsParameter {
76      get { return (ILookupParameter<ResultCollection>)Parameters[ResultsParameterName]; }
77    }
78    public ILookupParameter<DoubleValue> BestKnownQualityParameter {
79      get { return (ILookupParameter<DoubleValue>)Parameters[BestKnownQualityParameterName]; }
80    }
81    public ILookupParameter<DataTable> VariableFrequenciesParameter {
82      get { return (ILookupParameter<DataTable>)Parameters[VariableFrequenciesParameterName]; }
83    }
[5331]84    public IValueLookupParameter<BoolValue> ApplyLinearScalingParameter {
85      get { return (IValueLookupParameter<BoolValue>)Parameters[ApplyLinearScalingParameterName]; }
86    }
[3996]87    #endregion
88    #region properties
[4191]89    public BoolValue Maximization {
90      get { return MaximizationParameter.ActualValue; }
91    }
[5246]92    public BoolValue CalculateSolutionComplexity {
93      get { return CalculateSolutionComplexityParameter.Value; }
94      set { CalculateSolutionComplexityParameter.Value = value; }
95    }
[3996]96    public ResultCollection Results {
97      get { return ResultsParameter.ActualValue; }
98    }
99    public DataTable VariableFrequencies {
100      get { return VariableFrequenciesParameter.ActualValue; }
101    }
102    public IntValue Generations {
103      get { return GenerationsParameter.ActualValue; }
104    }
[4191]105    public DoubleValue BestSolutionQuality {
106      get { return BestSolutionQualityParameter.ActualValue; }
107    }
[5246]108    public IntValue BestSolutionLength {
109      get { return BestSolutionLengthParameter.ActualValue; }
110      set { BestSolutionLengthParameter.ActualValue = value; }
111    }
112    public IntValue BestSolutionHeight {
113      get { return BestSolutionHeightParameter.ActualValue; }
114      set { BestSolutionHeightParameter.ActualValue = value; }
115    }
[5331]116    public BoolValue ApplyLinearScaling {
117      get { return ApplyLinearScalingParameter.ActualValue; }
118      set { ApplyLinearScalingParameter.ActualValue = value; }
119    }
[3996]120    #endregion
121
[4722]122    [StorableConstructor]
123    private FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer(bool deserializing) : base(deserializing) { }
124    private FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer(FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer original, Cloner cloner) : base(original, cloner) { }
[3996]125    public FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer()
126      : base() {
[5331]127      Parameters.Add(new ValueLookupParameter<BoolValue>(ApplyLinearScalingParameterName, "The switch determines if the best solution should be linearly scaled on the whole training set.", new BoolValue(true)));
[4191]128      Parameters.Add(new LookupParameter<BoolValue>(MaximizationParameterName, "The direction of optimization."));
[5331]129      Parameters.Add(new ValueParameter<BoolValue>(CalculateSolutionComplexityParameterName, "Determines if the length and height of the validation best solution should be calculated.", new BoolValue(true)));
[3996]130      Parameters.Add(new LookupParameter<SymbolicRegressionSolution>(BestSolutionParameterName, "The best symbolic regression solution."));
131      Parameters.Add(new LookupParameter<IntValue>(GenerationsParameterName, "The number of generations calculated so far."));
132      Parameters.Add(new LookupParameter<DoubleValue>(BestSolutionQualityParameterName, "The quality of the best symbolic regression solution."));
[5246]133      Parameters.Add(new LookupParameter<IntValue>(BestSolutionLengthParameterName, "The length of the best symbolic regression solution."));
134      Parameters.Add(new LookupParameter<IntValue>(BestSolutionHeightParameterName, "The height of the best symbolic regression solution."));
[3996]135      Parameters.Add(new LookupParameter<ResultCollection>(ResultsParameterName, "The result collection where the best symbolic regression solution should be stored."));
136      Parameters.Add(new LookupParameter<DoubleValue>(BestKnownQualityParameterName, "The best known (validation) quality achieved on the data set."));
137      Parameters.Add(new LookupParameter<DataTable>(VariableFrequenciesParameterName, "The variable frequencies table to use for the calculation of variable impacts"));
138    }
139
[4722]140    public override IDeepCloneable Clone(Cloner cloner) {
141      return new FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer(this, cloner);
142    }
[3996]143
[4191]144    [StorableHook(HookType.AfterDeserialization)]
145    private void AfterDeserialization() {
[4889]146      #region compatibility remove before releasing 3.4
[5198]147      if (!Parameters.ContainsKey("Evaluator")) {
148        Parameters.Add(new LookupParameter<ISymbolicRegressionEvaluator>("Evaluator", "The evaluator which should be used to evaluate the solution on the validation set."));
[4191]149      }
150      if (!Parameters.ContainsKey(MaximizationParameterName)) {
151        Parameters.Add(new LookupParameter<BoolValue>(MaximizationParameterName, "The direction of optimization."));
152      }
[5246]153      if (!Parameters.ContainsKey(CalculateSolutionComplexityParameterName)) {
154        Parameters.Add(new ValueParameter<BoolValue>(CalculateSolutionComplexityParameterName, "Determines if the length and height of the validation best solution should be calculated.", new BoolValue(false)));
155      }
156      if (!Parameters.ContainsKey(BestSolutionLengthParameterName)) {
157        Parameters.Add(new LookupParameter<IntValue>(BestSolutionLengthParameterName, "The length of the best symbolic regression solution."));
158      }
159      if (!Parameters.ContainsKey(BestSolutionHeightParameterName)) {
160        Parameters.Add(new LookupParameter<IntValue>(BestSolutionHeightParameterName, "The height of the best symbolic regression solution."));
161      }
[5331]162      if (!Parameters.ContainsKey(ApplyLinearScalingParameterName)) {
163        Parameters.Add(new ValueLookupParameter<BoolValue>(ApplyLinearScalingParameterName, "The switch determines if the best solution should be linearly scaled on the whole training set.", new BoolValue(true)));
164      }
[4191]165      #endregion
166    }
[5246]167
[5198]168    protected override void Analyze(SymbolicExpressionTree[] trees, double[] validationQuality) {
[4191]169      double bestQuality = Maximization.Value ? double.NegativeInfinity : double.PositiveInfinity;
[3996]170      SymbolicExpressionTree bestTree = null;
171
[5246]172      for (int i = 0; i < trees.Length; i++) {
[5198]173        double quality = validationQuality[i];
[4191]174        if ((Maximization.Value && quality > bestQuality) ||
175            (!Maximization.Value && quality < bestQuality)) {
176          bestQuality = quality;
[5198]177          bestTree = trees[i];
[3996]178        }
179      }
180
[4127]181      // if the best validation tree is better than the current best solution => update
[4191]182      bool newBest =
183        BestSolutionQuality == null ||
184        (Maximization.Value && bestQuality > BestSolutionQuality.Value) ||
185        (!Maximization.Value && bestQuality < BestSolutionQuality.Value);
186      if (newBest) {
[5437]187        double upperEstimationLimit = UpperEstimationLimit != null ? UpperEstimationLimit.Value : double.PositiveInfinity;
188        double lowerEstimationLimit = LowerEstimationLimit != null ? LowerEstimationLimit.Value : double.NegativeInfinity;
[5198]189        string targetVariable = ProblemData.TargetVariable.Value;
190
[5331]191        if (ApplyLinearScaling.Value) {
192          // calculate scaling parameters and only for the best tree using the full training set
193          double alpha, beta;
194          SymbolicRegressionScaledMeanSquaredErrorEvaluator.Calculate(SymbolicExpressionTreeInterpreter, bestTree,
195            lowerEstimationLimit, upperEstimationLimit,
196            ProblemData.Dataset, targetVariable,
197            ProblemData.TrainingIndizes, out beta, out alpha);
[4127]198
[5331]199          // scale tree for solution
200          bestTree = SymbolicRegressionSolutionLinearScaler.Scale(bestTree, alpha, beta);
201        }
[3996]202        var model = new SymbolicRegressionModel((ISymbolicExpressionTreeInterpreter)SymbolicExpressionTreeInterpreter.Clone(),
[5331]203          bestTree);
[4468]204        var solution = new SymbolicRegressionSolution((DataAnalysisProblemData)ProblemData.Clone(), model, lowerEstimationLimit, upperEstimationLimit);
[3996]205        solution.Name = BestSolutionParameterName;
206        solution.Description = "Best solution on validation partition found over the whole run.";
207
208        BestSolutionParameter.ActualValue = solution;
[4191]209        BestSolutionQualityParameter.ActualValue = new DoubleValue(bestQuality);
[3996]210
[5246]211        if (CalculateSolutionComplexity.Value) {
212          BestSolutionLength = new IntValue(solution.Model.SymbolicExpressionTree.Size);
213          BestSolutionHeight = new IntValue(solution.Model.SymbolicExpressionTree.Height);
214          if (!Results.ContainsKey(BestSolutionLengthParameterName)) {
215            Results.Add(new Result(BestSolutionLengthParameterName, "Length of the best solution on the validation set", new IntValue()));
216            Results.Add(new Result(BestSolutionHeightParameterName, "Height of the best solution on the validation set", new IntValue()));
217          }
218          Results[BestSolutionLengthParameterName].Value = BestSolutionLength;
219          Results[BestSolutionHeightParameterName].Value = BestSolutionHeight;
220        }
221
[3996]222        BestSymbolicRegressionSolutionAnalyzer.UpdateBestSolutionResults(solution, ProblemData, Results, Generations, VariableFrequencies);
223      }
224
225      if (!Results.ContainsKey(BestSolutionQualityValuesParameterName)) {
226        Results.Add(new Result(BestSolutionQualityValuesParameterName, new DataTable(BestSolutionQualityValuesParameterName, BestSolutionQualityValuesParameterName)));
227        Results.Add(new Result(BestSolutionQualityParameterName, new DoubleValue()));
228        Results.Add(new Result(CurrentBestValidationQualityParameterName, new DoubleValue()));
229      }
230      Results[BestSolutionQualityParameterName].Value = new DoubleValue(BestSolutionQualityParameter.ActualValue.Value);
[4191]231      Results[CurrentBestValidationQualityParameterName].Value = new DoubleValue(bestQuality);
[3996]232
233      DataTable validationValues = (DataTable)Results[BestSolutionQualityValuesParameterName].Value;
234      AddValue(validationValues, BestSolutionQualityParameter.ActualValue.Value, BestSolutionQualityParameterName, BestSolutionQualityParameterName);
[4191]235      AddValue(validationValues, bestQuality, CurrentBestValidationQualityParameterName, CurrentBestValidationQualityParameterName);
[3996]236    }
237
238    private static void AddValue(DataTable table, double data, string name, string description) {
239      DataRow row;
240      table.Rows.TryGetValue(name, out row);
241      if (row == null) {
242        row = new DataRow(name, description);
243        row.Values.Add(data);
244        table.Rows.Add(row);
245      } else {
246        row.Values.Add(data);
247      }
248    }
249  }
250}
Note: See TracBrowser for help on using the repository browser.