Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers/FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer.cs @ 5198

Last change on this file since 5198 was 5198, checked in by gkronber, 13 years ago

Changed FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer to extend from SymbolicRegressionValidationAnalyzer. #1356

File size: 10.2 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System.Collections.Generic;
23using System.Linq;
24using HeuristicLab.Analysis;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Data;
28using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
29using HeuristicLab.Operators;
30using HeuristicLab.Optimization;
31using HeuristicLab.Parameters;
32using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
33using HeuristicLab.Problems.DataAnalysis.Symbolic;
34
35namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic.Analyzers {
36  /// <summary>
37  /// An operator that analyzes the validation best scaled symbolic regression solution.
38  /// </summary>
39  [Item("FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer", "An operator that analyzes the validation best scaled symbolic regression solution.")]
40  [StorableClass]
41  public sealed class FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer : SymbolicRegressionValidationAnalyzer, ISymbolicRegressionAnalyzer {
42    private const string MaximizationParameterName = "Maximization";
43    private const string BestSolutionParameterName = "Best solution (validation)";
44    private const string BestSolutionQualityParameterName = "Best solution quality (validation)";
45    private const string CurrentBestValidationQualityParameterName = "Current best validation quality";
46    private const string BestSolutionQualityValuesParameterName = "Validation Quality";
47    private const string ResultsParameterName = "Results";
48    private const string VariableFrequenciesParameterName = "VariableFrequencies";
49    private const string BestKnownQualityParameterName = "BestKnownQuality";
50    private const string GenerationsParameterName = "Generations";
51
52    #region parameter properties
53    public ILookupParameter<BoolValue> MaximizationParameter {
54      get { return (ILookupParameter<BoolValue>)Parameters[MaximizationParameterName]; }
55    }
56    public ILookupParameter<SymbolicRegressionSolution> BestSolutionParameter {
57      get { return (ILookupParameter<SymbolicRegressionSolution>)Parameters[BestSolutionParameterName]; }
58    }
59    public ILookupParameter<IntValue> GenerationsParameter {
60      get { return (ILookupParameter<IntValue>)Parameters[GenerationsParameterName]; }
61    }
62    public ILookupParameter<DoubleValue> BestSolutionQualityParameter {
63      get { return (ILookupParameter<DoubleValue>)Parameters[BestSolutionQualityParameterName]; }
64    }
65    public ILookupParameter<ResultCollection> ResultsParameter {
66      get { return (ILookupParameter<ResultCollection>)Parameters[ResultsParameterName]; }
67    }
68    public ILookupParameter<DoubleValue> BestKnownQualityParameter {
69      get { return (ILookupParameter<DoubleValue>)Parameters[BestKnownQualityParameterName]; }
70    }
71    public ILookupParameter<DataTable> VariableFrequenciesParameter {
72      get { return (ILookupParameter<DataTable>)Parameters[VariableFrequenciesParameterName]; }
73    }
74
75    #endregion
76    #region properties
77    public BoolValue Maximization {
78      get { return MaximizationParameter.ActualValue; }
79    }
80    public ResultCollection Results {
81      get { return ResultsParameter.ActualValue; }
82    }
83    public DataTable VariableFrequencies {
84      get { return VariableFrequenciesParameter.ActualValue; }
85    }
86    public IntValue Generations {
87      get { return GenerationsParameter.ActualValue; }
88    }
89    public DoubleValue BestSolutionQuality {
90      get { return BestSolutionQualityParameter.ActualValue; }
91    }
92
93    #endregion
94
95    [StorableConstructor]
96    private FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer(bool deserializing) : base(deserializing) { }
97    private FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer(FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer original, Cloner cloner) : base(original, cloner) { }
98    public FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer()
99      : base() {
100      Parameters.Add(new LookupParameter<BoolValue>(MaximizationParameterName, "The direction of optimization."));
101      Parameters.Add(new LookupParameter<SymbolicRegressionSolution>(BestSolutionParameterName, "The best symbolic regression solution."));
102      Parameters.Add(new LookupParameter<IntValue>(GenerationsParameterName, "The number of generations calculated so far."));
103      Parameters.Add(new LookupParameter<DoubleValue>(BestSolutionQualityParameterName, "The quality of the best symbolic regression solution."));
104      Parameters.Add(new LookupParameter<ResultCollection>(ResultsParameterName, "The result collection where the best symbolic regression solution should be stored."));
105      Parameters.Add(new LookupParameter<DoubleValue>(BestKnownQualityParameterName, "The best known (validation) quality achieved on the data set."));
106      Parameters.Add(new LookupParameter<DataTable>(VariableFrequenciesParameterName, "The variable frequencies table to use for the calculation of variable impacts"));
107    }
108
109    public override IDeepCloneable Clone(Cloner cloner) {
110      return new FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer(this, cloner);
111    }
112
113    [StorableHook(HookType.AfterDeserialization)]
114    private void AfterDeserialization() {
115      #region compatibility remove before releasing 3.4
116      if (!Parameters.ContainsKey("Evaluator")) {
117        Parameters.Add(new LookupParameter<ISymbolicRegressionEvaluator>("Evaluator", "The evaluator which should be used to evaluate the solution on the validation set."));
118      }
119      if (!Parameters.ContainsKey(MaximizationParameterName)) {
120        Parameters.Add(new LookupParameter<BoolValue>(MaximizationParameterName, "The direction of optimization."));
121      }
122      #endregion
123    }
124   
125    protected override void Analyze(SymbolicExpressionTree[] trees, double[] validationQuality) {
126      double bestQuality = Maximization.Value ? double.NegativeInfinity : double.PositiveInfinity;
127      SymbolicExpressionTree bestTree = null;
128
129      for(int i=0;i<trees.Length;i++) {
130        double quality = validationQuality[i];
131        if ((Maximization.Value && quality > bestQuality) ||
132            (!Maximization.Value && quality < bestQuality)) {
133          bestQuality = quality;
134          bestTree = trees[i];
135        }
136      }
137
138      // if the best validation tree is better than the current best solution => update
139      bool newBest =
140        BestSolutionQuality == null ||
141        (Maximization.Value && bestQuality > BestSolutionQuality.Value) ||
142        (!Maximization.Value && bestQuality < BestSolutionQuality.Value);
143      if (newBest) {
144        double lowerEstimationLimit = LowerEstimationLimit.Value;
145        double upperEstimationLimit = UpperEstimationLimit.Value;
146        string targetVariable = ProblemData.TargetVariable.Value;
147
148        // calculate scaling parameters and only for the best tree using the full training set
149        double alpha, beta;
150        SymbolicRegressionScaledMeanSquaredErrorEvaluator.Calculate(SymbolicExpressionTreeInterpreter, bestTree,
151          lowerEstimationLimit, upperEstimationLimit,
152          ProblemData.Dataset, targetVariable,
153          ProblemData.TrainingIndizes, out beta, out alpha);
154
155        // scale tree for solution
156        var scaledTree = SymbolicRegressionSolutionLinearScaler.Scale(bestTree, alpha, beta);
157        var model = new SymbolicRegressionModel((ISymbolicExpressionTreeInterpreter)SymbolicExpressionTreeInterpreter.Clone(),
158          scaledTree);
159        var solution = new SymbolicRegressionSolution((DataAnalysisProblemData)ProblemData.Clone(), model, lowerEstimationLimit, upperEstimationLimit);
160        solution.Name = BestSolutionParameterName;
161        solution.Description = "Best solution on validation partition found over the whole run.";
162
163        BestSolutionParameter.ActualValue = solution;
164        BestSolutionQualityParameter.ActualValue = new DoubleValue(bestQuality);
165
166        BestSymbolicRegressionSolutionAnalyzer.UpdateBestSolutionResults(solution, ProblemData, Results, Generations, VariableFrequencies);
167      }
168
169
170      if (!Results.ContainsKey(BestSolutionQualityValuesParameterName)) {
171        Results.Add(new Result(BestSolutionQualityValuesParameterName, new DataTable(BestSolutionQualityValuesParameterName, BestSolutionQualityValuesParameterName)));
172        Results.Add(new Result(BestSolutionQualityParameterName, new DoubleValue()));
173        Results.Add(new Result(CurrentBestValidationQualityParameterName, new DoubleValue()));
174      }
175      Results[BestSolutionQualityParameterName].Value = new DoubleValue(BestSolutionQualityParameter.ActualValue.Value);
176      Results[CurrentBestValidationQualityParameterName].Value = new DoubleValue(bestQuality);
177
178      DataTable validationValues = (DataTable)Results[BestSolutionQualityValuesParameterName].Value;
179      AddValue(validationValues, BestSolutionQualityParameter.ActualValue.Value, BestSolutionQualityParameterName, BestSolutionQualityParameterName);
180      AddValue(validationValues, bestQuality, CurrentBestValidationQualityParameterName, CurrentBestValidationQualityParameterName);
181    }
182
183    private static void AddValue(DataTable table, double data, string name, string description) {
184      DataRow row;
185      table.Rows.TryGetValue(name, out row);
186      if (row == null) {
187        row = new DataRow(name, description);
188        row.Values.Add(data);
189        table.Rows.Add(row);
190      } else {
191        row.Values.Add(data);
192      }
193    }
194  }
195}
Note: See TracBrowser for help on using the repository browser.