Free cookie consent management tool by TermsFeed Policy Generator

source: branches/1278_DataAnalysis.PopulationDiversityAnalysis/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers/FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer.cs @ 16749

Last change on this file since 16749 was 4877, checked in by swinkler, 14 years ago

Created branch for population diversity analysis for symbolic regression. (#1278)

File size: 16.9 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System.Collections.Generic;
23using System.Linq;
24using HeuristicLab.Analysis;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Data;
28using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
29using HeuristicLab.Operators;
30using HeuristicLab.Optimization;
31using HeuristicLab.Parameters;
32using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
33using HeuristicLab.Problems.DataAnalysis.Symbolic;
34
35namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic.Analyzers {
36  /// <summary>
37  /// An operator that analyzes the validation best scaled symbolic regression solution.
38  /// </summary>
39  [Item("FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer", "An operator that analyzes the validation best scaled symbolic regression solution.")]
40  [StorableClass]
41  public sealed class FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer : SingleSuccessorOperator, ISymbolicRegressionAnalyzer {
42    private const string RandomParameterName = "Random";
43    private const string SymbolicExpressionTreeParameterName = "SymbolicExpressionTree";
44    private const string SymbolicExpressionTreeInterpreterParameterName = "SymbolicExpressionTreeInterpreter";
45    private const string ProblemDataParameterName = "ProblemData";
46    private const string ValidationSamplesStartParameterName = "SamplesStart";
47    private const string ValidationSamplesEndParameterName = "SamplesEnd";
48    // private const string QualityParameterName = "Quality";
49    private const string UpperEstimationLimitParameterName = "UpperEstimationLimit";
50    private const string LowerEstimationLimitParameterName = "LowerEstimationLimit";
51    private const string EvaluatorParameterName = "Evaluator";
52    private const string MaximizationParameterName = "Maximization";
53    private const string BestSolutionParameterName = "Best solution (validation)";
54    private const string BestSolutionQualityParameterName = "Best solution quality (validation)";
55    private const string CurrentBestValidationQualityParameterName = "Current best validation quality";
56    private const string BestSolutionQualityValuesParameterName = "Validation Quality";
57    private const string ResultsParameterName = "Results";
58    private const string VariableFrequenciesParameterName = "VariableFrequencies";
59    private const string BestKnownQualityParameterName = "BestKnownQuality";
60    private const string GenerationsParameterName = "Generations";
61    private const string RelativeNumberOfEvaluatedSamplesParameterName = "RelativeNumberOfEvaluatedSamples";
62
63    #region parameter properties
64    public ILookupParameter<IRandom> RandomParameter {
65      get { return (ILookupParameter<IRandom>)Parameters[RandomParameterName]; }
66    }
67    public ScopeTreeLookupParameter<SymbolicExpressionTree> SymbolicExpressionTreeParameter {
68      get { return (ScopeTreeLookupParameter<SymbolicExpressionTree>)Parameters[SymbolicExpressionTreeParameterName]; }
69    }
70    public IValueLookupParameter<ISymbolicExpressionTreeInterpreter> SymbolicExpressionTreeInterpreterParameter {
71      get { return (IValueLookupParameter<ISymbolicExpressionTreeInterpreter>)Parameters[SymbolicExpressionTreeInterpreterParameterName]; }
72    }
73    public ILookupParameter<ISymbolicRegressionEvaluator> EvaluatorParameter {
74      get { return (ILookupParameter<ISymbolicRegressionEvaluator>)Parameters[EvaluatorParameterName]; }
75    }
76    public ILookupParameter<BoolValue> MaximizationParameter {
77      get { return (ILookupParameter<BoolValue>)Parameters[MaximizationParameterName]; }
78    }
79    public IValueLookupParameter<DataAnalysisProblemData> ProblemDataParameter {
80      get { return (IValueLookupParameter<DataAnalysisProblemData>)Parameters[ProblemDataParameterName]; }
81    }
82    public IValueLookupParameter<IntValue> ValidationSamplesStartParameter {
83      get { return (IValueLookupParameter<IntValue>)Parameters[ValidationSamplesStartParameterName]; }
84    }
85    public IValueLookupParameter<IntValue> ValidationSamplesEndParameter {
86      get { return (IValueLookupParameter<IntValue>)Parameters[ValidationSamplesEndParameterName]; }
87    }
88    public IValueParameter<PercentValue> RelativeNumberOfEvaluatedSamplesParameter {
89      get { return (IValueParameter<PercentValue>)Parameters[RelativeNumberOfEvaluatedSamplesParameterName]; }
90    }
91
92    public IValueLookupParameter<DoubleValue> UpperEstimationLimitParameter {
93      get { return (IValueLookupParameter<DoubleValue>)Parameters[UpperEstimationLimitParameterName]; }
94    }
95    public IValueLookupParameter<DoubleValue> LowerEstimationLimitParameter {
96      get { return (IValueLookupParameter<DoubleValue>)Parameters[LowerEstimationLimitParameterName]; }
97    }
98    public ILookupParameter<SymbolicRegressionSolution> BestSolutionParameter {
99      get { return (ILookupParameter<SymbolicRegressionSolution>)Parameters[BestSolutionParameterName]; }
100    }
101    public ILookupParameter<IntValue> GenerationsParameter {
102      get { return (ILookupParameter<IntValue>)Parameters[GenerationsParameterName]; }
103    }
104    public ILookupParameter<DoubleValue> BestSolutionQualityParameter {
105      get { return (ILookupParameter<DoubleValue>)Parameters[BestSolutionQualityParameterName]; }
106    }
107    public ILookupParameter<ResultCollection> ResultsParameter {
108      get { return (ILookupParameter<ResultCollection>)Parameters[ResultsParameterName]; }
109    }
110    public ILookupParameter<DoubleValue> BestKnownQualityParameter {
111      get { return (ILookupParameter<DoubleValue>)Parameters[BestKnownQualityParameterName]; }
112    }
113    public ILookupParameter<DataTable> VariableFrequenciesParameter {
114      get { return (ILookupParameter<DataTable>)Parameters[VariableFrequenciesParameterName]; }
115    }
116
117    #endregion
118    #region properties
119    public IRandom Random {
120      get { return RandomParameter.ActualValue; }
121    }
122    public ItemArray<SymbolicExpressionTree> SymbolicExpressionTree {
123      get { return SymbolicExpressionTreeParameter.ActualValue; }
124    }
125    public ISymbolicExpressionTreeInterpreter SymbolicExpressionTreeInterpreter {
126      get { return SymbolicExpressionTreeInterpreterParameter.ActualValue; }
127    }
128    public ISymbolicRegressionEvaluator Evaluator {
129      get { return EvaluatorParameter.ActualValue; }
130    }
131    public BoolValue Maximization {
132      get { return MaximizationParameter.ActualValue; }
133    }
134    public DataAnalysisProblemData ProblemData {
135      get { return ProblemDataParameter.ActualValue; }
136    }
137    public IntValue ValidiationSamplesStart {
138      get { return ValidationSamplesStartParameter.ActualValue; }
139    }
140    public IntValue ValidationSamplesEnd {
141      get { return ValidationSamplesEndParameter.ActualValue; }
142    }
143    public PercentValue RelativeNumberOfEvaluatedSamples {
144      get { return RelativeNumberOfEvaluatedSamplesParameter.Value; }
145    }
146
147    public DoubleValue UpperEstimationLimit {
148      get { return UpperEstimationLimitParameter.ActualValue; }
149    }
150    public DoubleValue LowerEstimationLimit {
151      get { return LowerEstimationLimitParameter.ActualValue; }
152    }
153    public ResultCollection Results {
154      get { return ResultsParameter.ActualValue; }
155    }
156    public DataTable VariableFrequencies {
157      get { return VariableFrequenciesParameter.ActualValue; }
158    }
159    public IntValue Generations {
160      get { return GenerationsParameter.ActualValue; }
161    }
162    public DoubleValue BestSolutionQuality {
163      get { return BestSolutionQualityParameter.ActualValue; }
164    }
165
166    #endregion
167
168    [StorableConstructor]
169    private FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer(bool deserializing) : base(deserializing) { }
170    private FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer(FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer original, Cloner cloner) : base(original, cloner) { }
171    public FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer()
172      : base() {
173      Parameters.Add(new LookupParameter<IRandom>(RandomParameterName, "The random generator to use."));
174      Parameters.Add(new LookupParameter<ISymbolicRegressionEvaluator>(EvaluatorParameterName, "The evaluator which should be used to evaluate the solution on the validation set."));
175      Parameters.Add(new ScopeTreeLookupParameter<SymbolicExpressionTree>(SymbolicExpressionTreeParameterName, "The symbolic expression trees to analyze."));
176      Parameters.Add(new LookupParameter<BoolValue>(MaximizationParameterName, "The direction of optimization."));
177      Parameters.Add(new ValueLookupParameter<ISymbolicExpressionTreeInterpreter>(SymbolicExpressionTreeInterpreterParameterName, "The interpreter that should be used for the analysis of symbolic expression trees."));
178      Parameters.Add(new ValueLookupParameter<DataAnalysisProblemData>(ProblemDataParameterName, "The problem data for which the symbolic expression tree is a solution."));
179      Parameters.Add(new ValueLookupParameter<IntValue>(ValidationSamplesStartParameterName, "The first index of the validation partition of the data set."));
180      Parameters.Add(new ValueLookupParameter<IntValue>(ValidationSamplesEndParameterName, "The last index of the validation partition of the data set."));
181      Parameters.Add(new ValueParameter<PercentValue>(RelativeNumberOfEvaluatedSamplesParameterName, "The relative number of samples of the dataset partition, which should be randomly chosen for evaluation between the start and end index.", new PercentValue(1)));
182      Parameters.Add(new ValueLookupParameter<DoubleValue>(UpperEstimationLimitParameterName, "The upper estimation limit that was set for the evaluation of the symbolic expression trees."));
183      Parameters.Add(new ValueLookupParameter<DoubleValue>(LowerEstimationLimitParameterName, "The lower estimation limit that was set for the evaluation of the symbolic expression trees."));
184      Parameters.Add(new LookupParameter<SymbolicRegressionSolution>(BestSolutionParameterName, "The best symbolic regression solution."));
185      Parameters.Add(new LookupParameter<IntValue>(GenerationsParameterName, "The number of generations calculated so far."));
186      Parameters.Add(new LookupParameter<DoubleValue>(BestSolutionQualityParameterName, "The quality of the best symbolic regression solution."));
187      Parameters.Add(new LookupParameter<ResultCollection>(ResultsParameterName, "The result collection where the best symbolic regression solution should be stored."));
188      Parameters.Add(new LookupParameter<DoubleValue>(BestKnownQualityParameterName, "The best known (validation) quality achieved on the data set."));
189      Parameters.Add(new LookupParameter<DataTable>(VariableFrequenciesParameterName, "The variable frequencies table to use for the calculation of variable impacts"));
190    }
191
192    public override IDeepCloneable Clone(Cloner cloner) {
193      return new FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer(this, cloner);
194    }
195
196    [StorableHook(HookType.AfterDeserialization)]
197    private void AfterDeserialization() {
198      #region compatibility remove before releasing 3.3.1
199      if (!Parameters.ContainsKey(EvaluatorParameterName)) {
200        Parameters.Add(new LookupParameter<ISymbolicRegressionEvaluator>(EvaluatorParameterName, "The evaluator which should be used to evaluate the solution on the validation set."));
201      }
202      if (!Parameters.ContainsKey(MaximizationParameterName)) {
203        Parameters.Add(new LookupParameter<BoolValue>(MaximizationParameterName, "The direction of optimization."));
204      }
205      #endregion
206    }
207
208    public override IOperation Apply() {
209      var trees = SymbolicExpressionTree;
210
211      string targetVariable = ProblemData.TargetVariable.Value;
212
213      // select a random subset of rows in the validation set
214      int validationStart = ValidiationSamplesStart.Value;
215      int validationEnd = ValidationSamplesEnd.Value;
216      int seed = Random.Next();
217      int count = (int)((validationEnd - validationStart) * RelativeNumberOfEvaluatedSamples.Value);
218      if (count == 0) count = 1;
219      IEnumerable<int> rows = RandomEnumerable.SampleRandomNumbers(seed, validationStart, validationEnd, count)
220        .Where(row => row < ProblemData.TestSamplesStart.Value || ProblemData.TestSamplesEnd.Value <= row);
221
222      double upperEstimationLimit = UpperEstimationLimit != null ? UpperEstimationLimit.Value : double.PositiveInfinity;
223      double lowerEstimationLimit = LowerEstimationLimit != null ? LowerEstimationLimit.Value : double.NegativeInfinity;
224
225      double bestQuality = Maximization.Value ? double.NegativeInfinity : double.PositiveInfinity;
226      SymbolicExpressionTree bestTree = null;
227
228      foreach (var tree in trees) {
229        double quality = Evaluator.Evaluate(SymbolicExpressionTreeInterpreter, tree,
230          lowerEstimationLimit, upperEstimationLimit,
231          ProblemData.Dataset, targetVariable,
232         rows);
233
234        if ((Maximization.Value && quality > bestQuality) ||
235            (!Maximization.Value && quality < bestQuality)) {
236          bestQuality = quality;
237          bestTree = tree;
238        }
239      }
240
241      // if the best validation tree is better than the current best solution => update
242      bool newBest =
243        BestSolutionQuality == null ||
244        (Maximization.Value && bestQuality > BestSolutionQuality.Value) ||
245        (!Maximization.Value && bestQuality < BestSolutionQuality.Value);
246      if (newBest) {
247        // calculate scaling parameters and only for the best tree using the full training set
248        double alpha, beta;
249        SymbolicRegressionScaledMeanSquaredErrorEvaluator.Calculate(SymbolicExpressionTreeInterpreter, bestTree,
250          lowerEstimationLimit, upperEstimationLimit,
251          ProblemData.Dataset, targetVariable,
252          ProblemData.TrainingIndizes, out beta, out alpha);
253
254        // scale tree for solution
255        var scaledTree = SymbolicRegressionSolutionLinearScaler.Scale(bestTree, alpha, beta);
256        var model = new SymbolicRegressionModel((ISymbolicExpressionTreeInterpreter)SymbolicExpressionTreeInterpreter.Clone(),
257          scaledTree);
258        var solution = new SymbolicRegressionSolution((DataAnalysisProblemData)ProblemData.Clone(), model, lowerEstimationLimit, upperEstimationLimit);
259        solution.Name = BestSolutionParameterName;
260        solution.Description = "Best solution on validation partition found over the whole run.";
261
262        BestSolutionParameter.ActualValue = solution;
263        BestSolutionQualityParameter.ActualValue = new DoubleValue(bestQuality);
264
265        BestSymbolicRegressionSolutionAnalyzer.UpdateBestSolutionResults(solution, ProblemData, Results, Generations, VariableFrequencies);
266      }
267
268
269      if (!Results.ContainsKey(BestSolutionQualityValuesParameterName)) {
270        Results.Add(new Result(BestSolutionQualityValuesParameterName, new DataTable(BestSolutionQualityValuesParameterName, BestSolutionQualityValuesParameterName)));
271        Results.Add(new Result(BestSolutionQualityParameterName, new DoubleValue()));
272        Results.Add(new Result(CurrentBestValidationQualityParameterName, new DoubleValue()));
273      }
274      Results[BestSolutionQualityParameterName].Value = new DoubleValue(BestSolutionQualityParameter.ActualValue.Value);
275      Results[CurrentBestValidationQualityParameterName].Value = new DoubleValue(bestQuality);
276
277      DataTable validationValues = (DataTable)Results[BestSolutionQualityValuesParameterName].Value;
278      AddValue(validationValues, BestSolutionQualityParameter.ActualValue.Value, BestSolutionQualityParameterName, BestSolutionQualityParameterName);
279      AddValue(validationValues, bestQuality, CurrentBestValidationQualityParameterName, CurrentBestValidationQualityParameterName);
280      return base.Apply();
281    }
282
283    [StorableHook(HookType.AfterDeserialization)]
284    private void Initialize() { }
285
286    private static void AddValue(DataTable table, double data, string name, string description) {
287      DataRow row;
288      table.Rows.TryGetValue(name, out row);
289      if (row == null) {
290        row = new DataRow(name, description);
291        row.Values.Add(data);
292        table.Rows.Add(row);
293      } else {
294        row.Values.Add(data);
295      }
296    }
297  }
298}
Note: See TracBrowser for help on using the repository browser.