Free cookie consent management tool by TermsFeed Policy Generator

source: branches/CloningRefactoring/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers/FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer.cs @ 4718

Last change on this file since 4718 was 4682, checked in by mkommend, 14 years ago

Refactored ExternalEvaluation.* and fixed some errors and warnings (ticket #922).

File size: 16.9 KB
RevLine 
[3996]1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
[4068]22using System.Collections.Generic;
[3996]23using System.Linq;
[4068]24using HeuristicLab.Analysis;
[4678]25using HeuristicLab.Common;
[3996]26using HeuristicLab.Core;
27using HeuristicLab.Data;
[4068]28using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
[3996]29using HeuristicLab.Operators;
30using HeuristicLab.Optimization;
31using HeuristicLab.Parameters;
32using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
33using HeuristicLab.Problems.DataAnalysis.Symbolic;
34
35namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic.Analyzers {
36  /// <summary>
37  /// An operator that analyzes the validation best scaled symbolic regression solution.
38  /// </summary>
39  [Item("FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer", "An operator that analyzes the validation best scaled symbolic regression solution.")]
40  [StorableClass]
41  public sealed class FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer : SingleSuccessorOperator, ISymbolicRegressionAnalyzer {
[4127]42    private const string RandomParameterName = "Random";
[3996]43    private const string SymbolicExpressionTreeParameterName = "SymbolicExpressionTree";
44    private const string SymbolicExpressionTreeInterpreterParameterName = "SymbolicExpressionTreeInterpreter";
45    private const string ProblemDataParameterName = "ProblemData";
46    private const string ValidationSamplesStartParameterName = "SamplesStart";
47    private const string ValidationSamplesEndParameterName = "SamplesEnd";
[4191]48    // private const string QualityParameterName = "Quality";
[3996]49    private const string UpperEstimationLimitParameterName = "UpperEstimationLimit";
50    private const string LowerEstimationLimitParameterName = "LowerEstimationLimit";
[4191]51    private const string EvaluatorParameterName = "Evaluator";
52    private const string MaximizationParameterName = "Maximization";
[3996]53    private const string BestSolutionParameterName = "Best solution (validation)";
54    private const string BestSolutionQualityParameterName = "Best solution quality (validation)";
55    private const string CurrentBestValidationQualityParameterName = "Current best validation quality";
56    private const string BestSolutionQualityValuesParameterName = "Validation Quality";
57    private const string ResultsParameterName = "Results";
58    private const string VariableFrequenciesParameterName = "VariableFrequencies";
59    private const string BestKnownQualityParameterName = "BestKnownQuality";
60    private const string GenerationsParameterName = "Generations";
[4127]61    private const string RelativeNumberOfEvaluatedSamplesParameterName = "RelativeNumberOfEvaluatedSamples";
[3996]62
63    #region parameter properties
[4127]64    public ILookupParameter<IRandom> RandomParameter {
65      get { return (ILookupParameter<IRandom>)Parameters[RandomParameterName]; }
66    }
[3996]67    public ScopeTreeLookupParameter<SymbolicExpressionTree> SymbolicExpressionTreeParameter {
68      get { return (ScopeTreeLookupParameter<SymbolicExpressionTree>)Parameters[SymbolicExpressionTreeParameterName]; }
69    }
70    public IValueLookupParameter<ISymbolicExpressionTreeInterpreter> SymbolicExpressionTreeInterpreterParameter {
71      get { return (IValueLookupParameter<ISymbolicExpressionTreeInterpreter>)Parameters[SymbolicExpressionTreeInterpreterParameterName]; }
72    }
[4191]73    public ILookupParameter<ISymbolicRegressionEvaluator> EvaluatorParameter {
74      get { return (ILookupParameter<ISymbolicRegressionEvaluator>)Parameters[EvaluatorParameterName]; }
75    }
76    public ILookupParameter<BoolValue> MaximizationParameter {
77      get { return (ILookupParameter<BoolValue>)Parameters[MaximizationParameterName]; }
78    }
[3996]79    public IValueLookupParameter<DataAnalysisProblemData> ProblemDataParameter {
80      get { return (IValueLookupParameter<DataAnalysisProblemData>)Parameters[ProblemDataParameterName]; }
81    }
82    public IValueLookupParameter<IntValue> ValidationSamplesStartParameter {
83      get { return (IValueLookupParameter<IntValue>)Parameters[ValidationSamplesStartParameterName]; }
84    }
85    public IValueLookupParameter<IntValue> ValidationSamplesEndParameter {
86      get { return (IValueLookupParameter<IntValue>)Parameters[ValidationSamplesEndParameterName]; }
87    }
[4127]88    public IValueParameter<PercentValue> RelativeNumberOfEvaluatedSamplesParameter {
89      get { return (IValueParameter<PercentValue>)Parameters[RelativeNumberOfEvaluatedSamplesParameterName]; }
90    }
91
[3996]92    public IValueLookupParameter<DoubleValue> UpperEstimationLimitParameter {
93      get { return (IValueLookupParameter<DoubleValue>)Parameters[UpperEstimationLimitParameterName]; }
94    }
95    public IValueLookupParameter<DoubleValue> LowerEstimationLimitParameter {
96      get { return (IValueLookupParameter<DoubleValue>)Parameters[LowerEstimationLimitParameterName]; }
97    }
98    public ILookupParameter<SymbolicRegressionSolution> BestSolutionParameter {
99      get { return (ILookupParameter<SymbolicRegressionSolution>)Parameters[BestSolutionParameterName]; }
100    }
101    public ILookupParameter<IntValue> GenerationsParameter {
102      get { return (ILookupParameter<IntValue>)Parameters[GenerationsParameterName]; }
103    }
104    public ILookupParameter<DoubleValue> BestSolutionQualityParameter {
105      get { return (ILookupParameter<DoubleValue>)Parameters[BestSolutionQualityParameterName]; }
106    }
107    public ILookupParameter<ResultCollection> ResultsParameter {
108      get { return (ILookupParameter<ResultCollection>)Parameters[ResultsParameterName]; }
109    }
110    public ILookupParameter<DoubleValue> BestKnownQualityParameter {
111      get { return (ILookupParameter<DoubleValue>)Parameters[BestKnownQualityParameterName]; }
112    }
113    public ILookupParameter<DataTable> VariableFrequenciesParameter {
114      get { return (ILookupParameter<DataTable>)Parameters[VariableFrequenciesParameterName]; }
115    }
116
117    #endregion
118    #region properties
[4127]119    public IRandom Random {
120      get { return RandomParameter.ActualValue; }
121    }
[3996]122    public ItemArray<SymbolicExpressionTree> SymbolicExpressionTree {
123      get { return SymbolicExpressionTreeParameter.ActualValue; }
124    }
125    public ISymbolicExpressionTreeInterpreter SymbolicExpressionTreeInterpreter {
126      get { return SymbolicExpressionTreeInterpreterParameter.ActualValue; }
127    }
[4191]128    public ISymbolicRegressionEvaluator Evaluator {
129      get { return EvaluatorParameter.ActualValue; }
130    }
131    public BoolValue Maximization {
132      get { return MaximizationParameter.ActualValue; }
133    }
[3996]134    public DataAnalysisProblemData ProblemData {
135      get { return ProblemDataParameter.ActualValue; }
136    }
137    public IntValue ValidiationSamplesStart {
138      get { return ValidationSamplesStartParameter.ActualValue; }
139    }
140    public IntValue ValidationSamplesEnd {
141      get { return ValidationSamplesEndParameter.ActualValue; }
142    }
[4127]143    public PercentValue RelativeNumberOfEvaluatedSamples {
144      get { return RelativeNumberOfEvaluatedSamplesParameter.Value; }
145    }
146
[3996]147    public DoubleValue UpperEstimationLimit {
148      get { return UpperEstimationLimitParameter.ActualValue; }
149    }
150    public DoubleValue LowerEstimationLimit {
151      get { return LowerEstimationLimitParameter.ActualValue; }
152    }
153    public ResultCollection Results {
154      get { return ResultsParameter.ActualValue; }
155    }
156    public DataTable VariableFrequencies {
157      get { return VariableFrequenciesParameter.ActualValue; }
158    }
159    public IntValue Generations {
160      get { return GenerationsParameter.ActualValue; }
161    }
[4191]162    public DoubleValue BestSolutionQuality {
163      get { return BestSolutionQualityParameter.ActualValue; }
164    }
[3996]165
166    #endregion
167
[4678]168    [StorableConstructor]
169    private FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer(bool deserializing) : base(deserializing) { }
[4682]170    private FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer(FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer original, Cloner cloner) : base(original, cloner) { }
[3996]171    public FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer()
172      : base() {
[4127]173      Parameters.Add(new LookupParameter<IRandom>(RandomParameterName, "The random generator to use."));
[4191]174      Parameters.Add(new LookupParameter<ISymbolicRegressionEvaluator>(EvaluatorParameterName, "The evaluator which should be used to evaluate the solution on the validation set."));
[3996]175      Parameters.Add(new ScopeTreeLookupParameter<SymbolicExpressionTree>(SymbolicExpressionTreeParameterName, "The symbolic expression trees to analyze."));
[4191]176      Parameters.Add(new LookupParameter<BoolValue>(MaximizationParameterName, "The direction of optimization."));
[3996]177      Parameters.Add(new ValueLookupParameter<ISymbolicExpressionTreeInterpreter>(SymbolicExpressionTreeInterpreterParameterName, "The interpreter that should be used for the analysis of symbolic expression trees."));
178      Parameters.Add(new ValueLookupParameter<DataAnalysisProblemData>(ProblemDataParameterName, "The problem data for which the symbolic expression tree is a solution."));
179      Parameters.Add(new ValueLookupParameter<IntValue>(ValidationSamplesStartParameterName, "The first index of the validation partition of the data set."));
180      Parameters.Add(new ValueLookupParameter<IntValue>(ValidationSamplesEndParameterName, "The last index of the validation partition of the data set."));
[4127]181      Parameters.Add(new ValueParameter<PercentValue>(RelativeNumberOfEvaluatedSamplesParameterName, "The relative number of samples of the dataset partition, which should be randomly chosen for evaluation between the start and end index.", new PercentValue(1)));
[3996]182      Parameters.Add(new ValueLookupParameter<DoubleValue>(UpperEstimationLimitParameterName, "The upper estimation limit that was set for the evaluation of the symbolic expression trees."));
183      Parameters.Add(new ValueLookupParameter<DoubleValue>(LowerEstimationLimitParameterName, "The lower estimation limit that was set for the evaluation of the symbolic expression trees."));
184      Parameters.Add(new LookupParameter<SymbolicRegressionSolution>(BestSolutionParameterName, "The best symbolic regression solution."));
185      Parameters.Add(new LookupParameter<IntValue>(GenerationsParameterName, "The number of generations calculated so far."));
186      Parameters.Add(new LookupParameter<DoubleValue>(BestSolutionQualityParameterName, "The quality of the best symbolic regression solution."));
187      Parameters.Add(new LookupParameter<ResultCollection>(ResultsParameterName, "The result collection where the best symbolic regression solution should be stored."));
188      Parameters.Add(new LookupParameter<DoubleValue>(BestKnownQualityParameterName, "The best known (validation) quality achieved on the data set."));
189      Parameters.Add(new LookupParameter<DataTable>(VariableFrequenciesParameterName, "The variable frequencies table to use for the calculation of variable impacts"));
190    }
191
[4678]192    public override IDeepCloneable Clone(Cloner cloner) {
193      return new FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer(this, cloner);
194    }
[3996]195
[4191]196    [StorableHook(HookType.AfterDeserialization)]
197    private void AfterDeserialization() {
198      #region compatibility remove before releasing 3.3.1
199      if (!Parameters.ContainsKey(EvaluatorParameterName)) {
200        Parameters.Add(new LookupParameter<ISymbolicRegressionEvaluator>(EvaluatorParameterName, "The evaluator which should be used to evaluate the solution on the validation set."));
201      }
202      if (!Parameters.ContainsKey(MaximizationParameterName)) {
203        Parameters.Add(new LookupParameter<BoolValue>(MaximizationParameterName, "The direction of optimization."));
204      }
205      #endregion
206    }
207
[3996]208    public override IOperation Apply() {
209      var trees = SymbolicExpressionTree;
210
[4127]211      string targetVariable = ProblemData.TargetVariable.Value;
[3996]212
[4127]213      // select a random subset of rows in the validation set
[3996]214      int validationStart = ValidiationSamplesStart.Value;
215      int validationEnd = ValidationSamplesEnd.Value;
[4246]216      int seed = Random.Next();
[4127]217      int count = (int)((validationEnd - validationStart) * RelativeNumberOfEvaluatedSamples.Value);
218      if (count == 0) count = 1;
[4468]219      IEnumerable<int> rows = RandomEnumerable.SampleRandomNumbers(seed, validationStart, validationEnd, count)
220        .Where(row => row < ProblemData.TestSamplesStart.Value || ProblemData.TestSamplesEnd.Value <= row);
[4127]221
[3996]222      double upperEstimationLimit = UpperEstimationLimit != null ? UpperEstimationLimit.Value : double.PositiveInfinity;
223      double lowerEstimationLimit = LowerEstimationLimit != null ? LowerEstimationLimit.Value : double.NegativeInfinity;
224
[4191]225      double bestQuality = Maximization.Value ? double.NegativeInfinity : double.PositiveInfinity;
[3996]226      SymbolicExpressionTree bestTree = null;
227
[4127]228      foreach (var tree in trees) {
[4191]229        double quality = Evaluator.Evaluate(SymbolicExpressionTreeInterpreter, tree,
[4022]230          lowerEstimationLimit, upperEstimationLimit,
231          ProblemData.Dataset, targetVariable,
[4127]232         rows);
[4022]233
[4191]234        if ((Maximization.Value && quality > bestQuality) ||
235            (!Maximization.Value && quality < bestQuality)) {
236          bestQuality = quality;
[4127]237          bestTree = tree;
[3996]238        }
239      }
240
[4127]241      // if the best validation tree is better than the current best solution => update
[4191]242      bool newBest =
243        BestSolutionQuality == null ||
244        (Maximization.Value && bestQuality > BestSolutionQuality.Value) ||
245        (!Maximization.Value && bestQuality < BestSolutionQuality.Value);
246      if (newBest) {
247        // calculate scaling parameters and only for the best tree using the full training set
[4127]248        double alpha, beta;
[4191]249        SymbolicRegressionScaledMeanSquaredErrorEvaluator.Calculate(SymbolicExpressionTreeInterpreter, bestTree,
[4127]250          lowerEstimationLimit, upperEstimationLimit,
251          ProblemData.Dataset, targetVariable,
[4468]252          ProblemData.TrainingIndizes, out beta, out alpha);
[4127]253
[4191]254        // scale tree for solution
[4127]255        var scaledTree = SymbolicRegressionSolutionLinearScaler.Scale(bestTree, alpha, beta);
[3996]256        var model = new SymbolicRegressionModel((ISymbolicExpressionTreeInterpreter)SymbolicExpressionTreeInterpreter.Clone(),
[4127]257          scaledTree);
[4468]258        var solution = new SymbolicRegressionSolution((DataAnalysisProblemData)ProblemData.Clone(), model, lowerEstimationLimit, upperEstimationLimit);
[3996]259        solution.Name = BestSolutionParameterName;
260        solution.Description = "Best solution on validation partition found over the whole run.";
261
262        BestSolutionParameter.ActualValue = solution;
[4191]263        BestSolutionQualityParameter.ActualValue = new DoubleValue(bestQuality);
[3996]264
265        BestSymbolicRegressionSolutionAnalyzer.UpdateBestSolutionResults(solution, ProblemData, Results, Generations, VariableFrequencies);
266      }
267
[4191]268
[3996]269      if (!Results.ContainsKey(BestSolutionQualityValuesParameterName)) {
270        Results.Add(new Result(BestSolutionQualityValuesParameterName, new DataTable(BestSolutionQualityValuesParameterName, BestSolutionQualityValuesParameterName)));
271        Results.Add(new Result(BestSolutionQualityParameterName, new DoubleValue()));
272        Results.Add(new Result(CurrentBestValidationQualityParameterName, new DoubleValue()));
273      }
274      Results[BestSolutionQualityParameterName].Value = new DoubleValue(BestSolutionQualityParameter.ActualValue.Value);
[4191]275      Results[CurrentBestValidationQualityParameterName].Value = new DoubleValue(bestQuality);
[3996]276
277      DataTable validationValues = (DataTable)Results[BestSolutionQualityValuesParameterName].Value;
278      AddValue(validationValues, BestSolutionQualityParameter.ActualValue.Value, BestSolutionQualityParameterName, BestSolutionQualityParameterName);
[4191]279      AddValue(validationValues, bestQuality, CurrentBestValidationQualityParameterName, CurrentBestValidationQualityParameterName);
[3996]280      return base.Apply();
281    }
282
283    [StorableHook(HookType.AfterDeserialization)]
[4127]284    private void Initialize() { }
[3996]285
286    private static void AddValue(DataTable table, double data, string name, string description) {
287      DataRow row;
288      table.Rows.TryGetValue(name, out row);
289      if (row == null) {
290        row = new DataRow(name, description);
291        row.Values.Add(data);
292        table.Rows.Add(row);
293      } else {
294        row.Values.Add(data);
295      }
296    }
297  }
298}
Note: See TracBrowser for help on using the repository browser.