Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers/SymbolicRegressionOverfittingAnalyzer.cs @ 5747

Last change on this file since 5747 was 5445, checked in by swagner, 14 years ago

Updated year of copyrights (#1406)

File size: 7.5 KB
RevLine 
[4271]1#region License Information
2/* HeuristicLab
[5445]3 * Copyright (C) 2002-2011 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
[4271]4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System.Collections.Generic;
23using System.Linq;
24using HeuristicLab.Analysis;
[4272]25using HeuristicLab.Common;
[4271]26using HeuristicLab.Core;
27using HeuristicLab.Data;
28using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
29using HeuristicLab.Operators;
30using HeuristicLab.Optimization;
31using HeuristicLab.Parameters;
32using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
33using HeuristicLab.Problems.DataAnalysis.Evaluators;
34using HeuristicLab.Problems.DataAnalysis.Symbolic;
35using System;
36
37namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic.Analyzers {
[5192]38  [Item("SymbolicRegressionOverfittingAnalyzer", "Calculates and tracks correlation of training and validation fitness of symbolic regression models.")]
[4271]39  [StorableClass]
[5197]40  public sealed class SymbolicRegressionOverfittingAnalyzer : SymbolicRegressionValidationAnalyzer, ISymbolicRegressionAnalyzer {
[5192]41    private const string MaximizationParameterName = "Maximization";
42    private const string QualityParameterName = "Quality";
43    private const string TrainingValidationCorrelationParameterName = "TrainingValidationCorrelation";
44    private const string TrainingValidationCorrelationTableParameterName = "TrainingValidationCorrelationTable";
45    private const string LowerCorrelationThresholdParameterName = "LowerCorrelationThreshold";
46    private const string UpperCorrelationThresholdParameterName = "UpperCorrelationThreshold";
47    private const string OverfittingParameterName = "IsOverfitting";
48    private const string ResultsParameterName = "Results";
[4271]49
50    #region parameter properties
51    public ScopeTreeLookupParameter<DoubleValue> QualityParameter {
[5192]52      get { return (ScopeTreeLookupParameter<DoubleValue>)Parameters[QualityParameterName]; }
[4271]53    }
[5192]54    public ILookupParameter<BoolValue> MaximizationParameter {
55      get { return (ILookupParameter<BoolValue>)Parameters[MaximizationParameterName]; }
56    }
[4271]57    public ILookupParameter<DoubleValue> TrainingValidationQualityCorrelationParameter {
[5192]58      get { return (ILookupParameter<DoubleValue>)Parameters[TrainingValidationCorrelationParameterName]; }
[4271]59    }
[5192]60    public ILookupParameter<DataTable> TrainingValidationQualityCorrelationTableParameter {
61      get { return (ILookupParameter<DataTable>)Parameters[TrainingValidationCorrelationTableParameterName]; }
[4271]62    }
[5192]63    public IValueLookupParameter<DoubleValue> LowerCorrelationThresholdParameter {
64      get { return (IValueLookupParameter<DoubleValue>)Parameters[LowerCorrelationThresholdParameterName]; }
[4326]65    }
[5192]66    public IValueLookupParameter<DoubleValue> UpperCorrelationThresholdParameter {
67      get { return (IValueLookupParameter<DoubleValue>)Parameters[UpperCorrelationThresholdParameterName]; }
68    }
[4271]69    public ILookupParameter<BoolValue> OverfittingParameter {
[5192]70      get { return (ILookupParameter<BoolValue>)Parameters[OverfittingParameterName]; }
[4271]71    }
72    public ILookupParameter<ResultCollection> ResultsParameter {
[5192]73      get { return (ILookupParameter<ResultCollection>)Parameters[ResultsParameterName]; }
[4271]74    }
75    #endregion
76    #region properties
[5192]77    public BoolValue Maximization {
78      get { return MaximizationParameter.ActualValue; }
[4271]79    }
80    #endregion
81
[5192]82    [StorableConstructor]
83    private SymbolicRegressionOverfittingAnalyzer(bool deserializing) : base(deserializing) { }
84    private SymbolicRegressionOverfittingAnalyzer(SymbolicRegressionOverfittingAnalyzer original, Cloner cloner) : base(original, cloner) { }
85    public SymbolicRegressionOverfittingAnalyzer()
[4271]86      : base() {
[5192]87      Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>(QualityParameterName, "Training fitness"));
88      Parameters.Add(new LookupParameter<BoolValue>(MaximizationParameterName, "The direction of optimization."));
89      Parameters.Add(new LookupParameter<DoubleValue>(TrainingValidationCorrelationParameterName, "Correlation of training and validation fitnesses"));
90      Parameters.Add(new LookupParameter<DataTable>(TrainingValidationCorrelationTableParameterName, "Data table of training and validation fitness correlation values over the whole run."));
91      Parameters.Add(new ValueLookupParameter<DoubleValue>(LowerCorrelationThresholdParameterName, "Lower threshold for correlation value that marks the boundary from non-overfitting to overfitting.", new DoubleValue(0.65)));
92      Parameters.Add(new ValueLookupParameter<DoubleValue>(UpperCorrelationThresholdParameterName, "Upper threshold for correlation value that marks the boundary from overfitting to non-overfitting.", new DoubleValue(0.75)));
93      Parameters.Add(new LookupParameter<BoolValue>(OverfittingParameterName, "Boolean indicator for overfitting."));
94      Parameters.Add(new LookupParameter<ResultCollection>(ResultsParameterName, "The results collection."));
[4271]95    }
96
97    [StorableHook(HookType.AfterDeserialization)]
98    private void AfterDeserialization() {
[5192]99    }
[4326]100
[5192]101    public override IDeepCloneable Clone(Cloner cloner) {
102      return new SymbolicRegressionOverfittingAnalyzer(this, cloner);
[4271]103    }
104
[5197]105    protected override void Analyze(SymbolicExpressionTree[] trees, double[] validationQuality) {
106      double[] trainingQuality = QualityParameter.ActualValue.Select(x => x.Value).ToArray();
[4271]107
[5197]108      double r = alglib.spearmancorr2(trainingQuality, validationQuality);
[4271]109
[5192]110      TrainingValidationQualityCorrelationParameter.ActualValue = new DoubleValue(r);
[4275]111
[5192]112      if (TrainingValidationQualityCorrelationTableParameter.ActualValue == null) {
113        var dataTable = new DataTable("Training and validation fitness correlation table", "Data table of training and validation fitness correlation values over the whole run.");
114        dataTable.Rows.Add(new DataRow("Training and validation fitness correlation", "Training and validation fitness correlation values"));
115        TrainingValidationQualityCorrelationTableParameter.ActualValue = dataTable;
116        ResultsParameter.ActualValue.Add(new Result(TrainingValidationCorrelationTableParameterName, dataTable));
[4272]117      }
118
[5192]119      TrainingValidationQualityCorrelationTableParameter.ActualValue.Rows["Training and validation fitness correlation"].Values.Add(r);
[4272]120
[5192]121      if (OverfittingParameter.ActualValue != null && OverfittingParameter.ActualValue.Value) {
[5436]122        // overfitting == true
123        // => r must reach the upper threshold to switch back to non-overfitting state
124        OverfittingParameter.ActualValue = new BoolValue(r < UpperCorrelationThresholdParameter.ActualValue.Value);
[5192]125      } else {
[5436]126        // overfitting == false
127        // => r must drop below lower threshold to switch to overfitting state
128        OverfittingParameter.ActualValue = new BoolValue(r < LowerCorrelationThresholdParameter.ActualValue.Value);
[5010]129      }
[4271]130    }
131  }
132}
Note: See TracBrowser for help on using the repository browser.