Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Evaluators/SymbolicRegressionScaledMeanAndVarianceSquaredErrorEvaluator.cs @ 4468

Last change on this file since 4468 was 4190, checked in by gkronber, 14 years ago

Moved upper and lower estimation limit parameters into ISymbolicRegressionEvaluator interface and introduced an Evaluate method in the interface in preparation for a ISymbolicRegressionEvaluator parameter for the validation best solution analyzer. #1117

File size: 12.2 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using HeuristicLab.Common;
25using HeuristicLab.Core;
26using HeuristicLab.Data;
27using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
28using HeuristicLab.Parameters;
29using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
30using HeuristicLab.Problems.DataAnalysis.Evaluators;
31using HeuristicLab.Problems.DataAnalysis.Symbolic;
32
33namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic {
34  [Item("SymbolicRegressionScaledMeanAndVarianceSquaredErrorEvaluator", "Calculates the mean and the variance of the squared errors of a linearly scaled symbolic regression solution.")]
35  [StorableClass]
36  public class SymbolicRegressionScaledMeanAndVarianceSquaredErrorEvaluator : SymbolicRegressionMeanSquaredErrorEvaluator {
37    private const string QualityVarianceParameterName = "QualityVariance";
38    private const string QualitySamplesParameterName = "QualitySamples";
39    private const string DecompositionBiasParameterName = "QualityDecompositionBias";
40    private const string DecompositionVarianceParameterName = "QualityDecompositionVariance";
41    private const string DecompositionCovarianceParameterName = "QualityDecompositionCovariance";
42    private const string ApplyScalingParameterName = "ApplyScaling";
43
44    #region parameter properties
45    public IValueLookupParameter<BoolValue> ApplyScalingParameter {
46      get { return (IValueLookupParameter<BoolValue>)Parameters[ApplyScalingParameterName]; }
47    }
48    public ILookupParameter<DoubleValue> AlphaParameter {
49      get { return (ILookupParameter<DoubleValue>)Parameters["Alpha"]; }
50    }
51    public ILookupParameter<DoubleValue> BetaParameter {
52      get { return (ILookupParameter<DoubleValue>)Parameters["Beta"]; }
53    }
54    public ILookupParameter<DoubleValue> QualityVarianceParameter {
55      get { return (ILookupParameter<DoubleValue>)Parameters[QualityVarianceParameterName]; }
56    }
57    public ILookupParameter<IntValue> QualitySamplesParameter {
58      get { return (ILookupParameter<IntValue>)Parameters[QualitySamplesParameterName]; }
59    }
60    public ILookupParameter<DoubleValue> DecompositionBiasParameter {
61      get { return (ILookupParameter<DoubleValue>)Parameters[DecompositionBiasParameterName]; }
62    }
63    public ILookupParameter<DoubleValue> DecompositionVarianceParameter {
64      get { return (ILookupParameter<DoubleValue>)Parameters[DecompositionVarianceParameterName]; }
65    }
66    public ILookupParameter<DoubleValue> DecompositionCovarianceParameter {
67      get { return (ILookupParameter<DoubleValue>)Parameters[DecompositionCovarianceParameterName]; }
68    }
69
70    #endregion
71    #region properties
72    public BoolValue ApplyScaling {
73      get { return ApplyScalingParameter.ActualValue; }
74    }
75    public DoubleValue Alpha {
76      get { return AlphaParameter.ActualValue; }
77      set { AlphaParameter.ActualValue = value; }
78    }
79    public DoubleValue Beta {
80      get { return BetaParameter.ActualValue; }
81      set { BetaParameter.ActualValue = value; }
82    }
83    public DoubleValue QualityVariance {
84      get { return QualityVarianceParameter.ActualValue; }
85      set { QualityVarianceParameter.ActualValue = value; }
86    }
87    public IntValue QualitySamples {
88      get { return QualitySamplesParameter.ActualValue; }
89      set { QualitySamplesParameter.ActualValue = value; }
90    }
91    #endregion
92    public SymbolicRegressionScaledMeanAndVarianceSquaredErrorEvaluator()
93      : base() {
94      Parameters.Add(new ValueLookupParameter<BoolValue>(ApplyScalingParameterName, "Determines if the estimated values should be scaled.", new BoolValue(true)));
95      Parameters.Add(new LookupParameter<DoubleValue>("Alpha", "Alpha parameter for linear scaling of the estimated values."));
96      Parameters.Add(new LookupParameter<DoubleValue>("Beta", "Beta parameter for linear scaling of the estimated values."));
97      Parameters.Add(new LookupParameter<DoubleValue>(QualityVarianceParameterName, "A parameter which stores the variance of the squared errors."));
98      Parameters.Add(new LookupParameter<IntValue>(QualitySamplesParameterName, " The number of evaluated samples."));
99      Parameters.Add(new LookupParameter<DoubleValue>(DecompositionBiasParameterName, "A parameter which stores the relativ bias of the MSE."));
100      Parameters.Add(new LookupParameter<DoubleValue>(DecompositionVarianceParameterName, "A parameter which stores the relativ bias of the MSE."));
101      Parameters.Add(new LookupParameter<DoubleValue>(DecompositionCovarianceParameterName, "A parameter which stores the relativ bias of the MSE."));
102    }
103
104    public override double Evaluate(ISymbolicExpressionTreeInterpreter interpreter, SymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, Dataset dataset, string targetVariable, IEnumerable<int> rows) {
105      double alpha, beta;
106      double meanSE, varianceSE;
107      int count;
108      double bias, variance, covariance;
109      double mse;
110      if (ApplyScaling.Value) {
111        mse = Calculate(interpreter, solution, LowerEstimationLimit.Value, UpperEstimationLimit.Value, dataset, targetVariable, rows, out beta, out alpha, out meanSE, out varianceSE, out count, out bias, out variance, out covariance);
112        Alpha = new DoubleValue(alpha);
113        Beta = new DoubleValue(beta);
114      } else {
115        mse = CalculateWithScaling(interpreter, solution, LowerEstimationLimit.Value, UpperEstimationLimit.Value, dataset, targetVariable, rows, 1, 0, out meanSE, out varianceSE, out count, out bias, out variance, out covariance);
116      }
117      QualityVariance = new DoubleValue(varianceSE);
118      QualitySamples = new IntValue(count);
119      DecompositionBiasParameter.ActualValue = new DoubleValue(bias / meanSE);
120      DecompositionVarianceParameter.ActualValue = new DoubleValue(variance / meanSE);
121      DecompositionCovarianceParameter.ActualValue = new DoubleValue(covariance / meanSE);
122      return mse;
123    }
124
125    public static double Calculate(ISymbolicExpressionTreeInterpreter interpreter, SymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, Dataset dataset, string targetVariable, IEnumerable<int> rows, out double beta, out double alpha, out double meanSE, out double varianceSE, out int count, out double bias, out double variance, out double covariance) {
126      IEnumerable<double> originalValues = dataset.GetEnumeratedVariableValues(targetVariable, rows);
127      IEnumerable<double> estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, dataset, rows);
128      CalculateScalingParameters(originalValues, estimatedValues, out beta, out alpha);
129
130      return CalculateWithScaling(interpreter, solution, lowerEstimationLimit, upperEstimationLimit, dataset, targetVariable, rows, beta, alpha, out meanSE, out varianceSE, out count, out bias, out variance, out covariance);
131    }
132
133    public static double CalculateWithScaling(ISymbolicExpressionTreeInterpreter interpreter, SymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, Dataset dataset, string targetVariable, IEnumerable<int> rows, double beta, double alpha, out double meanSE, out double varianceSE, out int count, out double bias, out double variance, out double covariance) {
134      IEnumerable<double> estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, dataset, rows);
135      IEnumerable<double> originalValues = dataset.GetEnumeratedVariableValues(targetVariable, rows);
136      IEnumerator<double> originalEnumerator = originalValues.GetEnumerator();
137      IEnumerator<double> estimatedEnumerator = estimatedValues.GetEnumerator();
138      OnlineMeanAndVarianceCalculator seEvaluator = new OnlineMeanAndVarianceCalculator();
139      OnlineMeanAndVarianceCalculator originalMeanEvaluator = new OnlineMeanAndVarianceCalculator();
140      OnlineMeanAndVarianceCalculator estimatedMeanEvaluator = new OnlineMeanAndVarianceCalculator();
141      OnlinePearsonsRSquaredEvaluator r2Evaluator = new OnlinePearsonsRSquaredEvaluator();
142
143      while (originalEnumerator.MoveNext() & estimatedEnumerator.MoveNext()) {
144        double estimated = estimatedEnumerator.Current * beta + alpha;
145        double original = originalEnumerator.Current;
146        if (double.IsNaN(estimated))
147          estimated = upperEstimationLimit;
148        else
149          estimated = Math.Min(upperEstimationLimit, Math.Max(lowerEstimationLimit, estimated));
150        double error = estimated - original;
151        error *= error;
152        seEvaluator.Add(error);
153        originalMeanEvaluator.Add(original);
154        estimatedMeanEvaluator.Add(estimated);
155        r2Evaluator.Add(original, estimated);
156      }
157
158      if (estimatedEnumerator.MoveNext() || originalEnumerator.MoveNext()) {
159        throw new ArgumentException("Number of elements in original and estimated enumeration doesn't match.");
160      } else {
161        meanSE = seEvaluator.Mean;
162        varianceSE = seEvaluator.Variance;
163        count = seEvaluator.Count;
164        bias = (originalMeanEvaluator.Mean - estimatedMeanEvaluator.Mean);
165        bias *= bias;
166
167        double sO = Math.Sqrt(originalMeanEvaluator.Variance);
168        double sE = Math.Sqrt(estimatedMeanEvaluator.Variance);
169        variance = sO - sE;
170        variance *= variance;
171        double r = Math.Sqrt(r2Evaluator.RSquared);
172        covariance = 2 * sO * sE * (1 - r);
173        return seEvaluator.Mean;
174      }
175    }
176
177    /// <summary>
178    /// Calculates linear scaling parameters in one pass.
179    /// The formulas to calculate the scaling parameters were taken from Scaled Symblic Regression by Maarten Keijzer.
180    /// http://www.springerlink.com/content/x035121165125175/
181    /// </summary>
182    public static void CalculateScalingParameters(IEnumerable<double> original, IEnumerable<double> estimated, out double beta, out double alpha) {
183      IEnumerator<double> originalEnumerator = original.GetEnumerator();
184      IEnumerator<double> estimatedEnumerator = estimated.GetEnumerator();
185      OnlineMeanAndVarianceCalculator yVarianceCalculator = new OnlineMeanAndVarianceCalculator();
186      OnlineMeanAndVarianceCalculator tMeanCalculator = new OnlineMeanAndVarianceCalculator();
187      OnlineCovarianceEvaluator ytCovarianceEvaluator = new OnlineCovarianceEvaluator();
188      int cnt = 0;
189
190      while (originalEnumerator.MoveNext() & estimatedEnumerator.MoveNext()) {
191        double y = estimatedEnumerator.Current;
192        double t = originalEnumerator.Current;
193        if (IsValidValue(t) && IsValidValue(y)) {
194          tMeanCalculator.Add(t);
195          yVarianceCalculator.Add(y);
196          ytCovarianceEvaluator.Add(y, t);
197
198          cnt++;
199        }
200      }
201
202      if (estimatedEnumerator.MoveNext() || originalEnumerator.MoveNext())
203        throw new ArgumentException("Number of elements in original and estimated enumeration doesn't match.");
204      if (cnt < 2) {
205        alpha = 0;
206        beta = 1;
207      } else {
208        if (yVarianceCalculator.Variance.IsAlmost(0.0))
209          beta = 1;
210        else
211          beta = ytCovarianceEvaluator.Covariance / yVarianceCalculator.Variance;
212
213        alpha = tMeanCalculator.Mean - beta * yVarianceCalculator.Mean;
214      }
215    }
216
217    private static bool IsValidValue(double d) {
218      return !double.IsInfinity(d) && !double.IsNaN(d) && d > -1.0E07 && d < 1.0E07;  // don't consider very large or very small values for scaling
219    }
220  }
221}
Note: See TracBrowser for help on using the repository browser.