Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Evaluators/SymbolicRegressionScaledMeanAndVarianceSquaredErrorEvaluator.cs @ 6409

Last change on this file since 6409 was 5445, checked in by swagner, 14 years ago

Updated year of copyrights (#1406)

File size: 13.3 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2011 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using HeuristicLab.Common;
25using HeuristicLab.Core;
26using HeuristicLab.Data;
27using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
28using HeuristicLab.Parameters;
29using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
30using HeuristicLab.Problems.DataAnalysis.Evaluators;
31using HeuristicLab.Problems.DataAnalysis.Symbolic;
32
33namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic {
34  [Item("SymbolicRegressionScaledMeanAndVarianceSquaredErrorEvaluator", "Calculates the mean and the variance of the squared errors of a linearly scaled symbolic regression solution.")]
35  [StorableClass]
36  public sealed class SymbolicRegressionScaledMeanAndVarianceSquaredErrorEvaluator : SymbolicRegressionMeanSquaredErrorEvaluator {
37    private const string QualityVarianceParameterName = "QualityVariance";
38    private const string QualitySamplesParameterName = "QualitySamples";
39    private const string DecompositionBiasParameterName = "QualityDecompositionBias";
40    private const string DecompositionVarianceParameterName = "QualityDecompositionVariance";
41    private const string DecompositionCovarianceParameterName = "QualityDecompositionCovariance";
42    private const string ApplyScalingParameterName = "ApplyScaling";
43
44    #region parameter properties
45    public IValueLookupParameter<BoolValue> ApplyScalingParameter {
46      get { return (IValueLookupParameter<BoolValue>)Parameters[ApplyScalingParameterName]; }
47    }
48    public ILookupParameter<DoubleValue> AlphaParameter {
49      get { return (ILookupParameter<DoubleValue>)Parameters["Alpha"]; }
50    }
51    public ILookupParameter<DoubleValue> BetaParameter {
52      get { return (ILookupParameter<DoubleValue>)Parameters["Beta"]; }
53    }
54    public ILookupParameter<DoubleValue> QualityVarianceParameter {
55      get { return (ILookupParameter<DoubleValue>)Parameters[QualityVarianceParameterName]; }
56    }
57    public ILookupParameter<IntValue> QualitySamplesParameter {
58      get { return (ILookupParameter<IntValue>)Parameters[QualitySamplesParameterName]; }
59    }
60    public ILookupParameter<DoubleValue> DecompositionBiasParameter {
61      get { return (ILookupParameter<DoubleValue>)Parameters[DecompositionBiasParameterName]; }
62    }
63    public ILookupParameter<DoubleValue> DecompositionVarianceParameter {
64      get { return (ILookupParameter<DoubleValue>)Parameters[DecompositionVarianceParameterName]; }
65    }
66    public ILookupParameter<DoubleValue> DecompositionCovarianceParameter {
67      get { return (ILookupParameter<DoubleValue>)Parameters[DecompositionCovarianceParameterName]; }
68    }
69
70    #endregion
71    #region properties
72    public BoolValue ApplyScaling {
73      get { return ApplyScalingParameter.ActualValue; }
74    }
75    public DoubleValue Alpha {
76      get { return AlphaParameter.ActualValue; }
77      set { AlphaParameter.ActualValue = value; }
78    }
79    public DoubleValue Beta {
80      get { return BetaParameter.ActualValue; }
81      set { BetaParameter.ActualValue = value; }
82    }
83    public DoubleValue QualityVariance {
84      get { return QualityVarianceParameter.ActualValue; }
85      set { QualityVarianceParameter.ActualValue = value; }
86    }
87    public IntValue QualitySamples {
88      get { return QualitySamplesParameter.ActualValue; }
89      set { QualitySamplesParameter.ActualValue = value; }
90    }
91    #endregion
92    [StorableConstructor]
93    private SymbolicRegressionScaledMeanAndVarianceSquaredErrorEvaluator(bool deserializing) : base(deserializing) { }
94    private SymbolicRegressionScaledMeanAndVarianceSquaredErrorEvaluator(SymbolicRegressionScaledMeanAndVarianceSquaredErrorEvaluator original, Cloner cloner) : base(original, cloner) { }
95    public SymbolicRegressionScaledMeanAndVarianceSquaredErrorEvaluator()
96      : base() {
97      Parameters.Add(new ValueLookupParameter<BoolValue>(ApplyScalingParameterName, "Determines if the estimated values should be scaled.", new BoolValue(true)));
98      Parameters.Add(new LookupParameter<DoubleValue>("Alpha", "Alpha parameter for linear scaling of the estimated values."));
99      Parameters.Add(new LookupParameter<DoubleValue>("Beta", "Beta parameter for linear scaling of the estimated values."));
100      Parameters.Add(new LookupParameter<DoubleValue>(QualityVarianceParameterName, "A parameter which stores the variance of the squared errors."));
101      Parameters.Add(new LookupParameter<IntValue>(QualitySamplesParameterName, " The number of evaluated samples."));
102      Parameters.Add(new LookupParameter<DoubleValue>(DecompositionBiasParameterName, "A parameter which stores the relativ bias of the MSE."));
103      Parameters.Add(new LookupParameter<DoubleValue>(DecompositionVarianceParameterName, "A parameter which stores the relativ bias of the MSE."));
104      Parameters.Add(new LookupParameter<DoubleValue>(DecompositionCovarianceParameterName, "A parameter which stores the relativ bias of the MSE."));
105    }
106
107    public override IDeepCloneable Clone(Cloner cloner) {
108      return new SymbolicRegressionScaledMeanAndVarianceSquaredErrorEvaluator(this, cloner);
109    }
110
111    public override double Evaluate(ISymbolicExpressionTreeInterpreter interpreter, SymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, Dataset dataset, string targetVariable, IEnumerable<int> rows) {
112      double alpha, beta;
113      double meanSE, varianceSE;
114      int count;
115      double bias, variance, covariance;
116      double mse;
117      if (ExecutionContext != null) {
118        if (ApplyScaling.Value) {
119          mse = Calculate(interpreter, solution, lowerEstimationLimit, upperEstimationLimit, dataset, targetVariable, rows, out beta, out alpha, out meanSE, out varianceSE, out count, out bias, out variance, out covariance);
120          Alpha = new DoubleValue(alpha);
121          Beta = new DoubleValue(beta);
122        } else {
123          mse = CalculateWithScaling(interpreter, solution,lowerEstimationLimit, upperEstimationLimit, dataset, targetVariable, rows, 1, 0, out meanSE, out varianceSE, out count, out bias, out variance, out covariance);
124        }
125        QualityVariance = new DoubleValue(varianceSE);
126        QualitySamples = new IntValue(count);
127        DecompositionBiasParameter.ActualValue = new DoubleValue(bias / meanSE);
128        DecompositionVarianceParameter.ActualValue = new DoubleValue(variance / meanSE);
129        DecompositionCovarianceParameter.ActualValue = new DoubleValue(covariance / meanSE);
130      } else {
131        if (ApplyScalingParameter.Value != null && ApplyScalingParameter.Value.Value)
132          mse = Calculate(interpreter, solution, lowerEstimationLimit, upperEstimationLimit, dataset, targetVariable, rows, out beta, out alpha, out meanSE, out varianceSE, out count, out bias, out variance, out covariance);
133        else
134          mse = CalculateWithScaling(interpreter, solution, lowerEstimationLimit, upperEstimationLimit, dataset, targetVariable, rows, 1, 0, out meanSE, out varianceSE, out count, out bias, out variance, out covariance);
135      }
136
137      return mse;
138    }
139
140    public static double Calculate(ISymbolicExpressionTreeInterpreter interpreter, SymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, Dataset dataset, string targetVariable, IEnumerable<int> rows, out double beta, out double alpha, out double meanSE, out double varianceSE, out int count, out double bias, out double variance, out double covariance) {
141      IEnumerable<double> originalValues = dataset.GetEnumeratedVariableValues(targetVariable, rows);
142      IEnumerable<double> estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, dataset, rows);
143      CalculateScalingParameters(originalValues, estimatedValues, out beta, out alpha);
144
145      return CalculateWithScaling(interpreter, solution, lowerEstimationLimit, upperEstimationLimit, dataset, targetVariable, rows, beta, alpha, out meanSE, out varianceSE, out count, out bias, out variance, out covariance);
146    }
147
148    public static double CalculateWithScaling(ISymbolicExpressionTreeInterpreter interpreter, SymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, Dataset dataset, string targetVariable, IEnumerable<int> rows, double beta, double alpha, out double meanSE, out double varianceSE, out int count, out double bias, out double variance, out double covariance) {
149      IEnumerable<double> estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, dataset, rows);
150      IEnumerable<double> originalValues = dataset.GetEnumeratedVariableValues(targetVariable, rows);
151      IEnumerator<double> originalEnumerator = originalValues.GetEnumerator();
152      IEnumerator<double> estimatedEnumerator = estimatedValues.GetEnumerator();
153      OnlineMeanAndVarianceCalculator seEvaluator = new OnlineMeanAndVarianceCalculator();
154      OnlineMeanAndVarianceCalculator originalMeanEvaluator = new OnlineMeanAndVarianceCalculator();
155      OnlineMeanAndVarianceCalculator estimatedMeanEvaluator = new OnlineMeanAndVarianceCalculator();
156      OnlinePearsonsRSquaredEvaluator r2Evaluator = new OnlinePearsonsRSquaredEvaluator();
157
158      while (originalEnumerator.MoveNext() & estimatedEnumerator.MoveNext()) {
159        double estimated = estimatedEnumerator.Current * beta + alpha;
160        double original = originalEnumerator.Current;
161        if (double.IsNaN(estimated))
162          estimated = upperEstimationLimit;
163        else
164          estimated = Math.Min(upperEstimationLimit, Math.Max(lowerEstimationLimit, estimated));
165        double error = estimated - original;
166        error *= error;
167        seEvaluator.Add(error);
168        originalMeanEvaluator.Add(original);
169        estimatedMeanEvaluator.Add(estimated);
170        r2Evaluator.Add(original, estimated);
171      }
172
173      if (estimatedEnumerator.MoveNext() || originalEnumerator.MoveNext()) {
174        throw new ArgumentException("Number of elements in original and estimated enumeration doesn't match.");
175      } else {
176        meanSE = seEvaluator.Mean;
177        varianceSE = seEvaluator.Variance;
178        count = seEvaluator.Count;
179        bias = (originalMeanEvaluator.Mean - estimatedMeanEvaluator.Mean);
180        bias *= bias;
181
182        double sO = Math.Sqrt(originalMeanEvaluator.Variance);
183        double sE = Math.Sqrt(estimatedMeanEvaluator.Variance);
184        variance = sO - sE;
185        variance *= variance;
186        double r = Math.Sqrt(r2Evaluator.RSquared);
187        covariance = 2 * sO * sE * (1 - r);
188        return seEvaluator.Mean;
189      }
190    }
191
192    /// <summary>
193    /// Calculates linear scaling parameters in one pass.
194    /// The formulas to calculate the scaling parameters were taken from Scaled Symblic Regression by Maarten Keijzer.
195    /// http://www.springerlink.com/content/x035121165125175/
196    /// </summary>
197    public static void CalculateScalingParameters(IEnumerable<double> original, IEnumerable<double> estimated, out double beta, out double alpha) {
198      IEnumerator<double> originalEnumerator = original.GetEnumerator();
199      IEnumerator<double> estimatedEnumerator = estimated.GetEnumerator();
200      OnlineMeanAndVarianceCalculator yVarianceCalculator = new OnlineMeanAndVarianceCalculator();
201      OnlineMeanAndVarianceCalculator tMeanCalculator = new OnlineMeanAndVarianceCalculator();
202      OnlineCovarianceEvaluator ytCovarianceEvaluator = new OnlineCovarianceEvaluator();
203      int cnt = 0;
204
205      while (originalEnumerator.MoveNext() & estimatedEnumerator.MoveNext()) {
206        double y = estimatedEnumerator.Current;
207        double t = originalEnumerator.Current;
208        if (IsValidValue(t) && IsValidValue(y)) {
209          tMeanCalculator.Add(t);
210          yVarianceCalculator.Add(y);
211          ytCovarianceEvaluator.Add(y, t);
212
213          cnt++;
214        }
215      }
216
217      if (estimatedEnumerator.MoveNext() || originalEnumerator.MoveNext())
218        throw new ArgumentException("Number of elements in original and estimated enumeration doesn't match.");
219      if (cnt < 2) {
220        alpha = 0;
221        beta = 1;
222      } else {
223        if (yVarianceCalculator.Variance.IsAlmost(0.0))
224          beta = 1;
225        else
226          beta = ytCovarianceEvaluator.Covariance / yVarianceCalculator.Variance;
227
228        alpha = tMeanCalculator.Mean - beta * yVarianceCalculator.Mean;
229      }
230    }
231
232    private static bool IsValidValue(double d) {
233      return !double.IsInfinity(d) && !double.IsNaN(d) && d > -1.0E07 && d < 1.0E07;  // don't consider very large or very small values for scaling
234    }
235  }
236}
Note: See TracBrowser for help on using the repository browser.