Free cookie consent management tool by TermsFeed Policy Generator

source: branches/DataAnalysis/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Evaluators/SymbolicRegressionScaledMeanAndVarianceSquaredErrorEvaluator.cs @ 10743

Last change on this file since 10743 was 5275, checked in by gkronber, 14 years ago

Merged changes from trunk to data analysis exploration branch and added fractional distance metric evaluator. #1142

File size: 13.3 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using HeuristicLab.Common;
25using HeuristicLab.Core;
26using HeuristicLab.Data;
27using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
28using HeuristicLab.Parameters;
29using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
30using HeuristicLab.Problems.DataAnalysis.Evaluators;
31using HeuristicLab.Problems.DataAnalysis.Symbolic;
32
33namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic {
34  [Item("SymbolicRegressionScaledMeanAndVarianceSquaredErrorEvaluator", "Calculates the mean and the variance of the squared errors of a linearly scaled symbolic regression solution.")]
35  [StorableClass]
36  public sealed class SymbolicRegressionScaledMeanAndVarianceSquaredErrorEvaluator : SymbolicRegressionMeanSquaredErrorEvaluator {
37    private const string QualityVarianceParameterName = "QualityVariance";
38    private const string QualitySamplesParameterName = "QualitySamples";
39    private const string DecompositionBiasParameterName = "QualityDecompositionBias";
40    private const string DecompositionVarianceParameterName = "QualityDecompositionVariance";
41    private const string DecompositionCovarianceParameterName = "QualityDecompositionCovariance";
42    private const string ApplyScalingParameterName = "ApplyScaling";
43
44    #region parameter properties
45    public IValueLookupParameter<BoolValue> ApplyScalingParameter {
46      get { return (IValueLookupParameter<BoolValue>)Parameters[ApplyScalingParameterName]; }
47    }
48    public ILookupParameter<DoubleValue> AlphaParameter {
49      get { return (ILookupParameter<DoubleValue>)Parameters["Alpha"]; }
50    }
51    public ILookupParameter<DoubleValue> BetaParameter {
52      get { return (ILookupParameter<DoubleValue>)Parameters["Beta"]; }
53    }
54    public ILookupParameter<DoubleValue> QualityVarianceParameter {
55      get { return (ILookupParameter<DoubleValue>)Parameters[QualityVarianceParameterName]; }
56    }
57    public ILookupParameter<IntValue> QualitySamplesParameter {
58      get { return (ILookupParameter<IntValue>)Parameters[QualitySamplesParameterName]; }
59    }
60    public ILookupParameter<DoubleValue> DecompositionBiasParameter {
61      get { return (ILookupParameter<DoubleValue>)Parameters[DecompositionBiasParameterName]; }
62    }
63    public ILookupParameter<DoubleValue> DecompositionVarianceParameter {
64      get { return (ILookupParameter<DoubleValue>)Parameters[DecompositionVarianceParameterName]; }
65    }
66    public ILookupParameter<DoubleValue> DecompositionCovarianceParameter {
67      get { return (ILookupParameter<DoubleValue>)Parameters[DecompositionCovarianceParameterName]; }
68    }
69
70    #endregion
71    #region properties
72    public BoolValue ApplyScaling {
73      get { return ApplyScalingParameter.ActualValue; }
74    }
75    public DoubleValue Alpha {
76      get { return AlphaParameter.ActualValue; }
77      set { AlphaParameter.ActualValue = value; }
78    }
79    public DoubleValue Beta {
80      get { return BetaParameter.ActualValue; }
81      set { BetaParameter.ActualValue = value; }
82    }
83    public DoubleValue QualityVariance {
84      get { return QualityVarianceParameter.ActualValue; }
85      set { QualityVarianceParameter.ActualValue = value; }
86    }
87    public IntValue QualitySamples {
88      get { return QualitySamplesParameter.ActualValue; }
89      set { QualitySamplesParameter.ActualValue = value; }
90    }
91    #endregion
92    [StorableConstructor]
93    private SymbolicRegressionScaledMeanAndVarianceSquaredErrorEvaluator(bool deserializing) : base(deserializing) { }
94    private SymbolicRegressionScaledMeanAndVarianceSquaredErrorEvaluator(SymbolicRegressionScaledMeanAndVarianceSquaredErrorEvaluator original, Cloner cloner) : base(original, cloner) { }
95    public SymbolicRegressionScaledMeanAndVarianceSquaredErrorEvaluator()
96      : base() {
97      Parameters.Add(new ValueLookupParameter<BoolValue>(ApplyScalingParameterName, "Determines if the estimated values should be scaled.", new BoolValue(true)));
98      Parameters.Add(new LookupParameter<DoubleValue>("Alpha", "Alpha parameter for linear scaling of the estimated values."));
99      Parameters.Add(new LookupParameter<DoubleValue>("Beta", "Beta parameter for linear scaling of the estimated values."));
100      Parameters.Add(new LookupParameter<DoubleValue>(QualityVarianceParameterName, "A parameter which stores the variance of the squared errors."));
101      Parameters.Add(new LookupParameter<IntValue>(QualitySamplesParameterName, " The number of evaluated samples."));
102      Parameters.Add(new LookupParameter<DoubleValue>(DecompositionBiasParameterName, "A parameter which stores the relativ bias of the MSE."));
103      Parameters.Add(new LookupParameter<DoubleValue>(DecompositionVarianceParameterName, "A parameter which stores the relativ bias of the MSE."));
104      Parameters.Add(new LookupParameter<DoubleValue>(DecompositionCovarianceParameterName, "A parameter which stores the relativ bias of the MSE."));
105    }
106
107    public override IDeepCloneable Clone(Cloner cloner) {
108      return new SymbolicRegressionScaledMeanAndVarianceSquaredErrorEvaluator(this, cloner);
109    }
110
111    public override double Evaluate(ISymbolicExpressionTreeInterpreter interpreter, SymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, Dataset dataset, string targetVariable, IEnumerable<int> rows) {
112      double alpha, beta;
113      double meanSE, varianceSE;
114      int count;
115      double bias, variance, covariance;
116      double mse;
117      if (ExecutionContext != null) {
118        if (ApplyScaling.Value) {
119          mse = Calculate(interpreter, solution, lowerEstimationLimit, upperEstimationLimit, dataset, targetVariable, rows, out beta, out alpha, out meanSE, out varianceSE, out count, out bias, out variance, out covariance);
120          Alpha = new DoubleValue(alpha);
121          Beta = new DoubleValue(beta);
122        } else {
123          mse = CalculateWithScaling(interpreter, solution,lowerEstimationLimit, upperEstimationLimit, dataset, targetVariable, rows, 1, 0, out meanSE, out varianceSE, out count, out bias, out variance, out covariance);
124        }
125        QualityVariance = new DoubleValue(varianceSE);
126        QualitySamples = new IntValue(count);
127        DecompositionBiasParameter.ActualValue = new DoubleValue(bias / meanSE);
128        DecompositionVarianceParameter.ActualValue = new DoubleValue(variance / meanSE);
129        DecompositionCovarianceParameter.ActualValue = new DoubleValue(covariance / meanSE);
130      } else {
131        if (ApplyScalingParameter.Value != null && ApplyScalingParameter.Value.Value)
132          mse = Calculate(interpreter, solution, lowerEstimationLimit, upperEstimationLimit, dataset, targetVariable, rows, out beta, out alpha, out meanSE, out varianceSE, out count, out bias, out variance, out covariance);
133        else
134          mse = CalculateWithScaling(interpreter, solution, lowerEstimationLimit, upperEstimationLimit, dataset, targetVariable, rows, 1, 0, out meanSE, out varianceSE, out count, out bias, out variance, out covariance);
135      }
136
137      return mse;
138    }
139
140    public static double Calculate(ISymbolicExpressionTreeInterpreter interpreter, SymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, Dataset dataset, string targetVariable, IEnumerable<int> rows, out double beta, out double alpha, out double meanSE, out double varianceSE, out int count, out double bias, out double variance, out double covariance) {
141      IEnumerable<double> originalValues = dataset.GetEnumeratedVariableValues(targetVariable, rows);
142      IEnumerable<double> estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, dataset, rows);
143      CalculateScalingParameters(originalValues, estimatedValues, out beta, out alpha);
144
145      return CalculateWithScaling(interpreter, solution, lowerEstimationLimit, upperEstimationLimit, dataset, targetVariable, rows, beta, alpha, out meanSE, out varianceSE, out count, out bias, out variance, out covariance);
146    }
147
148    public static double CalculateWithScaling(ISymbolicExpressionTreeInterpreter interpreter, SymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, Dataset dataset, string targetVariable, IEnumerable<int> rows, double beta, double alpha, out double meanSE, out double varianceSE, out int count, out double bias, out double variance, out double covariance) {
149      IEnumerable<double> estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, dataset, rows);
150      IEnumerable<double> originalValues = dataset.GetEnumeratedVariableValues(targetVariable, rows);
151      IEnumerator<double> originalEnumerator = originalValues.GetEnumerator();
152      IEnumerator<double> estimatedEnumerator = estimatedValues.GetEnumerator();
153      OnlineMeanAndVarianceCalculator seEvaluator = new OnlineMeanAndVarianceCalculator();
154      OnlineMeanAndVarianceCalculator originalMeanEvaluator = new OnlineMeanAndVarianceCalculator();
155      OnlineMeanAndVarianceCalculator estimatedMeanEvaluator = new OnlineMeanAndVarianceCalculator();
156      OnlinePearsonsRSquaredEvaluator r2Evaluator = new OnlinePearsonsRSquaredEvaluator();
157
158      while (originalEnumerator.MoveNext() & estimatedEnumerator.MoveNext()) {
159        double estimated = estimatedEnumerator.Current * beta + alpha;
160        double original = originalEnumerator.Current;
161        if (double.IsNaN(estimated))
162          estimated = upperEstimationLimit;
163        else
164          estimated = Math.Min(upperEstimationLimit, Math.Max(lowerEstimationLimit, estimated));
165        double error = estimated - original;
166        error *= error;
167        seEvaluator.Add(error);
168        originalMeanEvaluator.Add(original);
169        estimatedMeanEvaluator.Add(estimated);
170        r2Evaluator.Add(original, estimated);
171      }
172
173      if (estimatedEnumerator.MoveNext() || originalEnumerator.MoveNext()) {
174        throw new ArgumentException("Number of elements in original and estimated enumeration doesn't match.");
175      } else {
176        meanSE = seEvaluator.Mean;
177        varianceSE = seEvaluator.Variance;
178        count = seEvaluator.Count;
179        bias = (originalMeanEvaluator.Mean - estimatedMeanEvaluator.Mean);
180        bias *= bias;
181
182        double sO = Math.Sqrt(originalMeanEvaluator.Variance);
183        double sE = Math.Sqrt(estimatedMeanEvaluator.Variance);
184        variance = sO - sE;
185        variance *= variance;
186        double r = Math.Sqrt(r2Evaluator.RSquared);
187        covariance = 2 * sO * sE * (1 - r);
188        return seEvaluator.Mean;
189      }
190    }
191
192    /// <summary>
193    /// Calculates linear scaling parameters in one pass.
194    /// The formulas to calculate the scaling parameters were taken from Scaled Symblic Regression by Maarten Keijzer.
195    /// http://www.springerlink.com/content/x035121165125175/
196    /// </summary>
197    public static void CalculateScalingParameters(IEnumerable<double> original, IEnumerable<double> estimated, out double beta, out double alpha) {
198      IEnumerator<double> originalEnumerator = original.GetEnumerator();
199      IEnumerator<double> estimatedEnumerator = estimated.GetEnumerator();
200      OnlineMeanAndVarianceCalculator yVarianceCalculator = new OnlineMeanAndVarianceCalculator();
201      OnlineMeanAndVarianceCalculator tMeanCalculator = new OnlineMeanAndVarianceCalculator();
202      OnlineCovarianceEvaluator ytCovarianceEvaluator = new OnlineCovarianceEvaluator();
203      int cnt = 0;
204
205      while (originalEnumerator.MoveNext() & estimatedEnumerator.MoveNext()) {
206        double y = estimatedEnumerator.Current;
207        double t = originalEnumerator.Current;
208        if (IsValidValue(t) && IsValidValue(y)) {
209          tMeanCalculator.Add(t);
210          yVarianceCalculator.Add(y);
211          ytCovarianceEvaluator.Add(y, t);
212
213          cnt++;
214        }
215      }
216
217      if (estimatedEnumerator.MoveNext() || originalEnumerator.MoveNext())
218        throw new ArgumentException("Number of elements in original and estimated enumeration doesn't match.");
219      if (cnt < 2) {
220        alpha = 0;
221        beta = 1;
222      } else {
223        if (yVarianceCalculator.Variance.IsAlmost(0.0))
224          beta = 1;
225        else
226          beta = ytCovarianceEvaluator.Covariance / yVarianceCalculator.Variance;
227
228        alpha = tMeanCalculator.Mean - beta * yVarianceCalculator.Mean;
229      }
230    }
231
232    private static bool IsValidValue(double d) {
233      return !double.IsInfinity(d) && !double.IsNaN(d) && d > -1.0E07 && d < 1.0E07;  // don't consider very large or very small values for scaling
234    }
235  }
236}
Note: See TracBrowser for help on using the repository browser.