Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/SymbolicRegressionScaledMeanSquaredErrorEvaluator.cs @ 3995

Last change on this file since 3995 was 3995, checked in by mkommend, 14 years ago

improved !SymbolicRegressionScaledMSEEvaluator (ticket #1074)

File size: 8.2 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using System.Drawing;
26using HeuristicLab.Common;
27using HeuristicLab.Core;
28using HeuristicLab.Data;
29using HeuristicLab.Optimization;
30using HeuristicLab.Parameters;
31using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
32using HeuristicLab.PluginInfrastructure;
33using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
34using HeuristicLab.Problems.DataAnalysis;
35using HeuristicLab.Operators;
36using HeuristicLab.Problems.DataAnalysis.Evaluators;
37using HeuristicLab.Problems.DataAnalysis.Symbolic;
38
39namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic {
40  [Item("SymbolicRegressionScaledMeanSquaredErrorEvaluator", "Calculates the mean squared error of a linearly scaled symbolic regression solution.")]
41  [StorableClass]
42  public class SymbolicRegressionScaledMeanSquaredErrorEvaluator : SymbolicRegressionMeanSquaredErrorEvaluator {
43
44    #region parameter properties
45    public ILookupParameter<DoubleValue> AlphaParameter {
46      get { return (ILookupParameter<DoubleValue>)Parameters["Alpha"]; }
47    }
48    public ILookupParameter<DoubleValue> BetaParameter {
49      get { return (ILookupParameter<DoubleValue>)Parameters["Beta"]; }
50    }
51    #endregion
52    #region properties
53    public DoubleValue Alpha {
54      get { return AlphaParameter.ActualValue; }
55      set { AlphaParameter.ActualValue = value; }
56    }
57    public DoubleValue Beta {
58      get { return BetaParameter.ActualValue; }
59      set { BetaParameter.ActualValue = value; }
60    }
61    #endregion
62    public SymbolicRegressionScaledMeanSquaredErrorEvaluator()
63      : base() {
64      Parameters.Add(new LookupParameter<DoubleValue>("Alpha", "Alpha parameter for linear scaling of the estimated values."));
65      Parameters.Add(new LookupParameter<DoubleValue>("Beta", "Beta parameter for linear scaling of the estimated values."));
66    }
67
68    protected override double Evaluate(ISymbolicExpressionTreeInterpreter interpreter, SymbolicExpressionTree solution, Dataset dataset, StringValue targetVariable, IntValue samplesStart, IntValue samplesEnd) {
69      double alpha, beta;
70      double mse = Calculate(interpreter, solution, LowerEstimationLimit.Value, UpperEstimationLimit.Value, dataset, targetVariable.Value, samplesStart.Value, samplesEnd.Value, out beta, out alpha);
71      AlphaParameter.ActualValue = new DoubleValue(alpha);
72      BetaParameter.ActualValue = new DoubleValue(beta);
73      return mse;
74    }
75
76    public static double Calculate(ISymbolicExpressionTreeInterpreter interpreter, SymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, Dataset dataset, string targetVariable, int start, int end, out double beta, out double alpha) {
77      IEnumerable<double> originalValues = dataset.GetEnumeratedVariableValues(targetVariable, start, end);
78      IEnumerable<double> estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, dataset, Enumerable.Range(start, end - start));
79      CalculateScalingParameters(originalValues, estimatedValues, out beta, out alpha);
80
81      return CalculateWithScaling(interpreter, solution, lowerEstimationLimit, upperEstimationLimit, dataset, targetVariable, start, end, beta, alpha);
82    }
83
84    public static double CalculateWithScaling(ISymbolicExpressionTreeInterpreter interpreter, SymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, Dataset dataset, string targetVariable, int start, int end, double beta, double alpha) {
85      //IEnumerable<double> estimatedValues = from x in interpreter.GetSymbolicExpressionTreeValues(solution, dataset, Enumerable.Range(start, end - start))
86      //                                      let boundedX = Math.Min(upperEstimationLimit, Math.Max(lowerEstimationLimit, x * beta + alpha))
87      //                                      select double.IsNaN(boundedX) ? upperEstimationLimit : boundedX;
88      IEnumerable<double> estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, dataset, Enumerable.Range(start, end - start));
89      IEnumerable<double> originalValues = dataset.GetEnumeratedVariableValues(targetVariable, start, end);
90      IEnumerator<double> originalEnumerator = originalValues.GetEnumerator();
91      IEnumerator<double> estimatedEnumerator = estimatedValues.GetEnumerator();
92      double cnt = 0;
93      double sse = 0;
94
95      while (originalEnumerator.MoveNext() & estimatedEnumerator.MoveNext()) {
96        double estimated = estimatedEnumerator.Current * beta + alpha;
97        double original = originalEnumerator.Current;
98        estimated = Math.Min(upperEstimationLimit, Math.Max(lowerEstimationLimit, estimated));
99        if (double.IsNaN(estimated))
100          estimated = upperEstimationLimit;
101        if (!double.IsNaN(estimated) && !double.IsInfinity(estimated) &&
102            !double.IsNaN(original) && !double.IsInfinity(original)) {
103          double error = estimated - original;
104          sse += error * error;
105          cnt++;
106        }
107      }
108
109      if (estimatedEnumerator.MoveNext() || originalEnumerator.MoveNext()) {
110        throw new ArgumentException("Number of elements in original and estimated enumeration doesn't match.");
111      } else if (cnt == 0) {
112        throw new ArgumentException("Mean squared errors is not defined for input vectors of NaN or Inf");
113      } else {
114        double mse = sse / cnt;
115        return mse;
116      }
117    }
118
119    /// <summary>
120    /// Calculates linear scaling parameters in one pass.
121    /// The formulas to calculate the scaling parameters were taken from Scaled Symblic Regression by Maarten Keijzer.
122    /// http://www.springerlink.com/content/x035121165125175/
123    /// </summary>
124    public static void CalculateScalingParameters(IEnumerable<double> original, IEnumerable<double> estimated, out double beta, out double alpha) {
125      IEnumerator<double> originalEnumerator = original.GetEnumerator();
126      IEnumerator<double> estimatedEnumerator = estimated.GetEnumerator();
127
128      int cnt = 0;
129      double tSum = 0;
130      double ySum = 0;
131      double yySum = 0;
132      double ytSum = 0;
133
134      while (originalEnumerator.MoveNext() & estimatedEnumerator.MoveNext()) {
135        double y = estimatedEnumerator.Current;
136        double t = originalEnumerator.Current;
137        if (IsValidValue(t) && IsValidValue(y)) {
138          cnt++;
139          tSum += t;
140          ySum += y;
141          yySum += y * y;
142          ytSum += t * y;
143        }
144      }
145
146      if (estimatedEnumerator.MoveNext() || originalEnumerator.MoveNext())
147        throw new ArgumentException("Number of elements in original and estimated enumeration doesn't match.");
148      if (cnt < 2) {
149        alpha = 0;
150        beta = 1;
151      } else {
152        double tMean = tSum / cnt;
153        double yMean = ySum / cnt;
154        //division by cnt is omited because the variance and covariance are divided afterwards.
155        double yVariance = yySum - 2 * yMean * ySum + cnt * yMean * yMean;
156        double ytCovariance = ytSum - tMean * ySum - yMean * tSum + cnt * yMean * tMean;
157
158        if (yVariance.IsAlmost(0.0))
159          beta = 1;
160        else
161          beta = ytCovariance / yVariance;
162
163        alpha = tMean - beta * yMean;
164      }
165    }
166
167    private static bool IsValidValue(double d) {
168      return !double.IsInfinity(d) && !double.IsNaN(d) && d > -1.0E07 && d < 1.0E07;  // don't consider very large or very small values for scaling
169    }
170  }
171}
Note: See TracBrowser for help on using the repository browser.