Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/SymbolicRegressionScaledMeanSquaredErrorEvaluator.cs @ 4027

Last change on this file since 4027 was 4027, checked in by gkronber, 14 years ago

Moved code for calculation of covariance from the scaled MSE evaluator into a separate online evaluator. #1081

File size: 7.5 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using System.Drawing;
26using HeuristicLab.Common;
27using HeuristicLab.Core;
28using HeuristicLab.Data;
29using HeuristicLab.Optimization;
30using HeuristicLab.Parameters;
31using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
32using HeuristicLab.PluginInfrastructure;
33using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
34using HeuristicLab.Problems.DataAnalysis;
35using HeuristicLab.Operators;
36using HeuristicLab.Problems.DataAnalysis.Evaluators;
37using HeuristicLab.Problems.DataAnalysis.Symbolic;
38
39namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic {
40  [Item("SymbolicRegressionScaledMeanSquaredErrorEvaluator", "Calculates the mean squared error of a linearly scaled symbolic regression solution.")]
41  [StorableClass]
42  public class SymbolicRegressionScaledMeanSquaredErrorEvaluator : SymbolicRegressionMeanSquaredErrorEvaluator {
43
44    #region parameter properties
45    public ILookupParameter<DoubleValue> AlphaParameter {
46      get { return (ILookupParameter<DoubleValue>)Parameters["Alpha"]; }
47    }
48    public ILookupParameter<DoubleValue> BetaParameter {
49      get { return (ILookupParameter<DoubleValue>)Parameters["Beta"]; }
50    }
51    #endregion
52    #region properties
53    public DoubleValue Alpha {
54      get { return AlphaParameter.ActualValue; }
55      set { AlphaParameter.ActualValue = value; }
56    }
57    public DoubleValue Beta {
58      get { return BetaParameter.ActualValue; }
59      set { BetaParameter.ActualValue = value; }
60    }
61    #endregion
62    public SymbolicRegressionScaledMeanSquaredErrorEvaluator()
63      : base() {
64      Parameters.Add(new LookupParameter<DoubleValue>("Alpha", "Alpha parameter for linear scaling of the estimated values."));
65      Parameters.Add(new LookupParameter<DoubleValue>("Beta", "Beta parameter for linear scaling of the estimated values."));
66    }
67
68    protected override double Evaluate(ISymbolicExpressionTreeInterpreter interpreter, SymbolicExpressionTree solution, Dataset dataset, StringValue targetVariable, IntValue samplesStart, IntValue samplesEnd) {
69      double alpha, beta;
70      double mse = Calculate(interpreter, solution, LowerEstimationLimit.Value, UpperEstimationLimit.Value, dataset, targetVariable.Value, samplesStart.Value, samplesEnd.Value, out beta, out alpha);
71      AlphaParameter.ActualValue = new DoubleValue(alpha);
72      BetaParameter.ActualValue = new DoubleValue(beta);
73      return mse;
74    }
75
76    public static double Calculate(ISymbolicExpressionTreeInterpreter interpreter, SymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, Dataset dataset, string targetVariable, int start, int end, out double beta, out double alpha) {
77      IEnumerable<double> originalValues = dataset.GetEnumeratedVariableValues(targetVariable, start, end);
78      IEnumerable<double> estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, dataset, Enumerable.Range(start, end - start));
79      CalculateScalingParameters(originalValues, estimatedValues, out beta, out alpha);
80
81      return CalculateWithScaling(interpreter, solution, lowerEstimationLimit, upperEstimationLimit, dataset, targetVariable, start, end, beta, alpha);
82    }
83
84    public static double CalculateWithScaling(ISymbolicExpressionTreeInterpreter interpreter, SymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, Dataset dataset, string targetVariable, int start, int end, double beta, double alpha) {
85      IEnumerable<double> estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, dataset, Enumerable.Range(start, end - start));
86      IEnumerable<double> originalValues = dataset.GetEnumeratedVariableValues(targetVariable, start, end);
87      IEnumerator<double> originalEnumerator = originalValues.GetEnumerator();
88      IEnumerator<double> estimatedEnumerator = estimatedValues.GetEnumerator();
89      OnlineMeanSquaredErrorEvaluator mseEvaluator = new OnlineMeanSquaredErrorEvaluator();
90
91      while (originalEnumerator.MoveNext() & estimatedEnumerator.MoveNext()) {
92        double estimated = estimatedEnumerator.Current * beta + alpha;
93        double original = originalEnumerator.Current;
94        if (double.IsNaN(estimated))
95          estimated = upperEstimationLimit;
96        else
97          estimated = Math.Min(upperEstimationLimit, Math.Max(lowerEstimationLimit, estimated));
98        mseEvaluator.Add(original, estimated);
99      }
100
101      if (estimatedEnumerator.MoveNext() || originalEnumerator.MoveNext()) {
102        throw new ArgumentException("Number of elements in original and estimated enumeration doesn't match.");
103      } else {
104        return mseEvaluator.MeanSquaredError;
105      }
106    }
107
108    /// <summary>
109    /// Calculates linear scaling parameters in one pass.
110    /// The formulas to calculate the scaling parameters were taken from Scaled Symblic Regression by Maarten Keijzer.
111    /// http://www.springerlink.com/content/x035121165125175/
112    /// </summary>
113    public static void CalculateScalingParameters(IEnumerable<double> original, IEnumerable<double> estimated, out double beta, out double alpha) {
114      IEnumerator<double> originalEnumerator = original.GetEnumerator();
115      IEnumerator<double> estimatedEnumerator = estimated.GetEnumerator();
116      OnlineMeanAndVarianceCalculator yVarianceCalculator = new OnlineMeanAndVarianceCalculator();
117      OnlineMeanAndVarianceCalculator tMeanCalculator = new OnlineMeanAndVarianceCalculator();
118      OnlineCovarianceEvaluator ytCovarianceEvaluator = new OnlineCovarianceEvaluator();
119      int cnt = 0;
120
121      while (originalEnumerator.MoveNext() & estimatedEnumerator.MoveNext()) {
122        double y = estimatedEnumerator.Current;
123        double t = originalEnumerator.Current;
124        if (IsValidValue(t) && IsValidValue(y)) {
125          tMeanCalculator.Add(t);
126          yVarianceCalculator.Add(y);
127          ytCovarianceEvaluator.Add(y, t);
128
129          cnt++;
130        }
131      }
132
133      if (estimatedEnumerator.MoveNext() || originalEnumerator.MoveNext())
134        throw new ArgumentException("Number of elements in original and estimated enumeration doesn't match.");
135      if (cnt < 2) {
136        alpha = 0;
137        beta = 1;
138      } else {
139        if (yVarianceCalculator.Variance.IsAlmost(0.0))
140          beta = 1;
141        else
142          beta = ytCovarianceEvaluator.Covariance / yVarianceCalculator.Variance;
143
144        alpha = tMeanCalculator.Mean - beta * yVarianceCalculator.Mean;
145      }
146    }
147
148    private static bool IsValidValue(double d) {
149      return !double.IsInfinity(d) && !double.IsNaN(d) && d > -1.0E07 && d < 1.0E07;  // don't consider very large or very small values for scaling
150    }
151  }
152}
Note: See TracBrowser for help on using the repository browser.