Free cookie consent management tool by TermsFeed Policy Generator

source: branches/DataAnalysis/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Evaluators/SymbolicRegressionScaledMeanSquaredErrorEvaluator.cs @ 6347

Last change on this file since 6347 was 5275, checked in by gkronber, 14 years ago

Merged changes from trunk to data analysis exploration branch and added fractional distance metric evaluator. #1142

File size: 7.8 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using HeuristicLab.Common;
25using HeuristicLab.Core;
26using HeuristicLab.Data;
27using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
28using HeuristicLab.Parameters;
29using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
30using HeuristicLab.Problems.DataAnalysis.Evaluators;
31using HeuristicLab.Problems.DataAnalysis.Symbolic;
32
33namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic {
34  [Item("SymbolicRegressionScaledMeanSquaredErrorEvaluator", "Calculates the mean squared error of a linearly scaled symbolic regression solution.")]
35  [StorableClass]
36  public sealed class SymbolicRegressionScaledMeanSquaredErrorEvaluator : SymbolicRegressionMeanSquaredErrorEvaluator {
37
38    #region parameter properties
39    public ILookupParameter<DoubleValue> AlphaParameter {
40      get { return (ILookupParameter<DoubleValue>)Parameters["Alpha"]; }
41    }
42    public ILookupParameter<DoubleValue> BetaParameter {
43      get { return (ILookupParameter<DoubleValue>)Parameters["Beta"]; }
44    }
45    #endregion
46    #region properties
47    public DoubleValue Alpha {
48      get { return AlphaParameter.ActualValue; }
49      set { AlphaParameter.ActualValue = value; }
50    }
51    public DoubleValue Beta {
52      get { return BetaParameter.ActualValue; }
53      set { BetaParameter.ActualValue = value; }
54    }
55    #endregion
56    [StorableConstructor]
57    private SymbolicRegressionScaledMeanSquaredErrorEvaluator(bool deserializing) : base(deserializing) { }
58    private SymbolicRegressionScaledMeanSquaredErrorEvaluator(SymbolicRegressionScaledMeanSquaredErrorEvaluator original, Cloner cloner) : base(original, cloner) { }
59    public SymbolicRegressionScaledMeanSquaredErrorEvaluator()
60      : base() {
61      Parameters.Add(new LookupParameter<DoubleValue>("Alpha", "Alpha parameter for linear scaling of the estimated values."));
62      Parameters.Add(new LookupParameter<DoubleValue>("Beta", "Beta parameter for linear scaling of the estimated values."));
63    }
64
65    public override IDeepCloneable Clone(Cloner cloner) {
66      return new SymbolicRegressionScaledMeanSquaredErrorEvaluator(this, cloner);
67    }
68
69    public override double Evaluate(ISymbolicExpressionTreeInterpreter interpreter, SymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, Dataset dataset, string targetVariable, IEnumerable<int> rows) {
70      double alpha, beta;
71      double mse = Calculate(interpreter, solution, lowerEstimationLimit, upperEstimationLimit, dataset, targetVariable, rows, out beta, out alpha);
72      if (ExecutionContext != null) {
73        AlphaParameter.ActualValue = new DoubleValue(alpha);
74        BetaParameter.ActualValue = new DoubleValue(beta);
75      }
76      return mse;
77    }
78
79    public static double Calculate(ISymbolicExpressionTreeInterpreter interpreter, SymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, Dataset dataset, string targetVariable, IEnumerable<int> rows, out double beta, out double alpha) {
80      IEnumerable<double> originalValues = dataset.GetEnumeratedVariableValues(targetVariable, rows);
81      IEnumerable<double> estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, dataset, rows);
82      CalculateScalingParameters(originalValues, estimatedValues, out beta, out alpha);
83
84      return CalculateWithScaling(interpreter, solution, lowerEstimationLimit, upperEstimationLimit, dataset, targetVariable, rows, beta, alpha);
85    }
86
87    public static double CalculateWithScaling(ISymbolicExpressionTreeInterpreter interpreter, SymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, Dataset dataset, string targetVariable, IEnumerable<int> rows, double beta, double alpha) {
88      IEnumerable<double> estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, dataset, rows);
89      IEnumerable<double> originalValues = dataset.GetEnumeratedVariableValues(targetVariable, rows);
90      IEnumerator<double> originalEnumerator = originalValues.GetEnumerator();
91      IEnumerator<double> estimatedEnumerator = estimatedValues.GetEnumerator();
92      OnlineMeanSquaredErrorEvaluator mseEvaluator = new OnlineMeanSquaredErrorEvaluator();
93
94      while (originalEnumerator.MoveNext() & estimatedEnumerator.MoveNext()) {
95        double estimated = estimatedEnumerator.Current * beta + alpha;
96        double original = originalEnumerator.Current;
97        if (double.IsNaN(estimated))
98          estimated = upperEstimationLimit;
99        else
100          estimated = Math.Min(upperEstimationLimit, Math.Max(lowerEstimationLimit, estimated));
101        mseEvaluator.Add(original, estimated);
102      }
103
104      if (estimatedEnumerator.MoveNext() || originalEnumerator.MoveNext()) {
105        throw new ArgumentException("Number of elements in original and estimated enumeration doesn't match.");
106      } else {
107        return mseEvaluator.MeanSquaredError;
108      }
109    }
110
111    /// <summary>
112    /// Calculates linear scaling parameters in one pass.
113    /// The formulas to calculate the scaling parameters were taken from Scaled Symblic Regression by Maarten Keijzer.
114    /// http://www.springerlink.com/content/x035121165125175/
115    /// </summary>
116    public static void CalculateScalingParameters(IEnumerable<double> original, IEnumerable<double> estimated, out double beta, out double alpha) {
117      IEnumerator<double> originalEnumerator = original.GetEnumerator();
118      IEnumerator<double> estimatedEnumerator = estimated.GetEnumerator();
119      OnlineMeanAndVarianceCalculator yVarianceCalculator = new OnlineMeanAndVarianceCalculator();
120      OnlineMeanAndVarianceCalculator tMeanCalculator = new OnlineMeanAndVarianceCalculator();
121      OnlineCovarianceEvaluator ytCovarianceEvaluator = new OnlineCovarianceEvaluator();
122      int cnt = 0;
123
124      while (originalEnumerator.MoveNext() & estimatedEnumerator.MoveNext()) {
125        double y = estimatedEnumerator.Current;
126        double t = originalEnumerator.Current;
127        if (IsValidValue(t) && IsValidValue(y)) {
128          tMeanCalculator.Add(t);
129          yVarianceCalculator.Add(y);
130          ytCovarianceEvaluator.Add(y, t);
131
132          cnt++;
133        }
134      }
135
136      if (estimatedEnumerator.MoveNext() || originalEnumerator.MoveNext())
137        throw new ArgumentException("Number of elements in original and estimated enumeration doesn't match.");
138      if (cnt < 2) {
139        alpha = 0;
140        beta = 1;
141      } else {
142        if (yVarianceCalculator.PopulationVariance.IsAlmost(0.0))
143          beta = 1;
144        else
145          beta = ytCovarianceEvaluator.Covariance / yVarianceCalculator.PopulationVariance;
146
147        alpha = tMeanCalculator.Mean - beta * yVarianceCalculator.Mean;
148      }
149    }
150
151    private static bool IsValidValue(double d) {
152      return !double.IsInfinity(d) && !double.IsNaN(d) && d > -1.0E07 && d < 1.0E07;  // don't consider very large or very small values for scaling
153    }
154  }
155}
Note: See TracBrowser for help on using the repository browser.