Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/SymbolicRegressionScaledMeanAndVarianceSquaredErrorEvaluator.cs @ 4054

Last change on this file since 4054 was 4044, checked in by mkommend, 14 years ago

added statistical comperator operator for SymReg OSGP (ticket #1082)

File size: 9.0 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using System.Drawing;
26using HeuristicLab.Common;
27using HeuristicLab.Core;
28using HeuristicLab.Data;
29using HeuristicLab.Optimization;
30using HeuristicLab.Parameters;
31using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
32using HeuristicLab.PluginInfrastructure;
33using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
34using HeuristicLab.Problems.DataAnalysis;
35using HeuristicLab.Operators;
36using HeuristicLab.Problems.DataAnalysis.Evaluators;
37using HeuristicLab.Problems.DataAnalysis.Symbolic;
38
39namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic {
40  [Item("SymbolicRegressionScaledMeanAndVarianceSquaredErrorEvaluator", "Calculates the mean and the variance of the squared errors of a linearly scaled symbolic regression solution.")]
41  [StorableClass]
42  public class SymbolicRegressionScaledMeanAndVarianceSquaredErrorEvaluator : SymbolicRegressionMeanSquaredErrorEvaluator {
43    private const string QualityVarianceParameterName = "QualityVariance";
44    private const string QualitySamplesParameterName = "QualitySamples";
45
46    #region parameter properties
47    public ILookupParameter<DoubleValue> AlphaParameter {
48      get { return (ILookupParameter<DoubleValue>)Parameters["Alpha"]; }
49    }
50    public ILookupParameter<DoubleValue> BetaParameter {
51      get { return (ILookupParameter<DoubleValue>)Parameters["Beta"]; }
52    }
53    public ILookupParameter<DoubleValue> QualityVarianceParameter {
54      get { return (ILookupParameter<DoubleValue>)Parameters[QualityVarianceParameterName]; }
55    }
56    public ILookupParameter<IntValue> QualitySamplesParameter {
57      get { return (ILookupParameter<IntValue>)Parameters[QualitySamplesParameterName]; }
58    }
59
60    #endregion
61    #region properties
62    public DoubleValue Alpha {
63      get { return AlphaParameter.ActualValue; }
64      set { AlphaParameter.ActualValue = value; }
65    }
66    public DoubleValue Beta {
67      get { return BetaParameter.ActualValue; }
68      set { BetaParameter.ActualValue = value; }
69    }
70    public DoubleValue QualityVariance {
71      get { return QualityVarianceParameter.ActualValue; }
72      set { QualityVarianceParameter.ActualValue = value; }
73    }
74    public IntValue QualitySamples {
75      get { return QualitySamplesParameter.ActualValue; }
76      set { QualitySamplesParameter.ActualValue = value; }
77    }
78    #endregion
79    public SymbolicRegressionScaledMeanAndVarianceSquaredErrorEvaluator()
80      : base() {
81      Parameters.Add(new LookupParameter<DoubleValue>("Alpha", "Alpha parameter for linear scaling of the estimated values."));
82      Parameters.Add(new LookupParameter<DoubleValue>("Beta", "Beta parameter for linear scaling of the estimated values."));
83      Parameters.Add(new LookupParameter<DoubleValue>(QualityVarianceParameterName, "A parameter which stores the variance of the squared errors."));
84      Parameters.Add(new LookupParameter<IntValue>(QualitySamplesParameterName, " The number of evaluated samples."));
85    }
86
87    protected override double Evaluate(ISymbolicExpressionTreeInterpreter interpreter, SymbolicExpressionTree solution, Dataset dataset, StringValue targetVariable, IEnumerable<int> rows) {
88      double alpha, beta;
89      double meanSE, varianceSE;
90      int count;
91      double mse = Calculate(interpreter, solution, LowerEstimationLimit.Value, UpperEstimationLimit.Value, dataset, targetVariable.Value, rows, out beta, out alpha, out meanSE, out varianceSE, out count);
92      Alpha = new DoubleValue(alpha);
93      Beta = new DoubleValue(beta);
94      QualityVariance = new DoubleValue(varianceSE);
95      QualitySamples = new IntValue(count);
96      return mse;
97    }
98
99    public static double Calculate(ISymbolicExpressionTreeInterpreter interpreter, SymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, Dataset dataset, string targetVariable, IEnumerable<int> rows, out double beta, out double alpha, out double meanSE, out double varianceSE, out int count) {
100      IEnumerable<double> originalValues = dataset.GetEnumeratedVariableValues(targetVariable, rows);
101      IEnumerable<double> estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, dataset, rows);
102      CalculateScalingParameters(originalValues, estimatedValues, out beta, out alpha);
103
104      return CalculateWithScaling(interpreter, solution, lowerEstimationLimit, upperEstimationLimit, dataset, targetVariable, rows, beta, alpha, out meanSE, out varianceSE, out count);
105    }
106
107    public static double CalculateWithScaling(ISymbolicExpressionTreeInterpreter interpreter, SymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, Dataset dataset, string targetVariable, IEnumerable<int> rows, double beta, double alpha, out double meanSE, out double varianceSE, out int count) {
108      IEnumerable<double> estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, dataset, rows);
109      IEnumerable<double> originalValues = dataset.GetEnumeratedVariableValues(targetVariable, rows);
110      IEnumerator<double> originalEnumerator = originalValues.GetEnumerator();
111      IEnumerator<double> estimatedEnumerator = estimatedValues.GetEnumerator();
112      OnlineMeanAndVarianceCalculator seEvaluator = new OnlineMeanAndVarianceCalculator();
113
114      while (originalEnumerator.MoveNext() & estimatedEnumerator.MoveNext()) {
115        double estimated = estimatedEnumerator.Current * beta + alpha;
116        double original = originalEnumerator.Current;
117        if (double.IsNaN(estimated))
118          estimated = upperEstimationLimit;
119        else
120          estimated = Math.Min(upperEstimationLimit, Math.Max(lowerEstimationLimit, estimated));
121        double error = estimated - original;
122        error *= error;
123        seEvaluator.Add(error);
124      }
125
126      if (estimatedEnumerator.MoveNext() || originalEnumerator.MoveNext()) {
127        throw new ArgumentException("Number of elements in original and estimated enumeration doesn't match.");
128      } else {
129        meanSE = seEvaluator.Mean;
130        varianceSE = seEvaluator.Variance;
131        count = seEvaluator.Count;
132        return seEvaluator.Mean;
133      }
134    }
135
136    /// <summary>
137    /// Calculates linear scaling parameters in one pass.
138    /// The formulas to calculate the scaling parameters were taken from Scaled Symblic Regression by Maarten Keijzer.
139    /// http://www.springerlink.com/content/x035121165125175/
140    /// </summary>
141    public static void CalculateScalingParameters(IEnumerable<double> original, IEnumerable<double> estimated, out double beta, out double alpha) {
142      IEnumerator<double> originalEnumerator = original.GetEnumerator();
143      IEnumerator<double> estimatedEnumerator = estimated.GetEnumerator();
144      OnlineMeanAndVarianceCalculator yVarianceCalculator = new OnlineMeanAndVarianceCalculator();
145      OnlineMeanAndVarianceCalculator tMeanCalculator = new OnlineMeanAndVarianceCalculator();
146      OnlineCovarianceEvaluator ytCovarianceEvaluator = new OnlineCovarianceEvaluator();
147      int cnt = 0;
148
149      while (originalEnumerator.MoveNext() & estimatedEnumerator.MoveNext()) {
150        double y = estimatedEnumerator.Current;
151        double t = originalEnumerator.Current;
152        if (IsValidValue(t) && IsValidValue(y)) {
153          tMeanCalculator.Add(t);
154          yVarianceCalculator.Add(y);
155          ytCovarianceEvaluator.Add(y, t);
156
157          cnt++;
158        }
159      }
160
161      if (estimatedEnumerator.MoveNext() || originalEnumerator.MoveNext())
162        throw new ArgumentException("Number of elements in original and estimated enumeration doesn't match.");
163      if (cnt < 2) {
164        alpha = 0;
165        beta = 1;
166      } else {
167        if (yVarianceCalculator.Variance.IsAlmost(0.0))
168          beta = 1;
169        else
170          beta = ytCovarianceEvaluator.Covariance / yVarianceCalculator.Variance;
171
172        alpha = tMeanCalculator.Mean - beta * yVarianceCalculator.Mean;
173      }
174    }
175
176    private static bool IsValidValue(double d) {
177      return !double.IsInfinity(d) && !double.IsNaN(d) && d > -1.0E07 && d < 1.0E07;  // don't consider very large or very small values for scaling
178    }
179  }
180}
Note: See TracBrowser for help on using the repository browser.