Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/SymbolicRegressionScaledMeanSquaredErrorEvaluator.cs @ 3714

Last change on this file since 3714 was 3532, checked in by gkronber, 15 years ago

Implemented linear scaling. #938 (Data types and operators for regression problems)

File size: 7.0 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using System.Drawing;
26using HeuristicLab.Common;
27using HeuristicLab.Core;
28using HeuristicLab.Data;
29using HeuristicLab.Optimization;
30using HeuristicLab.Parameters;
31using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
32using HeuristicLab.PluginInfrastructure;
33using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
34using HeuristicLab.Problems.DataAnalysis;
35using HeuristicLab.Operators;
36using HeuristicLab.Problems.DataAnalysis.Evaluators;
37using HeuristicLab.Problems.DataAnalysis.Symbolic;
38
39namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic {
40  [Item("SymbolicRegressionScaledMeanSquaredErrorEvaluator", "Calculates the mean squared error of a linearly scaled symbolic regression solution.")]
41  [StorableClass]
42  public class SymbolicRegressionScaledMeanSquaredErrorEvaluator : SymbolicRegressionMeanSquaredErrorEvaluator {
43
44    #region parameter properties
45    public ILookupParameter<DoubleValue> AlphaParameter {
46      get { return (ILookupParameter<DoubleValue>)Parameters["Alpha"]; }
47    }
48    public ILookupParameter<DoubleValue> BetaParameter {
49      get { return (ILookupParameter<DoubleValue>)Parameters["Beta"]; }
50    }
51    #endregion
52    #region properties
53    public DoubleValue Alpha {
54      get { return AlphaParameter.ActualValue; }
55      set { AlphaParameter.ActualValue = value; }
56    }
57    public DoubleValue Beta {
58      get { return BetaParameter.ActualValue; }
59      set { BetaParameter.ActualValue = value; }
60    }
61    #endregion
62    public SymbolicRegressionScaledMeanSquaredErrorEvaluator()
63      : base() {
64      Parameters.Add(new LookupParameter<DoubleValue>("Alpha", "Alpha parameter for linear scaling of the estimated values."));
65      Parameters.Add(new LookupParameter<DoubleValue>("Beta", "Beta parameter for linear scaling of the estimated values."));
66    }
67
68    protected override double Evaluate(ISymbolicExpressionTreeInterpreter interpreter, SymbolicExpressionTree solution, Dataset dataset, StringValue targetVariable, IntValue samplesStart, IntValue samplesEnd) {
69      double alpha, beta;
70      double mse = Calculate(interpreter, solution, LowerEstimationLimit.Value, UpperEstimationLimit.Value, dataset, targetVariable.Value, samplesStart.Value, samplesEnd.Value, out beta, out alpha);
71      AlphaParameter.ActualValue = new DoubleValue(alpha);
72      BetaParameter.ActualValue = new DoubleValue(beta);
73      return mse;
74    }
75
76    public static double Calculate(ISymbolicExpressionTreeInterpreter interpreter, SymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, Dataset dataset, string targetVariable, int start, int end, out double beta, out double alpha) {
77      var estimatedValues = CalculateScaledEstimatedValues(interpreter, solution, dataset, targetVariable, start, end, out beta, out alpha);
78      estimatedValues = from x in estimatedValues
79                        let boundedX = Math.Min(upperEstimationLimit, Math.Max(lowerEstimationLimit, x))
80                        select double.IsNaN(boundedX) ? upperEstimationLimit : boundedX;
81      var originalValues = dataset.GetVariableValues(targetVariable, start, end);
82      return SimpleMSEEvaluator.Calculate(originalValues, estimatedValues);
83    }
84
85    public static double CalculateWithScaling(ISymbolicExpressionTreeInterpreter interpreter, SymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, Dataset dataset, string targetVariable, int start, int end, double beta, double alpha) {
86      var estimatedValues = from x in interpreter.GetSymbolicExpressionTreeValues(solution, dataset, Enumerable.Range(start, end - start))
87                            let boundedX = Math.Min(upperEstimationLimit, Math.Max(lowerEstimationLimit, x * beta + alpha))
88                            select double.IsNaN(boundedX) ? upperEstimationLimit : boundedX;
89      var originalValues = dataset.GetVariableValues(targetVariable, start, end);
90      return SimpleMSEEvaluator.Calculate(originalValues, estimatedValues);
91    }
92
93    private static IEnumerable<double> CalculateScaledEstimatedValues(ISymbolicExpressionTreeInterpreter interpreter, SymbolicExpressionTree solution, Dataset dataset, string targetVariable, int start, int end, out double beta, out double alpha) {
94      int targetVariableIndex = dataset.GetVariableIndex(targetVariable);
95      var estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, dataset, Enumerable.Range(start, end - start)).ToList();
96      var originalValues = dataset.GetVariableValues(targetVariable, start, end);
97      CalculateScalingParameters(originalValues, estimatedValues, out beta, out alpha);
98      for (int i = 0; i < estimatedValues.Count; i++)
99        estimatedValues[i] = estimatedValues[i] * beta + alpha;
100      return estimatedValues;
101    }
102
103
104    public static void CalculateScalingParameters(IEnumerable<double> original, IEnumerable<double> estimated, out double beta, out double alpha) {
105      var originalEnumerator = original.GetEnumerator();
106      var estimatedEnumerator = estimated.GetEnumerator();
107
108      double tMean = original.Average();
109      double xMean = estimated.Average();
110      double sumXT = 0;
111      double sumXX = 0;
112      while (originalEnumerator.MoveNext() & estimatedEnumerator.MoveNext()) {
113        // calculate alpha and beta on the subset of rows with valid values
114        if (IsValidValue(originalEnumerator.Current) && IsValidValue(estimatedEnumerator.Current)) {
115          double x = estimatedEnumerator.Current;
116          double t = originalEnumerator.Current;
117          sumXT += (x - xMean) * (t - tMean);
118          sumXX += (x - xMean) * (x - xMean);
119        }
120      }
121      if (estimatedEnumerator.MoveNext() || originalEnumerator.MoveNext()) {
122        throw new ArgumentException("Number of elements in estimated and original doesn't match.");
123      }
124      if (sumXX != 0) {
125        beta = sumXT / sumXX;
126      } else {
127        beta = 1;
128      }
129      alpha = tMean - beta * xMean;
130    }
131
132    private static bool IsValidValue(double d) {
133      return !double.IsInfinity(d) && !double.IsNaN(d);
134    }
135  }
136}
Note: See TracBrowser for help on using the repository browser.