Free cookie consent management tool by TermsFeed Policy Generator

source: branches/3.1/sources/HeuristicLab.DataAnalysis/Regression.cs @ 15411

Last change on this file since 15411 was 2, checked in by swagner, 17 years ago

Added HeuristicLab 3.0 sources from former SVN repository at revision 52

File size: 6.7 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2008 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Text;
25
26namespace HeuristicLab.DataAnalysis {
27  public class LinearStatistics {
28    /// <summary>
29    /// Calculates linear regression for the given data. The result is given as regression values, which are returned,
30    /// as well as the characteristic coefficients <paramref name="a"/> and <paramref name="b"/>.
31    /// </summary>
32    /// <param name="Data">The data that is given; linear regression is calculated for these data samples.</param>
33    /// <param name="a">Regression coefficient 'a' (output parameter).</param>
34    /// <param name="b">Regression coefficient 'b' (output parameter).</param>
35    /// <returns>Calculated linear regression values.</returns>
36    public static double[] LinearRegression(double[] data, out double a, out double b) {
37      int n = data.Length;
38      double xMean = n / 2.0;
39      double yMean = Statistics.Mean(data);
40      double[] xMinusMean = new double[n];
41      double[] yMinusMean = new double[n];
42      double[] xMinusMeanSquared = new double[n];
43      double[] xMinusMeanTimesYMinusMean = new double[n];
44
45      double ssxx = 0;
46      double ssxy = 0;
47      for(int i = 0; i < n; i++) {
48        xMinusMean[i] = i - xMean;
49        yMinusMean[i] = data[i] - yMean;
50        xMinusMeanSquared[i] = xMinusMean[i] * xMinusMean[i];
51        xMinusMeanTimesYMinusMean[i] = xMinusMean[i] * yMinusMean[i];
52        ssxx += xMinusMeanSquared[i];
53        ssxy += xMinusMeanTimesYMinusMean[i];
54      }
55      b = ssxy / ssxx;
56      a = yMean - b * xMean;
57      double[] result = new double[n];
58      for(int x = 0; x < n; x++)
59        result[x] = a + x * b;
60      return result;
61    }
62
63    /// <summary>
64    /// Calculates linear regression for the given data. The result is given as regression values, which are returned,
65    /// as well as the characteristic coefficients <paramref name="a"/> and <paramref name="b"/>.
66    /// </summary>
67    /// <param name="Data">The data that is given; linear regression is calculated for these data samples.</param>
68    /// <returns>Calculated linear regression values.</returns>
69    public static double[] LinearRegression(double[] data) {
70      double a, b;
71      return LinearRegression(data, out a, out b);
72    }
73
74    public static double CorrelationCoefficient(double[] xValues, double[] yValues) {
75      if(xValues.Length != yValues.Length)
76        throw new Exception("ERROR in CorrelationCoefficient: The given variables have to be equally long!");
77      int n = xValues.Length;
78      double[] x = new double[n];
79      double[] y = new double[n];
80      for(int i = 0; i < n; i++) {
81        if(double.IsNaN(xValues[i]))
82          throw new NotFiniteNumberException();
83        else
84          x[i] = xValues[i];
85        if(double.IsNaN(yValues[i]))
86          throw new NotFiniteNumberException();
87        else
88          y[i] = yValues[i];
89      }
90      double OneOverN = 1.0 / (n + 1);
91      double xMean = Statistics.Mean(x);
92      double yMean = Statistics.Mean(y);
93      double[] xMinusMean = new double[n];
94      double[] yMinusMean = new double[n];
95      double[] xMinusMeanSquared = new double[n];
96      double xMinusMeanSquaredSum = 0.0;
97      double[] yMinusMeanSquared = new double[n];
98      double yMinusMeanSquaredSum = 0.0;
99      double[] xMinusMeanTimesYMinusMean = new double[n];
100      double xMinusMeanTimesYMinusMeanSum = 0.0;
101      for(int i = 0; i < n; i++) {
102        xMinusMean[i] = x[i] - xMean;
103        yMinusMean[i] = y[i] - yMean;
104        xMinusMeanSquared[i] = xMinusMean[i] * xMinusMean[i];
105        xMinusMeanSquaredSum += xMinusMeanSquared[i];
106        yMinusMeanSquared[i] = yMinusMean[i] * yMinusMean[i];
107        yMinusMeanSquaredSum += yMinusMeanSquared[i];
108        xMinusMeanTimesYMinusMean[i] = xMinusMean[i] * yMinusMean[i];
109        xMinusMeanTimesYMinusMeanSum += xMinusMeanTimesYMinusMean[i];
110      }
111      return (OneOverN * xMinusMeanTimesYMinusMeanSum) /
112        (Math.Sqrt(OneOverN * xMinusMeanSquaredSum) * Math.Sqrt(OneOverN * yMinusMeanSquaredSum));
113    }
114
115    #region Coefficient of Determination (R-squared)
116    /// <summary>
117    /// In statistics, the coefficient of determination (R-squared) is the proportion of a sample variance
118    /// of a response variable that is "explained" by the predictor (explanatory) variables when regression is done.
119    /// </summary>
120    /// <param name="originalValues">The original values for which a model shall be created.</param>
121    /// <param name="residuals">The errors between original and predicted values.</param>
122    /// <returns></returns>
123    public static double CoefficientOfDetermination(double[] originalValues, double[] residuals) {
124      int n = originalValues.Length;
125
126      double originalValuesMean = Statistics.Mean(originalValues);
127
128      double[] originalValuesMinusMeanSquared = new double[n];
129      originalValuesMinusMeanSquared = Array.ConvertAll<double, double>(originalValues, delegate(double v) {
130        double t = v - originalValuesMean;
131        return t * t;
132      });
133
134      double totalSumOfSquares = Statistics.Sum(originalValuesMinusMeanSquared);
135
136      double[] residualsSquared = new double[residuals.Length];
137      residualsSquared = Array.ConvertAll<double, double>(residuals, delegate(double r) {
138        return r * r;
139      });
140
141      double sumOfSquaredResiduals = Statistics.Sum(residualsSquared);
142
143      return (1.0 - sumOfSquaredResiduals / totalSumOfSquares);
144    }
145    #endregion
146
147    #region Adjusted Coefficient of Determination (Adjusted R-squared)
148    public static double AdjustedCoefficientOfDetermination(double[] originalValues, double[] residuals, int numberOfExplanatoryTerms) {
149      double rSquared = CoefficientOfDetermination(originalValues, residuals);
150      double n = originalValues.Length;
151      return (1 - (1 - rSquared) * (n - 1) / (n - numberOfExplanatoryTerms - 1));
152    }
153    #endregion
154  }
155}
Note: See TracBrowser for help on using the repository browser.