1 | #region License Information
|
---|
2 | /* HeuristicLab
|
---|
3 | * Copyright (C) 2002-2008 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
|
---|
4 | *
|
---|
5 | * This file is part of HeuristicLab.
|
---|
6 | *
|
---|
7 | * HeuristicLab is free software: you can redistribute it and/or modify
|
---|
8 | * it under the terms of the GNU General Public License as published by
|
---|
9 | * the Free Software Foundation, either version 3 of the License, or
|
---|
10 | * (at your option) any later version.
|
---|
11 | *
|
---|
12 | * HeuristicLab is distributed in the hope that it will be useful,
|
---|
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
15 | * GNU General Public License for more details.
|
---|
16 | *
|
---|
17 | * You should have received a copy of the GNU General Public License
|
---|
18 | * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
|
---|
19 | */
|
---|
20 | #endregion
|
---|
21 |
|
---|
22 | using System;
|
---|
23 | using System.Collections.Generic;
|
---|
24 | using System.Text;
|
---|
25 |
|
---|
26 | namespace HeuristicLab.DataAnalysis {
|
---|
27 | public class LinearStatistics {
|
---|
28 | /// <summary>
|
---|
29 | /// Calculates linear regression for the given data. The result is given as regression values, which are returned,
|
---|
30 | /// as well as the characteristic coefficients <paramref name="a"/> and <paramref name="b"/>.
|
---|
31 | /// </summary>
|
---|
32 | /// <param name="Data">The data that is given; linear regression is calculated for these data samples.</param>
|
---|
33 | /// <param name="a">Regression coefficient 'a' (output parameter).</param>
|
---|
34 | /// <param name="b">Regression coefficient 'b' (output parameter).</param>
|
---|
35 | /// <returns>Calculated linear regression values.</returns>
|
---|
36 | public static double[] LinearRegression(double[] data, out double a, out double b) {
|
---|
37 | int n = data.Length;
|
---|
38 | double xMean = n / 2.0;
|
---|
39 | double yMean = Statistics.Mean(data);
|
---|
40 | double[] xMinusMean = new double[n];
|
---|
41 | double[] yMinusMean = new double[n];
|
---|
42 | double[] xMinusMeanSquared = new double[n];
|
---|
43 | double[] xMinusMeanTimesYMinusMean = new double[n];
|
---|
44 |
|
---|
45 | double ssxx = 0;
|
---|
46 | double ssxy = 0;
|
---|
47 | for(int i = 0; i < n; i++) {
|
---|
48 | xMinusMean[i] = i - xMean;
|
---|
49 | yMinusMean[i] = data[i] - yMean;
|
---|
50 | xMinusMeanSquared[i] = xMinusMean[i] * xMinusMean[i];
|
---|
51 | xMinusMeanTimesYMinusMean[i] = xMinusMean[i] * yMinusMean[i];
|
---|
52 | ssxx += xMinusMeanSquared[i];
|
---|
53 | ssxy += xMinusMeanTimesYMinusMean[i];
|
---|
54 | }
|
---|
55 | b = ssxy / ssxx;
|
---|
56 | a = yMean - b * xMean;
|
---|
57 | double[] result = new double[n];
|
---|
58 | for(int x = 0; x < n; x++)
|
---|
59 | result[x] = a + x * b;
|
---|
60 | return result;
|
---|
61 | }
|
---|
62 |
|
---|
63 | /// <summary>
|
---|
64 | /// Calculates linear regression for the given data. The result is given as regression values, which are returned,
|
---|
65 | /// as well as the characteristic coefficients <paramref name="a"/> and <paramref name="b"/>.
|
---|
66 | /// </summary>
|
---|
67 | /// <param name="Data">The data that is given; linear regression is calculated for these data samples.</param>
|
---|
68 | /// <returns>Calculated linear regression values.</returns>
|
---|
69 | public static double[] LinearRegression(double[] data) {
|
---|
70 | double a, b;
|
---|
71 | return LinearRegression(data, out a, out b);
|
---|
72 | }
|
---|
73 |
|
---|
74 | public static double CorrelationCoefficient(double[] xValues, double[] yValues) {
|
---|
75 | if(xValues.Length != yValues.Length)
|
---|
76 | throw new Exception("ERROR in CorrelationCoefficient: The given variables have to be equally long!");
|
---|
77 | int n = xValues.Length;
|
---|
78 | double[] x = new double[n];
|
---|
79 | double[] y = new double[n];
|
---|
80 | for(int i = 0; i < n; i++) {
|
---|
81 | if(double.IsNaN(xValues[i]))
|
---|
82 | throw new NotFiniteNumberException();
|
---|
83 | else
|
---|
84 | x[i] = xValues[i];
|
---|
85 | if(double.IsNaN(yValues[i]))
|
---|
86 | throw new NotFiniteNumberException();
|
---|
87 | else
|
---|
88 | y[i] = yValues[i];
|
---|
89 | }
|
---|
90 | double OneOverN = 1.0 / (n + 1);
|
---|
91 | double xMean = Statistics.Mean(x);
|
---|
92 | double yMean = Statistics.Mean(y);
|
---|
93 | double[] xMinusMean = new double[n];
|
---|
94 | double[] yMinusMean = new double[n];
|
---|
95 | double[] xMinusMeanSquared = new double[n];
|
---|
96 | double xMinusMeanSquaredSum = 0.0;
|
---|
97 | double[] yMinusMeanSquared = new double[n];
|
---|
98 | double yMinusMeanSquaredSum = 0.0;
|
---|
99 | double[] xMinusMeanTimesYMinusMean = new double[n];
|
---|
100 | double xMinusMeanTimesYMinusMeanSum = 0.0;
|
---|
101 | for(int i = 0; i < n; i++) {
|
---|
102 | xMinusMean[i] = x[i] - xMean;
|
---|
103 | yMinusMean[i] = y[i] - yMean;
|
---|
104 | xMinusMeanSquared[i] = xMinusMean[i] * xMinusMean[i];
|
---|
105 | xMinusMeanSquaredSum += xMinusMeanSquared[i];
|
---|
106 | yMinusMeanSquared[i] = yMinusMean[i] * yMinusMean[i];
|
---|
107 | yMinusMeanSquaredSum += yMinusMeanSquared[i];
|
---|
108 | xMinusMeanTimesYMinusMean[i] = xMinusMean[i] * yMinusMean[i];
|
---|
109 | xMinusMeanTimesYMinusMeanSum += xMinusMeanTimesYMinusMean[i];
|
---|
110 | }
|
---|
111 | return (OneOverN * xMinusMeanTimesYMinusMeanSum) /
|
---|
112 | (Math.Sqrt(OneOverN * xMinusMeanSquaredSum) * Math.Sqrt(OneOverN * yMinusMeanSquaredSum));
|
---|
113 | }
|
---|
114 |
|
---|
115 | #region Coefficient of Determination (R-squared)
|
---|
116 | /// <summary>
|
---|
117 | /// In statistics, the coefficient of determination (R-squared) is the proportion of a sample variance
|
---|
118 | /// of a response variable that is "explained" by the predictor (explanatory) variables when regression is done.
|
---|
119 | /// </summary>
|
---|
120 | /// <param name="originalValues">The original values for which a model shall be created.</param>
|
---|
121 | /// <param name="residuals">The errors between original and predicted values.</param>
|
---|
122 | /// <returns></returns>
|
---|
123 | public static double CoefficientOfDetermination(double[] originalValues, double[] residuals) {
|
---|
124 | int n = originalValues.Length;
|
---|
125 |
|
---|
126 | double originalValuesMean = Statistics.Mean(originalValues);
|
---|
127 |
|
---|
128 | double[] originalValuesMinusMeanSquared = new double[n];
|
---|
129 | originalValuesMinusMeanSquared = Array.ConvertAll<double, double>(originalValues, delegate(double v) {
|
---|
130 | double t = v - originalValuesMean;
|
---|
131 | return t * t;
|
---|
132 | });
|
---|
133 |
|
---|
134 | double totalSumOfSquares = Statistics.Sum(originalValuesMinusMeanSquared);
|
---|
135 |
|
---|
136 | double[] residualsSquared = new double[residuals.Length];
|
---|
137 | residualsSquared = Array.ConvertAll<double, double>(residuals, delegate(double r) {
|
---|
138 | return r * r;
|
---|
139 | });
|
---|
140 |
|
---|
141 | double sumOfSquaredResiduals = Statistics.Sum(residualsSquared);
|
---|
142 |
|
---|
143 | return (1.0 - sumOfSquaredResiduals / totalSumOfSquares);
|
---|
144 | }
|
---|
145 | #endregion
|
---|
146 |
|
---|
147 | #region Adjusted Coefficient of Determination (Adjusted R-squared)
|
---|
148 | public static double AdjustedCoefficientOfDetermination(double[] originalValues, double[] residuals, int numberOfExplanatoryTerms) {
|
---|
149 | double rSquared = CoefficientOfDetermination(originalValues, residuals);
|
---|
150 | double n = originalValues.Length;
|
---|
151 | return (1 - (1 - rSquared) * (n - 1) / (n - numberOfExplanatoryTerms - 1));
|
---|
152 | }
|
---|
153 | #endregion
|
---|
154 | }
|
---|
155 | }
|
---|