[2] | 1 | #region License Information
|
---|
| 2 | /* HeuristicLab
|
---|
| 3 | * Copyright (C) 2002-2008 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
|
---|
| 4 | *
|
---|
| 5 | * This file is part of HeuristicLab.
|
---|
| 6 | *
|
---|
| 7 | * HeuristicLab is free software: you can redistribute it and/or modify
|
---|
| 8 | * it under the terms of the GNU General Public License as published by
|
---|
| 9 | * the Free Software Foundation, either version 3 of the License, or
|
---|
| 10 | * (at your option) any later version.
|
---|
| 11 | *
|
---|
| 12 | * HeuristicLab is distributed in the hope that it will be useful,
|
---|
| 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
| 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
| 15 | * GNU General Public License for more details.
|
---|
| 16 | *
|
---|
| 17 | * You should have received a copy of the GNU General Public License
|
---|
| 18 | * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
|
---|
| 19 | */
|
---|
| 20 | #endregion
|
---|
| 21 |
|
---|
| 22 | using System;
|
---|
| 23 | using System.Collections.Generic;
|
---|
| 24 | using System.Text;
|
---|
| 25 |
|
---|
| 26 | namespace HeuristicLab.DataAnalysis {
|
---|
| 27 | public class LinearStatistics {
|
---|
| 28 | /// <summary>
|
---|
| 29 | /// Calculates linear regression for the given data. The result is given as regression values, which are returned,
|
---|
| 30 | /// as well as the characteristic coefficients <paramref name="a"/> and <paramref name="b"/>.
|
---|
| 31 | /// </summary>
|
---|
| 32 | /// <param name="Data">The data that is given; linear regression is calculated for these data samples.</param>
|
---|
| 33 | /// <param name="a">Regression coefficient 'a' (output parameter).</param>
|
---|
| 34 | /// <param name="b">Regression coefficient 'b' (output parameter).</param>
|
---|
| 35 | /// <returns>Calculated linear regression values.</returns>
|
---|
| 36 | public static double[] LinearRegression(double[] data, out double a, out double b) {
|
---|
| 37 | int n = data.Length;
|
---|
| 38 | double xMean = n / 2.0;
|
---|
| 39 | double yMean = Statistics.Mean(data);
|
---|
| 40 | double[] xMinusMean = new double[n];
|
---|
| 41 | double[] yMinusMean = new double[n];
|
---|
| 42 | double[] xMinusMeanSquared = new double[n];
|
---|
| 43 | double[] xMinusMeanTimesYMinusMean = new double[n];
|
---|
| 44 |
|
---|
| 45 | double ssxx = 0;
|
---|
| 46 | double ssxy = 0;
|
---|
| 47 | for(int i = 0; i < n; i++) {
|
---|
| 48 | xMinusMean[i] = i - xMean;
|
---|
| 49 | yMinusMean[i] = data[i] - yMean;
|
---|
| 50 | xMinusMeanSquared[i] = xMinusMean[i] * xMinusMean[i];
|
---|
| 51 | xMinusMeanTimesYMinusMean[i] = xMinusMean[i] * yMinusMean[i];
|
---|
| 52 | ssxx += xMinusMeanSquared[i];
|
---|
| 53 | ssxy += xMinusMeanTimesYMinusMean[i];
|
---|
| 54 | }
|
---|
| 55 | b = ssxy / ssxx;
|
---|
| 56 | a = yMean - b * xMean;
|
---|
| 57 | double[] result = new double[n];
|
---|
| 58 | for(int x = 0; x < n; x++)
|
---|
| 59 | result[x] = a + x * b;
|
---|
| 60 | return result;
|
---|
| 61 | }
|
---|
| 62 |
|
---|
| 63 | /// <summary>
|
---|
| 64 | /// Calculates linear regression for the given data. The result is given as regression values, which are returned,
|
---|
| 65 | /// as well as the characteristic coefficients <paramref name="a"/> and <paramref name="b"/>.
|
---|
| 66 | /// </summary>
|
---|
| 67 | /// <param name="Data">The data that is given; linear regression is calculated for these data samples.</param>
|
---|
| 68 | /// <returns>Calculated linear regression values.</returns>
|
---|
| 69 | public static double[] LinearRegression(double[] data) {
|
---|
| 70 | double a, b;
|
---|
| 71 | return LinearRegression(data, out a, out b);
|
---|
| 72 | }
|
---|
| 73 |
|
---|
| 74 | public static double CorrelationCoefficient(double[] xValues, double[] yValues) {
|
---|
| 75 | if(xValues.Length != yValues.Length)
|
---|
| 76 | throw new Exception("ERROR in CorrelationCoefficient: The given variables have to be equally long!");
|
---|
| 77 | int n = xValues.Length;
|
---|
| 78 | double[] x = new double[n];
|
---|
| 79 | double[] y = new double[n];
|
---|
| 80 | for(int i = 0; i < n; i++) {
|
---|
| 81 | if(double.IsNaN(xValues[i]))
|
---|
| 82 | throw new NotFiniteNumberException();
|
---|
| 83 | else
|
---|
| 84 | x[i] = xValues[i];
|
---|
| 85 | if(double.IsNaN(yValues[i]))
|
---|
| 86 | throw new NotFiniteNumberException();
|
---|
| 87 | else
|
---|
| 88 | y[i] = yValues[i];
|
---|
| 89 | }
|
---|
| 90 | double OneOverN = 1.0 / (n + 1);
|
---|
| 91 | double xMean = Statistics.Mean(x);
|
---|
| 92 | double yMean = Statistics.Mean(y);
|
---|
| 93 | double[] xMinusMean = new double[n];
|
---|
| 94 | double[] yMinusMean = new double[n];
|
---|
| 95 | double[] xMinusMeanSquared = new double[n];
|
---|
| 96 | double xMinusMeanSquaredSum = 0.0;
|
---|
| 97 | double[] yMinusMeanSquared = new double[n];
|
---|
| 98 | double yMinusMeanSquaredSum = 0.0;
|
---|
| 99 | double[] xMinusMeanTimesYMinusMean = new double[n];
|
---|
| 100 | double xMinusMeanTimesYMinusMeanSum = 0.0;
|
---|
| 101 | for(int i = 0; i < n; i++) {
|
---|
| 102 | xMinusMean[i] = x[i] - xMean;
|
---|
| 103 | yMinusMean[i] = y[i] - yMean;
|
---|
| 104 | xMinusMeanSquared[i] = xMinusMean[i] * xMinusMean[i];
|
---|
| 105 | xMinusMeanSquaredSum += xMinusMeanSquared[i];
|
---|
| 106 | yMinusMeanSquared[i] = yMinusMean[i] * yMinusMean[i];
|
---|
| 107 | yMinusMeanSquaredSum += yMinusMeanSquared[i];
|
---|
| 108 | xMinusMeanTimesYMinusMean[i] = xMinusMean[i] * yMinusMean[i];
|
---|
| 109 | xMinusMeanTimesYMinusMeanSum += xMinusMeanTimesYMinusMean[i];
|
---|
| 110 | }
|
---|
| 111 | return (OneOverN * xMinusMeanTimesYMinusMeanSum) /
|
---|
| 112 | (Math.Sqrt(OneOverN * xMinusMeanSquaredSum) * Math.Sqrt(OneOverN * yMinusMeanSquaredSum));
|
---|
| 113 | }
|
---|
| 114 |
|
---|
| 115 | #region Coefficient of Determination (R-squared)
|
---|
| 116 | /// <summary>
|
---|
| 117 | /// In statistics, the coefficient of determination (R-squared) is the proportion of a sample variance
|
---|
| 118 | /// of a response variable that is "explained" by the predictor (explanatory) variables when regression is done.
|
---|
| 119 | /// </summary>
|
---|
| 120 | /// <param name="originalValues">The original values for which a model shall be created.</param>
|
---|
| 121 | /// <param name="residuals">The errors between original and predicted values.</param>
|
---|
| 122 | /// <returns></returns>
|
---|
| 123 | public static double CoefficientOfDetermination(double[] originalValues, double[] residuals) {
|
---|
| 124 | int n = originalValues.Length;
|
---|
| 125 |
|
---|
| 126 | double originalValuesMean = Statistics.Mean(originalValues);
|
---|
| 127 |
|
---|
| 128 | double[] originalValuesMinusMeanSquared = new double[n];
|
---|
| 129 | originalValuesMinusMeanSquared = Array.ConvertAll<double, double>(originalValues, delegate(double v) {
|
---|
| 130 | double t = v - originalValuesMean;
|
---|
| 131 | return t * t;
|
---|
| 132 | });
|
---|
| 133 |
|
---|
| 134 | double totalSumOfSquares = Statistics.Sum(originalValuesMinusMeanSquared);
|
---|
| 135 |
|
---|
| 136 | double[] residualsSquared = new double[residuals.Length];
|
---|
| 137 | residualsSquared = Array.ConvertAll<double, double>(residuals, delegate(double r) {
|
---|
| 138 | return r * r;
|
---|
| 139 | });
|
---|
| 140 |
|
---|
| 141 | double sumOfSquaredResiduals = Statistics.Sum(residualsSquared);
|
---|
| 142 |
|
---|
| 143 | return (1.0 - sumOfSquaredResiduals / totalSumOfSquares);
|
---|
| 144 | }
|
---|
| 145 | #endregion
|
---|
| 146 |
|
---|
| 147 | #region Adjusted Coefficient of Determination (Adjusted R-squared)
|
---|
| 148 | public static double AdjustedCoefficientOfDetermination(double[] originalValues, double[] residuals, int numberOfExplanatoryTerms) {
|
---|
| 149 | double rSquared = CoefficientOfDetermination(originalValues, residuals);
|
---|
| 150 | double n = originalValues.Length;
|
---|
| 151 | return (1 - (1 - rSquared) * (n - 1) / (n - numberOfExplanatoryTerms - 1));
|
---|
| 152 | }
|
---|
| 153 | #endregion
|
---|
| 154 | }
|
---|
| 155 | }
|
---|