#region License Information /* HeuristicLab * Copyright (C) 2002-2012 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System; using System.Collections.Generic; using System.Linq; namespace HeuristicLab.Problems.DataAnalysis { public class HoeffdingsDependenceCalculator { public static double Calculate(IEnumerable originalValues, IEnumerable estimatedValues, out OnlineCalculatorError errorState) { double d = HoeffD(originalValues, estimatedValues, out errorState); if (errorState != OnlineCalculatorError.None) return double.NaN; return d; } /// /// computes Hoeffding's dependence coefficient. /// Source: hoeffd.r from R package hmisc http://cran.r-project.org/web/packages/Hmisc/index.html /// private static double HoeffD(IEnumerable xs, IEnumerable ys, out OnlineCalculatorError errorState) { double[] rx = TiedRank(xs); double[] ry = TiedRank(ys); if (rx.Length != ry.Length) throw new ArgumentException("The number of elements in xs and ys does not match"); double[] rxy = TiedRank(xs, ys); int n = rx.Length; double q = 0, r = 0, s = 0; double scaling = 1.0 / (n * (n - 1)); for (int i = 0; i < n; i++) { q += (rx[i] - 1) * (rx[i] - 2) * (ry[i] - 1) * (ry[i] - 2) * scaling; r += (rx[i] - 2) * (ry[i] - 2) * rxy[i] * scaling; s += rxy[i] * (rxy[i] - 1) * scaling; } errorState = OnlineCalculatorError.None; // return 30.0 * (q - 2 * (n - 2) * r + (n - 2) * (n - 3) * s) / n / (n - 1) / (n - 2) / (n - 3) / (n - 4); double t0 = q / (n - 2) / (n - 3) / (n - 4); double t1 = 2 * r / (n - 3) / (n - 4); double t2 = s / (n - 4); return 30.0 * (t0 - t1 + t2); } private static double[] TiedRank(IEnumerable xs) { var xsArr = xs.ToArray(); var idx = Enumerable.Range(1, xsArr.Length).ToArray(); Array.Sort(xsArr, idx); CRank(xsArr); Array.Sort(idx, xsArr); return xsArr; } /// /// Calculates the joint rank with midranks for ties. Source: hoeffd.r from R package hmisc http://cran.r-project.org/web/packages/Hmisc/index.html /// /// /// /// private static double[] TiedRank(IEnumerable xs, IEnumerable ys) { var xsArr = xs.ToArray(); var ysArr = ys.ToArray(); var r = new double[xsArr.Length]; int n = r.Length; for (int i = 0; i < n; i++) { var xi = xsArr[i]; var yi = ysArr[i]; double ri = 0.0; for (int j = 0; j < n; j++) { if (i != j) { double cx; if (xsArr[j] < xi) cx = 1.0; else if (xsArr[j] > xi) cx = 0.0; else cx = 0.5; // eq double cy; if (ysArr[j] < yi) cy = 1.0; else if (ysArr[j] > yi) cy = 0.0; else cy = 0.5; // eq ri = ri + cx * cy; } } r[i] = ri; } return r; } /// /// Calculates midranks. Source: Numerical Recipes in C. p 642 /// /// Sorted array of elements, replaces the elements by their rank, including midranking of ties /// private static void CRank(double[] w) { int i = 0; int n = w.Length; while (i < n - 1) { if (w[i + 1] > w[i]) { // w[i+1] must be larger or equal w[i] as w must be sorted // not a tie w[i] = i + 1; i++; } else { int j; for (j = i + 1; j < n && w[j] <= w[i]; j++) ; // how far does it go (<= effectively means == as w must be sorted, side-step equality for double values) double rank = 1 + 0.5 * (i + j - 1); int k; for (k = i; k < j; k++) w[k] = rank; // set the rank for all tied entries i = j; } } if (i == n - 1) w[n - 1] = n; // if the last element was not tied, this is its rank } } }