1  #region License Information


2  /* HeuristicLab


3  * Copyright (C) 20022012 Heuristic and Evolutionary Algorithms Laboratory (HEAL)


4  *


5  * This file is part of HeuristicLab.


6  *


7  * HeuristicLab is free software: you can redistribute it and/or modify


8  * it under the terms of the GNU General Public License as published by


9  * the Free Software Foundation, either version 3 of the License, or


10  * (at your option) any later version.


11  *


12  * HeuristicLab is distributed in the hope that it will be useful,


13  * but WITHOUT ANY WARRANTY; without even the implied warranty of


14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the


15  * GNU General Public License for more details.


16  *


17  * You should have received a copy of the GNU General Public License


18  * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.


19  */


20  #endregion


21 


22  using System;


23  using System.Collections.Generic;


24  using System.Linq;


25 


26  namespace HeuristicLab.Problems.DataAnalysis {


27  public class HoeffdingsDependenceCalculator {


28 


29  public static double Calculate(IEnumerable<double> originalValues, IEnumerable<double> estimatedValues, out OnlineCalculatorError errorState) {


30  double d = HoeffD(originalValues, estimatedValues, out errorState);


31  if (errorState != OnlineCalculatorError.None) return double.NaN;


32  return d;


33  }


34 


35  /// <summary>


36  /// computes Hoeffding's dependence coefficient.


37  /// Source: hoeffd.r from R package hmisc http://cran.rproject.org/web/packages/Hmisc/index.html


38  /// </summary>


39  private static double HoeffD(IEnumerable<double> xs, IEnumerable<double> ys, out OnlineCalculatorError errorState) {


40  double[] rx = TiedRank(xs);


41  double[] ry = TiedRank(ys);


42  if (rx.Length != ry.Length) throw new ArgumentException("The number of elements in xs and ys does not match");


43  double[] rxy = TiedRank(xs, ys);


44 


45  int n = rx.Length;


46  double q = 0, r = 0, s = 0;


47  double scaling = 1.0 / (n * (n  1));


48  for (int i = 0; i < n; i++) {


49  q += (rx[i]  1) * (rx[i]  2) * (ry[i]  1) * (ry[i]  2) * scaling;


50  r += (rx[i]  2) * (ry[i]  2) * rxy[i] * scaling;


51  s += rxy[i] * (rxy[i]  1) * scaling;


52  }


53  errorState = OnlineCalculatorError.None;


54  // return 30.0 * (q  2 * (n  2) * r + (n  2) * (n  3) * s) / n / (n  1) / (n  2) / (n  3) / (n  4);


55  double t0 = q / (n  2) / (n  3) / (n  4);


56  double t1 = 2 * r / (n  3) / (n  4);


57  double t2 = s / (n  4);


58  return 30.0 * (t0  t1 + t2);


59  }


60 


61  private static double[] TiedRank(IEnumerable<double> xs) {


62  var xsArr = xs.ToArray();


63  var idx = Enumerable.Range(1, xsArr.Length).ToArray();


64  Array.Sort(xsArr, idx);


65  CRank(xsArr);


66  Array.Sort(idx, xsArr);


67  return xsArr;


68  }


69 


70  /// <summary>


71  /// Calculates the joint rank with midranks for ties. Source: hoeffd.r from R package hmisc http://cran.rproject.org/web/packages/Hmisc/index.html


72  /// </summary>


73  /// <param name="xs"></param>


74  /// <param name="ys"></param>


75  /// <returns></returns>


76  private static double[] TiedRank(IEnumerable<double> xs, IEnumerable<double> ys) {


77  var xsArr = xs.ToArray();


78  var ysArr = ys.ToArray();


79  var r = new double[xsArr.Length];


80  int n = r.Length;


81  for (int i = 0; i < n; i++) {


82  var xi = xsArr[i];


83  var yi = ysArr[i];


84  double ri = 0.0;


85  for (int j = 0; j < n; j++) {


86  if (i != j) {


87  double cx;


88  if (xsArr[j] < xi) cx = 1.0;


89  else if (xsArr[j] > xi) cx = 0.0;


90  else cx = 0.5; // eq


91  double cy;


92  if (ysArr[j] < yi) cy = 1.0;


93  else if (ysArr[j] > yi) cy = 0.0;


94  else cy = 0.5; // eq


95  ri = ri + cx * cy;


96  }


97  }


98  r[i] = ri;


99  }


100  return r;


101  }


102 


103  /// <summary>


104  /// Calculates midranks. Source: Numerical Recipes in C. p 642


105  /// </summary>


106  /// <param name="w">Sorted array of elements, replaces the elements by their rank, including midranking of ties</param>


107  /// <returns></returns>


108  private static void CRank(double[] w) {


109  int i = 0;


110  int n = w.Length;


111  while (i < n  1) {


112  if (w[i + 1] > w[i]) { // w[i+1] must be larger or equal w[i] as w must be sorted


113  // not a tie


114  w[i] = i + 1;


115  i++;


116  } else {


117  int j;


118  for (j = i + 1; j < n && w[j] <= w[i]; j++) ; // how far does it go (<= effectively means == as w must be sorted, sidestep equality for double values)


119  double rank = 1 + 0.5 * (i + j  1);


120  int k;


121  for (k = i; k < j; k++) w[k] = rank; // set the rank for all tied entries


122  i = j;


123  }


124  }


125 


126  if (i == n  1) w[n  1] = n; // if the last element was not tied, this is its rank


127  }


128  }


129  }

