Free cookie consent management tool by TermsFeed Policy Generator

Changeset 8294


Ignore:
Timestamp:
07/16/12 17:23:16 (12 years ago)
Author:
sforsten
Message:

#1292:

  • SpearmansRankCorrelationCoefficientCalculator now uses the alglib function
  • strings in ExtendedHeatMap have been made constant
Location:
branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis/3.4
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis/3.4/HeuristicLab.Problems.DataAnalysis-3.4.csproj

    r8276 r8294  
    9393  </PropertyGroup>
    9494  <ItemGroup>
     95    <Reference Include="ALGLIB-3.5.0, Version=3.5.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL">
     96      <Private>False</Private>
     97    </Reference>
    9598    <Reference Include="HeuristicLab.Analysis-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL">
    9699      <Private>False</Private>
  • branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/ExtendedHeatMap.cs

    r8276 r8294  
    5757    }
    5858
     59    private const string PrearsonsRSquared = "Pearsons R Squared";
     60    private const string HoeffdingsDependence = "Hoeffdings Dependence";
     61    private const string SpearmansRank = "Spearmans Rank";
    5962    public IEnumerable<string> CorrelationCalculators {
    60       get { return new List<string>() { "OnlinePearsonsRSquaredCalculator", "HoeffdingsDependenceCalculator", "SpearmansRankCorrelationCoefficientCalculator" }; }
     63      get { return new List<string>() { PrearsonsRSquared, HoeffdingsDependence, SpearmansRank }; }
    6164    }
    6265
     66    private const string AllSamples = "All Samples";
     67    private const string TrainingSamples = "Training Samples";
     68    private const string TestSamples = "Test Samples";
    6369    public IEnumerable<string> Partitions {
    64       get { return new List<string>() { "All Samples", "Training Partition", "Test Partition" }; }
     70      get { return new List<string>() { AllSamples, TrainingSamples, TestSamples }; }
    6571    }
    6672
     
    112118          IEnumerable<double> var1 = dataset.GetDoubleValues(doubleVariableNames[i]);
    113119          IEnumerable<double> var2 = dataset.GetDoubleValues(doubleVariableNames[j]);
    114           if (partition.Equals(Partitions.ElementAt(1))) {
     120          if (partition.Equals(TrainingSamples)) {
    115121            var1 = var1.Skip(problemData.TrainingPartition.Start).Take(problemData.TrainingPartition.End - problemData.TrainingPartition.Start);
    116122            var2 = var2.Skip(problemData.TrainingPartition.Start).Take(problemData.TrainingPartition.End - problemData.TrainingPartition.Start);
    117           } else if (partition.Equals(Partitions.ElementAt(2))) {
     123          } else if (partition.Equals(TestSamples)) {
    118124            var1 = var1.Skip(problemData.TestPartition.Start).Take(problemData.TestPartition.End - problemData.TestPartition.Start);
    119125            var2 = var2.Skip(problemData.TestPartition.Start).Take(problemData.TestPartition.End - problemData.TestPartition.Start);
    120126          }
    121127
    122           if (calc.Equals(CorrelationCalculators.ElementAt(1))) {
     128          if (calc.Equals(HoeffdingsDependence)) {
    123129            elements[i, j] = HoeffdingsDependenceCalculator.Calculate(var1, var2, out error);
    124           } else if (calc.Equals(CorrelationCalculators.ElementAt(2))) {
     130          } else if (calc.Equals(SpearmansRank)) {
    125131            elements[i, j] = SpearmansRankCorrelationCoefficientCalculator.Calculate(var1, var2, out error);
    126132          } else {
  • branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis/3.4/OnlineCalculators/SpearmansRankCorrelationCoefficientCalculator.cs

    r8276 r8294  
    2828
    2929    public static double Calculate(IEnumerable<double> originalValues, IEnumerable<double> estimatedValues, out OnlineCalculatorError errorState) {
    30       double rs = Math.Abs(Spear(originalValues, estimatedValues, out errorState));
    31       if (errorState != OnlineCalculatorError.None) return double.NaN;
    32       return rs;
    33     }
    34 
    35     /// <summary>
    36     /// Calculates Spearmans Rank Correlation Coefficient. Source: Numerical Recipes in C.
    37     /// </summary>
    38     private static double Spear(IEnumerable<double> xs, IEnumerable<double> ys, out OnlineCalculatorError errorState) {
    39       double[] xsArr = xs.ToArray();
    40       double[] ysArr = ys.ToArray();
    41       if (xsArr.Length != ysArr.Length) throw new ArgumentException("The number of elements in xs and ys does not match");
    42 
    43       int n = xsArr.Length;
    44       Array.Sort(xsArr, ysArr);
    45       double sf = CRank(xsArr);
    46       Array.Sort(ysArr, xsArr);
    47       double sg = CRank(ysArr);
    48 
    49       double d = 0.0;
    50       for (int j = 0; j < n; j++) //Sum the squared difference of ranks.
    51         d += Math.Pow(xsArr[j] - ysArr[j], 2);
    52 
    53       double en, en3n, aved, fac, rs; // vard, zd, t, df, probd, probrs;
    54       en = n;
    55       en3n = en * en * en - en;
    56       aved = en3n / 6.0 - (sf + sg) / 12.0;
    57       fac = (1.0 - sf / en3n) * (1.0 - sg / en3n);
    58       //vard = ((en - 1.0) * en * en * Math.Pow(en + 1.0, 2) / 36.0) * fac;
    59       //zd = (d - aved) / Math.Sqrt(vard);
    60       //probd = erfcc(Math.Abs(zd) / 1.4142136);
    61       rs = (1.0 - (6.0 / en3n) * (d + (sf + sg) / 12.0)) / Math.Sqrt(fac);
    62       //fac = (rs + 1.0) * (1.0 - rs);
    63       //if (fac > 0.0) {
    64       //  t = rs * Math.Sqrt((en - 2.0) / fac);
    65       //  df = en - 2.0;
    66       //  probrs = betai(0.5 * df, 0.5, df / (df + t * t));
    67       //} else {
    68       //  probrs = 0.0;
    69       //}
     30      double rs = Math.Abs(alglib.basestat.spearmancorr2(originalValues.ToArray(), estimatedValues.ToArray(), originalValues.Count()));
    7031      errorState = OnlineCalculatorError.None;
    7132      return rs;
    7233    }
    73 
    74     /// <summary>
    75     /// Calculates midranks. Source: Numerical Recipes in C.
    76     /// </summary>
    77     /// <param name="w">Sorted array of elements, replaces the elements by their rank, including midranking of ties</param>
    78     /// <returns></returns>
    79     private static double CRank(double[] w) {
    80       int i = 0;
    81       int n = w.Length;
    82       double s = 0.0;
    83       double t;
    84       while (i < n - 1) {
    85         if (w[i + 1] > w[i]) {    // w[i+1] must be larger or equal w[i] as w must be sorted
    86           // not a tie
    87           w[i] = i + 1;
    88           i++;
    89         } else {
    90           int j;
    91           for (j = i + 1; j < n && w[j] <= w[i]; j++) ; // how far does it go (<= effectively means == as w must be sorted)
    92           double rank = 0.5 * (i + j - 1);
    93           int k;
    94           for (k = i; k <= j - 1; k++) w[k] = rank; // set the rank for all tied entries
    95           t = j - i;
    96           s += t * t * t - t;
    97           i = j;
    98         }
    99       }
    100 
    101       if (i == n - 1) w[n - 1] = n - 1;   // if the last element was not tied, this is its rank
    102       return s;
    103     }
    10434  }
    10535}
Note: See TracChangeset for help on using the changeset viewer.