Changeset 8294
- Timestamp:
- 07/16/12 17:23:16 (12 years ago)
- Location:
- branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis/3.4
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis/3.4/HeuristicLab.Problems.DataAnalysis-3.4.csproj
r8276 r8294 93 93 </PropertyGroup> 94 94 <ItemGroup> 95 <Reference Include="ALGLIB-3.5.0, Version=3.5.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL"> 96 <Private>False</Private> 97 </Reference> 95 98 <Reference Include="HeuristicLab.Analysis-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL"> 96 99 <Private>False</Private> -
branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/ExtendedHeatMap.cs
r8276 r8294 57 57 } 58 58 59 private const string PrearsonsRSquared = "Pearsons R Squared"; 60 private const string HoeffdingsDependence = "Hoeffdings Dependence"; 61 private const string SpearmansRank = "Spearmans Rank"; 59 62 public IEnumerable<string> CorrelationCalculators { 60 get { return new List<string>() { "OnlinePearsonsRSquaredCalculator", "HoeffdingsDependenceCalculator", "SpearmansRankCorrelationCoefficientCalculator"}; }63 get { return new List<string>() { PrearsonsRSquared, HoeffdingsDependence, SpearmansRank }; } 61 64 } 62 65 66 private const string AllSamples = "All Samples"; 67 private const string TrainingSamples = "Training Samples"; 68 private const string TestSamples = "Test Samples"; 63 69 public IEnumerable<string> Partitions { 64 get { return new List<string>() { "All Samples", "Training Partition", "Test Partition"}; }70 get { return new List<string>() { AllSamples, TrainingSamples, TestSamples }; } 65 71 } 66 72 … … 112 118 IEnumerable<double> var1 = dataset.GetDoubleValues(doubleVariableNames[i]); 113 119 IEnumerable<double> var2 = dataset.GetDoubleValues(doubleVariableNames[j]); 114 if (partition.Equals( Partitions.ElementAt(1))) {120 if (partition.Equals(TrainingSamples)) { 115 121 var1 = var1.Skip(problemData.TrainingPartition.Start).Take(problemData.TrainingPartition.End - problemData.TrainingPartition.Start); 116 122 var2 = var2.Skip(problemData.TrainingPartition.Start).Take(problemData.TrainingPartition.End - problemData.TrainingPartition.Start); 117 } else if (partition.Equals( Partitions.ElementAt(2))) {123 } else if (partition.Equals(TestSamples)) { 118 124 var1 = var1.Skip(problemData.TestPartition.Start).Take(problemData.TestPartition.End - problemData.TestPartition.Start); 119 125 var2 = var2.Skip(problemData.TestPartition.Start).Take(problemData.TestPartition.End - problemData.TestPartition.Start); 120 126 } 121 127 122 if (calc.Equals( CorrelationCalculators.ElementAt(1))) {128 if (calc.Equals(HoeffdingsDependence)) { 123 129 elements[i, j] = HoeffdingsDependenceCalculator.Calculate(var1, var2, out error); 124 } else if (calc.Equals( CorrelationCalculators.ElementAt(2))) {130 } else if (calc.Equals(SpearmansRank)) { 125 131 elements[i, j] = SpearmansRankCorrelationCoefficientCalculator.Calculate(var1, var2, out error); 126 132 } else { -
branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis/3.4/OnlineCalculators/SpearmansRankCorrelationCoefficientCalculator.cs
r8276 r8294 28 28 29 29 public static double Calculate(IEnumerable<double> originalValues, IEnumerable<double> estimatedValues, out OnlineCalculatorError errorState) { 30 double rs = Math.Abs(Spear(originalValues, estimatedValues, out errorState)); 31 if (errorState != OnlineCalculatorError.None) return double.NaN; 32 return rs; 33 } 34 35 /// <summary> 36 /// Calculates Spearmans Rank Correlation Coefficient. Source: Numerical Recipes in C. 37 /// </summary> 38 private static double Spear(IEnumerable<double> xs, IEnumerable<double> ys, out OnlineCalculatorError errorState) { 39 double[] xsArr = xs.ToArray(); 40 double[] ysArr = ys.ToArray(); 41 if (xsArr.Length != ysArr.Length) throw new ArgumentException("The number of elements in xs and ys does not match"); 42 43 int n = xsArr.Length; 44 Array.Sort(xsArr, ysArr); 45 double sf = CRank(xsArr); 46 Array.Sort(ysArr, xsArr); 47 double sg = CRank(ysArr); 48 49 double d = 0.0; 50 for (int j = 0; j < n; j++) //Sum the squared difference of ranks. 51 d += Math.Pow(xsArr[j] - ysArr[j], 2); 52 53 double en, en3n, aved, fac, rs; // vard, zd, t, df, probd, probrs; 54 en = n; 55 en3n = en * en * en - en; 56 aved = en3n / 6.0 - (sf + sg) / 12.0; 57 fac = (1.0 - sf / en3n) * (1.0 - sg / en3n); 58 //vard = ((en - 1.0) * en * en * Math.Pow(en + 1.0, 2) / 36.0) * fac; 59 //zd = (d - aved) / Math.Sqrt(vard); 60 //probd = erfcc(Math.Abs(zd) / 1.4142136); 61 rs = (1.0 - (6.0 / en3n) * (d + (sf + sg) / 12.0)) / Math.Sqrt(fac); 62 //fac = (rs + 1.0) * (1.0 - rs); 63 //if (fac > 0.0) { 64 // t = rs * Math.Sqrt((en - 2.0) / fac); 65 // df = en - 2.0; 66 // probrs = betai(0.5 * df, 0.5, df / (df + t * t)); 67 //} else { 68 // probrs = 0.0; 69 //} 30 double rs = Math.Abs(alglib.basestat.spearmancorr2(originalValues.ToArray(), estimatedValues.ToArray(), originalValues.Count())); 70 31 errorState = OnlineCalculatorError.None; 71 32 return rs; 72 33 } 73 74 /// <summary>75 /// Calculates midranks. Source: Numerical Recipes in C.76 /// </summary>77 /// <param name="w">Sorted array of elements, replaces the elements by their rank, including midranking of ties</param>78 /// <returns></returns>79 private static double CRank(double[] w) {80 int i = 0;81 int n = w.Length;82 double s = 0.0;83 double t;84 while (i < n - 1) {85 if (w[i + 1] > w[i]) { // w[i+1] must be larger or equal w[i] as w must be sorted86 // not a tie87 w[i] = i + 1;88 i++;89 } else {90 int j;91 for (j = i + 1; j < n && w[j] <= w[i]; j++) ; // how far does it go (<= effectively means == as w must be sorted)92 double rank = 0.5 * (i + j - 1);93 int k;94 for (k = i; k <= j - 1; k++) w[k] = rank; // set the rank for all tied entries95 t = j - i;96 s += t * t * t - t;97 i = j;98 }99 }100 101 if (i == n - 1) w[n - 1] = n - 1; // if the last element was not tied, this is its rank102 return s;103 }104 34 } 105 35 }
Note: See TracChangeset
for help on using the changeset viewer.