Changeset 8913 for trunk/sources/HeuristicLab.Problems.DataAnalysis
- Timestamp:
- 11/14/12 18:30:24 (12 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/sources/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/ThresholdCalculators/NormalDistributionCutPointsThresholdCalculator.cs
r8680 r8913 103 103 if (thresholdList.Count == 2) { 104 104 // this happens if there are no thresholds (distributions for all classes are exactly the same) 105 // -> all samples should be classified as the first class 106 classValuesList.Add(originalClasses[0]); 105 // -> all samples should be classified as the class with the most observations 106 // group observations by target class and select the class with largest count 107 classValuesList.Add(targetClassValues.GroupBy(c => c) 108 .OrderBy(g => g.Count()) 109 .Last().Key); 107 110 } else { 108 111 // at least one reasonable threshold ... … … 111 114 112 115 // determine class with maximal density mass between the thresholds 113 double maxDensity = LogNormalDensityMass(thresholdList[i], thresholdList[i + 1], classMean[originalClasses[0]], classStdDev[originalClasses[0]]);116 double maxDensity = DensityMass(thresholdList[i], thresholdList[i + 1], classMean[originalClasses[0]], classStdDev[originalClasses[0]]); 114 117 double maxDensityClassValue = originalClasses[0]; 115 118 foreach (var classValue in originalClasses.Skip(1)) { 116 double density = LogNormalDensityMass(thresholdList[i], thresholdList[i + 1], classMean[classValue], classStdDev[classValue]);119 double density = DensityMass(thresholdList[i], thresholdList[i + 1], classMean[classValue], classStdDev[classValue]); 117 120 if (density > maxDensity) { 118 121 maxDensity = density; … … 149 152 } 150 153 151 private static double LogNormalDensityMass(double lower, double upper, double mu, double sigma) { 154 private static double NormalCDF(double mu, double sigma, double x) { 155 return 0.5 * (1 + alglib.normaldistr.errorfunction((x - mu) / (sigma * Math.Sqrt(2.0)))); 156 } 157 158 // determines the value NormalCDF(mu,sigma, upper) - NormalCDF(mu, sigma, lower) 159 // = the integral of the PDF of N(mu, sigma) in the range [lower, upper] 160 private static double DensityMass(double lower, double upper, double mu, double sigma) { 152 161 if (sigma.IsAlmost(0.0)) { 153 if (lower < mu && mu < upper) return double.PositiveInfinity; // log(1)154 else return double.NegativeInfinity; // log(0)155 } 156 157 Func<double, double> f = (x) =>158 x * -0.5 * Math.Log(2.0 * Math.PI * sigma * sigma) - Math.Pow(x - mu, 3) / (3 * 2.0 * sigma * sigma);159 160 if (double.IsNegativeInfinity(lower)) return f(upper); 161 else return f(upper) - f(lower);162 }163 162 if (lower < mu && mu < upper) return 1.0; // all mass is between lower and upper 163 else return 0; // no mass is between lower and upper 164 } 165 166 if (double.IsNegativeInfinity(lower)) return NormalCDF(mu, sigma, upper); 167 else return NormalCDF(mu, sigma, upper) - NormalCDF(mu, sigma, lower); 168 } 169 170 // Calculates the points x1 and x2 where the distributions N(m1, s1) == N(m2,s2). 171 // In the general case there should be two cut points. If either s1 or s2 is 0 then x1==x2. 172 // If both s1 and s2 are zero than there are no cut points but we should return something reasonable (e.g. (m1 + m2) / 2) then. 164 173 private static void CalculateCutPoints(double m1, double s1, double m2, double s2, out double x1, out double x2) { 165 174 if (s1.IsAlmost(s2)) { … … 168 177 x2 = double.NegativeInfinity; 169 178 } else { 179 // s1==s2 and m1 != m2 180 // return something reasonable. cut point should be half way between m1 and m2 170 181 x1 = (m1 + m2) / 2; 171 182 x2 = double.NegativeInfinity; 172 183 } 173 184 } else if (s1.IsAlmost(0.0)) { 185 // when s1 is 0.0 the cut points are exactly at m1 ... 174 186 x1 = m1; 175 187 x2 = m1; 176 188 } else if (s2.IsAlmost(0.0)) { 189 // ... same for s2 177 190 x1 = m2; 178 191 x2 = m2; … … 182 195 CalculateCutPoints(m2, s2, m1, s1, out x1, out x2); 183 196 } else { 197 // general case 198 // calculate the solutions x1, x2 where N(m1,s1) == N(m2,s2) 184 199 double a = (s1 + s2) * (s1 - s2); 185 200 double g = Math.Sqrt(s1 * s1 * s2 * s2 * ((m1 - m2) * (m1 - m2) + 2.0 * (s1 * s1 + s2 * s2) * Math.Log(s2 / s1))); 186 201 double m1s2 = m1 * s2 * s2; 187 202 double m2s1 = m2 * s1 * s1; 188 x1 = -(-m2s1 + m1s2 +g) / a;203 x1 = (m2s1 - m1s2 - g) / a; 189 204 x2 = (m2s1 - m1s2 + g) / a; 190 205 }
Note: See TracChangeset
for help on using the changeset viewer.