Changeset 8638 for trunk/sources/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification
- Timestamp:
- 09/12/12 17:28:08 (12 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/sources/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/ThresholdCalculators/NormalDistributionCutPointsThresholdCalculator.cs
r8623 r8638 53 53 54 54 public static void CalculateThresholds(IClassificationProblemData problemData, IEnumerable<double> estimatedValues, IEnumerable<double> targetClassValues, out double[] classValues, out double[] thresholds) { 55 double maxEstimatedValue = estimatedValues.Max();56 double minEstimatedValue = estimatedValues.Min();57 55 var estimatedTargetValues = Enumerable.Zip(estimatedValues, targetClassValues, (e, t) => new { EstimatedValue = e, TargetValue = t }).ToList(); 56 double estimatedValuesRange = estimatedValues.Range(); 58 57 59 58 Dictionary<double, double> classMean = new Dictionary<double, double>(); … … 82 81 // calculate all thresholds 83 82 CalculateCutPoints(classMean[class0], classStdDev[class0], classMean[class1], classStdDev[class1], out x1, out x2); 84 if (!thresholdList.Any(x => x.IsAlmost(x1))) thresholdList.Add(x1); 85 if (!thresholdList.Any(x => x.IsAlmost(x2))) thresholdList.Add(x2); 83 84 // if the two cut points are too close (for instance because the stdDev=0) 85 // then move them by 0.1% of the range of estimated values 86 if (x1.IsAlmost(x2)) { 87 x1 -= 0.001 * estimatedValuesRange; 88 x2 += 0.001 * estimatedValuesRange; 89 } 90 if (!double.IsInfinity(x1) && !thresholdList.Any(x => x.IsAlmost(x1))) thresholdList.Add(x1); 91 if (!double.IsInfinity(x2) && !thresholdList.Any(x => x.IsAlmost(x2))) thresholdList.Add(x2); 86 92 } 87 93 } … … 92 98 // all points in the partition are classified as the class with the maximal density in the parition 93 99 List<double> classValuesList = new List<double>(); 94 for (int i = 0; i < thresholdList.Count; i++) { 95 double m; 96 if (double.IsNegativeInfinity(thresholdList[i])) { 97 m = thresholdList[i + 1] - 1.0; // smaller than the smalles non-infinity threshold 98 } else if (i == thresholdList.Count - 1) { 99 // last threshold 100 m = thresholdList[i] + 1.0; // larger than the last threshold 101 } else { 102 m = thresholdList[i] + (thresholdList[i + 1] - thresholdList[i]) / 2.0; // middle of partition 100 if (thresholdList.Count == 1) { 101 // this happens if there are no thresholds (distributions for all classes are exactly the same) 102 // -> all samples should be classified as the first class 103 classValuesList.Add(originalClasses[0]); 104 } else { 105 // at least one reasonable threshold ... 106 // find the most likely class for the points between thresholds m 107 for (int i = 0; i < thresholdList.Count; i++) { 108 double m; 109 if (double.IsNegativeInfinity(thresholdList[i])) { 110 m = thresholdList[i + 1] - 1.0; // smaller than the smallest non-infinity threshold 111 } else if (i == thresholdList.Count - 1) { 112 // last threshold 113 m = thresholdList[i] + 1.0; // larger than the last threshold 114 } else { 115 m = thresholdList[i] + (thresholdList[i + 1] - thresholdList[i]) / 2.0; // middle of partition 116 } 117 118 // determine class with maximal probability density in m 119 double maxDensity = LogNormalDensity(m, classMean[originalClasses[0]], classStdDev[originalClasses[0]]); 120 double maxDensityClassValue = originalClasses[0]; 121 foreach (var classValue in originalClasses.Skip(1)) { 122 double density = LogNormalDensity(m, classMean[classValue], classStdDev[classValue]); 123 if (density > maxDensity) { 124 maxDensity = density; 125 maxDensityClassValue = classValue; 126 } 127 } 128 classValuesList.Add(maxDensityClassValue); 103 129 } 104 105 // determine class with maximal probability density in m106 double maxDensity = double.MinValue;107 double maxDensityClassValue = -1;108 foreach (var classValue in originalClasses) {109 double density = LogNormalDensity(m, classMean[classValue], classStdDev[classValue]);110 if (density > maxDensity) {111 maxDensity = density;112 maxDensityClassValue = classValue;113 }114 }115 classValuesList.Add(maxDensityClassValue);116 130 } 117 131 … … 130 144 filteredClassValues.Add(classValuesList[0]); 131 145 for (int i = 0; i < classValuesList.Count - 1; i++) { 132 if ( classValuesList[i] != classValuesList[i + 1]) {146 if (!classValuesList[i].IsAlmost(classValuesList[i + 1])) { 133 147 filteredThresholds.Add(thresholdList[i + 1]); 134 148 filteredClassValues.Add(classValuesList[i + 1]); … … 140 154 141 155 private static double LogNormalDensity(double x, double mu, double sigma) { 156 if (sigma.IsAlmost(0.0)) 157 if (mu.IsAlmost(x)) return 0.0; // (log(1)) 158 else return double.NegativeInfinity; 142 159 return -0.5 * Math.Log(2.0 * Math.PI * sigma * sigma) - ((x - mu) * (x - mu)) / (2.0 * sigma * sigma); 143 160 } 144 161 145 162 private static void CalculateCutPoints(double m1, double s1, double m2, double s2, out double x1, out double x2) { 146 double a = (s1 * s1 - s2 * s2); 147 x1 = -(-m2 * s1 * s1 + m1 * s2 * s2 + Math.Sqrt(s1 * s1 * s2 * s2 * ((m1 - m2) * (m1 - m2) + 2.0 * (-s1 * s1 + s2 * s2) * Math.Log(s2 / s1)))) / a; 148 x2 = (m2 * s1 * s1 - m1 * s2 * s2 + Math.Sqrt(s1 * s1 * s2 * s2 * ((m1 - m2) * (m1 - m2) + 2.0 * (-s1 * s1 + s2 * s2) * Math.Log(s2 / s1)))) / a; 163 if (s1.IsAlmost(s2)) { 164 if (m1.IsAlmost(m2)) { 165 x1 = double.NegativeInfinity; 166 x2 = double.NegativeInfinity; 167 } else { 168 x1 = (m1 + m2) / 2; 169 x2 = double.NegativeInfinity; 170 } 171 } else if (s1.IsAlmost(0.0)) { 172 x1 = m1; 173 x2 = m1; 174 } else if (s2.IsAlmost(0.0)) { 175 x1 = m2; 176 x2 = m2; 177 } else { 178 // scale s1 and s2 for numeric stability 179 s2 = s2 / s1; 180 s1 = 1.0; 181 double a = (s1 + s2) * (s1 - s2); 182 double g = Math.Sqrt(s1 * s1 * s2 * s2 * ((m1 - m2) * (m1 - m2) + 2.0 * (s1 * s1 + s2 * s2) * Math.Log(s2 / s1))); 183 double m1s2 = m1 * s2 * s2; 184 double m2s1 = m2 * s1 * s1; 185 x1 = -(-m2s1 + m1s2 + g) / a; 186 x2 = (m2s1 - m1s2 + g) / a; 187 } 149 188 } 150 189 }
Note: See TracChangeset
for help on using the changeset viewer.