Changeset 8921


Ignore:
Timestamp:
11/19/12 15:11:44 (10 years ago)
Author:
gkronber
Message:

#1925 fixed robustness of NormalDistributionCutPointsThresholdCalculator. Now it always returns at least negative infinity and the most frequent class.

Location:
trunk/sources
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Symbolic.Classification/3.4/SymbolicDiscriminantFunctionClassificationModel.cs

    r8883 r8921  
    8888      var classValuesArr = classValues.ToArray();
    8989      var thresholdsArr = thresholds.ToArray();
    90       if (thresholdsArr.Length != classValuesArr.Length) throw new ArgumentException();
     90      if (thresholdsArr.Length != classValuesArr.Length || thresholdsArr.Length < 1)
     91        throw new ArgumentException();
     92      if (!double.IsNegativeInfinity(thresholds.First()))
     93        throw new ArgumentException();
    9194
    9295      this.classValues = classValuesArr;
  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/ThresholdCalculators/NormalDistributionCutPointsThresholdCalculator.cs

    r8917 r8921  
    7171        }
    7272      }
     73
    7374      double[] originalClasses = classMean.Keys.OrderBy(x => x).ToArray();
    7475      int nClasses = originalClasses.Length;
     
    9899      thresholdList.Add(double.PositiveInfinity);
    99100
    100       // determine class values for each partition separated by a threshold by calculating the density of all class distributions
    101       // all points in the partition are classified as the class with the maximal density in the parition
    102       if (thresholdList.Count == 2) {
     101
     102      // find the most likely class for the points between thresholds m
     103      List<double> filteredThresholds = new List<double>();
     104      List<double> filteredClassValues = new List<double>();
     105      for (int i = 0; i < thresholdList.Count - 1; i++) {
     106        // determine class with maximal density mass between the thresholds
     107        double maxDensity = DensityMass(thresholdList[i], thresholdList[i + 1], classMean[originalClasses[0]], classStdDev[originalClasses[0]]);
     108        double maxDensityClassValue = originalClasses[0];
     109        foreach (var classValue in originalClasses.Skip(1)) {
     110          double density = DensityMass(thresholdList[i], thresholdList[i + 1], classMean[classValue], classStdDev[classValue]);
     111          if (density > maxDensity) {
     112            maxDensity = density;
     113            maxDensityClassValue = classValue;
     114          }
     115        }
     116        if (maxDensity > double.NegativeInfinity &&
     117          (filteredClassValues.Count == 0 || !maxDensityClassValue.IsAlmost(filteredClassValues.Last()))) {
     118          filteredThresholds.Add(thresholdList[i]);
     119          filteredClassValues.Add(maxDensityClassValue);
     120        }
     121      }
     122
     123      if (filteredThresholds.Count == 0 || !double.IsNegativeInfinity(filteredThresholds.First())) {
    103124        // this happens if there are no thresholds (distributions for all classes are exactly the same)
     125        // or when the CDF up to the first threshold is zero
    104126        // -> all samples should be classified as the class with the most observations
    105127        // group observations by target class and select the class with largest count
     
    107129                              .OrderBy(g => g.Count())
    108130                              .Last().Key;
    109         thresholds = new double[] { double.NegativeInfinity };
    110         classValues = new double[] { mostFrequentClass };
    111       } else {
    112 
    113         // at least one reasonable threshold ...
    114         // find the most likely class for the points between thresholds m
    115         List<double> filteredThresholds = new List<double>();
    116         List<double> filteredClassValues = new List<double>();
    117         for (int i = 0; i < thresholdList.Count - 1; i++) {
    118           // determine class with maximal density mass between the thresholds
    119           double maxDensity = DensityMass(thresholdList[i], thresholdList[i + 1], classMean[originalClasses[0]], classStdDev[originalClasses[0]]);
    120           double maxDensityClassValue = originalClasses[0];
    121           foreach (var classValue in originalClasses.Skip(1)) {
    122             double density = DensityMass(thresholdList[i], thresholdList[i + 1], classMean[classValue], classStdDev[classValue]);
    123             if (density > maxDensity) {
    124               maxDensity = density;
    125               maxDensityClassValue = classValue;
    126             }
    127           }
    128           if (maxDensity > double.NegativeInfinity &&
    129             (filteredClassValues.Count == 0 || !maxDensityClassValue.IsAlmost(filteredClassValues.Last()))) {
    130             filteredThresholds.Add(thresholdList[i]);
    131             filteredClassValues.Add(maxDensityClassValue);
    132           }
    133         }
    134         thresholds = filteredThresholds.ToArray();
    135         classValues = filteredClassValues.ToArray();
    136       }
     131        filteredThresholds.Insert(0, double.NegativeInfinity);
     132        filteredClassValues.Insert(0, mostFrequentClass);
     133      }
     134
     135      thresholds = filteredThresholds.ToArray();
     136      classValues = filteredClassValues.ToArray();
    137137    }
    138138
     
    208208          double g = Math.Sqrt(2 * s2 * s2 * Math.Log(s2 / s1) - 2 * s1 * s1 * Math.Log(s2 / s1) - 2 * m1 * m2 + m1 * m1 + m2 * m2);
    209209          double s = (s1 * s1 - s2 * s2);
    210           x1 =  (m2 * s1 * s1 - m1 * s2 * s2 + s1 * s2 * g) / s;
     210          x1 = (m2 * s1 * s1 - m1 * s2 * s2 + s1 * s2 * g) / s;
    211211          x2 = -(m1 * s2 * s2 - m2 * s1 * s1 + s1 * s2 * g) / s;
    212212        }
Note: See TracChangeset for help on using the changeset viewer.