source: trunk/sources/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/ThresholdCalculators/NormalDistributionCutPointsThresholdCalculator.cs @ 5942

Last change on this file since 5942 was 5942, checked in by mkommend, 11 years ago

#1453: Renamed IOnlineEvaluator to IOnlineCalculator

File size: 7.7 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2011 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
28
29namespace HeuristicLab.Problems.DataAnalysis {
30  /// <summary>
31  /// Represents a threshold calculator that calculates thresholds as the cutting points between the estimated class distributions (assuming normally distributed class values).
32  /// </summary>
33  [StorableClass]
34  [Item("NormalDistributionCutPointsThresholdCalculator", "Represents a threshold calculator that calculates thresholds as the cutting points between the estimated class distributions (assuming normally distributed class values).")]
35  public class NormalDistributionCutPointsThresholdCalculator : ThresholdCalculator {
36
37    [StorableConstructor]
38    protected NormalDistributionCutPointsThresholdCalculator(bool deserializing) : base(deserializing) { }
39    protected NormalDistributionCutPointsThresholdCalculator(NormalDistributionCutPointsThresholdCalculator original, Cloner cloner)
40      : base(original, cloner) {
41    }
42    public NormalDistributionCutPointsThresholdCalculator()
43      : base() {
44    }
45
46    public override IDeepCloneable Clone(Cloner cloner) {
47      return new NormalDistributionCutPointsThresholdCalculator(this, cloner);
48    }
49
50    public override void Calculate(IClassificationProblemData problemData, IEnumerable<double> estimatedValues, IEnumerable<double> targetClassValues, out double[] classValues, out double[] thresholds) {
51      NormalDistributionCutPointsThresholdCalculator.CalculateThresholds(problemData, estimatedValues, targetClassValues, out classValues, out thresholds);
52    }
53
54    public static void CalculateThresholds(IClassificationProblemData problemData, IEnumerable<double> estimatedValues, IEnumerable<double> targetClassValues, out double[] classValues, out double[] thresholds) {
55      double maxEstimatedValue = estimatedValues.Max();
56      double minEstimatedValue = estimatedValues.Min();
57      var estimatedTargetValues = Enumerable.Zip(estimatedValues, targetClassValues, (e, t) => new { EstimatedValue = e, TargetValue = t }).ToList();
58
59      Dictionary<double, double> classMean = new Dictionary<double, double>();
60      Dictionary<double, double> classStdDev = new Dictionary<double, double>();
61      // calculate moments per class
62      foreach (var group in estimatedTargetValues.GroupBy(p => p.TargetValue)) {
63        IEnumerable<double> estimatedClassValues = group.Select(x => x.EstimatedValue);
64        double classValue = group.Key;
65        double mean, variance;
66        OnlineCalculatorError meanErrorState, varianceErrorState;
67        OnlineMeanAndVarianceCalculator.Calculate(estimatedClassValues, out mean, out variance, out meanErrorState, out varianceErrorState);
68
69        if (meanErrorState == OnlineCalculatorError.None && varianceErrorState == OnlineCalculatorError.None) {
70          classMean[classValue] = mean;
71          classStdDev[classValue] = Math.Sqrt(variance);
72        }
73      }
74      double[] originalClasses = classMean.Keys.OrderBy(x => x).ToArray();
75      int nClasses = originalClasses.Length;
76      List<double> thresholdList = new List<double>();
77      for (int i = 0; i < nClasses - 1; i++) {
78        for (int j = i + 1; j < nClasses; j++) {
79          double x1, x2;
80          double class0 = originalClasses[i];
81          double class1 = originalClasses[j];
82          // calculate all thresholds
83          CalculateCutPoints(classMean[class0], classStdDev[class0], classMean[class1], classStdDev[class1], out x1, out x2);
84          if (!thresholdList.Any(x => x.IsAlmost(x1))) thresholdList.Add(x1);
85          if (!thresholdList.Any(x => x.IsAlmost(x2))) thresholdList.Add(x2);
86        }
87      }
88      thresholdList.Sort();
89      thresholdList.Insert(0, double.NegativeInfinity);
90
91      // determine class values for each partition separated by a threshold by calculating the density of all class distributions
92      // all points in the partition are classified as the class with the maximal density in the parition
93      List<double> classValuesList = new List<double>();
94      for (int i = 0; i < thresholdList.Count; i++) {
95        double m;
96        if (double.IsNegativeInfinity(thresholdList[i])) {
97          m = thresholdList[i + 1] - 1.0; // smaller than the smalles non-infinity threshold
98        } else if (i == thresholdList.Count - 1) {
99          // last threshold
100          m = thresholdList[i] + 1.0; // larger than the last threshold
101        } else {
102          m = thresholdList[i] + (thresholdList[i + 1] - thresholdList[i]) / 2.0; // middle of partition
103        }
104
105        // determine class with maximal probability density in m
106        double maxDensity = double.MinValue;
107        double maxDensityClassValue = -1;
108        foreach (var classValue in originalClasses) {
109          double density = NormalDensity(m, classMean[classValue], classStdDev[classValue]);
110          if (density > maxDensity) {
111            maxDensity = density;
112            maxDensityClassValue = classValue;
113          }
114        }
115        classValuesList.Add(maxDensityClassValue);
116      }
117
118      // only keep thresholds at which the class changes
119      // class B overrides threshold s. So only thresholds r and t are relevant and have to be kept
120      //
121      //      A    B  C
122      //       /\  /\/\       
123      //      / r\/ /\t\       
124      //     /   /\/  \ \     
125      //    /   / /\s  \ \     
126      //  -/---/-/ -\---\-\----
127      List<double> filteredThresholds = new List<double>();
128      List<double> filteredClassValues = new List<double>();
129      filteredThresholds.Add(thresholdList[0]);
130      filteredClassValues.Add(classValuesList[0]);
131      for (int i = 0; i < classValuesList.Count - 1; i++) {
132        if (classValuesList[i] != classValuesList[i + 1]) {
133          filteredThresholds.Add(thresholdList[i + 1]);
134          filteredClassValues.Add(classValuesList[i + 1]);
135        }
136      }
137      thresholds = filteredThresholds.ToArray();
138      classValues = filteredClassValues.ToArray();
139    }
140
141    private static double NormalDensity(double x, double mu, double sigma) {
142      if (sigma.IsAlmost(0.0)) {
143        if (x.IsAlmost(mu)) return 1.0; else return 0.0;
144      } else {
145        return (1.0 / Math.Sqrt(2.0 * Math.PI * sigma * sigma)) * Math.Exp(-((x - mu) * (x - mu)) / (2.0 * sigma * sigma));
146      }
147    }
148
149    private static void CalculateCutPoints(double m1, double s1, double m2, double s2, out double x1, out double x2) {
150      double a = (s1 * s1 - s2 * s2);
151      x1 = -(-m2 * s1 * s1 + m1 * s2 * s2 + Math.Sqrt(s1 * s1 * s2 * s2 * ((m1 - m2) * (m1 - m2) + 2.0 * (-s1 * s1 + s2 * s2) * Math.Log(s2 / s1)))) / a;
152      x2 = (m2 * s1 * s1 - m1 * s2 * s2 + Math.Sqrt(s1 * s1 * s2 * s2 * ((m1 - m2) * (m1 - m2) + 2.0 * (-s1 * s1 + s2 * s2) * Math.Log(s2 / s1)))) / a;
153    }
154  }
155}
Note: See TracBrowser for help on using the repository browser.