Free cookie consent management tool by TermsFeed Policy Generator

source: branches/DataAnalysis Refactoring/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/ThresholdCalculators/NormalDistributionCutPointsThresholdCalculator.cs @ 5777

Last change on this file since 5777 was 5777, checked in by mkommend, 13 years ago

#1418: Corrected plugin dependencies.

File size: 7.3 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2011 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
28
29namespace HeuristicLab.Problems.DataAnalysis {
30  /// <summary>
31  /// Represents a threshold calculator that calculates thresholds as the cutting points between the estimated class distributions (assuming normally distributed class values).
32  /// </summary>
33  [StorableClass]
34  [Item("NormalDistributionCutPointsThresholdCalculator", "Represents a threshold calculator that calculates thresholds as the cutting points between the estimated class distributions (assuming normally distributed class values).")]
35  public class NormalDistributionCutPointsThresholdCalculator : ThresholdCalculator {
36
37    [StorableConstructor]
38    protected NormalDistributionCutPointsThresholdCalculator(bool deserializing) : base(deserializing) { }
39    protected NormalDistributionCutPointsThresholdCalculator(NormalDistributionCutPointsThresholdCalculator original, Cloner cloner)
40      : base(original, cloner) {
41    }
42    public NormalDistributionCutPointsThresholdCalculator()
43      : base() {
44    }
45
46    public override IDeepCloneable Clone(Cloner cloner) {
47      return new NormalDistributionCutPointsThresholdCalculator(this, cloner);
48    }
49
50    public override void Calculate(IClassificationProblemData problemData, IEnumerable<double> estimatedValues, IEnumerable<double> targetClassValues, out double[] classValues, out double[] thresholds) {
51      NormalDistributionCutPointsThresholdCalculator.CalculateThresholds(problemData, estimatedValues, targetClassValues, out classValues, out thresholds);
52    }
53
54    public static void CalculateThresholds(IClassificationProblemData problemData, IEnumerable<double> estimatedValues, IEnumerable<double> targetClassValues, out double[] classValues, out double[] thresholds) {
55      double maxEstimatedValue = estimatedValues.Max();
56      double minEstimatedValue = estimatedValues.Min();
57      var estimatedTargetValues = Enumerable.Zip(estimatedValues, targetClassValues, (e, t) => new { EstimatedValue = e, TargetValue = t }).ToList();
58
59      Dictionary<double, double> classMean = new Dictionary<double, double>();
60      Dictionary<double, double> classStdDev = new Dictionary<double, double>();
61      // calculate moments per class
62      foreach (var group in estimatedTargetValues.GroupBy(p => p.TargetValue)) {
63        IEnumerable<double> estimatedClassValues = group.Select(x => x.EstimatedValue);
64        double classValue = group.Key;
65        double mean, variance;
66        OnlineMeanAndVarianceCalculator.Calculate(estimatedClassValues, out mean, out variance);
67        classMean[classValue] = mean;
68        classStdDev[classValue] = Math.Sqrt(variance);
69      }
70      double[] originalClasses = classMean.Keys.OrderBy(x => x).ToArray();
71      int nClasses = originalClasses.Length;
72      List<double> thresholdList = new List<double>();
73      for (int i = 0; i < nClasses - 1; i++) {
74        for (int j = i + 1; j < nClasses; j++) {
75          double x1, x2;
76          double class0 = originalClasses[i];
77          double class1 = originalClasses[j];
78          // calculate all thresholds
79          CalculateCutPoints(classMean[class0], classStdDev[class0], classMean[class1], classStdDev[class1], out x1, out x2);
80          if (!thresholdList.Any(x => x.IsAlmost(x1))) thresholdList.Add(x1);
81          if (!thresholdList.Any(x => x.IsAlmost(x2))) thresholdList.Add(x2);
82        }
83      }
84      thresholdList.Sort();
85      thresholdList.Insert(0, double.NegativeInfinity);
86
87      // determine class values for each partition separated by a threshold by calculating the density of all class distributions
88      // all points in the partition are classified as the class with the maximal density in the parition
89      List<double> classValuesList = new List<double>();
90      for (int i = 0; i < thresholdList.Count; i++) {
91        double m;
92        if (double.IsNegativeInfinity(thresholdList[i])) {
93          m = thresholdList[i + 1] - 1.0; // smaller than the smalles non-infinity threshold
94        } else if (i == thresholdList.Count - 1) {
95          // last threshold
96          m = thresholdList[i] + 1.0; // larger than the last threshold
97        } else {
98          m = thresholdList[i] + (thresholdList[i + 1] - thresholdList[i]) / 2.0; // middle of partition
99        }
100
101        // determine class with maximal probability density in m
102        double maxDensity = 0;
103        double maxDensityClassValue = -1;
104        foreach (var classValue in originalClasses) {
105          double density = NormalDensity(m, classMean[classValue], classStdDev[classValue]);
106          if (density > maxDensity) {
107            maxDensity = density;
108            maxDensityClassValue = classValue;
109          }
110        }
111        classValuesList.Add(maxDensityClassValue);
112      }
113
114      // only keep thresholds at which the class changes
115      // class B overrides threshold s. So only thresholds r and t are relevant and have to be kept
116      //
117      //      A    B  C
118      //       /\  /\/\       
119      //      / r\/ /\t\       
120      //     /   /\/  \ \     
121      //    /   / /\s  \ \     
122      //  -/---/-/ -\---\-\----
123      List<double> filteredThresholds = new List<double>();
124      List<double> filteredClassValues = new List<double>();
125      filteredThresholds.Add(thresholdList[0]);
126      filteredClassValues.Add(classValuesList[0]);
127      for (int i = 0; i < classValuesList.Count - 1; i++) {
128        if (classValuesList[i] != classValuesList[i + 1]) {
129          filteredThresholds.Add(thresholdList[i + 1]);
130          filteredClassValues.Add(classValuesList[i + 1]);
131        }
132      }
133      thresholds = filteredThresholds.ToArray();
134      classValues = filteredClassValues.ToArray();
135    }
136
137    private static double NormalDensity(double x, double mu, double sigma) {
138      return (1.0 / Math.Sqrt(2.0 * Math.PI * sigma * sigma)) * Math.Exp(-((x - mu) * (x - mu)) / (2.0 * sigma * sigma));
139    }
140
141    private static void CalculateCutPoints(double m1, double s1, double m2, double s2, out double x1, out double x2) {
142      double a = (s1 * s1 - s2 * s2);
143      x1 = -(-m2 * s1 * s1 + m1 * s2 * s2 + Math.Sqrt(s1 * s1 * s2 * s2 * ((m1 - m2) * (m1 - m2) + 2.0 * (-s1 * s1 + s2 * s2) * Math.Log(s2 / s1)))) / a;
144      x2 = (m2 * s1 * s1 - m1 * s2 * s2 + Math.Sqrt(s1 * s1 * s2 * s2 * ((m1 - m2) * (m1 - m2) + 2.0 * (-s1 * s1 + s2 * s2) * Math.Log(s2 / s1)))) / a;
145    }
146  }
147}
Note: See TracBrowser for help on using the repository browser.