Free cookie consent management tool by TermsFeed Policy Generator

source: branches/CloningRefactoring/HeuristicLab.Problems.DataAnalysis.Classification/3.3/Symbolic/SymbolicClassificationSolution.cs @ 4696

Last change on this file since 4696 was 4678, checked in by gkronber, 14 years ago

Refactored cloning in DataAnalysis plugins. #922

File size: 7.2 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
28using HeuristicLab.Problems.DataAnalysis.Regression.Symbolic;
29
30namespace HeuristicLab.Problems.DataAnalysis.Classification {
31  /// <summary>
32  /// Represents a solution for a symbolic classification problem which can be visualized in the GUI.
33  /// </summary>
34  [Item("SymbolicClassificationSolution", "Represents a solution for a symbolic classification problem which can be visualized in the GUI.")]
35  [StorableClass]
36  public class SymbolicClassificationSolution : SymbolicRegressionSolution, IClassificationSolution {
37    public new ClassificationProblemData ProblemData {
38      get { return (ClassificationProblemData)base.ProblemData; }
39      set { base.ProblemData = value; }
40    }
41
42    #region properties
43    private List<double> optimalThresholds;
44    private List<double> actualThresholds;
45    public IEnumerable<double> Thresholds {
46      get {
47        if (actualThresholds == null) RecalculateEstimatedValues();
48        return actualThresholds;
49      }
50      set {
51        if (actualThresholds != null && actualThresholds.SequenceEqual(value))
52          return;
53        actualThresholds = new List<double>(value);
54        OnThresholdsChanged();
55      }
56    }
57
58    public IEnumerable<double> EstimatedClassValues {
59      get { return GetEstimatedClassValues(Enumerable.Range(0, ProblemData.Dataset.Rows)); }
60    }
61
62    public IEnumerable<double> EstimatedTrainingClassValues {
63      get { return GetEstimatedClassValues(ProblemData.TrainingIndizes); }
64    }
65
66    public IEnumerable<double> EstimatedTestClassValues {
67      get { return GetEstimatedClassValues(ProblemData.TestIndizes); }
68    }
69
70    [StorableConstructor]
71    protected SymbolicClassificationSolution(bool deserializing) : base(deserializing) { }
72    protected SymbolicClassificationSolution(SymbolicClassificationSolution original, Cloner cloner) : base(original, cloner) { }
73    private SymbolicClassificationSolution() : base() { }
74    public SymbolicClassificationSolution(ClassificationProblemData problemData, SymbolicRegressionModel model, double lowerEstimationLimit, double upperEstimationLimit)
75      : base(problemData, model, lowerEstimationLimit, upperEstimationLimit) {
76    }
77
78    public override IDeepCloneable Clone(Cloner cloner) {
79      return new SymbolicClassificationSolution(this, cloner);
80    }
81
82    protected override void RecalculateEstimatedValues() {
83      estimatedValues =
84          (from x in Model.GetEstimatedValues(ProblemData, 0, ProblemData.Dataset.Rows)
85           let boundedX = Math.Min(UpperEstimationLimit, Math.Max(LowerEstimationLimit, x))
86           select double.IsNaN(boundedX) ? UpperEstimationLimit : boundedX).ToList();
87      RecalculateClassIntermediates();
88      OnEstimatedValuesChanged();
89    }
90
91    private void RecalculateClassIntermediates() {
92      int slices = 100;
93
94      List<int> classInstances = (from classValue in ProblemData.Dataset.GetVariableValues(ProblemData.TargetVariable.Value)
95                                  group classValue by classValue into grouping
96                                  select grouping.Count()).ToList();
97
98      List<KeyValuePair<double, double>> estimatedTargetValues =
99         (from row in ProblemData.TrainingIndizes
100          select new KeyValuePair<double, double>(
101            estimatedValues[row],
102            ProblemData.Dataset[ProblemData.TargetVariable.Value, row])).ToList();
103
104      List<double> originalClasses = ProblemData.SortedClassValues.ToList();
105      double[] thresholds = new double[ProblemData.NumberOfClasses + 1];
106      thresholds[0] = double.NegativeInfinity;
107      thresholds[thresholds.Length - 1] = double.PositiveInfinity;
108
109      for (int i = 1; i < thresholds.Length - 1; i++) {
110        double lowerThreshold = thresholds[i - 1];
111        double actualThreshold = originalClasses[i - 1];
112        double thresholdIncrement = (originalClasses[i] - originalClasses[i - 1]) / slices;
113
114        double bestThreshold = double.NaN;
115        double bestClassificationScore = double.PositiveInfinity;
116
117        while (actualThreshold < originalClasses[i]) {
118          double classificationScore = 0.0;
119
120          foreach (KeyValuePair<double, double> estimatedTarget in estimatedTargetValues) {
121            //all positives
122            if (estimatedTarget.Value.IsAlmost(originalClasses[i - 1])) {
123              if (estimatedTarget.Key > lowerThreshold && estimatedTarget.Key < actualThreshold)
124                //true positive
125                classificationScore += ProblemData.MisclassificationMatrix[i - 1, i - 1] / classInstances[i - 1];
126              else
127                //false negative
128                classificationScore += ProblemData.MisclassificationMatrix[i, i - 1] / classInstances[i - 1];
129            }
130              //all negatives
131            else {
132              if (estimatedTarget.Key > lowerThreshold && estimatedTarget.Key < actualThreshold)
133                classificationScore += ProblemData.MisclassificationMatrix[i - 1, i] / classInstances[i];
134              else
135                //true negative, consider only upper class
136                classificationScore += ProblemData.MisclassificationMatrix[i, i] / classInstances[i];
137            }
138          }
139          if (classificationScore < bestClassificationScore) {
140            bestClassificationScore = classificationScore;
141            bestThreshold = actualThreshold;
142          }
143          actualThreshold += thresholdIncrement;
144        }
145        thresholds[i] = bestThreshold;
146      }
147      this.optimalThresholds = new List<double>(thresholds);
148      this.actualThresholds = optimalThresholds;
149    }
150
151    public IEnumerable<double> GetEstimatedClassValues(IEnumerable<int> rows) {
152      double[] classValues = ProblemData.SortedClassValues.ToArray();
153      foreach (int row in rows) {
154        double value = estimatedValues[row];
155        int classIndex = 0;
156        while (value > actualThresholds[classIndex + 1])
157          classIndex++;
158        yield return classValues[classIndex];
159      }
160    }
161    #endregion
162
163    public event EventHandler ThresholdsChanged;
164    private void OnThresholdsChanged() {
165      var handler = ThresholdsChanged;
166      if (handler != null)
167        ThresholdsChanged(this, EventArgs.Empty);
168    }
169  }
170}
Note: See TracBrowser for help on using the repository browser.