Free cookie consent management tool by TermsFeed Policy Generator

source: branches/CloningRefactoring/HeuristicLab.Problems.DataAnalysis.Classification/3.3/Symbolic/SymbolicClassificationSolution.cs @ 4657

Last change on this file since 4657 was 4469, checked in by mkommend, 14 years ago

Added logic to remove the test samples from the training samples (ticket #939).

File size: 6.8 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
28using HeuristicLab.Problems.DataAnalysis.Regression.Symbolic;
29
30namespace HeuristicLab.Problems.DataAnalysis.Classification {
31  /// <summary>
32  /// Represents a solution for a symbolic classification problem which can be visualized in the GUI.
33  /// </summary>
34  [Item("SymbolicClassificationSolution", "Represents a solution for a symbolic classification problem which can be visualized in the GUI.")]
35  [StorableClass]
36  public class SymbolicClassificationSolution : SymbolicRegressionSolution, IClassificationSolution {
37    private SymbolicClassificationSolution() : base() { }
38    public SymbolicClassificationSolution(ClassificationProblemData problemData, SymbolicRegressionModel model, double lowerEstimationLimit, double upperEstimationLimit)
39      : base(problemData, model, lowerEstimationLimit, upperEstimationLimit) {
40    }
41
42    public new ClassificationProblemData ProblemData {
43      get { return (ClassificationProblemData)base.ProblemData; }
44      set { base.ProblemData = value; }
45    }
46
47    protected override void RecalculateEstimatedValues() {
48      estimatedValues =
49          (from x in Model.GetEstimatedValues(ProblemData, 0, ProblemData.Dataset.Rows)
50           let boundedX = Math.Min(UpperEstimationLimit, Math.Max(LowerEstimationLimit, x))
51           select double.IsNaN(boundedX) ? UpperEstimationLimit : boundedX).ToList();
52      RecalculateClassIntermediates();
53      OnEstimatedValuesChanged();
54    }
55
56    private void RecalculateClassIntermediates() {
57      int slices = 100;
58
59      List<int> classInstances = (from classValue in ProblemData.Dataset.GetVariableValues(ProblemData.TargetVariable.Value)
60                                  group classValue by classValue into grouping
61                                  select grouping.Count()).ToList();
62
63      List<KeyValuePair<double, double>> estimatedTargetValues =
64         (from row in ProblemData.TrainingIndizes
65          select new KeyValuePair<double, double>(
66            estimatedValues[row],
67            ProblemData.Dataset[ProblemData.TargetVariable.Value, row])).ToList();
68
69      List<double> originalClasses = ProblemData.SortedClassValues.ToList();
70      double[] thresholds = new double[ProblemData.NumberOfClasses + 1];
71      thresholds[0] = double.NegativeInfinity;
72      thresholds[thresholds.Length - 1] = double.PositiveInfinity;
73
74      for (int i = 1; i < thresholds.Length - 1; i++) {
75        double lowerThreshold = thresholds[i - 1];
76        double actualThreshold = originalClasses[i - 1];
77        double thresholdIncrement = (originalClasses[i] - originalClasses[i - 1]) / slices;
78
79        double bestThreshold = double.NaN;
80        double bestClassificationScore = double.PositiveInfinity;
81
82        while (actualThreshold < originalClasses[i]) {
83          double classificationScore = 0.0;
84
85          foreach (KeyValuePair<double, double> estimatedTarget in estimatedTargetValues) {
86            //all positives
87            if (estimatedTarget.Value.IsAlmost(originalClasses[i - 1])) {
88              if (estimatedTarget.Key > lowerThreshold && estimatedTarget.Key < actualThreshold)
89                //true positive
90                classificationScore += ProblemData.MisclassificationMatrix[i - 1, i - 1] / classInstances[i - 1];
91              else
92                //false negative
93                classificationScore += ProblemData.MisclassificationMatrix[i, i - 1] / classInstances[i - 1];
94            }
95              //all negatives
96            else {
97              if (estimatedTarget.Key > lowerThreshold && estimatedTarget.Key < actualThreshold)
98                classificationScore += ProblemData.MisclassificationMatrix[i - 1, i] / classInstances[i];
99              else
100                //true negative, consider only upper class
101                classificationScore += ProblemData.MisclassificationMatrix[i, i] / classInstances[i];
102            }
103          }
104          if (classificationScore < bestClassificationScore) {
105            bestClassificationScore = classificationScore;
106            bestThreshold = actualThreshold;
107          }
108          actualThreshold += thresholdIncrement;
109        }
110        thresholds[i] = bestThreshold;
111      }
112      this.optimalThresholds = new List<double>(thresholds);
113      this.actualThresholds = optimalThresholds;
114    }
115
116    #region properties
117    private List<double> optimalThresholds;
118    private List<double> actualThresholds;
119    public IEnumerable<double> Thresholds {
120      get {
121        if (actualThresholds == null) RecalculateEstimatedValues();
122        return actualThresholds;
123      }
124      set {
125        if (actualThresholds != null && actualThresholds.SequenceEqual(value))
126          return;
127        actualThresholds = new List<double>(value);
128        OnThresholdsChanged();
129      }
130    }
131
132    public IEnumerable<double> EstimatedClassValues {
133      get { return GetEstimatedClassValues(Enumerable.Range(0, ProblemData.Dataset.Rows)); }
134    }
135
136    public IEnumerable<double> EstimatedTrainingClassValues {
137      get { return GetEstimatedClassValues(ProblemData.TrainingIndizes); }
138    }
139
140    public IEnumerable<double> EstimatedTestClassValues {
141      get { return GetEstimatedClassValues(ProblemData.TestIndizes); }
142    }
143
144    public IEnumerable<double> GetEstimatedClassValues(IEnumerable<int> rows) {
145      double[] classValues = ProblemData.SortedClassValues.ToArray();
146      foreach (int row in rows) {
147        double value = estimatedValues[row];
148        int classIndex = 0;
149        while (value > actualThresholds[classIndex + 1])
150          classIndex++;
151        yield return classValues[classIndex];
152      }
153    }
154    #endregion
155
156    public event EventHandler ThresholdsChanged;
157    private void OnThresholdsChanged() {
158      var handler = ThresholdsChanged;
159      if (handler != null)
160        ThresholdsChanged(this, EventArgs.Empty);
161    }
162  }
163}
Note: See TracBrowser for help on using the repository browser.