Free cookie consent management tool by TermsFeed Policy Generator

source: branches/ParameterBinding/HeuristicLab.Problems.DataAnalysis.Classification/3.3/Symbolic/SymbolicClassificationSolution.cs @ 10473

Last change on this file since 10473 was 4722, checked in by swagner, 14 years ago

Merged cloning refactoring branch back into trunk (#922)

File size: 7.2 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
28using HeuristicLab.Problems.DataAnalysis.Regression.Symbolic;
29
30namespace HeuristicLab.Problems.DataAnalysis.Classification {
31  /// <summary>
32  /// Represents a solution for a symbolic classification problem which can be visualized in the GUI.
33  /// </summary>
34  [Item("SymbolicClassificationSolution", "Represents a solution for a symbolic classification problem which can be visualized in the GUI.")]
35  [StorableClass]
36  public class SymbolicClassificationSolution : SymbolicRegressionSolution, IClassificationSolution {
37    public new ClassificationProblemData ProblemData {
38      get { return (ClassificationProblemData)base.ProblemData; }
39      set { base.ProblemData = value; }
40    }
41
42    #region properties
43    private List<double> optimalThresholds;
44    private List<double> actualThresholds;
45    public IEnumerable<double> Thresholds {
46      get {
47        if (actualThresholds == null) RecalculateEstimatedValues();
48        return actualThresholds;
49      }
50      set {
51        if (actualThresholds != null && actualThresholds.SequenceEqual(value))
52          return;
53        actualThresholds = new List<double>(value);
54        OnThresholdsChanged();
55      }
56    }
57
58    public IEnumerable<double> EstimatedClassValues {
59      get { return GetEstimatedClassValues(Enumerable.Range(0, ProblemData.Dataset.Rows)); }
60    }
61
62    public IEnumerable<double> EstimatedTrainingClassValues {
63      get { return GetEstimatedClassValues(ProblemData.TrainingIndizes); }
64    }
65
66    public IEnumerable<double> EstimatedTestClassValues {
67      get { return GetEstimatedClassValues(ProblemData.TestIndizes); }
68    }
69
70    [StorableConstructor]
71    protected SymbolicClassificationSolution(bool deserializing) : base(deserializing) { }
72    protected SymbolicClassificationSolution(SymbolicClassificationSolution original, Cloner cloner) : base(original, cloner) { }
73    public SymbolicClassificationSolution(ClassificationProblemData problemData, SymbolicRegressionModel model, double lowerEstimationLimit, double upperEstimationLimit)
74      : base(problemData, model, lowerEstimationLimit, upperEstimationLimit) {
75    }
76
77    public override IDeepCloneable Clone(Cloner cloner) {
78      return new SymbolicClassificationSolution(this, cloner);
79    }
80
81    protected override void RecalculateEstimatedValues() {
82      estimatedValues =
83          (from x in Model.GetEstimatedValues(ProblemData, 0, ProblemData.Dataset.Rows)
84           let boundedX = Math.Min(UpperEstimationLimit, Math.Max(LowerEstimationLimit, x))
85           select double.IsNaN(boundedX) ? UpperEstimationLimit : boundedX).ToList();
86      RecalculateClassIntermediates();
87      OnEstimatedValuesChanged();
88    }
89
90    private void RecalculateClassIntermediates() {
91      int slices = 100;
92
93      List<int> classInstances = (from classValue in ProblemData.Dataset.GetVariableValues(ProblemData.TargetVariable.Value)
94                                  group classValue by classValue into grouping
95                                  select grouping.Count()).ToList();
96
97      List<KeyValuePair<double, double>> estimatedTargetValues =
98         (from row in ProblemData.TrainingIndizes
99          select new KeyValuePair<double, double>(
100            estimatedValues[row],
101            ProblemData.Dataset[ProblemData.TargetVariable.Value, row])).ToList();
102
103      List<double> originalClasses = ProblemData.SortedClassValues.ToList();
104      double[] thresholds = new double[ProblemData.NumberOfClasses + 1];
105      thresholds[0] = double.NegativeInfinity;
106      thresholds[thresholds.Length - 1] = double.PositiveInfinity;
107
108      for (int i = 1; i < thresholds.Length - 1; i++) {
109        double lowerThreshold = thresholds[i - 1];
110        double actualThreshold = originalClasses[i - 1];
111        double thresholdIncrement = (originalClasses[i] - originalClasses[i - 1]) / slices;
112
113        double bestThreshold = double.NaN;
114        double bestClassificationScore = double.PositiveInfinity;
115
116        while (actualThreshold < originalClasses[i]) {
117          double classificationScore = 0.0;
118
119          foreach (KeyValuePair<double, double> estimatedTarget in estimatedTargetValues) {
120            //all positives
121            if (estimatedTarget.Value.IsAlmost(originalClasses[i - 1])) {
122              if (estimatedTarget.Key > lowerThreshold && estimatedTarget.Key < actualThreshold)
123                //true positive
124                classificationScore += ProblemData.MisclassificationMatrix[i - 1, i - 1] / classInstances[i - 1];
125              else
126                //false negative
127                classificationScore += ProblemData.MisclassificationMatrix[i, i - 1] / classInstances[i - 1];
128            }
129              //all negatives
130            else {
131              if (estimatedTarget.Key > lowerThreshold && estimatedTarget.Key < actualThreshold)
132                classificationScore += ProblemData.MisclassificationMatrix[i - 1, i] / classInstances[i];
133              else
134                //true negative, consider only upper class
135                classificationScore += ProblemData.MisclassificationMatrix[i, i] / classInstances[i];
136            }
137          }
138          if (classificationScore < bestClassificationScore) {
139            bestClassificationScore = classificationScore;
140            bestThreshold = actualThreshold;
141          }
142          actualThreshold += thresholdIncrement;
143        }
144        thresholds[i] = bestThreshold;
145      }
146      this.optimalThresholds = new List<double>(thresholds);
147      this.actualThresholds = optimalThresholds;
148    }
149
150    public IEnumerable<double> GetEstimatedClassValues(IEnumerable<int> rows) {
151      double[] classValues = ProblemData.SortedClassValues.ToArray();
152      foreach (int row in rows) {
153        double value = estimatedValues[row];
154        int classIndex = 0;
155        while (value > actualThresholds[classIndex + 1])
156          classIndex++;
157        yield return classValues[classIndex];
158      }
159    }
160    #endregion
161
162    public event EventHandler ThresholdsChanged;
163    private void OnThresholdsChanged() {
164      var handler = ThresholdsChanged;
165      if (handler != null)
166        ThresholdsChanged(this, EventArgs.Empty);
167    }
168  }
169}
Note: See TracBrowser for help on using the repository browser.