Context Navigation

source: trunk/sources/HeuristicLab.Problems.DataAnalysis.Classification/3.3/Symbolic/SymbolicClassificationSolution.cs @ 4565

Visit:

Last change on this file since 4565 was 4469, checked in by mkommend, 14 years ago
Added logic to remove the test samples from the training samples (ticket #939).
File size: 6.8 KB

Line
1	#region License Information
2	/* HeuristicLab
3	* Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4	*
5	* This file is part of HeuristicLab.
6	*
7	* HeuristicLab is free software: you can redistribute it and/or modify
8	* it under the terms of the GNU General Public License as published by
9	* the Free Software Foundation, either version 3 of the License, or
10	* (at your option) any later version.
11	*
12	* HeuristicLab is distributed in the hope that it will be useful,
13	* but WITHOUT ANY WARRANTY; without even the implied warranty of
14	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15	* GNU General Public License for more details.
16	*
17	* You should have received a copy of the GNU General Public License
18	* along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19	*/
20	#endregion
21
22	using System;
23	using System.Collections.Generic;
24	using System.Linq;
25	using HeuristicLab.Common;
26	using HeuristicLab.Core;
27	using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
28	using HeuristicLab.Problems.DataAnalysis.Regression.Symbolic;
29
30	namespace HeuristicLab.Problems.DataAnalysis.Classification {
31	/// <summary>
32	/// Represents a solution for a symbolic classification problem which can be visualized in the GUI.
33	/// </summary>
34	[Item("SymbolicClassificationSolution", "Represents a solution for a symbolic classification problem which can be visualized in the GUI.")]
35	[StorableClass]
36	public class SymbolicClassificationSolution : SymbolicRegressionSolution, IClassificationSolution {
37	private SymbolicClassificationSolution() : base() { }
38	public SymbolicClassificationSolution(ClassificationProblemData problemData, SymbolicRegressionModel model, double lowerEstimationLimit, double upperEstimationLimit)
39	: base(problemData, model, lowerEstimationLimit, upperEstimationLimit) {
40	}
41
42	public new ClassificationProblemData ProblemData {
43	get { return (ClassificationProblemData)base.ProblemData; }
44	set { base.ProblemData = value; }
45	}
46
47	protected override void RecalculateEstimatedValues() {
48	estimatedValues =
49	(from x in Model.GetEstimatedValues(ProblemData, 0, ProblemData.Dataset.Rows)
50	let boundedX = Math.Min(UpperEstimationLimit, Math.Max(LowerEstimationLimit, x))
51	select double.IsNaN(boundedX) ? UpperEstimationLimit : boundedX).ToList();
52	RecalculateClassIntermediates();
53	OnEstimatedValuesChanged();
54	}
55
56	private void RecalculateClassIntermediates() {
57	int slices = 100;
58
59	List<int> classInstances = (from classValue in ProblemData.Dataset.GetVariableValues(ProblemData.TargetVariable.Value)
60	group classValue by classValue into grouping
61	select grouping.Count()).ToList();
62
63	List<KeyValuePair<double, double>> estimatedTargetValues =
64	(from row in ProblemData.TrainingIndizes
65	select new KeyValuePair<double, double>(
66	estimatedValues[row],
67	ProblemData.Dataset[ProblemData.TargetVariable.Value, row])).ToList();
68
69	List<double> originalClasses = ProblemData.SortedClassValues.ToList();
70	double[] thresholds = new double[ProblemData.NumberOfClasses + 1];
71	thresholds[0] = double.NegativeInfinity;
72	thresholds[thresholds.Length - 1] = double.PositiveInfinity;
73
74	for (int i = 1; i < thresholds.Length - 1; i++) {
75	double lowerThreshold = thresholds[i - 1];
76	double actualThreshold = originalClasses[i - 1];
77	double thresholdIncrement = (originalClasses[i] - originalClasses[i - 1]) / slices;
78
79	double bestThreshold = double.NaN;
80	double bestClassificationScore = double.PositiveInfinity;
81
82	while (actualThreshold < originalClasses[i]) {
83	double classificationScore = 0.0;
84
85	foreach (KeyValuePair<double, double> estimatedTarget in estimatedTargetValues) {
86	//all positives
87	if (estimatedTarget.Value.IsAlmost(originalClasses[i - 1])) {
88	if (estimatedTarget.Key > lowerThreshold && estimatedTarget.Key < actualThreshold)
89	//true positive
90	classificationScore += ProblemData.MisclassificationMatrix[i - 1, i - 1] / classInstances[i - 1];
91	else
92	//false negative
93	classificationScore += ProblemData.MisclassificationMatrix[i, i - 1] / classInstances[i - 1];
94	}
95	//all negatives
96	else {
97	if (estimatedTarget.Key > lowerThreshold && estimatedTarget.Key < actualThreshold)
98	classificationScore += ProblemData.MisclassificationMatrix[i - 1, i] / classInstances[i];
99	else
100	//true negative, consider only upper class
101	classificationScore += ProblemData.MisclassificationMatrix[i, i] / classInstances[i];
102	}
103	}
104	if (classificationScore < bestClassificationScore) {
105	bestClassificationScore = classificationScore;
106	bestThreshold = actualThreshold;
107	}
108	actualThreshold += thresholdIncrement;
109	}
110	thresholds[i] = bestThreshold;
111	}
112	this.optimalThresholds = new List<double>(thresholds);
113	this.actualThresholds = optimalThresholds;
114	}
115
116	#region properties
117	private List<double> optimalThresholds;
118	private List<double> actualThresholds;
119	public IEnumerable<double> Thresholds {
120	get {
121	if (actualThresholds == null) RecalculateEstimatedValues();
122	return actualThresholds;
123	}
124	set {
125	if (actualThresholds != null && actualThresholds.SequenceEqual(value))
126	return;
127	actualThresholds = new List<double>(value);
128	OnThresholdsChanged();
129	}
130	}
131
132	public IEnumerable<double> EstimatedClassValues {
133	get { return GetEstimatedClassValues(Enumerable.Range(0, ProblemData.Dataset.Rows)); }
134	}
135
136	public IEnumerable<double> EstimatedTrainingClassValues {
137	get { return GetEstimatedClassValues(ProblemData.TrainingIndizes); }
138	}
139
140	public IEnumerable<double> EstimatedTestClassValues {
141	get { return GetEstimatedClassValues(ProblemData.TestIndizes); }
142	}
143
144	public IEnumerable<double> GetEstimatedClassValues(IEnumerable<int> rows) {
145	double[] classValues = ProblemData.SortedClassValues.ToArray();
146	foreach (int row in rows) {
147	double value = estimatedValues[row];
148	int classIndex = 0;
149	while (value > actualThresholds[classIndex + 1])
150	classIndex++;
151	yield return classValues[classIndex];
152	}
153	}
154	#endregion
155
156	public event EventHandler ThresholdsChanged;
157	private void OnThresholdsChanged() {
158	var handler = ThresholdsChanged;
159	if (handler != null)
160	ThresholdsChanged(this, EventArgs.Empty);
161	}
162	}
163	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Update cookies preferences