Context Navigation

← Previous Revision
Next Revision →
Normal
Revision Log

AccuracyMaximizationThresholdCalculator.cs

Visit:

Last change on this file was 17180, checked in by swagner, 5 years ago
#2875: Removed years in copyrights
File size: 7.1 KB

Rev	Line
[5681]	1	#region License Information
	2	/* HeuristicLab
[17180]	3	* Copyright (C) Heuristic and Evolutionary Algorithms Laboratory (HEAL)
[5681]	4	*
	5	* This file is part of HeuristicLab.
	6	*
	7	* HeuristicLab is free software: you can redistribute it and/or modify
	8	* it under the terms of the GNU General Public License as published by
	9	* the Free Software Foundation, either version 3 of the License, or
	10	* (at your option) any later version.
	11	*
	12	* HeuristicLab is distributed in the hope that it will be useful,
	13	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	14	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	15	* GNU General Public License for more details.
	16	*
	17	* You should have received a copy of the GNU General Public License
	18	* along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
	19	*/
	20	#endregion
	21
[5777]	22	using System;
[5681]	23	using System.Collections.Generic;
	24	using System.Linq;
	25	using HeuristicLab.Common;
	26	using HeuristicLab.Core;
[16565]	27	using HEAL.Attic;
[5681]	28
	29	namespace HeuristicLab.Problems.DataAnalysis {
	30	/// <summary>
	31	/// Represents a threshold calculator that maximizes the weighted accuracy of the classifcation model.
	32	/// </summary>
[16565]	33	[StorableType("30BB9513-542D-4CB8-931B-9767C9CB2EFB")]
[5681]	34	[Item("AccuracyMaximizationThresholdCalculator", "Represents a threshold calculator that maximizes the weighted accuracy of the classifcation model.")]
	35	public class AccuracyMaximizationThresholdCalculator : ThresholdCalculator {
	36
	37	[StorableConstructor]
[16565]	38	protected AccuracyMaximizationThresholdCalculator(StorableConstructorFlag _) : base(_) { }
[5681]	39	protected AccuracyMaximizationThresholdCalculator(AccuracyMaximizationThresholdCalculator original, Cloner cloner)
	40	: base(original, cloner) {
	41	}
	42	public AccuracyMaximizationThresholdCalculator()
	43	: base() {
	44	}
	45
	46	public override IDeepCloneable Clone(Cloner cloner) {
	47	return new AccuracyMaximizationThresholdCalculator(this, cloner);
	48	}
	49
	50	public override void Calculate(IClassificationProblemData problemData, IEnumerable<double> estimatedValues, IEnumerable<double> targetClassValues, out double[] classValues, out double[] thresholds) {
	51	AccuracyMaximizationThresholdCalculator.CalculateThresholds(problemData, estimatedValues, targetClassValues, out classValues, out thresholds);
	52	}
	53
	54	public static void CalculateThresholds(IClassificationProblemData problemData, IEnumerable<double> estimatedValues, IEnumerable<double> targetClassValues, out double[] classValues, out double[] thresholds) {
[8636]	55	const int slices = 100;
	56	const double minThresholdInc = 10e-5; // necessary to prevent infinite loop when maxEstimated - minEstimated is effectively zero (constant model)
[5681]	57	List<double> estimatedValuesList = estimatedValues.ToList();
	58	double maxEstimatedValue = estimatedValuesList.Max();
	59	double minEstimatedValue = estimatedValuesList.Min();
[8126]	60	double thresholdIncrement = Math.Max((maxEstimatedValue - minEstimatedValue) / slices, minThresholdInc);
[5681]	61	var estimatedAndTargetValuePairs =
	62	estimatedValuesList.Zip(targetClassValues, (x, y) => new { EstimatedValue = x, TargetClassValue = y })
[8636]	63	.OrderBy(x => x.EstimatedValue).ToList();
[5681]	64
[8636]	65	classValues = estimatedAndTargetValuePairs.GroupBy(x => x.TargetClassValue)
	66	.Select(x => new { Median = x.Select(y => y.EstimatedValue).Median(), Class = x.Key })
	67	.OrderBy(x => x.Median).Select(x => x.Class).ToArray();
	68
[5681]	69	int nClasses = classValues.Length;
[5736]	70	thresholds = new double[nClasses];
[5681]	71	thresholds[0] = double.NegativeInfinity;
	72
	73	// incrementally calculate accuracy of all possible thresholds
[5736]	74	for (int i = 1; i < thresholds.Length; i++) {
[5681]	75	double lowerThreshold = thresholds[i - 1];
	76	double actualThreshold = Math.Max(lowerThreshold, minEstimatedValue);
	77	double lowestBestThreshold = double.NaN;
	78	double highestBestThreshold = double.NaN;
	79	double bestClassificationScore = double.PositiveInfinity;
	80	bool seriesOfEqualClassificationScores = false;
	81
	82	while (actualThreshold < maxEstimatedValue) {
	83	double classificationScore = 0.0;
	84
	85	foreach (var pair in estimatedAndTargetValuePairs) {
	86	//all positives
	87	if (pair.TargetClassValue.IsAlmost(classValues[i - 1])) {
[8554]	88	if (pair.EstimatedValue > lowerThreshold && pair.EstimatedValue <= actualThreshold)
[5681]	89	//true positive
[8554]	90	classificationScore += problemData.GetClassificationPenalty(pair.TargetClassValue, pair.TargetClassValue);
[5681]	91	else
	92	//false negative
[8554]	93	classificationScore += problemData.GetClassificationPenalty(pair.TargetClassValue, classValues[i]);
[5681]	94	}
	95	//all negatives
	96	else {
[8573]	97	//false positive
[8554]	98	if (pair.EstimatedValue > lowerThreshold && pair.EstimatedValue <= actualThreshold)
	99	classificationScore += problemData.GetClassificationPenalty(pair.TargetClassValue, classValues[i - 1]);
[8573]	100	else if (pair.EstimatedValue <= lowerThreshold)
	101	classificationScore += problemData.GetClassificationPenalty(pair.TargetClassValue, classValues[i - 2]);
	102	else if (pair.EstimatedValue > actualThreshold) {
	103	if (pair.TargetClassValue < classValues[i - 1]) //negative in wrong class, consider upper class
	104	classificationScore += problemData.GetClassificationPenalty(pair.TargetClassValue, classValues[i]);
	105	else //true negative, must be optimized by the other thresholds
	106	classificationScore += problemData.GetClassificationPenalty(pair.TargetClassValue, pair.TargetClassValue);
	107	}
[5681]	108	}
	109	}
	110
	111	//new best classification score found
	112	if (classificationScore < bestClassificationScore) {
	113	bestClassificationScore = classificationScore;
	114	lowestBestThreshold = actualThreshold;
	115	highestBestThreshold = actualThreshold;
	116	seriesOfEqualClassificationScores = true;
	117	}
	118	//equal classification scores => if seriesOfEqualClassifcationScores == true update highest threshold
	119	else if (Math.Abs(classificationScore - bestClassificationScore) < double.Epsilon && seriesOfEqualClassificationScores)
	120	highestBestThreshold = actualThreshold;
	121	//worse classificatoin score found reset seriesOfEqualClassifcationScores
	122	else seriesOfEqualClassificationScores = false;
	123
	124	actualThreshold += thresholdIncrement;
	125	}
	126	//scale lowest thresholds and highest found optimal threshold according to the misclassification matrix
	127	double falseNegativePenalty = problemData.GetClassificationPenalty(classValues[i], classValues[i - 1]);
	128	double falsePositivePenalty = problemData.GetClassificationPenalty(classValues[i - 1], classValues[i]);
	129	thresholds[i] = (lowestBestThreshold * falsePositivePenalty + highestBestThreshold * falseNegativePenalty) / (falseNegativePenalty + falsePositivePenalty);
	130	}
	131	}
	132	}
	133	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: trunk/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/ThresholdCalculators/AccuracyMaximizationThresholdCalculator.cs

Download in other formats: