#region License Information /* HeuristicLab * Copyright (C) 2002-2011 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System.Collections.Generic; using System.Linq; using HeuristicLab.Common; using HeuristicLab.Core; using HeuristicLab.Data; using HeuristicLab.Operators; using HeuristicLab.Parameters; using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; using HeuristicLab.Optimization; using System; namespace HeuristicLab.Problems.DataAnalysis { ///

/// Represents a classification solution that uses a discriminant function and classification thresholds. ///

[StorableClass] [Item("DiscriminantFunctionClassificationSolution", "Represents a classification solution that uses a discriminant function and classification thresholds.")] public class DiscriminantFunctionClassificationSolution : ClassificationSolution, IDiscriminantFunctionClassificationSolution { [StorableConstructor] protected DiscriminantFunctionClassificationSolution(bool deserializing) : base(deserializing) { } protected DiscriminantFunctionClassificationSolution(DiscriminantFunctionClassificationSolution original, Cloner cloner) : base(original, cloner) { } public DiscriminantFunctionClassificationSolution(IRegressionModel model, IClassificationProblemData problemData) : this(new DiscriminantFunctionClassificationModel(model, problemData.ClassValues, CalculateClassThresholds(model, problemData, problemData.TrainingIndizes)), problemData) { } public DiscriminantFunctionClassificationSolution(IDiscriminantFunctionClassificationModel model, IClassificationProblemData problemData) : base(model, problemData) { Model.ThresholdsChanged += new EventHandler(Model_ThresholdsChanged); } #region IDiscriminantFunctionClassificationSolution Members public new IDiscriminantFunctionClassificationModel Model { get { return (IDiscriminantFunctionClassificationModel)base.Model; } } public IEnumerable EstimatedValues { get { return GetEstimatedValues(Enumerable.Range(0, ProblemData.Dataset.Rows)); } } public IEnumerable EstimatedTrainingValues { get { return GetEstimatedValues(ProblemData.TrainingIndizes); } } public IEnumerable EstimatedTestValues { get { return GetEstimatedValues(ProblemData.TestIndizes); } } public IEnumerable GetEstimatedValues(IEnumerable rows) { return Model.GetEstimatedValues(ProblemData.Dataset, rows); } public IEnumerable Thresholds { get { return Model.Thresholds; } set { Model.Thresholds = new List(value); } } public event EventHandler ThresholdsChanged; private void Model_ThresholdsChanged(object sender, EventArgs e) { OnThresholdsChanged(e); } protected virtual void OnThresholdsChanged(EventArgs e) { var listener = ThresholdsChanged; if (listener != null) listener(this, e); } #endregion private static double[] CalculateClassThresholds(IRegressionModel model, IClassificationProblemData problemData, IEnumerable rows) { double[] thresholds; double[] classValues; CalculateClassThresholds(problemData, model.GetEstimatedValues(problemData.Dataset, rows), problemData.Dataset.GetEnumeratedVariableValues(problemData.TargetVariable, rows), out classValues, out thresholds); return thresholds; } public static void CalculateClassThresholds(IClassificationProblemData problemData, IEnumerable estimatedValues, IEnumerable targetClassValues, out double[] classValues, out double[] thresholds) { int slices = 100; List estimatedValuesList = estimatedValues.ToList(); double maxEstimatedValue = estimatedValuesList.Max(); double minEstimatedValue = estimatedValuesList.Min(); double thresholdIncrement = (maxEstimatedValue - minEstimatedValue) / slices; var estimatedAndTargetValuePairs = estimatedValuesList.Zip(targetClassValues, (x, y) => new { EstimatedValue = x, TargetClassValue = y }) .OrderBy(x => x.EstimatedValue) .ToList(); classValues = problemData.ClassValues.OrderBy(x => x).ToArray(); int nClasses = classValues.Length; thresholds = new double[nClasses + 1]; thresholds[0] = double.NegativeInfinity; thresholds[thresholds.Length - 1] = double.PositiveInfinity; // incrementally calculate accuracy of all possible thresholds int[,] confusionMatrix = new int[nClasses, nClasses]; // one threshold is always treated as binary separation of the remaining classes for (int i = 1; i < thresholds.Length - 1; i++) { double lowerThreshold = thresholds[i - 1]; double actualThreshold = Math.Max(lowerThreshold, minEstimatedValue); double lowestBestThreshold = double.NaN; double highestBestThreshold = double.NaN; double bestClassificationScore = double.PositiveInfinity; bool seriesOfEqualClassificationScores = false; while (actualThreshold < maxEstimatedValue) { double classificationScore = 0.0; foreach (var pair in estimatedAndTargetValuePairs) { //all positives if (pair.TargetClassValue.IsAlmost(classValues[i - 1])) { if (pair.EstimatedValue > lowerThreshold && pair.EstimatedValue < actualThreshold) //true positive classificationScore += problemData.GetClassificationPenalty(classValues[i - 1], classValues[i - 1]); else //false negative classificationScore += problemData.GetClassificationPenalty(classValues[i], classValues[i - 1]); } //all negatives else { if (pair.EstimatedValue > lowerThreshold && pair.EstimatedValue < actualThreshold) //false positive classificationScore += problemData.GetClassificationPenalty(classValues[i - 1], classValues[i]); else //true negative, consider only upper class classificationScore += problemData.GetClassificationPenalty(classValues[i], classValues[i]); } } //new best classification score found if (classificationScore < bestClassificationScore) { bestClassificationScore = classificationScore; lowestBestThreshold = actualThreshold; highestBestThreshold = actualThreshold; seriesOfEqualClassificationScores = true; } //equal classification scores => if seriesOfEqualClassifcationScores == true update highest threshold else if (Math.Abs(classificationScore - bestClassificationScore) < double.Epsilon && seriesOfEqualClassificationScores) highestBestThreshold = actualThreshold; //worse classificatoin score found reset seriesOfEqualClassifcationScores else seriesOfEqualClassificationScores = false; actualThreshold += thresholdIncrement; } //scale lowest thresholds and highest found optimal threshold according to the misclassification matrix double falseNegativePenalty = problemData.GetClassificationPenalty(classValues[i], classValues[i - 1]); double falsePositivePenalty = problemData.GetClassificationPenalty(classValues[i - 1], classValues[i]); thresholds[i] = (lowestBestThreshold * falsePositivePenalty + highestBestThreshold * falseNegativePenalty) / (falseNegativePenalty + falsePositivePenalty); } } } }