#region License Information /* HeuristicLab * Copyright (C) 2002-2014 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System.Collections.Generic; using System.Linq; using HeuristicLab.Common; using HeuristicLab.Data; using HeuristicLab.Optimization; using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; namespace HeuristicLab.Problems.DataAnalysis { [StorableClass] public abstract class ClassificationSolutionBase : DataAnalysisSolution, IClassificationSolution { private const string TrainingAccuracyResultName = "Accuracy (training)"; private const string TestAccuracyResultName = "Accuracy (test)"; private const string TrainingNormalizedGiniCoefficientResultName = "Normalized Gini Coefficient (training)"; private const string TestNormalizedGiniCoefficientResultName = "Normalized Gini Coefficient (test)"; private const string TrainingTruePositiveRateResultName = "True positive rate (training)"; private const string TrainingTrueNegativeRateResultName = "True negative rate (training)"; private const string TrainingPositivePredictiveValueResultName = "Positive predictive value (training)"; private const string TrainingNegativePredictiveValueResultName = "Negative predictive value (training)"; private const string TrainingFalsePositiveRateResultName = "False positive rate (training)"; private const string TrainingFalseDiscoveryRateResultName = "False discovery rate (training)"; private const string TestTruePositiveRateResultName = "True positive rate (test)"; private const string TestTrueNegativeRateResultName = "True negative rate (test)"; private const string TestPositivePredictiveValueResultName = "Positive predictive value (test)"; private const string TestNegativePredictiveValueResultName = "Negative predictive value (test)"; private const string TestFalsePositiveRateResultName = "False positive rate (test)"; private const string TestFalseDiscoveryRateResultName = "False discovery rate (test)"; private const string QualityMeasuresResultName = "Classification Quality Measures"; public new IClassificationModel Model { get { return (IClassificationModel)base.Model; } protected set { base.Model = value; } } public new IClassificationProblemData ProblemData { get { return (IClassificationProblemData)base.ProblemData; } set { base.ProblemData = value; } } #region Results public double TrainingAccuracy { get { return ((DoubleValue)this[TrainingAccuracyResultName].Value).Value; } private set { ((DoubleValue)this[TrainingAccuracyResultName].Value).Value = value; } } public double TestAccuracy { get { return ((DoubleValue)this[TestAccuracyResultName].Value).Value; } private set { ((DoubleValue)this[TestAccuracyResultName].Value).Value = value; } } public double TrainingNormalizedGiniCoefficient { get { return ((DoubleValue)this[TrainingNormalizedGiniCoefficientResultName].Value).Value; } protected set { ((DoubleValue)this[TrainingNormalizedGiniCoefficientResultName].Value).Value = value; } } public double TestNormalizedGiniCoefficient { get { return ((DoubleValue)this[TestNormalizedGiniCoefficientResultName].Value).Value; } protected set { ((DoubleValue)this[TestNormalizedGiniCoefficientResultName].Value).Value = value; } } #region Quality Measures public ResultCollection QualityMeasures { get { return ((ResultCollection)this[QualityMeasuresResultName].Value); } protected set { (this[QualityMeasuresResultName].Value) = value; } } public double TrainingTruePositiveRate { get { return ((DoubleValue)QualityMeasures[TrainingTruePositiveRateResultName].Value).Value; } protected set { ((DoubleValue)QualityMeasures[TrainingTruePositiveRateResultName].Value).Value = value; } } public double TrainingTrueNegativeRate { get { return ((DoubleValue)QualityMeasures[TrainingTrueNegativeRateResultName].Value).Value; } protected set { ((DoubleValue)QualityMeasures[TrainingTrueNegativeRateResultName].Value).Value = value; } } public double TrainingPositivePredictiveValue { get { return ((DoubleValue)QualityMeasures[TrainingPositivePredictiveValueResultName].Value).Value; } protected set { ((DoubleValue)QualityMeasures[TrainingPositivePredictiveValueResultName].Value).Value = value; } } public double TrainingNegativePredictiveValue { get { return ((DoubleValue)QualityMeasures[TrainingNegativePredictiveValueResultName].Value).Value; } protected set { ((DoubleValue)QualityMeasures[TrainingNegativePredictiveValueResultName].Value).Value = value; } } public double TrainingFalsePositiveRate { get { return ((DoubleValue)QualityMeasures[TrainingFalsePositiveRateResultName].Value).Value; } protected set { ((DoubleValue)QualityMeasures[TrainingFalsePositiveRateResultName].Value).Value = value; } } public double TrainingFalseDiscoveryRate { get { return ((DoubleValue)QualityMeasures[TrainingFalseDiscoveryRateResultName].Value).Value; } protected set { ((DoubleValue)QualityMeasures[TrainingFalseDiscoveryRateResultName].Value).Value = value; } } public double TestTruePositiveRate { get { return ((DoubleValue)QualityMeasures[TestTruePositiveRateResultName].Value).Value; } protected set { ((DoubleValue)QualityMeasures[TestTruePositiveRateResultName].Value).Value = value; } } public double TestTrueNegativeRate { get { return ((DoubleValue)QualityMeasures[TestTrueNegativeRateResultName].Value).Value; } protected set { ((DoubleValue)QualityMeasures[TestTrueNegativeRateResultName].Value).Value = value; } } public double TestPositivePredictiveValue { get { return ((DoubleValue)QualityMeasures[TestPositivePredictiveValueResultName].Value).Value; } protected set { ((DoubleValue)QualityMeasures[TestPositivePredictiveValueResultName].Value).Value = value; } } public double TestNegativePredictiveValue { get { return ((DoubleValue)QualityMeasures[TestNegativePredictiveValueResultName].Value).Value; } protected set { ((DoubleValue)QualityMeasures[TestNegativePredictiveValueResultName].Value).Value = value; } } public double TestFalsePositiveRate { get { return ((DoubleValue)QualityMeasures[TestFalsePositiveRateResultName].Value).Value; } protected set { ((DoubleValue)QualityMeasures[TestFalsePositiveRateResultName].Value).Value = value; } } public double TestFalseDiscoveryRate { get { return ((DoubleValue)QualityMeasures[TestFalseDiscoveryRateResultName].Value).Value; } protected set { ((DoubleValue)QualityMeasures[TestFalseDiscoveryRateResultName].Value).Value = value; } } #endregion #endregion [StorableConstructor] protected ClassificationSolutionBase(bool deserializing) : base(deserializing) { } protected ClassificationSolutionBase(ClassificationSolutionBase original, Cloner cloner) : base(original, cloner) { } protected ClassificationSolutionBase(IClassificationModel model, IClassificationProblemData problemData) : base(model, problemData) { Add(new Result(TrainingAccuracyResultName, "Accuracy of the model on the training partition (percentage of correctly classified instances).", new PercentValue())); Add(new Result(TestAccuracyResultName, "Accuracy of the model on the test partition (percentage of correctly classified instances).", new PercentValue())); Add(new Result(TrainingNormalizedGiniCoefficientResultName, "Normalized Gini coefficient of the model on the training partition.", new DoubleValue())); Add(new Result(TestNormalizedGiniCoefficientResultName, "Normalized Gini coefficient of the model on the test partition.", new DoubleValue())); AddQualityMeasuresResultCollection(); } [StorableHook(HookType.AfterDeserialization)] private void AfterDeserialization() { if (!this.ContainsKey(TrainingNormalizedGiniCoefficientResultName)) Add(new Result(TrainingNormalizedGiniCoefficientResultName, "Normalized Gini coefficient of the model on the training partition.", new DoubleValue())); if (!this.ContainsKey(TestNormalizedGiniCoefficientResultName)) Add(new Result(TestNormalizedGiniCoefficientResultName, "Normalized Gini coefficient of the model on the test partition.", new DoubleValue())); if (!this.ContainsKey(QualityMeasuresResultName)) AddQualityMeasuresResultCollection(); } protected void AddQualityMeasuresResultCollection() { ResultCollection qualityMeasuresResult = new ResultCollection(); qualityMeasuresResult.Add(new Result(TrainingTruePositiveRateResultName, "Sensitivity/True positive rate of the model on the training partition\n(TP/(TP+FN)).", new PercentValue())); qualityMeasuresResult.Add(new Result(TrainingTrueNegativeRateResultName, "Specificity/True negative rate of the model on the training partition\n(TN/(FP+TN)).", new PercentValue())); qualityMeasuresResult.Add(new Result(TrainingPositivePredictiveValueResultName, "Precision/Positive predictive value of the model on the training partition\n(TP/(TP+FP)).", new PercentValue())); qualityMeasuresResult.Add(new Result(TrainingNegativePredictiveValueResultName, "Negative predictive value of the model on the training partition\n(TN/(TN+FN)).", new PercentValue())); qualityMeasuresResult.Add(new Result(TrainingFalsePositiveRateResultName, "The false positive rate is the complement of the true negative rate of the model on the training partition.", new PercentValue())); qualityMeasuresResult.Add(new Result(TrainingFalseDiscoveryRateResultName, "The false discovery rate is the complement of the positive predictive value of the model on the training partition.", new PercentValue())); qualityMeasuresResult.Add(new Result(TestTruePositiveRateResultName, "Sensitivity/True positive rate of the model on the test partition\n(TP/(TP+FN)).", new PercentValue())); qualityMeasuresResult.Add(new Result(TestTrueNegativeRateResultName, "Specificity/True negative rate of the model on the test partition\n(TN/(FP+TN)).", new PercentValue())); qualityMeasuresResult.Add(new Result(TestPositivePredictiveValueResultName, "Precision/Positive predictive value of the model on the test partition\n(TP/(TP+FP)).", new PercentValue())); qualityMeasuresResult.Add(new Result(TestNegativePredictiveValueResultName, "Negative predictive value of the model on the test partition\n(TN/(TN+FN)).", new PercentValue())); qualityMeasuresResult.Add(new Result(TestFalsePositiveRateResultName, "The false positive rate is the complement of the true negative rate of the model on the test partition.", new PercentValue())); qualityMeasuresResult.Add(new Result(TestFalseDiscoveryRateResultName, "The false discovery rate is the complement of the positive predictive value of the model on the test partition.", new PercentValue())); Add(new Result(QualityMeasuresResultName, "Classification quality measures.\nIn Multiclass Classification all misclassifications of the negative class will be treated as true negatives.", qualityMeasuresResult)); } protected void CalculateClassificationResults() { double[] estimatedTrainingClassValues = EstimatedTrainingClassValues.ToArray(); // cache values double[] originalTrainingClassValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndices).ToArray(); double[] estimatedTestClassValues = EstimatedTestClassValues.ToArray(); // cache values double[] originalTestClassValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TestIndices).ToArray(); var positiveClassName = ProblemData.PositiveClassName; double positiveClassValue = ProblemData.GetClassValue(positiveClassName); QualityCalculator trainingQualityCalculator = new QualityCalculator(positiveClassValue); QualityCalculator testQualityCalculator = new QualityCalculator(positiveClassValue); OnlineCalculatorError errorState; double trainingAccuracy = OnlineAccuracyCalculator.Calculate(originalTrainingClassValues, estimatedTrainingClassValues, out errorState); if (errorState != OnlineCalculatorError.None) trainingAccuracy = double.NaN; double testAccuracy = OnlineAccuracyCalculator.Calculate(originalTestClassValues, estimatedTestClassValues, out errorState); if (errorState != OnlineCalculatorError.None) testAccuracy = double.NaN; TrainingAccuracy = trainingAccuracy; TestAccuracy = testAccuracy; double trainingNormalizedGini = NormalizedGiniCalculator.Calculate(originalTrainingClassValues, estimatedTrainingClassValues, out errorState); if (errorState != OnlineCalculatorError.None) trainingNormalizedGini = double.NaN; double testNormalizedGini = NormalizedGiniCalculator.Calculate(originalTestClassValues, estimatedTestClassValues, out errorState); if (errorState != OnlineCalculatorError.None) testNormalizedGini = double.NaN; TrainingNormalizedGiniCoefficient = trainingNormalizedGini; TestNormalizedGiniCoefficient = testNormalizedGini; //quality measures training partition trainingQualityCalculator.Calculate(originalTrainingClassValues, estimatedTrainingClassValues, out errorState); if (errorState != OnlineCalculatorError.None) { TrainingTruePositiveRate = double.NaN; TrainingTrueNegativeRate = double.NaN; TrainingPositivePredictiveValue = double.NaN; TrainingNegativePredictiveValue = double.NaN; TrainingFalsePositiveRate = double.NaN; TrainingFalseDiscoveryRate = double.NaN; } else { TrainingTruePositiveRate = trainingQualityCalculator.TruePositiveRate; TrainingTrueNegativeRate = trainingQualityCalculator.TrueNegativeRate; TrainingPositivePredictiveValue = trainingQualityCalculator.PositivePredictiveValue; TrainingNegativePredictiveValue = trainingQualityCalculator.NegativePredictiveValue; TrainingFalsePositiveRate = trainingQualityCalculator.FalsePositiveRate; TrainingFalseDiscoveryRate = trainingQualityCalculator.FalseDiscoveryRate; } //quality measures test partition testQualityCalculator.Calculate(originalTestClassValues, estimatedTestClassValues, out errorState); if (errorState != OnlineCalculatorError.None) { TestTruePositiveRate = double.NaN; TestTrueNegativeRate = double.NaN; TestPositivePredictiveValue = double.NaN; TestNegativePredictiveValue = double.NaN; TestFalsePositiveRate = double.NaN; TestFalseDiscoveryRate = double.NaN; } else { TestTruePositiveRate = testQualityCalculator.TruePositiveRate; TestTrueNegativeRate = testQualityCalculator.TrueNegativeRate; TestPositivePredictiveValue = testQualityCalculator.PositivePredictiveValue; TestNegativePredictiveValue = testQualityCalculator.NegativePredictiveValue; TestFalsePositiveRate = testQualityCalculator.FalsePositiveRate; TestFalseDiscoveryRate = testQualityCalculator.FalseDiscoveryRate; } } public abstract IEnumerable EstimatedClassValues { get; } public abstract IEnumerable EstimatedTrainingClassValues { get; } public abstract IEnumerable EstimatedTestClassValues { get; } public abstract IEnumerable GetEstimatedClassValues(IEnumerable rows); protected override void RecalculateResults() { CalculateClassificationResults(); } } }