Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
03/15/11 08:25:27 (13 years ago)
Author:
gkronber
Message:

#1418 refactored threshold calculators.

Location:
branches/DataAnalysis Refactoring/HeuristicLab.Problems.DataAnalysis/3.4
Files:
5 added
5 edited

Legend:

Unmodified
Added
Removed
  • branches/DataAnalysis Refactoring/HeuristicLab.Problems.DataAnalysis/3.4

    • Property svn:ignore
      •  

        old new  
        33HeuristicLabProblemsDataAnalysisPlugin.cs
        44obj
         5*.vs10x
  • branches/DataAnalysis Refactoring/HeuristicLab.Problems.DataAnalysis/3.4/DiscriminantFunctionClassificationModel.cs

    r5678 r5681  
    6464
    6565    [StorableConstructor]
    66     protected DiscriminantFunctionClassificationModel() : base() { }
     66    protected DiscriminantFunctionClassificationModel(bool deserializing) : base(deserializing) { }
    6767    protected DiscriminantFunctionClassificationModel(DiscriminantFunctionClassificationModel original, Cloner cloner)
    6868      : base(original, cloner) {
  • branches/DataAnalysis Refactoring/HeuristicLab.Problems.DataAnalysis/3.4/DiscriminantFunctionClassificationSolution.cs

    r5678 r5681  
    4343      : base(original, cloner) {
    4444    }
    45     public DiscriminantFunctionClassificationSolution(IRegressionModel model, IClassificationProblemData problemData)
    46       : this(new DiscriminantFunctionClassificationModel(model, problemData.ClassValues, CalculateClassThresholds(model, problemData, problemData.TrainingIndizes)), problemData) {
     45    public DiscriminantFunctionClassificationSolution(IRegressionModel model, IClassificationProblemData problemData, IEnumerable<double> classValues, IEnumerable<double> thresholds)
     46      : this(new DiscriminantFunctionClassificationModel(model, classValues, thresholds), problemData) {
    4747    }
    4848    public DiscriminantFunctionClassificationSolution(IDiscriminantFunctionClassificationModel model, IClassificationProblemData problemData)
     
    9191    }
    9292    #endregion
    93 
    94     private static double[] CalculateClassThresholds(IRegressionModel model, IClassificationProblemData problemData, IEnumerable<int> rows) {
    95       double[] thresholds;
    96       double[] classValues;
    97       CalculateClassThresholds(problemData, model.GetEstimatedValues(problemData.Dataset, rows), problemData.Dataset.GetEnumeratedVariableValues(problemData.TargetVariable, rows), out classValues, out thresholds);
    98       return thresholds;
    99     }
    100 
    101     public static void CalculateClassThresholds(IClassificationProblemData problemData, IEnumerable<double> estimatedValues, IEnumerable<double> targetClassValues, out double[] classValues, out double[] thresholds) {
    102       int slices = 100;
    103       List<double> estimatedValuesList = estimatedValues.ToList();
    104       double maxEstimatedValue = estimatedValuesList.Max();
    105       double minEstimatedValue = estimatedValuesList.Min();
    106       double thresholdIncrement = (maxEstimatedValue - minEstimatedValue) / slices;
    107       var estimatedAndTargetValuePairs =
    108         estimatedValuesList.Zip(targetClassValues, (x, y) => new { EstimatedValue = x, TargetClassValue = y })
    109         .OrderBy(x => x.EstimatedValue)
    110         .ToList();
    111 
    112       classValues = problemData.ClassValues.OrderBy(x => x).ToArray();
    113       int nClasses = classValues.Length;
    114       thresholds = new double[nClasses + 1];
    115       thresholds[0] = double.NegativeInfinity;
    116       thresholds[thresholds.Length - 1] = double.PositiveInfinity;
    117 
    118       // incrementally calculate accuracy of all possible thresholds
    119       int[,] confusionMatrix = new int[nClasses, nClasses];
    120 
    121       // one threshold is always treated as binary separation of the remaining classes
    122       for (int i = 1; i < thresholds.Length - 1; i++) {
    123         double lowerThreshold = thresholds[i - 1];
    124         double actualThreshold = Math.Max(lowerThreshold, minEstimatedValue);
    125         double lowestBestThreshold = double.NaN;
    126         double highestBestThreshold = double.NaN;
    127         double bestClassificationScore = double.PositiveInfinity;
    128         bool seriesOfEqualClassificationScores = false;
    129 
    130         while (actualThreshold < maxEstimatedValue) {
    131           double classificationScore = 0.0;
    132 
    133           foreach (var pair in estimatedAndTargetValuePairs) {
    134             //all positives
    135             if (pair.TargetClassValue.IsAlmost(classValues[i - 1])) {
    136               if (pair.EstimatedValue > lowerThreshold && pair.EstimatedValue < actualThreshold)
    137                 //true positive
    138                 classificationScore += problemData.GetClassificationPenalty(classValues[i - 1], classValues[i - 1]);
    139               else
    140                 //false negative
    141                 classificationScore += problemData.GetClassificationPenalty(classValues[i], classValues[i - 1]);
    142             }
    143               //all negatives
    144             else {
    145               if (pair.EstimatedValue > lowerThreshold && pair.EstimatedValue < actualThreshold)
    146                 //false positive
    147                 classificationScore += problemData.GetClassificationPenalty(classValues[i - 1], classValues[i]);
    148               else
    149                 //true negative, consider only upper class
    150                 classificationScore += problemData.GetClassificationPenalty(classValues[i], classValues[i]);
    151             }
    152           }
    153 
    154           //new best classification score found
    155           if (classificationScore < bestClassificationScore) {
    156             bestClassificationScore = classificationScore;
    157             lowestBestThreshold = actualThreshold;
    158             highestBestThreshold = actualThreshold;
    159             seriesOfEqualClassificationScores = true;
    160           }
    161             //equal classification scores => if seriesOfEqualClassifcationScores == true update highest threshold
    162           else if (Math.Abs(classificationScore - bestClassificationScore) < double.Epsilon && seriesOfEqualClassificationScores)
    163             highestBestThreshold = actualThreshold;
    164           //worse classificatoin score found reset seriesOfEqualClassifcationScores
    165           else seriesOfEqualClassificationScores = false;
    166 
    167           actualThreshold += thresholdIncrement;
    168         }
    169         //scale lowest thresholds and highest found optimal threshold according to the misclassification matrix
    170         double falseNegativePenalty = problemData.GetClassificationPenalty(classValues[i], classValues[i - 1]);
    171         double falsePositivePenalty = problemData.GetClassificationPenalty(classValues[i - 1], classValues[i]);
    172         thresholds[i] = (lowestBestThreshold * falsePositivePenalty + highestBestThreshold * falseNegativePenalty) / (falseNegativePenalty + falsePositivePenalty);
    173       }
    174     }
    17593  }
    17694}
  • branches/DataAnalysis Refactoring/HeuristicLab.Problems.DataAnalysis/3.4/HeuristicLab.Problems.DataAnalysis-3.4.csproj

    r5662 r5681  
    115115    <Compile Include="ClusteringSolution.cs" />
    116116    <Compile Include="ClassificationEnsembleModel.cs" />
     117    <Compile Include="Interfaces\Classification\IDiscriminantFunctionThresholdCalculator.cs" />
    117118    <Compile Include="Interfaces\Classification\IClassificationEnsembleModel.cs" />
    118119    <Compile Include="Interfaces\Regression\IRegressionEnsembleModel.cs" />
     
    152153    <Compile Include="RegressionSolution.cs" />
    153154    <Compile Include="TableFileParser.cs" />
     155    <Compile Include="ThresholdCalculators\AccuracyMaximizationThresholdCalculator.cs" />
     156    <Compile Include="ThresholdCalculators\NormalDistributionCutPointsThresholdCalculator.cs" />
     157    <Compile Include="ThresholdCalculators\ThresholdCalculator.cs" />
    154158    <None Include="HeuristicLab.snk" />
    155159    <None Include="HeuristicLabProblemsDataAnalysisPlugin.cs.frame" />
  • branches/DataAnalysis Refactoring/HeuristicLab.Problems.DataAnalysis/3.4/Tests

    • Property svn:ignore
      •  

        old new  
        11bin
        22obj
         3*.vs10x
Note: See TracChangeset for help on using the changeset viewer.