- Timestamp:
- 03/15/11 08:25:27 (13 years ago)
- Location:
- branches/DataAnalysis Refactoring/HeuristicLab.Problems.DataAnalysis/3.4
- Files:
-
- 5 added
- 5 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/DataAnalysis Refactoring/HeuristicLab.Problems.DataAnalysis/3.4
- Property svn:ignore
-
old new 3 3 HeuristicLabProblemsDataAnalysisPlugin.cs 4 4 obj 5 *.vs10x
-
- Property svn:ignore
-
branches/DataAnalysis Refactoring/HeuristicLab.Problems.DataAnalysis/3.4/DiscriminantFunctionClassificationModel.cs
r5678 r5681 64 64 65 65 [StorableConstructor] 66 protected DiscriminantFunctionClassificationModel( ) : base() { }66 protected DiscriminantFunctionClassificationModel(bool deserializing) : base(deserializing) { } 67 67 protected DiscriminantFunctionClassificationModel(DiscriminantFunctionClassificationModel original, Cloner cloner) 68 68 : base(original, cloner) { -
branches/DataAnalysis Refactoring/HeuristicLab.Problems.DataAnalysis/3.4/DiscriminantFunctionClassificationSolution.cs
r5678 r5681 43 43 : base(original, cloner) { 44 44 } 45 public DiscriminantFunctionClassificationSolution(IRegressionModel model, IClassificationProblemData problemData )46 : this(new DiscriminantFunctionClassificationModel(model, problemData.ClassValues, CalculateClassThresholds(model, problemData, problemData.TrainingIndizes)), problemData) {45 public DiscriminantFunctionClassificationSolution(IRegressionModel model, IClassificationProblemData problemData, IEnumerable<double> classValues, IEnumerable<double> thresholds) 46 : this(new DiscriminantFunctionClassificationModel(model, classValues, thresholds), problemData) { 47 47 } 48 48 public DiscriminantFunctionClassificationSolution(IDiscriminantFunctionClassificationModel model, IClassificationProblemData problemData) … … 91 91 } 92 92 #endregion 93 94 private static double[] CalculateClassThresholds(IRegressionModel model, IClassificationProblemData problemData, IEnumerable<int> rows) {95 double[] thresholds;96 double[] classValues;97 CalculateClassThresholds(problemData, model.GetEstimatedValues(problemData.Dataset, rows), problemData.Dataset.GetEnumeratedVariableValues(problemData.TargetVariable, rows), out classValues, out thresholds);98 return thresholds;99 }100 101 public static void CalculateClassThresholds(IClassificationProblemData problemData, IEnumerable<double> estimatedValues, IEnumerable<double> targetClassValues, out double[] classValues, out double[] thresholds) {102 int slices = 100;103 List<double> estimatedValuesList = estimatedValues.ToList();104 double maxEstimatedValue = estimatedValuesList.Max();105 double minEstimatedValue = estimatedValuesList.Min();106 double thresholdIncrement = (maxEstimatedValue - minEstimatedValue) / slices;107 var estimatedAndTargetValuePairs =108 estimatedValuesList.Zip(targetClassValues, (x, y) => new { EstimatedValue = x, TargetClassValue = y })109 .OrderBy(x => x.EstimatedValue)110 .ToList();111 112 classValues = problemData.ClassValues.OrderBy(x => x).ToArray();113 int nClasses = classValues.Length;114 thresholds = new double[nClasses + 1];115 thresholds[0] = double.NegativeInfinity;116 thresholds[thresholds.Length - 1] = double.PositiveInfinity;117 118 // incrementally calculate accuracy of all possible thresholds119 int[,] confusionMatrix = new int[nClasses, nClasses];120 121 // one threshold is always treated as binary separation of the remaining classes122 for (int i = 1; i < thresholds.Length - 1; i++) {123 double lowerThreshold = thresholds[i - 1];124 double actualThreshold = Math.Max(lowerThreshold, minEstimatedValue);125 double lowestBestThreshold = double.NaN;126 double highestBestThreshold = double.NaN;127 double bestClassificationScore = double.PositiveInfinity;128 bool seriesOfEqualClassificationScores = false;129 130 while (actualThreshold < maxEstimatedValue) {131 double classificationScore = 0.0;132 133 foreach (var pair in estimatedAndTargetValuePairs) {134 //all positives135 if (pair.TargetClassValue.IsAlmost(classValues[i - 1])) {136 if (pair.EstimatedValue > lowerThreshold && pair.EstimatedValue < actualThreshold)137 //true positive138 classificationScore += problemData.GetClassificationPenalty(classValues[i - 1], classValues[i - 1]);139 else140 //false negative141 classificationScore += problemData.GetClassificationPenalty(classValues[i], classValues[i - 1]);142 }143 //all negatives144 else {145 if (pair.EstimatedValue > lowerThreshold && pair.EstimatedValue < actualThreshold)146 //false positive147 classificationScore += problemData.GetClassificationPenalty(classValues[i - 1], classValues[i]);148 else149 //true negative, consider only upper class150 classificationScore += problemData.GetClassificationPenalty(classValues[i], classValues[i]);151 }152 }153 154 //new best classification score found155 if (classificationScore < bestClassificationScore) {156 bestClassificationScore = classificationScore;157 lowestBestThreshold = actualThreshold;158 highestBestThreshold = actualThreshold;159 seriesOfEqualClassificationScores = true;160 }161 //equal classification scores => if seriesOfEqualClassifcationScores == true update highest threshold162 else if (Math.Abs(classificationScore - bestClassificationScore) < double.Epsilon && seriesOfEqualClassificationScores)163 highestBestThreshold = actualThreshold;164 //worse classificatoin score found reset seriesOfEqualClassifcationScores165 else seriesOfEqualClassificationScores = false;166 167 actualThreshold += thresholdIncrement;168 }169 //scale lowest thresholds and highest found optimal threshold according to the misclassification matrix170 double falseNegativePenalty = problemData.GetClassificationPenalty(classValues[i], classValues[i - 1]);171 double falsePositivePenalty = problemData.GetClassificationPenalty(classValues[i - 1], classValues[i]);172 thresholds[i] = (lowestBestThreshold * falsePositivePenalty + highestBestThreshold * falseNegativePenalty) / (falseNegativePenalty + falsePositivePenalty);173 }174 }175 93 } 176 94 } -
branches/DataAnalysis Refactoring/HeuristicLab.Problems.DataAnalysis/3.4/HeuristicLab.Problems.DataAnalysis-3.4.csproj
r5662 r5681 115 115 <Compile Include="ClusteringSolution.cs" /> 116 116 <Compile Include="ClassificationEnsembleModel.cs" /> 117 <Compile Include="Interfaces\Classification\IDiscriminantFunctionThresholdCalculator.cs" /> 117 118 <Compile Include="Interfaces\Classification\IClassificationEnsembleModel.cs" /> 118 119 <Compile Include="Interfaces\Regression\IRegressionEnsembleModel.cs" /> … … 152 153 <Compile Include="RegressionSolution.cs" /> 153 154 <Compile Include="TableFileParser.cs" /> 155 <Compile Include="ThresholdCalculators\AccuracyMaximizationThresholdCalculator.cs" /> 156 <Compile Include="ThresholdCalculators\NormalDistributionCutPointsThresholdCalculator.cs" /> 157 <Compile Include="ThresholdCalculators\ThresholdCalculator.cs" /> 154 158 <None Include="HeuristicLab.snk" /> 155 159 <None Include="HeuristicLabProblemsDataAnalysisPlugin.cs.frame" /> -
branches/DataAnalysis Refactoring/HeuristicLab.Problems.DataAnalysis/3.4/Tests
- Property svn:ignore
-
old new 1 1 bin 2 2 obj 3 *.vs10x
-
- Property svn:ignore
Note: See TracChangeset
for help on using the changeset viewer.