Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
12/25/11 21:29:10 (12 years ago)
Author:
gkronber
Message:

#1685: changed simplification view for symbolic classification solutions to use the Gini index to determine the impact of a node it describes the degree of separation of the classes and we do not have to search for the optimal threshold value each time we calculate the impact of one node. Also fixed a problem with the Gini index result of classification solutions using a discriminating function as for these solutions the Gini index is calculated twice (once for the class values and once for the output values of the discriminating function.)

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Symbolic.Classification.Views/3.4/InteractiveSymbolicDiscriminantFunctionClassificationSolutionSimplifierView.cs

    r7233 r7234  
    7878        .LimitToRange(Content.Model.LowerEstimationLimit, Content.Model.UpperEstimationLimit)
    7979        .ToArray();
    80       double[] classValues;
    81       double[] thresholds;
    82       // normal distribution cut points are used as thresholds here because they are a lot faster to calculate than the accuracy maximizing thresholds
    83       AccuracyMaximizationThresholdCalculator.CalculateThresholds(Content.ProblemData, originalOutput, targetClassValues, out classValues, out thresholds);
    84       var classifier = new SymbolicDiscriminantFunctionClassificationModel(tree, interpreter);
    85       classifier.SetThresholdsAndClassValues(thresholds, classValues);
    8680      OnlineCalculatorError errorState;
    87       double originalAccuracy = OnlineAccuracyCalculator.Calculate(targetClassValues, classifier.GetEstimatedClassValues(dataset, rows), out errorState);
    88       if (errorState != OnlineCalculatorError.None) originalAccuracy = 0.0;
     81      double originalGini = NormalizedGiniCalculator.Calculate(targetClassValues, originalOutput, out errorState);
     82      if (errorState != OnlineCalculatorError.None) originalGini = 0.0;
    8983
    9084      foreach (ISymbolicExpressionTreeNode node in nodes) {
     
    9690          .LimitToRange(Content.Model.LowerEstimationLimit, Content.Model.UpperEstimationLimit)
    9791          .ToArray();
    98         AccuracyMaximizationThresholdCalculator.CalculateThresholds(Content.ProblemData, newOutput, targetClassValues, out classValues, out thresholds);
    99         classifier = new SymbolicDiscriminantFunctionClassificationModel(tree, interpreter);
    100         classifier.SetThresholdsAndClassValues(thresholds, classValues);
    101         double newAccuracy = OnlineAccuracyCalculator.Calculate(targetClassValues, classifier.GetEstimatedClassValues(dataset, rows), out errorState);
    102         if (errorState != OnlineCalculatorError.None) newAccuracy = 0.0;
     92        double newGini = NormalizedGiniCalculator.Calculate(targetClassValues, newOutput, out errorState);
     93        if (errorState != OnlineCalculatorError.None) newGini = 0.0;
    10394
    10495        // impact = 0 if no change
    10596        // impact < 0 if new solution is better
    10697        // impact > 0 if new solution is worse
    107         impactValues[node] = originalAccuracy - newAccuracy;
     98        impactValues[node] = originalGini - newGini;
    10899        SwitchNode(parent, replacementNode, node);
    109100      }
Note: See TracChangeset for help on using the changeset viewer.