Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Problems.DataAnalysis.Symbolic.Classification.Views/3.4/InteractiveSymbolicDiscriminantFunctionClassificationSolutionSimplifierView.cs @ 7234

Last change on this file since 7234 was 7234, checked in by gkronber, 11 years ago

#1685: changed simplification view for symbolic classification solutions to use the Gini index to determine the impact of a node it describes the degree of separation of the classes and we do not have to search for the optimal threshold value each time we calculate the impact of one node. Also fixed a problem with the Gini index result of classification solutions using a discriminating function as for these solutions the Gini index is calculated twice (once for the class values and once for the output values of the discriminating function.)

File size: 6.6 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2011 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Common;
26using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
27using HeuristicLab.Problems.DataAnalysis.Symbolic.Views;
28
29namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Classification.Views {
30  public partial class InteractiveSymbolicDiscriminantFunctionClassificationSolutionSimplifierView : InteractiveSymbolicDataAnalysisSolutionSimplifierView {
31    private readonly ConstantTreeNode constantNode;
32    private readonly SymbolicExpressionTree tempTree;
33
34    public new SymbolicDiscriminantFunctionClassificationSolution Content {
35      get { return (SymbolicDiscriminantFunctionClassificationSolution)base.Content; }
36      set { base.Content = value; }
37    }
38
39    public InteractiveSymbolicDiscriminantFunctionClassificationSolutionSimplifierView()
40      : base() {
41      InitializeComponent();
42      this.Caption = "Interactive Classification Solution Simplifier";
43
44      constantNode = ((ConstantTreeNode)new Constant().CreateTreeNode());
45      ISymbolicExpressionTreeNode root = new ProgramRootSymbol().CreateTreeNode();
46      ISymbolicExpressionTreeNode start = new StartSymbol().CreateTreeNode();
47      root.AddSubtree(start);
48      tempTree = new SymbolicExpressionTree(root);
49    }
50
51    protected override void UpdateModel(ISymbolicExpressionTree tree) {
52      Content.Model = new SymbolicDiscriminantFunctionClassificationModel(tree, Content.Model.Interpreter);
53      // the default policy for setting thresholds in classification models is the accuarcy maximizing policy.
54      // This is rather slow to calculate and can lead to a very laggy UI in the interactive solution simplifier.
55      // However, since we automatically prune sub-trees based on the threshold reaching the maximum accuracy we must
56      // also use maximum accuracy threshold calculation here in order to prevent incoherent behavior of the simplifier.
57      Content.SetAccuracyMaximizingThresholds();
58    }
59
60    protected override Dictionary<ISymbolicExpressionTreeNode, double> CalculateReplacementValues(ISymbolicExpressionTree tree) {
61      Dictionary<ISymbolicExpressionTreeNode, double> replacementValues = new Dictionary<ISymbolicExpressionTreeNode, double>();
62      foreach (ISymbolicExpressionTreeNode node in tree.Root.GetSubtree(0).GetSubtree(0).IterateNodesPrefix()) {
63        replacementValues[node] = CalculateReplacementValue(node, tree);
64      }
65      return replacementValues;
66    }
67
68    protected override Dictionary<ISymbolicExpressionTreeNode, double> CalculateImpactValues(ISymbolicExpressionTree tree) {
69      var interpreter = Content.Model.Interpreter;
70      var dataset = Content.ProblemData.Dataset;
71      var rows = Content.ProblemData.TrainingIndizes;
72      string targetVariable = Content.ProblemData.TargetVariable;
73      Dictionary<ISymbolicExpressionTreeNode, double> impactValues = new Dictionary<ISymbolicExpressionTreeNode, double>();
74      List<ISymbolicExpressionTreeNode> nodes = tree.Root.GetSubtree(0).GetSubtree(0).IterateNodesPostfix().ToList();
75
76      var targetClassValues = dataset.GetDoubleValues(targetVariable, rows);
77      var originalOutput = interpreter.GetSymbolicExpressionTreeValues(tree, dataset, rows)
78        .LimitToRange(Content.Model.LowerEstimationLimit, Content.Model.UpperEstimationLimit)
79        .ToArray();
80      OnlineCalculatorError errorState;
81      double originalGini = NormalizedGiniCalculator.Calculate(targetClassValues, originalOutput, out errorState);
82      if (errorState != OnlineCalculatorError.None) originalGini = 0.0;
83
84      foreach (ISymbolicExpressionTreeNode node in nodes) {
85        var parent = node.Parent;
86        constantNode.Value = CalculateReplacementValue(node, tree);
87        ISymbolicExpressionTreeNode replacementNode = constantNode;
88        SwitchNode(parent, node, replacementNode);
89        var newOutput = interpreter.GetSymbolicExpressionTreeValues(tree, dataset, rows)
90          .LimitToRange(Content.Model.LowerEstimationLimit, Content.Model.UpperEstimationLimit)
91          .ToArray();
92        double newGini = NormalizedGiniCalculator.Calculate(targetClassValues, newOutput, out errorState);
93        if (errorState != OnlineCalculatorError.None) newGini = 0.0;
94
95        // impact = 0 if no change
96        // impact < 0 if new solution is better
97        // impact > 0 if new solution is worse
98        impactValues[node] = originalGini - newGini;
99        SwitchNode(parent, replacementNode, node);
100      }
101      return impactValues;
102    }
103
104    private double CalculateReplacementValue(ISymbolicExpressionTreeNode node, ISymbolicExpressionTree sourceTree) {
105      // remove old ADFs
106      while (tempTree.Root.SubtreeCount > 1) tempTree.Root.RemoveSubtree(1);
107      // clone ADFs of source tree
108      for (int i = 1; i < sourceTree.Root.SubtreeCount; i++) {
109        tempTree.Root.AddSubtree((ISymbolicExpressionTreeNode)sourceTree.Root.GetSubtree(i).Clone());
110      }
111      var start = tempTree.Root.GetSubtree(0);
112      while (start.SubtreeCount > 0) start.RemoveSubtree(0);
113      start.AddSubtree((ISymbolicExpressionTreeNode)node.Clone());
114      var interpreter = Content.Model.Interpreter;
115      var rows = Content.ProblemData.TrainingIndizes;
116      return interpreter.GetSymbolicExpressionTreeValues(tempTree, Content.ProblemData.Dataset, rows).Median();
117    }
118
119
120    private void SwitchNode(ISymbolicExpressionTreeNode root, ISymbolicExpressionTreeNode oldBranch, ISymbolicExpressionTreeNode newBranch) {
121      for (int i = 0; i < root.SubtreeCount; i++) {
122        if (root.GetSubtree(i) == oldBranch) {
123          root.RemoveSubtree(i);
124          root.InsertSubtree(i, newBranch);
125          return;
126        }
127      }
128    }
129
130    protected override void btnOptimizeConstants_Click(object sender, EventArgs e) {
131
132    }
133  }
134}
Note: See TracBrowser for help on using the repository browser.