[5717] | 1 | #region License Information
|
---|
| 2 | /* HeuristicLab
|
---|
| 3 | * Copyright (C) 2002-2011 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
|
---|
| 4 | *
|
---|
| 5 | * This file is part of HeuristicLab.
|
---|
| 6 | *
|
---|
| 7 | * HeuristicLab is free software: you can redistribute it and/or modify
|
---|
| 8 | * it under the terms of the GNU General Public License as published by
|
---|
| 9 | * the Free Software Foundation, either version 3 of the License, or
|
---|
| 10 | * (at your option) any later version.
|
---|
| 11 | *
|
---|
| 12 | * HeuristicLab is distributed in the hope that it will be useful,
|
---|
| 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
| 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
| 15 | * GNU General Public License for more details.
|
---|
| 16 | *
|
---|
| 17 | * You should have received a copy of the GNU General Public License
|
---|
| 18 | * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
|
---|
| 19 | */
|
---|
| 20 | #endregion
|
---|
| 21 |
|
---|
| 22 | using System;
|
---|
| 23 | using System.Collections.Generic;
|
---|
| 24 | using System.Linq;
|
---|
| 25 | using HeuristicLab.Common;
|
---|
[6256] | 26 | using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
|
---|
[5717] | 27 | using HeuristicLab.Problems.DataAnalysis.Symbolic.Views;
|
---|
| 28 |
|
---|
| 29 | namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Classification.Views {
|
---|
| 30 | public partial class InteractiveSymbolicDiscriminantFunctionClassificationSolutionSimplifierView : InteractiveSymbolicDataAnalysisSolutionSimplifierView {
|
---|
| 31 | private readonly ConstantTreeNode constantNode;
|
---|
| 32 | private readonly SymbolicExpressionTree tempTree;
|
---|
| 33 |
|
---|
| 34 | public new SymbolicDiscriminantFunctionClassificationSolution Content {
|
---|
| 35 | get { return (SymbolicDiscriminantFunctionClassificationSolution)base.Content; }
|
---|
| 36 | set { base.Content = value; }
|
---|
| 37 | }
|
---|
| 38 |
|
---|
| 39 | public InteractiveSymbolicDiscriminantFunctionClassificationSolutionSimplifierView()
|
---|
| 40 | : base() {
|
---|
| 41 | InitializeComponent();
|
---|
| 42 | this.Caption = "Interactive Classification Solution Simplifier";
|
---|
| 43 |
|
---|
| 44 | constantNode = ((ConstantTreeNode)new Constant().CreateTreeNode());
|
---|
| 45 | ISymbolicExpressionTreeNode root = new ProgramRootSymbol().CreateTreeNode();
|
---|
| 46 | ISymbolicExpressionTreeNode start = new StartSymbol().CreateTreeNode();
|
---|
[5736] | 47 | root.AddSubtree(start);
|
---|
[5717] | 48 | tempTree = new SymbolicExpressionTree(root);
|
---|
| 49 | }
|
---|
| 50 |
|
---|
| 51 | protected override void UpdateModel(ISymbolicExpressionTree tree) {
|
---|
[5736] | 52 | Content.Model = new SymbolicDiscriminantFunctionClassificationModel(tree, Content.Model.Interpreter);
|
---|
[6438] | 53 | // the default policy for setting thresholds in classification models is the accuarcy maximizing policy
|
---|
| 54 | // however for performance reasons we must use estimations of the normal distribution cut points as the thresholds
|
---|
| 55 | // here and in CalculateImpactValues as they are a lot faster to calculate
|
---|
| 56 | Content.SetClassDistibutionCutPointThresholds();
|
---|
[5717] | 57 | }
|
---|
| 58 |
|
---|
| 59 | protected override Dictionary<ISymbolicExpressionTreeNode, double> CalculateReplacementValues(ISymbolicExpressionTree tree) {
|
---|
| 60 | Dictionary<ISymbolicExpressionTreeNode, double> replacementValues = new Dictionary<ISymbolicExpressionTreeNode, double>();
|
---|
[5993] | 61 | foreach (ISymbolicExpressionTreeNode node in tree.Root.GetSubtree(0).GetSubtree(0).IterateNodesPrefix()) {
|
---|
| 62 | replacementValues[node] = CalculateReplacementValue(node, tree);
|
---|
[5717] | 63 | }
|
---|
| 64 | return replacementValues;
|
---|
| 65 | }
|
---|
| 66 |
|
---|
| 67 | protected override Dictionary<ISymbolicExpressionTreeNode, double> CalculateImpactValues(ISymbolicExpressionTree tree) {
|
---|
| 68 | var interpreter = Content.Model.Interpreter;
|
---|
| 69 | var dataset = Content.ProblemData.Dataset;
|
---|
| 70 | var rows = Content.ProblemData.TrainingIndizes;
|
---|
| 71 | string targetVariable = Content.ProblemData.TargetVariable;
|
---|
| 72 | Dictionary<ISymbolicExpressionTreeNode, double> impactValues = new Dictionary<ISymbolicExpressionTreeNode, double>();
|
---|
[5736] | 73 | List<ISymbolicExpressionTreeNode> nodes = tree.Root.GetSubtree(0).GetSubtree(0).IterateNodesPostfix().ToList();
|
---|
[5717] | 74 |
|
---|
| 75 | var targetClassValues = dataset.GetEnumeratedVariableValues(targetVariable, rows);
|
---|
| 76 | var originalOutput = interpreter.GetSymbolicExpressionTreeValues(tree, dataset, rows)
|
---|
[5736] | 77 | .LimitToRange(Content.Model.LowerEstimationLimit, Content.Model.UpperEstimationLimit)
|
---|
[5717] | 78 | .ToArray();
|
---|
| 79 | double[] classValues;
|
---|
| 80 | double[] thresholds;
|
---|
[6438] | 81 | // normal distribution cut points are used as thresholds here because they are a lot faster to calculate than the accuracy maximizing thresholds
|
---|
[5717] | 82 | NormalDistributionCutPointsThresholdCalculator.CalculateThresholds(Content.ProblemData, originalOutput, targetClassValues, out classValues, out thresholds);
|
---|
[5736] | 83 | var classifier = new SymbolicDiscriminantFunctionClassificationModel(tree, interpreter);
|
---|
| 84 | classifier.SetThresholdsAndClassValues(thresholds, classValues);
|
---|
[5942] | 85 | OnlineCalculatorError errorState;
|
---|
| 86 | double originalAccuracy = OnlineAccuracyCalculator.Calculate(targetClassValues, classifier.GetEstimatedClassValues(dataset, rows), out errorState);
|
---|
| 87 | if (errorState != OnlineCalculatorError.None) originalAccuracy = 0.0;
|
---|
[5717] | 88 |
|
---|
| 89 | foreach (ISymbolicExpressionTreeNode node in nodes) {
|
---|
| 90 | var parent = node.Parent;
|
---|
[5993] | 91 | constantNode.Value = CalculateReplacementValue(node, tree);
|
---|
[5717] | 92 | ISymbolicExpressionTreeNode replacementNode = constantNode;
|
---|
| 93 | SwitchNode(parent, node, replacementNode);
|
---|
[5736] | 94 | var newOutput = interpreter.GetSymbolicExpressionTreeValues(tree, dataset, rows)
|
---|
| 95 | .LimitToRange(Content.Model.LowerEstimationLimit, Content.Model.UpperEstimationLimit)
|
---|
| 96 | .ToArray();
|
---|
[5717] | 97 | NormalDistributionCutPointsThresholdCalculator.CalculateThresholds(Content.ProblemData, newOutput, targetClassValues, out classValues, out thresholds);
|
---|
[5736] | 98 | classifier = new SymbolicDiscriminantFunctionClassificationModel(tree, interpreter);
|
---|
| 99 | classifier.SetThresholdsAndClassValues(thresholds, classValues);
|
---|
[5942] | 100 | double newAccuracy = OnlineAccuracyCalculator.Calculate(targetClassValues, classifier.GetEstimatedClassValues(dataset, rows), out errorState);
|
---|
| 101 | if (errorState != OnlineCalculatorError.None) newAccuracy = 0.0;
|
---|
[5717] | 102 |
|
---|
| 103 | // impact = 0 if no change
|
---|
| 104 | // impact < 0 if new solution is better
|
---|
| 105 | // impact > 0 if new solution is worse
|
---|
| 106 | impactValues[node] = originalAccuracy - newAccuracy;
|
---|
| 107 | SwitchNode(parent, replacementNode, node);
|
---|
| 108 | }
|
---|
| 109 | return impactValues;
|
---|
| 110 | }
|
---|
| 111 |
|
---|
[5993] | 112 | private double CalculateReplacementValue(ISymbolicExpressionTreeNode node, ISymbolicExpressionTree sourceTree) {
|
---|
| 113 | // remove old ADFs
|
---|
| 114 | while (tempTree.Root.SubtreesCount > 1) tempTree.Root.RemoveSubtree(1);
|
---|
| 115 | // clone ADFs of source tree
|
---|
| 116 | for (int i = 1; i < sourceTree.Root.SubtreesCount; i++) {
|
---|
| 117 | tempTree.Root.AddSubtree((ISymbolicExpressionTreeNode)sourceTree.Root.GetSubtree(i).Clone());
|
---|
[6256] | 118 | }
|
---|
[5736] | 119 | var start = tempTree.Root.GetSubtree(0);
|
---|
| 120 | while (start.SubtreesCount > 0) start.RemoveSubtree(0);
|
---|
| 121 | start.AddSubtree((ISymbolicExpressionTreeNode)node.Clone());
|
---|
[5717] | 122 | var interpreter = Content.Model.Interpreter;
|
---|
| 123 | var rows = Content.ProblemData.TrainingIndizes;
|
---|
| 124 | return interpreter.GetSymbolicExpressionTreeValues(tempTree, Content.ProblemData.Dataset, rows).Median();
|
---|
| 125 | }
|
---|
| 126 |
|
---|
| 127 |
|
---|
| 128 | private void SwitchNode(ISymbolicExpressionTreeNode root, ISymbolicExpressionTreeNode oldBranch, ISymbolicExpressionTreeNode newBranch) {
|
---|
[5736] | 129 | for (int i = 0; i < root.SubtreesCount; i++) {
|
---|
| 130 | if (root.GetSubtree(i) == oldBranch) {
|
---|
| 131 | root.RemoveSubtree(i);
|
---|
| 132 | root.InsertSubtree(i, newBranch);
|
---|
[5717] | 133 | return;
|
---|
| 134 | }
|
---|
| 135 | }
|
---|
| 136 | }
|
---|
[6256] | 137 |
|
---|
| 138 | protected override void btnOptimizeConstants_Click(object sender, EventArgs e) {
|
---|
| 139 |
|
---|
| 140 | }
|
---|
[5717] | 141 | }
|
---|
| 142 | }
|
---|