Free cookie consent management tool by TermsFeed Policy Generator

source: branches/3.2/sources/HeuristicLab.GP.StructureIdentification/3.3/Evaluators/NodeBasedVariableImpactCalculator.cs @ 5955

Last change on this file since 5955 was 2578, checked in by gkronber, 15 years ago

Implemented #824 (Refactor: ITreeEvaluator interface to provide a method that evaluates a tree on a range of samples.)

File size: 8.6 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2008 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Text;
25using System.Xml;
26using HeuristicLab.Core;
27using HeuristicLab.Data;
28using HeuristicLab.DataAnalysis;
29using System.Linq;
30using HeuristicLab.GP.Interfaces;
31using HeuristicLab.Modeling;
32
33namespace HeuristicLab.GP.StructureIdentification {
34  public class NodeBasedVariableImpactCalculator : OperatorBase {
35
36    public NodeBasedVariableImpactCalculator()
37      : base() {
38      AddVariableInfo(new VariableInfo("FunctionTree", "The GP model", typeof(IGeneticProgrammingModel), VariableKind.In));
39      AddVariableInfo(new VariableInfo("Dataset", "Dataset", typeof(Dataset), VariableKind.In));
40      AddVariableInfo(new VariableInfo("TargetVariable", "TargetVariable", typeof(StringData), VariableKind.In));
41      AddVariableInfo(new VariableInfo("InputVariableNames", "Names of used variables in the model (optional)", typeof(ItemList<StringData>), VariableKind.In));
42      AddVariableInfo(new VariableInfo("SamplesStart", "SamplesStart", typeof(IntData), VariableKind.In));
43      AddVariableInfo(new VariableInfo("SamplesEnd", "SamplesEnd", typeof(IntData), VariableKind.In));
44      AddVariableInfo(new VariableInfo("TreeEvaluator", "Evaluator that should be used for impact calculation", typeof(ITreeEvaluator), VariableKind.In));
45      AddVariableInfo(new VariableInfo(ModelingResult.VariableNodeImpact.ToString(), "Variable impacts", typeof(ItemList), VariableKind.New | VariableKind.Out));
46    }
47
48    public override string Description {
49      get { return @"Calculates the impact of all allowed input variables on the quality of the model based on node impacts."; }
50    }
51
52    public override IOperation Apply(IScope scope) {
53      IGeneticProgrammingModel gpModel = GetVariableValue<IGeneticProgrammingModel>("FunctionTree", scope, true);
54      Dataset dataset = GetVariableValue<Dataset>("Dataset", scope, true);
55      string targetVariableName = GetVariableValue<StringData>("TargetVariable", scope, true).Data;
56      int targetVariable = dataset.GetVariableIndex(targetVariableName);
57      ItemList<StringData> inputVariableNames = GetVariableValue<ItemList<StringData>>("InputVariableNames", scope, true, false);
58      ITreeEvaluator evaluator = GetVariableValue<ITreeEvaluator>("TreeEvaluator", scope, true);
59      int start = GetVariableValue<IntData>("SamplesStart", scope, true).Data;
60      int end = GetVariableValue<IntData>("SamplesEnd", scope, true).Data;
61
62      Dictionary<string, double> qualityImpacts;
63      if (inputVariableNames == null)
64        qualityImpacts = Calculate(dataset, evaluator, gpModel.FunctionTree, targetVariableName, start, end);
65      else
66        qualityImpacts = Calculate(dataset, evaluator, gpModel.FunctionTree, targetVariableName, inputVariableNames.Select(iv => iv.Data), start, end);
67
68      ItemList varImpacts = GetVariableValue<ItemList>(ModelingResult.VariableNodeImpact.ToString(), scope, true, false);
69      if (varImpacts == null) {
70        varImpacts = new ItemList();
71        scope.AddVariable(new HeuristicLab.Core.Variable(scope.TranslateName(ModelingResult.VariableNodeImpact.ToString()), varImpacts));
72      }
73
74      varImpacts.Clear();
75      foreach (KeyValuePair<string, double> p in qualityImpacts) {
76        if (p.Key != targetVariableName) {
77          ItemList row = new ItemList();
78          row.Add(new StringData(p.Key));
79          row.Add(new DoubleData(p.Value));
80          varImpacts.Add(row);
81        }
82      }
83
84      return null;
85    }
86
87    public static Dictionary<string, double> Calculate(Dataset dataset, ITreeEvaluator evaluator,
88      IFunctionTree tree, string targetVariableName, int start, int end) {
89      return Calculate(dataset, evaluator, tree, targetVariableName, null, start, end);
90    }
91
92    public static Dictionary<string, double> Calculate(Dataset dataset, ITreeEvaluator evaluator, IFunctionTree tree, string targetVariableName, IEnumerable<string> inputVariableNames, int start, int end) {
93      Dictionary<string, double> impacts = new Dictionary<string, double>();
94      Dictionary<IFunctionTree, double> nodeImpacts = new Dictionary<IFunctionTree, double>();
95      Dictionary<IFunctionTree, double> nodeReplacementValues = new Dictionary<IFunctionTree, double>();
96      Dictionary<IFunctionTree, IFunctionTree> parent = new Dictionary<IFunctionTree, IFunctionTree>();
97      int targetVariable = dataset.GetVariableIndex(targetVariableName);
98      IEnumerable<string> variables;
99      if (inputVariableNames != null)
100        variables = inputVariableNames;
101      else
102        variables = dataset.VariableNames;
103
104      parent[tree] = null;
105      foreach (var node in FunctionTreeIterator.IteratePostfix(tree)) {
106        foreach (var subTree in node.SubTrees) {
107          parent[subTree] = node;
108        }
109        nodeReplacementValues[node] = CalculateReplacementValue(dataset, evaluator, node, targetVariable, start, end);
110      }
111
112      double originalMse = CalculateMSE(dataset, evaluator, tree, targetVariable, start, end);
113      foreach (var node in FunctionTreeIterator.IteratePostfix(tree)) {
114        IFunctionTree newTree = ReplaceBranchInTree(tree, node, nodeReplacementValues[node]);
115        double newMse = CalculateMSE(dataset, evaluator, newTree, targetVariable, start, end);
116        nodeImpacts[node] = newMse / originalMse;
117      }
118
119
120      foreach (string variableName in variables) {
121        var matchingNodes = from node in nodeImpacts.Keys
122                            where node is VariableFunctionTree && ((VariableFunctionTree)node).VariableName == variableName
123                            select node;
124        double maxImpact;
125        if (matchingNodes.Count() > 0) {
126          maxImpact = (from matchingNode in matchingNodes
127                       select (from n in AncestorList(matchingNode, parent)
128                               select nodeImpacts[n]).Min()).Max();
129        } else {
130          maxImpact = 1.0;
131        }
132
133        impacts[variableName] = maxImpact;
134      }
135
136      return impacts;
137    }
138
139    private static double CalculateMSE(Dataset dataset, ITreeEvaluator evaluator, IFunctionTree tree, int targetVariable, int start, int end) {
140
141      double[,] values = Matrix<double>.Create(
142        dataset.GetVariableValues(targetVariable, start, end),
143        evaluator.Evaluate(dataset, tree, Enumerable.Range(start, end - start)).ToArray());
144      return SimpleMSEEvaluator.Calculate(values);
145    }
146
147    private static IEnumerable<IFunctionTree> AncestorList(IFunctionTree node, Dictionary<IFunctionTree, IFunctionTree> parent) {
148      while (node != null) {
149        yield return node;
150        node = parent[node];
151      }
152    }
153
154    private static double CalculateReplacementValue(Dataset dataset, ITreeEvaluator evaluator, IFunctionTree tree, int targetVariable, int start, int end) {
155      return Statistics.Median(evaluator.Evaluate(dataset, tree, Enumerable.Range(start, end - start)).ToArray());
156    }
157
158    private static IFunctionTree ReplaceBranchInTree(IFunctionTree tree, IFunctionTree node, double p) {
159      if (tree == node) return CreateConstantNode(p);
160      List<IFunctionTree> originalSubTrees = new List<IFunctionTree>(tree.SubTrees);
161      while (tree.SubTrees.Count > 0) tree.RemoveSubTree(0);
162      IFunctionTree clonedNode = (IFunctionTree)tree.Clone();
163      for (int i = 0; i < originalSubTrees.Count; i++) {
164        tree.AddSubTree(originalSubTrees[i]);
165        clonedNode.AddSubTree(ReplaceBranchInTree(originalSubTrees[i], node, p));
166      }
167      return clonedNode;
168    }
169
170    private static IFunctionTree CreateConstantNode(double value) {
171      ConstantFunctionTree constantTree = (ConstantFunctionTree)(new Constant().GetTreeNode());
172      constantTree.Value = value;
173      return (IFunctionTree)constantTree;
174    }
175  }
176}
Note: See TracBrowser for help on using the repository browser.