Free cookie consent management tool by TermsFeed Policy Generator

source: branches/DataAnalysis/HeuristicLab.Problems.DataAnalysis.Views/3.3/RunCollectionWinklerVariableImpactView.cs @ 6377

Last change on this file since 6377 was 5275, checked in by gkronber, 14 years ago

Merged changes from trunk to data analysis exploration branch and added fractional distance metric evaluator. #1142

File size: 8.8 KB
RevLine 
[4197]1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System.Collections.Generic;
23using System.Linq;
24using System.Windows.Forms;
25using HeuristicLab.Common;
26using HeuristicLab.Data;
27using HeuristicLab.MainForm;
28using HeuristicLab.MainForm.WindowsForms;
29using HeuristicLab.Optimization;
30using System;
31using HeuristicLab.Problems.DataAnalysis.Regression.Symbolic;
32using HeuristicLab.Problems.DataAnalysis.Symbolic.Symbols;
33using HeuristicLab.Problems.DataAnalysis.Evaluators;
34
35namespace HeuristicLab.Problems.DataAnalysis.Views {
36  [Content(typeof(RunCollection), false)]
37  [View("RunCollection Winkler Variable Impact View")]
38  public partial class RunCollectionWinklerVariableImpactView : AsynchronousContentView {
[4475]39    private const string validationBestModelResultName = "Best solution (on validation set)";
[4197]40    public RunCollectionWinklerVariableImpactView() {
41      InitializeComponent();
42    }
43
44    public new RunCollection Content {
45      get { return (RunCollection)base.Content; }
46      set { base.Content = value; }
47    }
48
49    protected override void RegisterContentEvents() {
50      base.RegisterContentEvents();
51      this.Content.ItemsAdded += new HeuristicLab.Collections.CollectionItemsChangedEventHandler<IRun>(Content_ItemsAdded);
52      this.Content.ItemsRemoved += new HeuristicLab.Collections.CollectionItemsChangedEventHandler<IRun>(Content_ItemsRemoved);
53      this.Content.CollectionReset += new HeuristicLab.Collections.CollectionItemsChangedEventHandler<IRun>(Content_CollectionReset);
54    }
55    protected override void DeregisterContentEvents() {
56      base.RegisterContentEvents();
57      this.Content.ItemsAdded -= new HeuristicLab.Collections.CollectionItemsChangedEventHandler<IRun>(Content_ItemsAdded);
58      this.Content.ItemsRemoved -= new HeuristicLab.Collections.CollectionItemsChangedEventHandler<IRun>(Content_ItemsRemoved);
59      this.Content.CollectionReset -= new HeuristicLab.Collections.CollectionItemsChangedEventHandler<IRun>(Content_CollectionReset);
60    }
61
62    protected override void OnContentChanged() {
63      base.OnContentChanged();
64      this.UpdateData();
65    }
66    private void Content_ItemsAdded(object sender, HeuristicLab.Collections.CollectionItemsChangedEventArgs<IRun> e) {
67      this.UpdateData();
68    }
69    private void Content_ItemsRemoved(object sender, HeuristicLab.Collections.CollectionItemsChangedEventArgs<IRun> e) {
70      this.UpdateData();
71    }
72    private void Content_CollectionReset(object sender, HeuristicLab.Collections.CollectionItemsChangedEventArgs<IRun> e) {
73      this.UpdateData();
74    }
75
76    private void UpdateData() {
77      matrixView.Content = CalculateVariableImpactMatrix();
78    }
79
[5010]80    public DoubleMatrix CalculateVariableImpactMatrix() {
[4197]81      DoubleMatrix matrix = null;
82      if (Content != null) {
83        List<IRun> runsWithSolutions = (from run in Content
84                                        where run.Results.ContainsKey(validationBestModelResultName)
85                                        select run)
86                                              .ToList();
87        IEnumerable<SymbolicRegressionSolution> allSolutions = (from run in Content
88                                                                where run.Results.ContainsKey(validationBestModelResultName)
89                                                                select run.Results[validationBestModelResultName]).Cast<SymbolicRegressionSolution>();
90
91        Dictionary<SymbolicRegressionSolution, IEnumerable<string>> variableReferences = new Dictionary<SymbolicRegressionSolution, IEnumerable<string>>();
92        foreach (var solution in allSolutions) {
93          variableReferences[solution] = GetVariableReferences(solution).Distinct();
94        }
95
96        List<string> variableNames = (from modelVarRefs in variableReferences.Values
97                                      from variableName in modelVarRefs
98                                      select variableName)
99                                     .Distinct()
100                                     .ToList();
101
102        List<string> statictics = new List<string> { "Median Impact", "Mean Impact", "StdDev", "pValue" };
103        List<string> columnNames = (from run in runsWithSolutions
104                                    select run.Name).ToList();
105        columnNames.AddRange(statictics);
106
107        matrix = new DoubleMatrix(variableNames.Count, columnNames.Count);
108        matrix.SortableView = true;
109        matrix.RowNames = variableNames;
110        matrix.ColumnNames = columnNames;
111
112        int columnIndex = 0;
113        foreach (SymbolicRegressionSolution solution in variableReferences.Keys) {
114          foreach (string variableName in variableReferences[solution]) {
115            int rowIndex = variableNames.IndexOf(variableName);
116            if (rowIndex > -1) {
117              matrix[rowIndex, columnIndex] = CalculateMeanImpact(variableName, solution);
118            }
119          }
120          columnIndex++;
121        }
122        List<List<double>> variableImpactValues = (from row in Enumerable.Range(0, variableNames.Count())
123                                                   select GetRowValues(matrix, row).ToList())
124                                                         .ToList();
125        List<double> referenceValues = (from variableImpacts in variableImpactValues
126                                        orderby variableImpacts.Average()
127                                        select variableImpacts)
128                                       .First();
129        for (int row = 0; row < variableNames.Count; row++) {
130          List<double> rowValues = variableImpactValues[row];
131          matrix[row, columnIndex] = rowValues.Median();
132          matrix[row, columnIndex + 1] = rowValues.Average();
133          matrix[row, columnIndex + 2] = rowValues.StandardDeviation();
134
135          double bothTails, leftTail, rightTail;
136          bothTails = leftTail = rightTail = 0.0;
137          double[] z = new double[rowValues.Count()];
138          for (int i = 0; i < z.Length; i++) {
139            z[i] = rowValues[i] - referenceValues[i];
140          }
[5275]141          alglib.wilcoxonsignedranktest(z, z.Length, 0.0, out bothTails, out leftTail, out rightTail);
[4197]142          matrix[row, columnIndex + 3] = bothTails;
143        }
144      }
145      return matrix;
146    }
147
148    private IEnumerable<double> GetRowValues(DoubleMatrix matrix, int row) {
149      return from col in Enumerable.Range(0, matrix.Columns)
150             select matrix[row, col];
151    }
152
153    private IEnumerable<string> GetVariableReferences(SymbolicRegressionSolution solution) {
154      return from node in solution.Model.SymbolicExpressionTree.IterateNodesPostfix().OfType<VariableTreeNode>()
155             select node.VariableName;
156    }
157
158    private double CalculateMeanImpact(string variableName, SymbolicRegressionSolution solution) {
159      int variableIndex = solution.ProblemData.Dataset.GetVariableIndex(variableName);
160      double meanVal = solution.ProblemData.Dataset.GetVariableValues(variableName).Average();
161      List<double> originalOutput = new List<double>(solution.EstimatedValues);
162
163      int rows = solution.ProblemData.Dataset.Rows;
164      int columns = solution.ProblemData.Dataset.Columns;
165
166      double[,] manipulatedData = new double[rows, columns];
167      for (int row = 0; row < rows; row++) {
168        for (int column = 0; column < columns; column++) {
169          if (column != variableIndex) {
170            manipulatedData[row, column] = solution.ProblemData.Dataset[row, column];
171          } else {
172            manipulatedData[row, column] = meanVal;
173          }
174        }
175      }
176
177      Dataset originalDataset = solution.ProblemData.Dataset;
178      Dataset manipulatedDataset = new Dataset(solution.ProblemData.Dataset.VariableNames, manipulatedData);
179      solution.ProblemData.Dataset = manipulatedDataset;
180      List<double> newOuput = new List<double>(solution.EstimatedValues);
181      solution.ProblemData.Dataset = originalDataset;
182
183      return SimpleMSEEvaluator.Calculate(originalOutput, newOuput);
184    }
185  }
186}
Note: See TracBrowser for help on using the repository browser.