Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.VariableInteractionNetworks/HeuristicLab.VariableInteractionNetworks.Views/3.3/VariableInteractionNetworkView.cs @ 12263

Last change on this file since 12263 was 12263, checked in by arapeanu, 9 years ago

#2288: Added adjacency matrix update by threshold and target variable functionality + node importance calculation

File size: 11.0 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.ComponentModel;
25using System.Drawing;
26using System.Linq;
27using System.Windows.Forms;
28using HeuristicLab.Common;
29using HeuristicLab.Data;
30using HeuristicLab.MainForm;
31using HeuristicLab.MainForm.WindowsForms;
32using HeuristicLab.Optimization;
33using HeuristicLab.Problems.DataAnalysis;
34using HeuristicLab.Problems.DataAnalysis.Symbolic;
35using HeuristicLab.Problems.DataAnalysis.Symbolic.Regression;
36using System.Collections;
37
38namespace HeuristicLab.VariableInteractionNetworks.Views {
39  [View("Variable Interaction Network")]
40  [Content(typeof(RunCollection), IsDefaultView = false)]
41
42  public sealed partial class VariableInteractionNetworkView : AsynchronousContentView {
43    private const string variableImpactResultName = "Variable impacts";
44    private const string TrainingBestSolutionParameterName = "Best training solution";
45    public new RunCollection Content {
46      get { return (RunCollection)base.Content; }
47      set { base.Content = value; }
48    }
49
50    public VariableInteractionNetworkView() {
51      InitializeComponent();
52    }
53
54    #region events
55   
56  //  #region Event Handlers (Content)
57    protected override void OnContentChanged() {
58      base.OnContentChanged();
59      if (Content == null) {
60        // TODO: Add code when content has been changed and is null
61      } else {
62        // TODO: Add code when content has been changed and is not null
63        CalculateAdjacencyMatrix();
64      }
65    }
66    #endregion
67
68    protected override void SetEnabledStateOfControls() {
69      base.SetEnabledStateOfControls();
70      // TODO: Enable or disable controls based on whether the content is null or the view is set readonly
71    }
72
73    #region Event Handlers (child controls)
74    // TODO: Put event handlers of child controls here.
75    #endregion
76
77    private void CalculateAdjacencyMatrix()
78    {
79        var runCollection = Content;
80        var groupRunCollection = Content.GroupBy(x => ((IRegressionProblemData)x.Parameters["ProblemData"]).TargetVariable).ToList();
81
82        var allVariableImpacts = runCollection.Select(run => (DoubleMatrix)run.Results[variableImpactResultName]);
83        var variableNames = (from variableImpact in allVariableImpacts
84                             from variableName in variableImpact.RowNames
85                             select variableName).Distinct().ToArray();
86        var adjMatrix = new DoubleMatrix(variableNames.Length, variableNames.Length);
87
88        adjMatrix.RowNames = groupRunCollection.Select(x => x.Key);
89        adjMatrix.ColumnNames = adjMatrix.RowNames;
90
91        for (int j = 0; j < groupRunCollection.Count; ++j)
92        {
93            var g = groupRunCollection[j];
94            var matrix = CalculateAdjacencyRows(g);
95            var variables = new List<Tuple<string, double>>();
96            var columnNames = matrix.ColumnNames.ToList();
97           
98            for (int i = 0; i < matrix.Columns; ++i)
99            {
100                variables.Add(new Tuple<string, double>(columnNames[i], matrix[0, i]));
101            }
102            variables.Add(new Tuple<string, double>(g.Key, 0));
103            variables.Sort((a, b) => a.Item1.CompareTo(b.Item1));
104            for (int i = 0; i < variables.Count; ++i)
105            {
106                adjMatrix[j, i] = variables[i].Item2;
107            }
108        }
109        viewHost2.Content = CalculateNodeImportance(adjMatrix);
110        viewHost3.Content = UpdateAdjacencyMatrixByThreshold(0.2, "x1", adjMatrix);
111        viewHost1.Content = adjMatrix;
112    }
113
114    private DoubleMatrix CalculateAdjacencyRows(IEnumerable<IRun> runs)
115    {
116        IEnumerable<DoubleMatrix> allVariableImpacts = (from run in runs
117                                                      select run.Results[variableImpactResultName]).Cast<DoubleMatrix>();
118        var variableNames = (from variableImpact in allVariableImpacts
119                                           from variableName in variableImpact.RowNames
120                                           select variableName)
121                                          .Distinct().ToArray();
122       
123        List<string> variableNamesList = (from variableName in variableNames
124                                          where GetVariableImpacts(variableName, allVariableImpacts).Any(x => !x.IsAlmost(0.0))
125                                          select variableName)
126                                         .ToList();
127   
128        var runNames = runs.Select(x => x.Name).ToArray();
129        var runsArray = runs.ToArray();
130        DoubleMatrix varImpactMatrix = CalculateVariableImpactMatrix(runsArray, runNames);
131        var targetMatrix = new DoubleMatrix(1, variableNames.Length);
132
133        for (int i = 0; i < varImpactMatrix.Rows; ++i)
134        {
135            targetMatrix[0, i] = varImpactMatrix[i, runNames.Length];
136        }
137   
138        targetMatrix.RowNames = new[] { "Target" };
139        targetMatrix.ColumnNames = variableNames;
140
141        return targetMatrix;
142    }
143
144    private DoubleMatrix UpdateAdjacencyMatrixByThreshold(double threshold, string targetVariable, DoubleMatrix adjMatrix)
145    {
146        var updatedMatrix = (DoubleMatrix) adjMatrix.Clone();
147        var groupRunCollection = Content.GroupBy(x => ((IRegressionProblemData)x.Parameters["ProblemData"]).TargetVariable).ToList();
148        string[] targets = adjMatrix.RowNames.ToArray();
149        var targetIndex = Array.IndexOf(targets, targetVariable);
150       
151        for (int j = 0; j < groupRunCollection.Count; ++j)
152        {
153            if (updatedMatrix[targetIndex, j] < threshold)
154            {
155                updatedMatrix[targetIndex, j] = 0;
156            }         
157        }
158        return updatedMatrix;
159    }
160
161    private DoubleMatrix CalculateNodeImportance(DoubleMatrix adjMatrix)
162    {
163           DoubleMatrix nodeImportance = new DoubleMatrix(adjMatrix.Rows, 1);
164           var variables = new List<Tuple<string, double>>();
165           var rowNames = adjMatrix.RowNames.ToList();
166           var groupRunCollection = Content.GroupBy(x => ((IRegressionProblemData)x.Parameters["ProblemData"]).TargetVariable).ToList();
167           double[] meanQuality = new double[groupRunCollection.Count];
168
169           for (int j = 0; j < groupRunCollection.Count; ++j)
170           {
171               var g = groupRunCollection[j];
172               meanQuality[j] = g.Average(x => ((IRegressionSolution)x.Results[TrainingBestSolutionParameterName]).TrainingRSquared);
173           }
174
175           for (int i = 0; i < adjMatrix.Columns; ++i)
176           {
177               for (int j = 0; j < adjMatrix.Rows; ++j)
178               {
179                   nodeImportance[i, 0] += adjMatrix[j, i];
180               }
181               nodeImportance[i, 0] = nodeImportance[i, 0] * meanQuality[i] / (adjMatrix.Rows - 1);
182               variables.Add(new Tuple<string, double>(rowNames[i], nodeImportance[i, 0]));
183           }
184           
185           variables.Sort((b,a) => a.Item2.CompareTo(b.Item2));
186
187           for (int i = 0; i < nodeImportance.Rows; ++i)
188           {
189               nodeImportance[i, 0] = variables[i].Item2;
190               rowNames[i] = variables[i].Item1;
191           }
192
193           nodeImportance.RowNames = rowNames;
194           nodeImportance.ColumnNames = new[] { "Node Importance" };
195           return nodeImportance;   
196    }
197
198    //adapted from RunCollectionVariableImpactView
199    private DoubleMatrix CalculateVariableImpactMatrix(IRun[] runs, string[] runNames)
200    {
201        IEnumerable<DoubleMatrix> allVariableImpacts = (from run in runs
202                                                        select run.Results[variableImpactResultName]).Cast<DoubleMatrix>();
203        IEnumerable<string> variableNames = (from variableImpact in allVariableImpacts
204                                             from variableName in variableImpact.RowNames
205                                             select variableName).Distinct();
206
207        // filter variableNames: only include names that have at least one non-zero value in a run
208        List<string> variableNamesList = (from variableName in variableNames
209                                          where GetVariableImpacts(variableName, allVariableImpacts).Any(x => !x.IsAlmost(0.0))
210                                          select variableName).ToList();
211
212        List<string> columnNames = new List<string>(runNames);
213        columnNames.Add("Mean");
214     
215        int numberOfRuns = runs.Length;
216
217        DoubleMatrix matrix = new DoubleMatrix(variableNamesList.Count, numberOfRuns + 1);
218        matrix.SortableView = true;
219        matrix.ColumnNames = columnNames;
220
221        List<List<double>> variableImpactsOverRuns = (from variableName in variableNamesList
222                                                      select GetVariableImpacts(variableName, allVariableImpacts).ToList()).ToList();
223
224        for (int row = 0; row < variableImpactsOverRuns.Count; row++)
225        {
226            matrix[row, numberOfRuns] = Math.Round(variableImpactsOverRuns[row].Average(), 3);
227        }
228
229        // fill matrix with impacts from runs
230        for (int i = 0; i < runs.Length; i++)
231        {
232            IRun run = runs[i];
233            DoubleMatrix runVariableImpacts = (DoubleMatrix)run.Results[variableImpactResultName];
234            for (int j = 0; j < runVariableImpacts.Rows; j++)
235            {
236                int rowIndex = variableNamesList.FindIndex(s => s == runVariableImpacts.RowNames.ElementAt(j));
237                if (rowIndex > -1)
238                {
239                    matrix[rowIndex, i] = Math.Round(runVariableImpacts[j, 0], 3);
240                }
241            }
242        }
243        return matrix;
244    }
245
246    //taken from RunCollectionVariableImpactView
247    private IEnumerable<double> GetVariableImpacts(string variableName, IEnumerable<DoubleMatrix> allVariableImpacts)
248    {
249        foreach (DoubleMatrix runVariableImpacts in allVariableImpacts)
250        {
251            int row = 0;
252            foreach (string rowName in runVariableImpacts.RowNames)
253            {
254                if (rowName == variableName)
255                    yield return runVariableImpacts[row, 0];
256                row++;
257            }
258        }
259    }
260  }
261}
Note: See TracBrowser for help on using the repository browser.