Changeset 6760 for branches/PersistenceSpeedUp/HeuristicLab.Problems.DataAnalysis.Symbolic.Views/3.4/RunCollectionVariableImpactView.cs
- Timestamp:
- 09/14/11 13:59:25 (13 years ago)
- Location:
- branches/PersistenceSpeedUp
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/PersistenceSpeedUp
- Property svn:ignore
-
old new 12 12 *.psess 13 13 *.vsp 14 *.docstates
-
- Property svn:mergeinfo changed
- Property svn:ignore
-
branches/PersistenceSpeedUp/HeuristicLab.Problems.DataAnalysis.Symbolic.Views/3.4/RunCollectionVariableImpactView.cs
r5975 r6760 35 35 public sealed partial class RunCollectionVariableImpactView : AsynchronousContentView { 36 36 private const string variableImpactResultName = "Variable impacts"; 37 private const string crossValidationFoldsResultName = "CrossValidation Folds"; 38 private const string numberOfFoldsParameterName = "Folds"; 37 39 public RunCollectionVariableImpactView() { 38 40 InitializeComponent(); … … 95 97 } 96 98 99 private void comboBox_SelectedValueChanged(object sender, EventArgs e) { 100 if (comboBox.SelectedItem != null) { 101 var cvRuns = from r in Content 102 where r.Visible 103 where r.Parameters.ContainsKey(numberOfFoldsParameterName) 104 select r; 105 var selectedFolds = from r in cvRuns 106 let foldCollection = (RunCollection)r.Results[crossValidationFoldsResultName] 107 select (IRun)foldCollection.ElementAt((int)comboBox.SelectedItem).Clone(); 108 matrixView.Content = CalculateVariableImpactMatrix(selectedFolds.ToArray(), cvRuns.Select(r => r.Name).ToArray()); 109 } 110 } 111 112 97 113 private void UpdateData() { 98 matrixView.Content = CalculateVariableImpactMatrix(); 99 } 100 101 private DoubleMatrix CalculateVariableImpactMatrix() { 114 if (Content != null) { 115 comboBox.Items.Clear(); 116 comboBox.Enabled = false; 117 var visibleRuns = Content.Where(r => r.Visible).ToArray(); 118 var representativeCvRun = 119 visibleRuns.Where(r => r.Parameters.ContainsKey(numberOfFoldsParameterName)).FirstOrDefault(); 120 if (representativeCvRun != null) { 121 // make sure all runs have the same number of folds 122 int nFolds = ((IntValue)representativeCvRun.Parameters[numberOfFoldsParameterName]).Value; 123 var cvRuns = visibleRuns.Where(r => r.Parameters.ContainsKey(numberOfFoldsParameterName)); 124 if (cvRuns.All(r => ((IntValue)r.Parameters[numberOfFoldsParameterName]).Value == nFolds)) { 125 // populate combobox 126 for (int foldIndex = 0; foldIndex < nFolds; foldIndex++) { 127 comboBox.Items.Add(foldIndex); 128 } 129 comboBox.Enabled = true; 130 var selectedFolds = from r in cvRuns 131 let foldCollection = (RunCollection)r.Results[crossValidationFoldsResultName] 132 select foldCollection.First(); 133 matrixView.Content = CalculateVariableImpactMatrix(selectedFolds.ToArray(), cvRuns.Select(f => f.Name).ToArray()); 134 } else { 135 matrixView.Content = null; 136 } 137 } else { 138 var runsWithVariables = visibleRuns.Where(r => r.Results.ContainsKey(variableImpactResultName)).ToArray(); 139 matrixView.Content = CalculateVariableImpactMatrix(runsWithVariables); 140 } 141 } 142 } 143 144 private IStringConvertibleMatrix CalculateVariableImpactMatrix(IRun[] runs) { 145 return CalculateVariableImpactMatrix(runs, runs.Select(r => r.Name).ToArray()); 146 } 147 148 private DoubleMatrix CalculateVariableImpactMatrix(IRun[] runs, string[] runNames) { 102 149 DoubleMatrix matrix = null; 103 if (Content != null) { 104 List<IRun> runsWithVariables = Content.Where(r => r.Visible && r.Results.ContainsKey(variableImpactResultName)).ToList(); 105 IEnumerable<DoubleMatrix> allVariableImpacts = (from run in runsWithVariables 106 select run.Results[variableImpactResultName]).Cast<DoubleMatrix>(); 107 IEnumerable<string> variableNames = (from variableImpact in allVariableImpacts 108 from variableName in variableImpact.RowNames 109 select variableName) 110 .Distinct(); 111 // filter variableNames: only include names that have at least one non-zero value in a run 112 List<string> variableNamesList = (from variableName in variableNames 113 where GetVariableImpacts(variableName, allVariableImpacts).Any(x => !x.IsAlmost(0.0)) 114 select variableName) 115 .ToList(); 116 117 List<string> statictics = new List<string> { "Median Rank", "Mean", "StdDev", "pValue" }; 118 List<string> columnNames = runsWithVariables.Select(r => r.Name).ToList(); 119 columnNames.AddRange(statictics); 120 int runs = runsWithVariables.Count(); 121 122 matrix = new DoubleMatrix(variableNamesList.Count, runs + statictics.Count); 123 matrix.SortableView = true; 124 matrix.RowNames = variableNamesList; 125 matrix.ColumnNames = columnNames; 126 127 for (int i = 0; i < runsWithVariables.Count; i++) { 128 IRun run = runsWithVariables[i]; 129 DoubleMatrix runVariableImpacts = (DoubleMatrix)run.Results[variableImpactResultName]; 130 for (int j = 0; j < runVariableImpacts.Rows; j++) { 131 int rowIndex = variableNamesList.FindIndex(s => s == runVariableImpacts.RowNames.ElementAt(j)); 132 if (rowIndex > -1) { 133 matrix[rowIndex, i] = runVariableImpacts[j, 0]; 134 } 135 } 136 } 137 138 List<List<double>> variableImpactsOverRuns = (from variableName in variableNamesList 139 select GetVariableImpacts(variableName, allVariableImpacts).ToList()) 140 .ToList(); 141 List<List<double>> variableRanks = (from variableName in variableNamesList 142 select GetVariableImpactRanks(variableName, allVariableImpacts).ToList()) 143 .ToList(); 144 if (variableImpactsOverRuns.Count() > 0) { 145 // the variable with the worst median impact value is chosen as the reference variable 146 // this is problematic if all variables are relevant, however works often in practice 147 List<double> referenceImpacts = (from impacts in variableImpactsOverRuns 148 let avg = impacts.Median() 149 orderby avg 150 select impacts) 151 .First(); 152 // for all variables 153 for (int row = 0; row < variableImpactsOverRuns.Count; row++) { 154 // median rank 155 matrix[row, runs] = variableRanks[row].Median(); 156 // also show mean and std.dev. of relative variable impacts to indicate the relative difference in impacts of variables 157 matrix[row, runs + 1] = variableImpactsOverRuns[row].Average(); 158 matrix[row, runs + 2] = variableImpactsOverRuns[row].StandardDeviation(); 159 160 double leftTail = 0; double rightTail = 0; double bothTails = 0; 161 // calc differences of impacts for current variable and reference variable 162 double[] z = new double[referenceImpacts.Count]; 163 for (int i = 0; i < z.Length; i++) { 164 z[i] = variableImpactsOverRuns[row][i] - referenceImpacts[i]; 165 } 166 // wilcoxon signed rank test is used because the impact values of two variables in a single run are not independent 167 alglib.wsr.wilcoxonsignedranktest(z, z.Length, 0, ref bothTails, ref leftTail, ref rightTail); 168 matrix[row, runs + 3] = bothTails; 169 } 170 } 171 } 172 return matrix; 150 IEnumerable<DoubleMatrix> allVariableImpacts = (from run in runs 151 select run.Results[variableImpactResultName]).Cast<DoubleMatrix>(); 152 IEnumerable<string> variableNames = (from variableImpact in allVariableImpacts 153 from variableName in variableImpact.RowNames 154 select variableName) 155 .Distinct(); 156 // filter variableNames: only include names that have at least one non-zero value in a run 157 List<string> variableNamesList = (from variableName in variableNames 158 where GetVariableImpacts(variableName, allVariableImpacts).Any(x => !x.IsAlmost(0.0)) 159 select variableName) 160 .ToList(); 161 162 List<string> statictics = new List<string> { "Median Rank", "Mean", "StdDev", "pValue" }; 163 List<string> columnNames = new List<string>(runNames); 164 columnNames.AddRange(statictics); 165 int numberOfRuns = runs.Length; 166 167 matrix = new DoubleMatrix(variableNamesList.Count, numberOfRuns + statictics.Count); 168 matrix.SortableView = true; 169 matrix.RowNames = variableNamesList; 170 matrix.ColumnNames = columnNames; 171 172 // calculate statistics 173 List<List<double>> variableImpactsOverRuns = (from variableName in variableNamesList 174 select GetVariableImpacts(variableName, allVariableImpacts).ToList()) 175 .ToList(); 176 List<List<double>> variableRanks = (from variableName in variableNamesList 177 select GetVariableImpactRanks(variableName, allVariableImpacts).ToList()) 178 .ToList(); 179 if (variableImpactsOverRuns.Count() > 0) { 180 // the variable with the worst median impact value is chosen as the reference variable 181 // this is problematic if all variables are relevant, however works often in practice 182 List<double> referenceImpacts = (from impacts in variableImpactsOverRuns 183 let avg = impacts.Median() 184 orderby avg 185 select impacts) 186 .First(); 187 // for all variables 188 for (int row = 0; row < variableImpactsOverRuns.Count; row++) { 189 // median rank 190 matrix[row, numberOfRuns] = variableRanks[row].Median(); 191 // also show mean and std.dev. of relative variable impacts to indicate the relative difference in impacts of variables 192 matrix[row, numberOfRuns + 1] = Math.Round(variableImpactsOverRuns[row].Average(), 3); 193 matrix[row, numberOfRuns + 2] = Math.Round(variableImpactsOverRuns[row].StandardDeviation(), 3); 194 195 double leftTail = 0; double rightTail = 0; double bothTails = 0; 196 // calc differences of impacts for current variable and reference variable 197 double[] z = new double[referenceImpacts.Count]; 198 for (int i = 0; i < z.Length; i++) { 199 z[i] = variableImpactsOverRuns[row][i] - referenceImpacts[i]; 200 } 201 // wilcoxon signed rank test is used because the impact values of two variables in a single run are not independent 202 alglib.wsr.wilcoxonsignedranktest(z, z.Length, 0, ref bothTails, ref leftTail, ref rightTail); 203 matrix[row, numberOfRuns + 3] = Math.Round(bothTails, 4); 204 } 205 } 206 207 // fill matrix with impacts from runs 208 for (int i = 0; i < runs.Length; i++) { 209 IRun run = runs[i]; 210 DoubleMatrix runVariableImpacts = (DoubleMatrix)run.Results[variableImpactResultName]; 211 for (int j = 0; j < runVariableImpacts.Rows; j++) { 212 int rowIndex = variableNamesList.FindIndex(s => s == runVariableImpacts.RowNames.ElementAt(j)); 213 if (rowIndex > -1) { 214 matrix[rowIndex, i] = Math.Round(runVariableImpacts[j, 0], 3); 215 } 216 } 217 } 218 // sort by median 219 var sortedMatrix = (DoubleMatrix)matrix.Clone(); 220 var sortedIndexes = from i in Enumerable.Range(0, sortedMatrix.Rows) 221 orderby matrix[i, numberOfRuns] 222 select i; 223 224 int targetIndex = 0; 225 foreach (var sourceIndex in sortedIndexes) { 226 for (int c = 0; c < matrix.Columns; c++) 227 sortedMatrix[targetIndex, c] = matrix[sourceIndex, c]; 228 targetIndex++; 229 } 230 return sortedMatrix; 173 231 } 174 232 … … 213 271 } 214 272 } 273 215 274 } 216 275 }
Note: See TracChangeset
for help on using the changeset viewer.