- Timestamp:
- 03/18/17 12:47:30 (8 years ago)
- Location:
- branches/symbreg-factors-2650
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Views/3.4/Regression/RegressionSolutionVariableImpactsView.Designer.cs
r14351 r14762 47 47 this.dataPartitionComboBox = new System.Windows.Forms.ComboBox(); 48 48 this.dataPartitionLabel = new System.Windows.Forms.Label(); 49 this. replacementLabel = new System.Windows.Forms.Label();49 this.numericVarReplacementLabel = new System.Windows.Forms.Label(); 50 50 this.replacementComboBox = new System.Windows.Forms.ComboBox(); 51 this.factorVarReplacementLabel = new System.Windows.Forms.Label(); 52 this.factorVarReplComboBox = new System.Windows.Forms.ComboBox(); 51 53 this.SuspendLayout(); 52 54 // … … 58 60 this.variableImactsArrayView.Caption = "StringConvertibleArray View"; 59 61 this.variableImactsArrayView.Content = null; 60 this.variableImactsArrayView.Location = new System.Drawing.Point(3, 59);62 this.variableImactsArrayView.Location = new System.Drawing.Point(3, 84); 61 63 this.variableImactsArrayView.Name = "variableImactsArrayView"; 62 64 this.variableImactsArrayView.ReadOnly = true; 63 this.variableImactsArrayView.Size = new System.Drawing.Size(3 04, 223);64 this.variableImactsArrayView.TabIndex = 4;65 this.variableImactsArrayView.Size = new System.Drawing.Size(363, 278); 66 this.variableImactsArrayView.TabIndex = 2; 65 67 // 66 68 // dataPartitionComboBox … … 71 73 HeuristicLab.Problems.DataAnalysis.RegressionSolutionVariableImpactsCalculator.DataPartitionEnum.Test, 72 74 HeuristicLab.Problems.DataAnalysis.RegressionSolutionVariableImpactsCalculator.DataPartitionEnum.All}); 73 this.dataPartitionComboBox.Location = new System.Drawing.Point( 82, 3);75 this.dataPartitionComboBox.Location = new System.Drawing.Point(197, 3); 74 76 this.dataPartitionComboBox.Name = "dataPartitionComboBox"; 75 77 this.dataPartitionComboBox.Size = new System.Drawing.Size(121, 21); … … 86 88 this.dataPartitionLabel.Text = "Data partition:"; 87 89 // 88 // replacementLabel90 // numericVarReplacementLabel 89 91 // 90 this. replacementLabel.AutoSize = true;91 this. replacementLabel.Location = new System.Drawing.Point(3, 35);92 this. replacementLabel.Name = "replacementLabel";93 this. replacementLabel.Size = new System.Drawing.Size(73, 13);94 this. replacementLabel.TabIndex = 2;95 this. replacementLabel.Text = "Replacement:";92 this.numericVarReplacementLabel.AutoSize = true; 93 this.numericVarReplacementLabel.Location = new System.Drawing.Point(3, 33); 94 this.numericVarReplacementLabel.Name = "numericVarReplacementLabel"; 95 this.numericVarReplacementLabel.Size = new System.Drawing.Size(173, 13); 96 this.numericVarReplacementLabel.TabIndex = 2; 97 this.numericVarReplacementLabel.Text = "Replacement for numeric variables:"; 96 98 // 97 99 // replacementComboBox … … 103 105 HeuristicLab.Problems.DataAnalysis.RegressionSolutionVariableImpactsCalculator.ReplacementMethodEnum.Noise, 104 106 HeuristicLab.Problems.DataAnalysis.RegressionSolutionVariableImpactsCalculator.ReplacementMethodEnum.Shuffle}); 105 this.replacementComboBox.Location = new System.Drawing.Point( 82, 32);107 this.replacementComboBox.Location = new System.Drawing.Point(197, 30); 106 108 this.replacementComboBox.Name = "replacementComboBox"; 107 109 this.replacementComboBox.Size = new System.Drawing.Size(121, 21); … … 109 111 this.replacementComboBox.SelectedIndexChanged += new System.EventHandler(this.replacementComboBox_SelectedIndexChanged); 110 112 // 113 // factorVarReplacementLabel 114 // 115 this.factorVarReplacementLabel.AutoSize = true; 116 this.factorVarReplacementLabel.Location = new System.Drawing.Point(3, 60); 117 this.factorVarReplacementLabel.Name = "factorVarReplacementLabel"; 118 this.factorVarReplacementLabel.Size = new System.Drawing.Size(188, 13); 119 this.factorVarReplacementLabel.TabIndex = 0; 120 this.factorVarReplacementLabel.Text = "Replacement for categorical variables:"; 121 // 122 // factorVarReplComboBox 123 // 124 this.factorVarReplComboBox.FormattingEnabled = true; 125 this.factorVarReplComboBox.Items.AddRange(new object[] { 126 HeuristicLab.Problems.DataAnalysis.RegressionSolutionVariableImpactsCalculator.FactorReplacementMethodEnum.Best, 127 HeuristicLab.Problems.DataAnalysis.RegressionSolutionVariableImpactsCalculator.FactorReplacementMethodEnum.Mode, 128 HeuristicLab.Problems.DataAnalysis.RegressionSolutionVariableImpactsCalculator.FactorReplacementMethodEnum.Shuffle}); 129 this.factorVarReplComboBox.Location = new System.Drawing.Point(197, 57); 130 this.factorVarReplComboBox.Name = "factorVarReplComboBox"; 131 this.factorVarReplComboBox.Size = new System.Drawing.Size(121, 21); 132 this.factorVarReplComboBox.TabIndex = 1; 133 this.factorVarReplComboBox.SelectedIndexChanged += new System.EventHandler(this.replacementComboBox_SelectedIndexChanged); 134 // 111 135 // RegressionSolutionVariableImpactsView 112 136 // 113 137 this.AllowDrop = true; 114 138 this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Inherit; 139 this.Controls.Add(this.factorVarReplComboBox); 140 this.Controls.Add(this.factorVarReplacementLabel); 115 141 this.Controls.Add(this.replacementComboBox); 116 this.Controls.Add(this. replacementLabel);142 this.Controls.Add(this.numericVarReplacementLabel); 117 143 this.Controls.Add(this.dataPartitionLabel); 118 144 this.Controls.Add(this.dataPartitionComboBox); 119 145 this.Controls.Add(this.variableImactsArrayView); 120 146 this.Name = "RegressionSolutionVariableImpactsView"; 121 this.Size = new System.Drawing.Size(3 10, 285);147 this.Size = new System.Drawing.Size(369, 365); 122 148 this.ResumeLayout(false); 123 149 this.PerformLayout(); … … 130 156 private System.Windows.Forms.ComboBox dataPartitionComboBox; 131 157 private System.Windows.Forms.Label dataPartitionLabel; 132 private System.Windows.Forms.Label replacementLabel;158 private System.Windows.Forms.Label numericVarReplacementLabel; 133 159 private System.Windows.Forms.ComboBox replacementComboBox; 160 private System.Windows.Forms.Label factorVarReplacementLabel; 161 private System.Windows.Forms.ComboBox factorVarReplComboBox; 134 162 } 135 163 } -
branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Views/3.4/Regression/RegressionSolutionVariableImpactsView.cs
r14351 r14762 43 43 this.dataPartitionComboBox.SelectedIndex = 0; 44 44 this.replacementComboBox.SelectedIndex = 0; 45 } 45 this.factorVarReplComboBox.SelectedIndex = 0; 46 } 46 47 47 48 #region events … … 68 69 protected override void OnContentChanged() { 69 70 base.OnContentChanged(); 70 if 71 if(Content == null) { 71 72 variableImactsArrayView.Content = null; 72 73 } else { … … 76 77 77 78 private void UpdateVariableImpacts() { 78 if (Content == null || replacementComboBox.SelectedIndex < 0 || dataPartitionComboBox.SelectedIndex < 0) return; 79 if(Content == null || replacementComboBox.SelectedIndex < 0 80 || factorVarReplComboBox.SelectedIndex < 0 81 || dataPartitionComboBox.SelectedIndex < 0) return; 79 82 var mainForm = (MainForm.WindowsForms.MainForm)MainFormManager.MainForm; 80 83 variableImactsArrayView.Caption = Content.Name + " Variable Impacts"; 81 84 var replMethod = 82 (RegressionSolutionVariableImpactsCalculator.ReplacementMethodEnum)replacementComboBox.Items[replacementComboBox.SelectedIndex]; 85 (RegressionSolutionVariableImpactsCalculator.ReplacementMethodEnum) 86 replacementComboBox.Items[replacementComboBox.SelectedIndex]; 87 var factorReplMethod = 88 (RegressionSolutionVariableImpactsCalculator.FactorReplacementMethodEnum) 89 factorVarReplComboBox.Items[factorVarReplComboBox.SelectedIndex]; 83 90 var dataPartition = 84 91 (RegressionSolutionVariableImpactsCalculator.DataPartitionEnum)dataPartitionComboBox.SelectedItem; … … 88 95 mainForm.AddOperationProgressToView(this, "Calculating variable impacts for " + Content.Name); 89 96 90 var impacts = RegressionSolutionVariableImpactsCalculator.CalculateImpacts(Content, dataPartition, replMethod );97 var impacts = RegressionSolutionVariableImpactsCalculator.CalculateImpacts(Content, dataPartition, replMethod, factorReplMethod); 91 98 var impactArray = new DoubleArray(impacts.Select(i => i.Item2).ToArray()); 92 99 impactArray.ElementNames = impacts.Select(i => i.Item1); -
branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Regression/RegressionSolutionVariableImpactsCalculator.cs
r14498 r14762 42 42 Noise 43 43 } 44 44 public enum FactorReplacementMethodEnum { 45 Best, 46 Mode, 47 Shuffle 48 } 45 49 public enum DataPartitionEnum { 46 50 Training, … … 88 92 } 89 93 90 public static IEnumerable<Tuple<string, double>> CalculateImpacts(IRegressionSolution solution, 94 public static IEnumerable<Tuple<string, double>> CalculateImpacts( 95 IRegressionSolution solution, 91 96 DataPartitionEnum data = DataPartitionEnum.Training, 92 ReplacementMethodEnum replacementMethod = ReplacementMethodEnum.Median) { 97 ReplacementMethodEnum replacementMethod = ReplacementMethodEnum.Median, 98 FactorReplacementMethodEnum factorReplacementMethod = FactorReplacementMethodEnum.Best) { 93 99 94 100 var problemData = solution.ProblemData; … … 101 107 OnlineCalculatorError error; 102 108 103 switch 109 switch(data) { 104 110 case DataPartitionEnum.All: 105 111 rows = solution.ProblemData.AllIndices; 106 112 targetValues = problemData.TargetVariableValues.ToList(); 107 113 originalR2 = OnlinePearsonsRCalculator.Calculate(problemData.TargetVariableValues, solution.EstimatedValues, out error); 108 if 114 if(error != OnlineCalculatorError.None) throw new InvalidOperationException("Error during R² calculation."); 109 115 originalR2 = originalR2 * originalR2; 110 116 break; … … 129 135 130 136 // calculate impacts for double variables 131 foreach 137 foreach(var inputVariable in allowedInputVariables.Where(problemData.Dataset.VariableHasType<double>)) { 132 138 var newEstimates = EvaluateModelWithReplacedVariable(solution.Model, inputVariable, modifiableDataset, rows, replacementMethod); 133 139 var newR2 = OnlinePearsonsRCalculator.Calculate(targetValues, newEstimates, out error); 134 if 140 if(error != OnlineCalculatorError.None) throw new InvalidOperationException("Error during R² calculation with replaced inputs."); 135 141 136 142 newR2 = newR2 * newR2; … … 138 144 impacts[inputVariable] = impact; 139 145 } 140 // calculate impacts for factor variables 141 foreach (var inputVariable in allowedInputVariables.Where(problemData.Dataset.VariableHasType<string>)) { 142 var smallestImpact = double.PositiveInfinity; 143 foreach (var repl in problemData.Dataset.GetStringValues(inputVariable, rows).Distinct()) { 144 var newEstimates = EvaluateModelWithReplacedVariable(solution.Model, inputVariable, modifiableDataset, rows, Enumerable.Repeat(repl, dataset.Rows)); 146 147 // calculate impacts for string variables 148 foreach(var inputVariable in allowedInputVariables.Where(problemData.Dataset.VariableHasType<string>)) { 149 if(factorReplacementMethod == FactorReplacementMethodEnum.Best) { 150 // try replacing with all possible values and find the best replacement value 151 var smallestImpact = double.PositiveInfinity; 152 foreach(var repl in problemData.Dataset.GetStringValues(inputVariable, rows).Distinct()) { 153 var newEstimates = EvaluateModelWithReplacedVariable(solution.Model, inputVariable, modifiableDataset, rows, 154 Enumerable.Repeat(repl, dataset.Rows)); 155 var newR2 = OnlinePearsonsRCalculator.Calculate(targetValues, newEstimates, out error); 156 if(error != OnlineCalculatorError.None) 157 throw new InvalidOperationException("Error during R² calculation with replaced inputs."); 158 159 newR2 = newR2 * newR2; 160 var impact = originalR2 - newR2; 161 if(impact < smallestImpact) smallestImpact = impact; 162 } 163 impacts[inputVariable] = smallestImpact; 164 } else { 165 // for replacement methods shuffle and mode 166 // calculate impacts for factor variables 167 168 var newEstimates = EvaluateModelWithReplacedVariable(solution.Model, inputVariable, modifiableDataset, rows, 169 factorReplacementMethod); 145 170 var newR2 = OnlinePearsonsRCalculator.Calculate(targetValues, newEstimates, out error); 146 if (error != OnlineCalculatorError.None) throw new InvalidOperationException("Error during R² calculation with replaced inputs."); 171 if(error != OnlineCalculatorError.None) 172 throw new InvalidOperationException("Error during R² calculation with replaced inputs."); 147 173 148 174 newR2 = newR2 * newR2; 149 175 var impact = originalR2 - newR2; 150 i f (impact < smallestImpact) smallestImpact= impact;176 impacts[inputVariable] = impact; 151 177 } 152 impacts[inputVariable] = smallestImpact; 153 } 178 } // foreach 154 179 return impacts.OrderByDescending(i => i.Value).Select(i => Tuple.Create(i.Key, i.Value)); 155 180 } … … 161 186 IRandom rand; 162 187 163 switch 188 switch(replacement) { 164 189 case ReplacementMethodEnum.Median: 165 190 replacementValue = rows.Select(r => originalValues[r]).Median(); … … 179 204 int i = 0; 180 205 // update column values 181 foreach 206 foreach(var r in rows) { 182 207 replacementValues[r] = shuffledValues[i++]; 183 208 } … … 190 215 replacementValues = Enumerable.Repeat(double.NaN, dataset.Rows).ToList(); 191 216 // update column values 192 foreach 217 foreach(var r in rows) { 193 218 replacementValues[r] = NormalDistributedRandom.NextDouble(rand, avg, stdDev); 194 219 } … … 202 227 } 203 228 204 private static IEnumerable<double> EvaluateModelWithReplacedVariable(IRegressionModel model, string variable, 229 private static IEnumerable<double> EvaluateModelWithReplacedVariable( 230 IRegressionModel model, string variable, ModifiableDataset dataset, 231 IEnumerable<int> rows, 232 FactorReplacementMethodEnum replacement = FactorReplacementMethodEnum.Shuffle) { 233 var originalValues = dataset.GetReadOnlyStringValues(variable).ToList(); 234 List<string> replacementValues; 235 IRandom rand; 236 237 switch(replacement) { 238 case FactorReplacementMethodEnum.Mode: 239 var mostCommonValue = rows.Select(r => originalValues[r]) 240 .GroupBy(v => v) 241 .OrderByDescending(g => g.Count()) 242 .First().Key; 243 replacementValues = Enumerable.Repeat(mostCommonValue, dataset.Rows).ToList(); 244 break; 245 case FactorReplacementMethodEnum.Shuffle: 246 // new var has same empirical distribution but the relation to y is broken 247 rand = new FastRandom(31415); 248 // prepare a complete column for the dataset 249 replacementValues = Enumerable.Repeat(string.Empty, dataset.Rows).ToList(); 250 // shuffle only the selected rows 251 var shuffledValues = rows.Select(r => originalValues[r]).Shuffle(rand).ToList(); 252 int i = 0; 253 // update column values 254 foreach(var r in rows) { 255 replacementValues[r] = shuffledValues[i++]; 256 } 257 break; 258 default: 259 throw new ArgumentException(string.Format("FactorReplacementMethod {0} cannot be handled.", replacement)); 260 } 261 262 return EvaluateModelWithReplacedVariable(model, variable, dataset, rows, replacementValues); 263 } 264 265 private static IEnumerable<double> EvaluateModelWithReplacedVariable(IRegressionModel model, string variable, 205 266 ModifiableDataset dataset, IEnumerable<int> rows, IEnumerable<double> replacementValues) { 206 267 var originalValues = dataset.GetReadOnlyDoubleValues(variable).ToList(); … … 212 273 return estimates; 213 274 } 214 private static IEnumerable<double> EvaluateModelWithReplacedVariable(IRegressionModel model, string variable, 275 private static IEnumerable<double> EvaluateModelWithReplacedVariable(IRegressionModel model, string variable, 215 276 ModifiableDataset dataset, IEnumerable<int> rows, IEnumerable<string> replacementValues) { 216 277 var originalValues = dataset.GetReadOnlyStringValues(variable).ToList();
Note: See TracChangeset
for help on using the changeset viewer.