Changeset 16035
- Timestamp:
- 07/31/18 13:26:33 (6 years ago)
- Location:
- branches/2904_CalculateImpacts
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/2904_CalculateImpacts/3.4/Implementation/Regression/RegressionSolutionVariableImpactsCalculator.cs
r16034 r16035 37 37 [Item("RegressionSolution Impacts Calculator", "Calculation of the impacts of input variables for any regression solution")] 38 38 public sealed class RegressionSolutionVariableImpactsCalculator : ParameterizedNamedItem { 39 #region Parameters/Properties 39 40 public enum ReplacementMethodEnum { 40 41 Median, … … 80 81 set { DataPartitionParameter.Value.Value = value; } 81 82 } 82 83 83 #endregion 84 85 #region Ctor/Cloner 84 86 [StorableConstructor] 85 87 private RegressionSolutionVariableImpactsCalculator(bool deserializing) : base(deserializing) { } 86 88 private RegressionSolutionVariableImpactsCalculator(RegressionSolutionVariableImpactsCalculator original, Cloner cloner) 87 89 : base(original, cloner) { } 88 public override IDeepCloneable Clone(Cloner cloner) {89 return new RegressionSolutionVariableImpactsCalculator(this, cloner);90 }91 92 90 public RegressionSolutionVariableImpactsCalculator() 93 91 : base() { … … 97 95 } 98 96 97 public override IDeepCloneable Clone(Cloner cloner) { 98 return new RegressionSolutionVariableImpactsCalculator(this, cloner); 99 } 100 #endregion 101 102 #region Public Methods/Wrappers 99 103 //mkommend: annoying name clash with static method, open to better naming suggestions 100 104 public IEnumerable<Tuple<string, double>> Calculate(IRegressionSolution solution) { … … 106 110 ReplacementMethodEnum replacementMethod = ReplacementMethodEnum.Shuffle, 107 111 FactorReplacementMethodEnum factorReplacementMethod = FactorReplacementMethodEnum.Best, 108 DataPartitionEnum data = DataPartitionEnum.Training) {109 return CalculateImpacts(solution.Model, solution.ProblemData, solution.EstimatedValues, replacementMethod, factorReplacementMethod, data );112 DataPartitionEnum dataPartition = DataPartitionEnum.Training) { 113 return CalculateImpacts(solution.Model, solution.ProblemData, solution.EstimatedValues, replacementMethod, factorReplacementMethod, dataPartition); 110 114 } 111 115 … … 116 120 ReplacementMethodEnum replacementMethod = ReplacementMethodEnum.Shuffle, 117 121 FactorReplacementMethodEnum factorReplacementMethod = FactorReplacementMethodEnum.Best, 118 DataPartitionEnum data = DataPartitionEnum.Training) { 119 IEnumerable<int> rows; 120 121 switch (data) { 122 case DataPartitionEnum.All: 123 rows = problemData.AllIndices; 124 break; 125 case DataPartitionEnum.Test: 126 rows = problemData.TestIndices; 127 break; 128 case DataPartitionEnum.Training: 129 rows = problemData.TrainingIndices; 130 break; 131 default: 132 throw new NotSupportedException("DataPartition not supported"); 133 } 134 122 DataPartitionEnum dataPartition = DataPartitionEnum.Training) { 123 IEnumerable<int> rows = GetPartitionRows(dataPartition, problemData); 135 124 return CalculateImpacts(model, problemData, estimatedValues, rows, replacementMethod, factorReplacementMethod); 136 }137 138 public static double CalculateImpact(string variableName, IRegressionModel model, IRegressionProblemData problemData, IEnumerable<double> estimatedValues, DataPartitionEnum dataPartition, ReplacementMethodEnum replMethod, FactorReplacementMethodEnum factorReplMethod) {139 double impact = 0;140 141 IEnumerable<int> rows;142 switch (dataPartition) {143 case DataPartitionEnum.All:144 rows = problemData.AllIndices;145 break;146 case DataPartitionEnum.Test:147 rows = problemData.TestIndices;148 break;149 case DataPartitionEnum.Training:150 rows = problemData.TrainingIndices;151 break;152 default:153 throw new NotSupportedException("DataPartition not supported");154 }155 156 OnlineCalculatorError error;157 IEnumerable<double> targetValuesPartition = rows.Select(v => problemData.TargetVariableValues.ElementAt(v));158 IEnumerable<double> estimatedValuesPartition = rows.Select(v => estimatedValues.ElementAt(v));159 var originalCalculatorValue = CalculateVariableImpact(targetValuesPartition, estimatedValuesPartition, out error);160 if (error != OnlineCalculatorError.None) throw new InvalidOperationException("Error during calculation.");161 162 163 var modifiableDataset = ((Dataset)(problemData.Dataset).Clone()).ToModifiable();164 165 // calculate impacts for double variables166 if (problemData.Dataset.VariableHasType<double>(variableName)) {167 impact = CalculateImpactForNumericalVariables(variableName, model, modifiableDataset, rows, targetValuesPartition, originalCalculatorValue, replMethod);168 } else if (problemData.Dataset.VariableHasType<string>(variableName)) {169 impact = CalculateImpactForFactorVariables(variableName, model, problemData.Dataset, modifiableDataset, rows, targetValuesPartition, originalCalculatorValue, factorReplMethod);170 } else {171 throw new NotSupportedException("Variable not supported");172 }173 return impact;174 125 } 175 126 … … 191 142 var inputvariables = new HashSet<string>(problemData.AllowedInputVariables.Union(model.VariablesUsedForPrediction)); 192 143 var allowedInputVariables = problemData.Dataset.VariableNames.Where(v => inputvariables.Contains(v)).ToList(); 144 var modifiableDataset = ((Dataset)(problemData.Dataset).Clone()).ToModifiable(); 193 145 194 146 foreach (var inputVariable in allowedInputVariables) { 195 impacts[inputVariable] = CalculateImpact(inputVariable, model, problemData.Dataset, rows, targetValuesPartition, originalCalculatorValue, replacementMethod, factorReplacementMethod);147 impacts[inputVariable] = CalculateImpact(inputVariable, model, modifiableDataset, rows, targetValuesPartition, originalCalculatorValue, replacementMethod, factorReplacementMethod); 196 148 } 197 149 … … 200 152 201 153 public static double CalculateImpact(string variableName, 202 IRegressionSolution solution, 203 IEnumerable<int> rows, 204 IEnumerable<double> targetValues, 205 double originalValue, 206 ReplacementMethodEnum replacementMethod = ReplacementMethodEnum.Shuffle, 207 FactorReplacementMethodEnum factorReplacementMethod = FactorReplacementMethodEnum.Best, 208 DataPartitionEnum data = DataPartitionEnum.Training) { 209 return CalculateImpact(variableName, solution.Model, solution.ProblemData.Dataset, rows, targetValues, originalValue, replacementMethod, factorReplacementMethod); 210 } 211 212 public static double CalculateImpact(string variableName, 213 IRegressionModel model, 214 IDataset dataset, 154 IRegressionModel model, 155 ModifiableDataset modifiableDataset, 215 156 IEnumerable<int> rows, 216 157 IEnumerable<double> targetValues, … … 220 161 221 162 double impact = 0; 222 var modifiableDataset = ((Dataset)(dataset).Clone()).ToModifiable();223 163 224 164 // calculate impacts for double variables 225 if ( dataset.VariableHasType<double>(variableName)) {165 if (modifiableDataset.VariableHasType<double>(variableName)) { 226 166 impact = CalculateImpactForNumericalVariables(variableName, model, modifiableDataset, rows, targetValues, originalValue, replacementMethod); 227 } else if ( dataset.VariableHasType<string>(variableName)) {228 impact = CalculateImpactForFactorVariables(variableName, model, dataset,modifiableDataset, rows, targetValues, originalValue, factorReplacementMethod);167 } else if (modifiableDataset.VariableHasType<string>(variableName)) { 168 impact = CalculateImpactForFactorVariables(variableName, model, modifiableDataset, rows, targetValues, originalValue, factorReplacementMethod); 229 169 } else { 230 170 throw new NotSupportedException("Variable not supported"); … … 232 172 return impact; 233 173 } 174 #endregion 234 175 235 176 private static double CalculateImpactForNumericalVariables(string variableName, … … 249 190 private static double CalculateImpactForFactorVariables(string variableName, 250 191 IRegressionModel model, 251 IDataset problemData,252 192 ModifiableDataset modifiableDataset, 253 193 IEnumerable<int> rows, … … 260 200 // try replacing with all possible values and find the best replacement value 261 201 var smallestImpact = double.PositiveInfinity; 262 foreach (var repl in problemData.GetStringValues(variableName, rows).Distinct()) {202 foreach (var repl in modifiableDataset.GetStringValues(variableName, rows).Distinct()) { 263 203 var originalValues = modifiableDataset.GetReadOnlyStringValues(variableName).ToList(); 264 var newEstimates = GetReplaced Values(originalValues, model, variableName, modifiableDataset, rows, Enumerable.Repeat(repl, problemData.Rows).ToList());204 var newEstimates = GetReplacedEstimates(originalValues, model, variableName, modifiableDataset, rows, Enumerable.Repeat(repl, modifiableDataset.Rows).ToList()); 265 205 var newValue = CalculateVariableImpact(targetValues, newEstimates, out error); 266 206 if (error != OnlineCalculatorError.None) throw new InvalidOperationException("Error during calculation with replaced inputs."); … … 330 270 } 331 271 332 return GetReplaced Values(originalValues, model, variable, dataset, rows, replacementValues);272 return GetReplacedEstimates(originalValues, model, variable, dataset, rows, replacementValues); 333 273 } 334 274 … … 368 308 } 369 309 370 return GetReplaced Values(originalValues, model, variable, dataset, rows, replacementValues);371 } 372 373 private static IEnumerable<double> GetReplaced Values(310 return GetReplacedEstimates(originalValues, model, variable, dataset, rows, replacementValues); 311 } 312 313 private static IEnumerable<double> GetReplacedEstimates( 374 314 IList originalValues, 375 315 IRegressionModel model, … … 386 326 } 387 327 388 p rivatestatic double CalculateVariableImpact(IEnumerable<double> originalValues, IEnumerable<double> estimatedValues, out OnlineCalculatorError errorState) {328 public static double CalculateVariableImpact(IEnumerable<double> originalValues, IEnumerable<double> estimatedValues, out OnlineCalculatorError errorState) { 389 329 IEnumerator<double> firstEnumerator = originalValues.GetEnumerator(); 390 330 IEnumerator<double> secondEnumerator = estimatedValues.GetEnumerator(); … … 408 348 } 409 349 } 350 351 public static IEnumerable<int> GetPartitionRows(DataPartitionEnum dataPartition, IRegressionProblemData problemData) { 352 IEnumerable<int> rows; 353 354 switch (dataPartition) { 355 case DataPartitionEnum.All: 356 rows = problemData.AllIndices; 357 break; 358 case DataPartitionEnum.Test: 359 rows = problemData.TestIndices; 360 break; 361 case DataPartitionEnum.Training: 362 rows = problemData.TrainingIndices; 363 break; 364 default: 365 throw new NotSupportedException("DataPartition not supported"); 366 } 367 368 return rows; 369 } 410 370 } 411 371 } -
branches/2904_CalculateImpacts/HeuristicLab.Problems.DataAnalysis.Views/3.4/Regression/RegressionSolutionVariableImpactsView.cs
r16034 r16035 160 160 int count = originalVariableOrdering.Count; 161 161 int i = 0; 162 163 foreach (var variable in originalVariableOrdering) { 162 var modifiableDataset = ((Dataset)(problemData.Dataset).Clone()).ToModifiable(); 163 IEnumerable<int> rows = RegressionSolutionVariableImpactsCalculator.GetPartitionRows(dataPartition, problemData); 164 165 //Calculate original quality-values (via calculator, default is R²) 166 OnlineCalculatorError error; 167 IEnumerable<double> targetValuesPartition = rows.Select(v => problemData.TargetVariableValues.ElementAt(v)); 168 IEnumerable<double> estimatedValuesPartition = rows.Select(v => estimatedValues.ElementAt(v)); 169 var originalCalculatorValue = RegressionSolutionVariableImpactsCalculator.CalculateVariableImpact(targetValuesPartition, estimatedValuesPartition, out error); 170 if (error != OnlineCalculatorError.None) throw new InvalidOperationException("Error during calculation."); 171 172 foreach (var variableName in originalVariableOrdering) { 164 173 if (cancellationToken.Token.IsCancellationRequested) { return null; } 165 174 progress.ProgressValue = (double)++i / count; 166 progress.Status = string.Format("Calculating impact for variable {0} ({1} of {2})", variable , i, count);167 168 double impact = RegressionSolutionVariableImpactsCalculator.CalculateImpact(variable , model, problemData, Content.EstimatedValues, dataPartition, replMethod, factorReplMethod);169 impacts.Add(new Tuple<string, double>(variable , impact));175 progress.Status = string.Format("Calculating impact for variable {0} ({1} of {2})", variableName, i, count); 176 177 double impact = RegressionSolutionVariableImpactsCalculator.CalculateImpact(variableName, model, modifiableDataset, rows, targetValuesPartition, originalCalculatorValue, replMethod, factorReplMethod); 178 impacts.Add(new Tuple<string, double>(variableName, impact)); 170 179 } 171 180
Note: See TracChangeset
for help on using the changeset viewer.