Changeset 16031
- Timestamp:
- 07/30/18 13:07:55 (6 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/2904_CalculateImpacts/3.4/Implementation/Regression/RegressionSolutionVariableImpactsCalculator.cs
r16020 r16031 58 58 private const string DataPartitionParameterName = "DataPartition"; 59 59 60 public IFixedValueParameter<EnumValue<ReplacementMethodEnum>> ReplacementParameter 61 { 60 public IFixedValueParameter<EnumValue<ReplacementMethodEnum>> ReplacementParameter { 62 61 get { return (IFixedValueParameter<EnumValue<ReplacementMethodEnum>>)Parameters[ReplacementParameterName]; } 63 62 } 64 public IFixedValueParameter<EnumValue<FactorReplacementMethodEnum>> FactorReplacementParameter 65 { 63 public IFixedValueParameter<EnumValue<FactorReplacementMethodEnum>> FactorReplacementParameter { 66 64 get { return (IFixedValueParameter<EnumValue<FactorReplacementMethodEnum>>)Parameters[FactorReplacementParameterName]; } 67 65 } 68 public IFixedValueParameter<EnumValue<DataPartitionEnum>> DataPartitionParameter 69 { 66 public IFixedValueParameter<EnumValue<DataPartitionEnum>> DataPartitionParameter { 70 67 get { return (IFixedValueParameter<EnumValue<DataPartitionEnum>>)Parameters[DataPartitionParameterName]; } 71 68 } 72 69 73 public ReplacementMethodEnum ReplacementMethod 74 { 70 public ReplacementMethodEnum ReplacementMethod { 75 71 get { return ReplacementParameter.Value.Value; } 76 72 set { ReplacementParameter.Value.Value = value; } 77 73 } 78 public FactorReplacementMethodEnum FactorReplacementMethod 79 { 74 public FactorReplacementMethodEnum FactorReplacementMethod { 80 75 get { return FactorReplacementParameter.Value.Value; } 81 76 set { FactorReplacementParameter.Value.Value = value; } 82 77 } 83 public DataPartitionEnum DataPartition 84 { 78 public DataPartitionEnum DataPartition { 85 79 get { return DataPartitionParameter.Value.Value; } 86 80 set { DataPartitionParameter.Value.Value = value; } … … 152 146 FactorReplacementMethodEnum factorReplacementMethod = FactorReplacementMethodEnum.Best, 153 147 Func<double, string, bool> progressCallback = null) { 154 155 IEnumerable<double> targetValues; 156 double originalCalculatorValue = -1; 157 158 PrepareData(rows, problemData, estimatedValues, out targetValues, out originalCalculatorValue); 148 //Calculate original quality-values (via calculator, default is R²) 149 OnlineCalculatorError error; 150 IEnumerable<double> targetValuesPartition = rows.Select(v => problemData.TargetVariableValues.ElementAt(v)); 151 IEnumerable<double> estimatedValuesPartition = rows.Select(v => estimatedValues.ElementAt(v)); 152 var originalCalculatorValue = CalculateVariableImpact(targetValuesPartition, estimatedValuesPartition, out error); 153 if (error != OnlineCalculatorError.None) throw new InvalidOperationException("Error during calculation."); 159 154 160 155 var impacts = new Dictionary<string, double>(); … … 171 166 if (progressCallback((double)curIdx / count, string.Format("Calculating impact for variable {0} ({1} of {2})", inputVariable, curIdx, count))) { return null; } 172 167 } 173 impacts[inputVariable] = CalculateImpact(inputVariable, model, problemData.Dataset, rows, targetValues , originalCalculatorValue, replacementMethod, factorReplacementMethod);168 impacts[inputVariable] = CalculateImpact(inputVariable, model, problemData.Dataset, rows, targetValuesPartition, originalCalculatorValue, replacementMethod, factorReplacementMethod); 174 169 } 175 170 … … 202 197 // calculate impacts for double variables 203 198 if (dataset.VariableHasType<double>(variableName)) { 204 impact = CalculateImpactFor Double(variableName, model, modifiableDataset, rows, targetValues, originalValue, replacementMethod);199 impact = CalculateImpactForNumericalVariables(variableName, model, modifiableDataset, rows, targetValues, originalValue, replacementMethod); 205 200 } else if (dataset.VariableHasType<string>(variableName)) { 206 impact = CalculateImpactFor String(variableName, model, dataset, modifiableDataset, rows, targetValues, originalValue, factorReplacementMethod);201 impact = CalculateImpactForFactorVariables(variableName, model, dataset, modifiableDataset, rows, targetValues, originalValue, factorReplacementMethod); 207 202 } else { 208 203 throw new NotSupportedException("Variable not supported"); … … 221 216 targetValues = rows.Select(v => targetVariableValueList.ElementAt(v)); 222 217 var estimatedValuesPartition = rows.Select(v => estimatedValues.ElementAt(v)); 223 originalValue = CalculateVa lue(targetValues, estimatedValuesPartition, out error);218 originalValue = CalculateVariableImpact(targetValues, estimatedValuesPartition, out error); 224 219 225 220 if (error != OnlineCalculatorError.None) throw new InvalidOperationException("Error during calculation."); 226 221 } 227 222 228 private static double CalculateImpactFor Double(string variableName,223 private static double CalculateImpactForNumericalVariables(string variableName, 229 224 IRegressionModel model, 230 225 ModifiableDataset modifiableDataset, … … 234 229 ReplacementMethodEnum replacementMethod) { 235 230 OnlineCalculatorError error; 236 var newEstimates = EvaluateModelWithReplacedVariable(model, variableName, modifiableDataset, rows, replacementMethod);237 var newValue = CalculateVa lue(targetValues, newEstimates, out error);231 var newEstimates = GetReplacedValuesForNumericalVariables(model, variableName, modifiableDataset, rows, replacementMethod); 232 var newValue = CalculateVariableImpact(targetValues, newEstimates, out error); 238 233 if (error != OnlineCalculatorError.None) { throw new InvalidOperationException("Error during calculation with replaced inputs."); } 239 234 return originalValue - newValue; 240 235 } 241 236 242 private static double CalculateImpactFor String(string variableName,237 private static double CalculateImpactForFactorVariables(string variableName, 243 238 IRegressionModel model, 244 239 IDataset problemData, … … 255 250 foreach (var repl in problemData.GetStringValues(variableName, rows).Distinct()) { 256 251 var originalValues = modifiableDataset.GetReadOnlyStringValues(variableName).ToList(); 257 var newEstimates = EvaluateModelWithReplacedVariable(originalValues, model, variableName, modifiableDataset, rows, Enumerable.Repeat(repl, problemData.Rows).ToList());258 var newValue = CalculateVa lue(targetValues, newEstimates, out error);252 var newEstimates = GetReplacedValues(originalValues, model, variableName, modifiableDataset, rows, Enumerable.Repeat(repl, problemData.Rows).ToList()); 253 var newValue = CalculateVariableImpact(targetValues, newEstimates, out error); 259 254 if (error != OnlineCalculatorError.None) throw new InvalidOperationException("Error during calculation with replaced inputs."); 260 255 … … 266 261 // for replacement methods shuffle and mode 267 262 // calculate impacts for factor variables 268 var newEstimates = EvaluateModelWithReplacedVariable(model, variableName, modifiableDataset, rows, factorReplacementMethod);269 var newValue = CalculateVa lue(targetValues, newEstimates, out error);263 var newEstimates = GetReplacedValuesForFactorVariables(model, variableName, modifiableDataset, rows, factorReplacementMethod); 264 var newValue = CalculateVariableImpact(targetValues, newEstimates, out error); 270 265 if (error != OnlineCalculatorError.None) throw new InvalidOperationException("Error during calculation with replaced inputs."); 271 266 … … 274 269 } 275 270 276 private static IEnumerable<double> EvaluateModelWithReplacedVariable(IRegressionModel model, string variable, ModifiableDataset dataset, IEnumerable<int> rows, ReplacementMethodEnum replacement = ReplacementMethodEnum.Shuffle) { 271 private static IEnumerable<double> GetReplacedValuesForNumericalVariables( 272 IRegressionModel model, 273 string variable, 274 ModifiableDataset dataset, 275 IEnumerable<int> rows, 276 ReplacementMethodEnum replacement = ReplacementMethodEnum.Shuffle) { 277 277 var originalValues = dataset.GetReadOnlyDoubleValues(variable).ToList(); 278 278 double replacementValue; … … 318 318 } 319 319 320 return EvaluateModelWithReplacedVariable(originalValues, model, variable, dataset, rows, replacementValues); 321 } 322 323 private static IEnumerable<double> EvaluateModelWithReplacedVariable( 324 IRegressionModel model, string variable, ModifiableDataset dataset, 325 IEnumerable<int> rows, 326 FactorReplacementMethodEnum replacement = FactorReplacementMethodEnum.Best) { 320 return GetReplacedValues(originalValues, model, variable, dataset, rows, replacementValues); 321 } 322 323 private static IEnumerable<double> GetReplacedValuesForFactorVariables( 324 IRegressionModel model, 325 string variable, 326 ModifiableDataset dataset, 327 IEnumerable<int> rows, 328 FactorReplacementMethodEnum replacement = FactorReplacementMethodEnum.Shuffle) { 327 329 var originalValues = dataset.GetReadOnlyStringValues(variable).ToList(); 328 330 List<string> replacementValues; … … 354 356 } 355 357 356 return EvaluateModelWithReplacedVariable(originalValues, model, variable, dataset, rows, replacementValues); 357 } 358 359 private static IEnumerable<double> EvaluateModelWithReplacedVariable(IList originalValues, IRegressionModel model, string variable, 360 ModifiableDataset dataset, IEnumerable<int> rows, IList replacementValues) { 358 return GetReplacedValues(originalValues, model, variable, dataset, rows, replacementValues); 359 } 360 361 private static IEnumerable<double> GetReplacedValues( 362 IList originalValues, 363 IRegressionModel model, 364 string variable, 365 ModifiableDataset dataset, 366 IEnumerable<int> rows, 367 IList replacementValues) { 361 368 dataset.ReplaceVariable(variable, replacementValues); 362 369 //mkommend: ToList is used on purpose to avoid lazy evaluation that could result in wrong estimates due to variable replacements … … 367 374 } 368 375 369 private static double CalculateVa lue(IEnumerable<double> originalValues, IEnumerable<double> estimatedValues, out OnlineCalculatorError errorState) {376 private static double CalculateVariableImpact(IEnumerable<double> originalValues, IEnumerable<double> estimatedValues, out OnlineCalculatorError errorState) { 370 377 IEnumerator<double> firstEnumerator = originalValues.GetEnumerator(); 371 378 IEnumerator<double> secondEnumerator = estimatedValues.GetEnumerator();
Note: See TracChangeset
for help on using the changeset viewer.