- Timestamp:
- 08/05/16 17:34:16 (8 years ago)
- Location:
- branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis/3.4
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Regression/RegressionSolutionVariableImpactsCalculator.cs
r13986 r14238 90 90 public static IEnumerable<Tuple<string, double>> CalculateImpacts(IRegressionSolution solution, 91 91 DataPartitionEnum data = DataPartitionEnum.Training, 92 ReplacementMethodEnum replacement = ReplacementMethodEnum.Median) {92 ReplacementMethodEnum replacementMethod = ReplacementMethodEnum.Median) { 93 93 94 94 var problemData = solution.ProblemData; … … 126 126 var modifiableDataset = ((Dataset)dataset).ToModifiable(); 127 127 128 foreach (var inputVariable in problemData.AllowedInputVariables) { 129 var newEstimates = EvaluateModelWithReplacedVariable(solution.Model, inputVariable, modifiableDataset, rows, replacement); 128 // calculate impacts for double variables 129 foreach (var inputVariable in problemData.AllowedInputVariables.Where(problemData.Dataset.VariableHasType<double>)) { 130 var newEstimates = EvaluateModelWithReplacedVariable(solution.Model, inputVariable, modifiableDataset, rows, replacementMethod); 130 131 var newR2 = OnlinePearsonsRCalculator.Calculate(targetValues, newEstimates, out error); 131 132 if (error != OnlineCalculatorError.None) throw new InvalidOperationException("Error during R² calculation with replaced inputs."); … … 134 135 var impact = originalR2 - newR2; 135 136 impacts[inputVariable] = impact; 137 } 138 // calculate impacts for factor variables 139 foreach (var inputVariable in problemData.AllowedInputVariables.Where(problemData.Dataset.VariableHasType<string>)) { 140 var smallestImpact = double.PositiveInfinity; 141 foreach (var repl in problemData.Dataset.GetStringValues(inputVariable, rows).Distinct()) { 142 var newEstimates = EvaluateModelWithReplacedVariable(solution.Model, inputVariable, modifiableDataset, rows, Enumerable.Repeat(repl, dataset.Rows)); 143 var newR2 = OnlinePearsonsRCalculator.Calculate(targetValues, newEstimates, out error); 144 if (error != OnlineCalculatorError.None) throw new InvalidOperationException("Error during R² calculation with replaced inputs."); 145 146 newR2 = newR2 * newR2; 147 var impact = originalR2 - newR2; 148 if (impact < smallestImpact) smallestImpact = impact; 149 } 150 impacts[inputVariable] = smallestImpact; 136 151 } 137 152 return impacts.OrderByDescending(i => i.Value).Select(i => Tuple.Create(i.Key, i.Value)); … … 169 184 } 170 185 171 dataset.ReplaceVariable(variable, replacementValues); 186 return EvaluateModelWithReplacedVariable(model, variable, dataset, rows, replacementValues); 187 } 188 189 private static IEnumerable<double> EvaluateModelWithReplacedVariable(IRegressionModel model, string variable, ModifiableDataset dataset, IEnumerable<int> rows, IEnumerable<double> replacementValues) { 190 var originalValues = dataset.GetReadOnlyDoubleValues(variable).ToList(); 191 dataset.ReplaceVariable(variable, replacementValues.ToList()); 172 192 //mkommend: ToList is used on purpose to avoid lazy evaluation that could result in wrong estimates due to variable replacements 173 193 var estimates = model.GetEstimatedValues(dataset, rows).ToList(); … … 176 196 return estimates; 177 197 } 198 private static IEnumerable<double> EvaluateModelWithReplacedVariable(IRegressionModel model, string variable, ModifiableDataset dataset, IEnumerable<int> rows, IEnumerable<string> replacementValues) { 199 var originalValues = dataset.GetReadOnlyStringValues(variable).ToList(); 200 dataset.ReplaceVariable(variable, replacementValues.ToList()); 201 //mkommend: ToList is used on purpose to avoid lazy evaluation that could result in wrong estimates due to variable replacements 202 var estimates = model.GetEstimatedValues(dataset, rows).ToList(); 203 dataset.ReplaceVariable(variable, originalValues); 204 205 return estimates; 206 } 178 207 } 179 208 } -
branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis/3.4/ModifiableDataset.cs
r13761 r14238 77 77 public void ReplaceVariable(string variableName, IList values) { 78 78 if (!variableValues.ContainsKey(variableName)) 79 throw new ArgumentException(string.Format("Variable {0} is not present in the dataset." ), variableName);79 throw new ArgumentException(string.Format("Variable {0} is not present in the dataset.", variableName)); 80 80 if (values.Count != variableValues[variableName].Count) 81 81 throw new ArgumentException("The number of values must coincide with the number of dataset rows.");
Note: See TracChangeset
for help on using the changeset viewer.