Changeset 16141 for branches/2817-BinPackingSpeedup/HeuristicLab.Problems.DataAnalysis/3.4/Implementation
- Timestamp:
- 09/14/18 11:47:37 (6 years ago)
- Location:
- branches/2817-BinPackingSpeedup
- Files:
-
- 5 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/2817-BinPackingSpeedup
- Property svn:mergeinfo changed
-
branches/2817-BinPackingSpeedup/HeuristicLab.Problems.DataAnalysis
- Property svn:mergeinfo changed
-
branches/2817-BinPackingSpeedup/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/ClassificationSolutionVariableImpactsCalculator.cs
r16140 r16141 100 100 var problemData = solution.ProblemData; 101 101 var dataset = problemData.Dataset; 102 var model = (IClassificationModel)solution.Model.Clone(); //mkommend: clone of model is necessary, because the thresholds for IDiscriminantClassificationModels are updated 102 103 103 104 IEnumerable<int> rows; … … 137 138 // calculate impacts for double variables 138 139 foreach (var inputVariable in allowedInputVariables.Where(problemData.Dataset.VariableHasType<double>)) { 139 var newEstimates = EvaluateModelWithReplacedVariable( solution.Model, inputVariable, modifiableDataset, rows, replacementMethod);140 var newEstimates = EvaluateModelWithReplacedVariable(model, inputVariable, modifiableDataset, rows, replacementMethod); 140 141 var newAccuracy = OnlineAccuracyCalculator.Calculate(targetValues, newEstimates, out error); 141 142 if (error != OnlineCalculatorError.None) throw new InvalidOperationException("Error during R² calculation with replaced inputs."); … … 150 151 var smallestImpact = double.PositiveInfinity; 151 152 foreach (var repl in problemData.Dataset.GetStringValues(inputVariable, rows).Distinct()) { 152 var newEstimates = EvaluateModelWithReplacedVariable( solution.Model, inputVariable, modifiableDataset, rows,153 var newEstimates = EvaluateModelWithReplacedVariable(model, inputVariable, modifiableDataset, rows, 153 154 Enumerable.Repeat(repl, dataset.Rows)); 154 155 var newAccuracy = OnlineAccuracyCalculator.Calculate(targetValues, newEstimates, out error); … … 164 165 // calculate impacts for factor variables 165 166 166 var newEstimates = EvaluateModelWithReplacedVariable( solution.Model, inputVariable, modifiableDataset, rows,167 var newEstimates = EvaluateModelWithReplacedVariable(model, inputVariable, modifiableDataset, rows, 167 168 factorReplacementMethod); 168 169 var newAccuracy = OnlineAccuracyCalculator.Calculate(targetValues, newEstimates, out error); … … 263 264 var originalValues = dataset.GetReadOnlyDoubleValues(variable).ToList(); 264 265 dataset.ReplaceVariable(variable, replacementValues.ToList()); 266 267 var discModel = model as IDiscriminantFunctionClassificationModel; 268 if (discModel != null) { 269 var problemData = new ClassificationProblemData(dataset, dataset.VariableNames, model.TargetVariable); 270 discModel.RecalculateModelParameters(problemData, rows); 271 } 272 265 273 //mkommend: ToList is used on purpose to avoid lazy evaluation that could result in wrong estimates due to variable replacements 266 274 var estimates = model.GetEstimatedClassValues(dataset, rows).ToList(); … … 273 281 var originalValues = dataset.GetReadOnlyStringValues(variable).ToList(); 274 282 dataset.ReplaceVariable(variable, replacementValues.ToList()); 283 284 285 var discModel = model as IDiscriminantFunctionClassificationModel; 286 if (discModel != null) { 287 var problemData = new ClassificationProblemData(dataset, dataset.VariableNames, model.TargetVariable); 288 discModel.RecalculateModelParameters(problemData, rows); 289 } 290 275 291 //mkommend: ToList is used on purpose to avoid lazy evaluation that could result in wrong estimates due to variable replacements 276 292 var estimates = model.GetEstimatedClassValues(dataset, rows).ToList(); -
branches/2817-BinPackingSpeedup/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/DataAnalysisProblemData.cs
r16140 r16141 163 163 164 164 var variables = dataset.VariableNames.Where(variable => dataset.VariableHasType<double>(variable) || dataset.VariableHasType<string>(variable)); 165 var inputVariables = new CheckedItemList<StringValue>(variables.Select(x => new StringValue(x) ));165 var inputVariables = new CheckedItemList<StringValue>(variables.Select(x => new StringValue(x).AsReadOnly())); 166 166 foreach (StringValue x in inputVariables) 167 167 inputVariables.SetItemCheckedState(x, allowedInputVariables.Contains(x.Value)); -
branches/2817-BinPackingSpeedup/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Regression/RegressionSolutionVariableImpactsCalculator.cs
r16140 r16141 52 52 All 53 53 } 54 54 55 55 private const string ReplacementParameterName = "Replacement Method"; 56 56 private const string DataPartitionParameterName = "DataPartition"; … … 96 96 DataPartitionEnum data = DataPartitionEnum.Training, 97 97 ReplacementMethodEnum replacementMethod = ReplacementMethodEnum.Median, 98 FactorReplacementMethodEnum factorReplacementMethod = FactorReplacementMethodEnum.Best) { 98 FactorReplacementMethodEnum factorReplacementMethod = FactorReplacementMethodEnum.Best, 99 Func<double, string, bool> progressCallback = null) { 99 100 100 101 var problemData = solution.ProblemData; … … 134 135 var allowedInputVariables = dataset.VariableNames.Where(v => inputvariables.Contains(v)).ToList(); 135 136 137 int curIdx = 0; 138 int count = allowedInputVariables.Where(problemData.Dataset.VariableHasType<double>).Count(); 136 139 // calculate impacts for double variables 137 140 foreach (var inputVariable in allowedInputVariables.Where(problemData.Dataset.VariableHasType<double>)) { 141 //Report the current progress in percent. If the callback returns true, it means the execution shall be stopped 142 if (progressCallback != null) { 143 curIdx++; 144 if (progressCallback((double)curIdx / count, string.Format("Calculating impact for variable {0} ({1} of {2})", inputVariable, curIdx, count))) { return null; } 145 } 138 146 var newEstimates = EvaluateModelWithReplacedVariable(solution.Model, inputVariable, modifiableDataset, rows, replacementMethod); 139 147 var newR2 = OnlinePearsonsRCalculator.Calculate(targetValues, newEstimates, out error); … … 180 188 } 181 189 190 182 191 private static IEnumerable<double> EvaluateModelWithReplacedVariable(IRegressionModel model, string variable, ModifiableDataset dataset, IEnumerable<int> rows, ReplacementMethodEnum replacement = ReplacementMethodEnum.Median) { 183 192 var originalValues = dataset.GetReadOnlyDoubleValues(variable).ToList();
Note: See TracChangeset
for help on using the changeset viewer.