Changeset 16034
- Timestamp:
- 07/30/18 16:59:47 (6 years ago)
- Location:
- branches/2904_CalculateImpacts
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/2904_CalculateImpacts/3.4/Implementation/Regression/RegressionSolutionVariableImpactsCalculator.cs
r16031 r16034 106 106 ReplacementMethodEnum replacementMethod = ReplacementMethodEnum.Shuffle, 107 107 FactorReplacementMethodEnum factorReplacementMethod = FactorReplacementMethodEnum.Best, 108 DataPartitionEnum data = DataPartitionEnum.Training, 109 Func<double, string, bool> progressCallback = null) { 110 return CalculateImpacts(solution.Model, solution.ProblemData, solution.EstimatedValues, replacementMethod, factorReplacementMethod, data, progressCallback); 108 DataPartitionEnum data = DataPartitionEnum.Training) { 109 return CalculateImpacts(solution.Model, solution.ProblemData, solution.EstimatedValues, replacementMethod, factorReplacementMethod, data); 111 110 } 112 111 … … 117 116 ReplacementMethodEnum replacementMethod = ReplacementMethodEnum.Shuffle, 118 117 FactorReplacementMethodEnum factorReplacementMethod = FactorReplacementMethodEnum.Best, 119 DataPartitionEnum data = DataPartitionEnum.Training, 120 Func<double, string, bool> progressCallback = null) { 118 DataPartitionEnum data = DataPartitionEnum.Training) { 121 119 IEnumerable<int> rows; 122 120 … … 135 133 } 136 134 137 return CalculateImpacts(model, problemData, estimatedValues, rows, replacementMethod, factorReplacementMethod, progressCallback); 135 return CalculateImpacts(model, problemData, estimatedValues, rows, replacementMethod, factorReplacementMethod); 136 } 137 138 public static double CalculateImpact(string variableName, IRegressionModel model, IRegressionProblemData problemData, IEnumerable<double> estimatedValues, DataPartitionEnum dataPartition, ReplacementMethodEnum replMethod, FactorReplacementMethodEnum factorReplMethod) { 139 double impact = 0; 140 141 IEnumerable<int> rows; 142 switch (dataPartition) { 143 case DataPartitionEnum.All: 144 rows = problemData.AllIndices; 145 break; 146 case DataPartitionEnum.Test: 147 rows = problemData.TestIndices; 148 break; 149 case DataPartitionEnum.Training: 150 rows = problemData.TrainingIndices; 151 break; 152 default: 153 throw new NotSupportedException("DataPartition not supported"); 154 } 155 156 OnlineCalculatorError error; 157 IEnumerable<double> targetValuesPartition = rows.Select(v => problemData.TargetVariableValues.ElementAt(v)); 158 IEnumerable<double> estimatedValuesPartition = rows.Select(v => estimatedValues.ElementAt(v)); 159 var originalCalculatorValue = CalculateVariableImpact(targetValuesPartition, estimatedValuesPartition, out error); 160 if (error != OnlineCalculatorError.None) throw new InvalidOperationException("Error during calculation."); 161 162 163 var modifiableDataset = ((Dataset)(problemData.Dataset).Clone()).ToModifiable(); 164 165 // calculate impacts for double variables 166 if (problemData.Dataset.VariableHasType<double>(variableName)) { 167 impact = CalculateImpactForNumericalVariables(variableName, model, modifiableDataset, rows, targetValuesPartition, originalCalculatorValue, replMethod); 168 } else if (problemData.Dataset.VariableHasType<string>(variableName)) { 169 impact = CalculateImpactForFactorVariables(variableName, model, problemData.Dataset, modifiableDataset, rows, targetValuesPartition, originalCalculatorValue, factorReplMethod); 170 } else { 171 throw new NotSupportedException("Variable not supported"); 172 } 173 return impact; 138 174 } 139 175 … … 144 180 IEnumerable<int> rows, 145 181 ReplacementMethodEnum replacementMethod = ReplacementMethodEnum.Shuffle, 146 FactorReplacementMethodEnum factorReplacementMethod = FactorReplacementMethodEnum.Best, 147 Func<double, string, bool> progressCallback = null) { 182 FactorReplacementMethodEnum factorReplacementMethod = FactorReplacementMethodEnum.Best) { 148 183 //Calculate original quality-values (via calculator, default is R²) 149 184 OnlineCalculatorError error; … … 157 192 var allowedInputVariables = problemData.Dataset.VariableNames.Where(v => inputvariables.Contains(v)).ToList(); 158 193 159 int curIdx = 0;160 int count = allowedInputVariables.Count(v => problemData.Dataset.VariableHasType<double>(v) || problemData.Dataset.VariableHasType<string>(v));161 162 194 foreach (var inputVariable in allowedInputVariables) { 163 //Report the current progress in percent. If the callback returns true, it means the execution shall be stopped164 if (progressCallback != null) {165 curIdx++;166 if (progressCallback((double)curIdx / count, string.Format("Calculating impact for variable {0} ({1} of {2})", inputVariable, curIdx, count))) { return null; }167 }168 195 impacts[inputVariable] = CalculateImpact(inputVariable, model, problemData.Dataset, rows, targetValuesPartition, originalCalculatorValue, replacementMethod, factorReplacementMethod); 169 196 } … … 204 231 } 205 232 return impact; 206 }207 208 private static void PrepareData(IEnumerable<int> rows,209 IRegressionProblemData problemData,210 IEnumerable<double> estimatedValues,211 out IEnumerable<double> targetValues,212 out double originalValue) {213 OnlineCalculatorError error;214 215 var targetVariableValueList = problemData.TargetVariableValues.ToList();216 targetValues = rows.Select(v => targetVariableValueList.ElementAt(v));217 var estimatedValuesPartition = rows.Select(v => estimatedValues.ElementAt(v));218 originalValue = CalculateVariableImpact(targetValues, estimatedValuesPartition, out error);219 220 if (error != OnlineCalculatorError.None) throw new InvalidOperationException("Error during calculation.");221 233 } 222 234 -
branches/2904_CalculateImpacts/HeuristicLab.Problems.DataAnalysis.Views/3.4/HeuristicLab.Problems.DataAnalysis.Views-3.4.csproj
r16030 r16034 52 52 <DebugType>pdbonly</DebugType> 53 53 <Optimize>true</Optimize> 54 <OutputPath>..\..\..\ trunk\bin\</OutputPath>54 <OutputPath>..\..\..\..\trunk\bin\</OutputPath> 55 55 <DefineConstants>TRACE</DefineConstants> 56 56 <ErrorReport>prompt</ErrorReport> -
branches/2904_CalculateImpacts/HeuristicLab.Problems.DataAnalysis.Views/3.4/Regression/RegressionSolutionVariableImpactsView.cs
r16030 r16034 33 33 [Content(typeof(IRegressionSolution))] 34 34 public partial class RegressionSolutionVariableImpactsView : DataAnalysisSolutionEvaluationView { 35 private CancellationTokenSource cancellationToken = new CancellationTokenSource();36 35 private enum SortingCriteria { 37 36 ImpactValue, … … 39 38 VariableName 40 39 } 40 private CancellationTokenSource cancellationToken = new CancellationTokenSource(); 41 41 private List<Tuple<string, double>> rawVariableImpacts = new List<Tuple<string, double>>(); 42 42 … … 64 64 Content.ProblemDataChanged += new EventHandler(Content_ProblemDataChanged); 65 65 } 66 67 66 protected override void DeregisterContentEvents() { 68 67 base.DeregisterContentEvents(); … … 74 73 OnContentChanged(); 75 74 } 76 77 75 protected virtual void Content_ModelChanged(object sender, EventArgs e) { 78 76 OnContentChanged(); 79 77 } 80 81 78 protected override void OnContentChanged() { 82 79 base.OnContentChanged(); … … 87 84 } 88 85 } 89 90 86 private void RegressionSolutionVariableImpactsView_VisibleChanged(object sender, EventArgs e) { 91 87 cancellationToken.Cancel(); 92 88 } 93 89 94 95 90 private void dataPartitionComboBox_SelectedIndexChanged(object sender, EventArgs e) { 96 91 UpdateVariableImpact(); 97 92 } 98 99 93 private void replacementComboBox_SelectedIndexChanged(object sender, EventArgs e) { 100 94 UpdateVariableImpact(); 101 95 } 102 103 96 private void sortByComboBox_SelectedIndexChanged(object sender, EventArgs e) { 104 97 //Update the default ordering (asc,desc), but remove the eventHandler beforehand (otherwise the data would be ordered twice) … … 109 102 UpdateOrdering(); 110 103 } 111 112 104 private void ascendingCheckBox_CheckedChanged(object sender, EventArgs e) { 113 105 UpdateOrdering(); 114 106 } 115 116 107 117 108 private async void UpdateVariableImpact() { … … 135 126 136 127 cancellationToken = new CancellationTokenSource(); 137 //Remember the original ordering of the variables 128 138 129 try { 139 var impacts = await Task.Run(() => RegressionSolutionVariableImpactsCalculator.CalculateImpacts(Content, replMethod, factorReplMethod, dataPartition,140 (i, s) => {141 progress.ProgressValue = i;142 progress.Status = s;143 return cancellationToken.Token.IsCancellationRequested;144 }), cancellationToken.Token);145 146 if (cancellationToken.Token.IsCancellationRequested) { return; }147 130 var problemData = Content.ProblemData; 148 131 var inputvariables = new HashSet<string>(problemData.AllowedInputVariables.Union(Content.Model.VariablesUsedForPrediction)); 132 //Remember the original ordering of the variables 149 133 var originalVariableOrdering = problemData.Dataset.VariableNames 150 134 .Where(v => inputvariables.Contains(v)) … … 152 136 .ToList(); 153 137 138 List<Tuple<string, double>> impacts = null; 139 await Task.Run(() => { impacts = CalculateVariableImpacts(originalVariableOrdering, Content.Model, problemData, Content.EstimatedValues, dataPartition, replMethod, factorReplMethod, cancellationToken.Token, progress); }); 140 if (impacts == null) { return; } 141 154 142 rawVariableImpacts.Clear(); 155 143 originalVariableOrdering.ForEach(v => rawVariableImpacts.Add(new Tuple<string, double>(v, impacts.First(vv => vv.Item1 == v).Item2))); 156 144 UpdateOrdering(); 157 } finally { 145 } 146 finally { 158 147 ((MainForm.WindowsForms.MainForm)MainFormManager.MainForm).RemoveOperationProgressFromView(this); 159 148 } 149 } 150 private List<Tuple<string, double>> CalculateVariableImpacts(List<string> originalVariableOrdering, 151 IRegressionModel model, 152 IRegressionProblemData problemData, 153 IEnumerable<double> estimatedValues, 154 RegressionSolutionVariableImpactsCalculator.DataPartitionEnum dataPartition, 155 RegressionSolutionVariableImpactsCalculator.ReplacementMethodEnum replMethod, 156 RegressionSolutionVariableImpactsCalculator.FactorReplacementMethodEnum factorReplMethod, 157 CancellationToken token, 158 IProgress progress) { 159 List<Tuple<string, double>> impacts = new List<Tuple<string, double>>(); 160 int count = originalVariableOrdering.Count; 161 int i = 0; 162 163 foreach (var variable in originalVariableOrdering) { 164 if (cancellationToken.Token.IsCancellationRequested) { return null; } 165 progress.ProgressValue = (double)++i / count; 166 progress.Status = string.Format("Calculating impact for variable {0} ({1} of {2})", variable, i, count); 167 168 double impact = RegressionSolutionVariableImpactsCalculator.CalculateImpact(variable, model, problemData, Content.EstimatedValues, dataPartition, replMethod, factorReplMethod); 169 impacts.Add(new Tuple<string, double>(variable, impact)); 170 } 171 172 return impacts; 160 173 } 161 174
Note: See TracChangeset
for help on using the changeset viewer.