Changeset 16035


Ignore:
Timestamp:
07/31/18 13:26:33 (13 months ago)
Author:
fholzing
Message:

#2904: Better method-ordering, variable-naming and cleaned up some code not necessary anymore.

Location:
branches/2904_CalculateImpacts
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • branches/2904_CalculateImpacts/3.4/Implementation/Regression/RegressionSolutionVariableImpactsCalculator.cs

    r16034 r16035  
    3737  [Item("RegressionSolution Impacts Calculator", "Calculation of the impacts of input variables for any regression solution")]
    3838  public sealed class RegressionSolutionVariableImpactsCalculator : ParameterizedNamedItem {
     39    #region Parameters/Properties
    3940    public enum ReplacementMethodEnum {
    4041      Median,
     
    8081      set { DataPartitionParameter.Value.Value = value; }
    8182    }
    82 
    83 
     83    #endregion
     84
     85    #region Ctor/Cloner
    8486    [StorableConstructor]
    8587    private RegressionSolutionVariableImpactsCalculator(bool deserializing) : base(deserializing) { }
    8688    private RegressionSolutionVariableImpactsCalculator(RegressionSolutionVariableImpactsCalculator original, Cloner cloner)
    8789      : base(original, cloner) { }
    88     public override IDeepCloneable Clone(Cloner cloner) {
    89       return new RegressionSolutionVariableImpactsCalculator(this, cloner);
    90     }
    91 
    9290    public RegressionSolutionVariableImpactsCalculator()
    9391      : base() {
     
    9795    }
    9896
     97    public override IDeepCloneable Clone(Cloner cloner) {
     98      return new RegressionSolutionVariableImpactsCalculator(this, cloner);
     99    }
     100    #endregion
     101
     102    #region Public Methods/Wrappers
    99103    //mkommend: annoying name clash with static method, open to better naming suggestions
    100104    public IEnumerable<Tuple<string, double>> Calculate(IRegressionSolution solution) {
     
    106110      ReplacementMethodEnum replacementMethod = ReplacementMethodEnum.Shuffle,
    107111      FactorReplacementMethodEnum factorReplacementMethod = FactorReplacementMethodEnum.Best,
    108       DataPartitionEnum data = DataPartitionEnum.Training) {
    109       return CalculateImpacts(solution.Model, solution.ProblemData, solution.EstimatedValues, replacementMethod, factorReplacementMethod, data);
     112      DataPartitionEnum dataPartition = DataPartitionEnum.Training) {
     113      return CalculateImpacts(solution.Model, solution.ProblemData, solution.EstimatedValues, replacementMethod, factorReplacementMethod, dataPartition);
    110114    }
    111115
     
    116120      ReplacementMethodEnum replacementMethod = ReplacementMethodEnum.Shuffle,
    117121      FactorReplacementMethodEnum factorReplacementMethod = FactorReplacementMethodEnum.Best,
    118       DataPartitionEnum data = DataPartitionEnum.Training) {
    119       IEnumerable<int> rows;
    120 
    121       switch (data) {
    122         case DataPartitionEnum.All:
    123           rows = problemData.AllIndices;
    124           break;
    125         case DataPartitionEnum.Test:
    126           rows = problemData.TestIndices;
    127           break;
    128         case DataPartitionEnum.Training:
    129           rows = problemData.TrainingIndices;
    130           break;
    131         default:
    132           throw new NotSupportedException("DataPartition not supported");
    133       }
    134 
     122      DataPartitionEnum dataPartition = DataPartitionEnum.Training) {
     123      IEnumerable<int> rows = GetPartitionRows(dataPartition, problemData);
    135124      return CalculateImpacts(model, problemData, estimatedValues, rows, replacementMethod, factorReplacementMethod);
    136     }
    137 
    138     public static double CalculateImpact(string variableName, IRegressionModel model, IRegressionProblemData problemData, IEnumerable<double> estimatedValues, DataPartitionEnum dataPartition, ReplacementMethodEnum replMethod, FactorReplacementMethodEnum factorReplMethod) {
    139       double impact = 0;
    140 
    141       IEnumerable<int> rows;
    142       switch (dataPartition) {
    143         case DataPartitionEnum.All:
    144           rows = problemData.AllIndices;
    145           break;
    146         case DataPartitionEnum.Test:
    147           rows = problemData.TestIndices;
    148           break;
    149         case DataPartitionEnum.Training:
    150           rows = problemData.TrainingIndices;
    151           break;
    152         default:
    153           throw new NotSupportedException("DataPartition not supported");
    154       }
    155 
    156       OnlineCalculatorError error;
    157       IEnumerable<double> targetValuesPartition = rows.Select(v => problemData.TargetVariableValues.ElementAt(v));
    158       IEnumerable<double> estimatedValuesPartition = rows.Select(v => estimatedValues.ElementAt(v));
    159       var originalCalculatorValue = CalculateVariableImpact(targetValuesPartition, estimatedValuesPartition, out error);
    160       if (error != OnlineCalculatorError.None) throw new InvalidOperationException("Error during calculation.");
    161 
    162 
    163       var modifiableDataset = ((Dataset)(problemData.Dataset).Clone()).ToModifiable();
    164 
    165       // calculate impacts for double variables
    166       if (problemData.Dataset.VariableHasType<double>(variableName)) {
    167         impact = CalculateImpactForNumericalVariables(variableName, model, modifiableDataset, rows, targetValuesPartition, originalCalculatorValue, replMethod);
    168       } else if (problemData.Dataset.VariableHasType<string>(variableName)) {
    169         impact = CalculateImpactForFactorVariables(variableName, model, problemData.Dataset, modifiableDataset, rows, targetValuesPartition, originalCalculatorValue, factorReplMethod);
    170       } else {
    171         throw new NotSupportedException("Variable not supported");
    172       }
    173       return impact;
    174125    }
    175126
     
    191142      var inputvariables = new HashSet<string>(problemData.AllowedInputVariables.Union(model.VariablesUsedForPrediction));
    192143      var allowedInputVariables = problemData.Dataset.VariableNames.Where(v => inputvariables.Contains(v)).ToList();
     144      var modifiableDataset = ((Dataset)(problemData.Dataset).Clone()).ToModifiable();
    193145
    194146      foreach (var inputVariable in allowedInputVariables) {
    195         impacts[inputVariable] = CalculateImpact(inputVariable, model, problemData.Dataset, rows, targetValuesPartition, originalCalculatorValue, replacementMethod, factorReplacementMethod);
     147        impacts[inputVariable] = CalculateImpact(inputVariable, model, modifiableDataset, rows, targetValuesPartition, originalCalculatorValue, replacementMethod, factorReplacementMethod);
    196148      }
    197149
     
    200152
    201153    public static double CalculateImpact(string variableName,
    202       IRegressionSolution solution,
    203       IEnumerable<int> rows,
    204       IEnumerable<double> targetValues,
    205       double originalValue,
    206       ReplacementMethodEnum replacementMethod = ReplacementMethodEnum.Shuffle,
    207       FactorReplacementMethodEnum factorReplacementMethod = FactorReplacementMethodEnum.Best,
    208       DataPartitionEnum data = DataPartitionEnum.Training) {
    209       return CalculateImpact(variableName, solution.Model, solution.ProblemData.Dataset, rows, targetValues, originalValue, replacementMethod, factorReplacementMethod);
    210     }
    211 
    212     public static double CalculateImpact(string variableName,
    213       IRegressionModel model,
    214       IDataset dataset,
     154      IRegressionModel model,
     155      ModifiableDataset modifiableDataset,
    215156      IEnumerable<int> rows,
    216157      IEnumerable<double> targetValues,
     
    220161
    221162      double impact = 0;
    222       var modifiableDataset = ((Dataset)(dataset).Clone()).ToModifiable();
    223163
    224164      // calculate impacts for double variables
    225       if (dataset.VariableHasType<double>(variableName)) {
     165      if (modifiableDataset.VariableHasType<double>(variableName)) {
    226166        impact = CalculateImpactForNumericalVariables(variableName, model, modifiableDataset, rows, targetValues, originalValue, replacementMethod);
    227       } else if (dataset.VariableHasType<string>(variableName)) {
    228         impact = CalculateImpactForFactorVariables(variableName, model, dataset, modifiableDataset, rows, targetValues, originalValue, factorReplacementMethod);
     167      } else if (modifiableDataset.VariableHasType<string>(variableName)) {
     168        impact = CalculateImpactForFactorVariables(variableName, model, modifiableDataset, rows, targetValues, originalValue, factorReplacementMethod);
    229169      } else {
    230170        throw new NotSupportedException("Variable not supported");
     
    232172      return impact;
    233173    }
     174    #endregion
    234175
    235176    private static double CalculateImpactForNumericalVariables(string variableName,
     
    249190    private static double CalculateImpactForFactorVariables(string variableName,
    250191      IRegressionModel model,
    251       IDataset problemData,
    252192      ModifiableDataset modifiableDataset,
    253193      IEnumerable<int> rows,
     
    260200        // try replacing with all possible values and find the best replacement value
    261201        var smallestImpact = double.PositiveInfinity;
    262         foreach (var repl in problemData.GetStringValues(variableName, rows).Distinct()) {
     202        foreach (var repl in modifiableDataset.GetStringValues(variableName, rows).Distinct()) {
    263203          var originalValues = modifiableDataset.GetReadOnlyStringValues(variableName).ToList();
    264           var newEstimates = GetReplacedValues(originalValues, model, variableName, modifiableDataset, rows, Enumerable.Repeat(repl, problemData.Rows).ToList());
     204          var newEstimates = GetReplacedEstimates(originalValues, model, variableName, modifiableDataset, rows, Enumerable.Repeat(repl, modifiableDataset.Rows).ToList());
    265205          var newValue = CalculateVariableImpact(targetValues, newEstimates, out error);
    266206          if (error != OnlineCalculatorError.None) throw new InvalidOperationException("Error during calculation with replaced inputs.");
     
    330270      }
    331271
    332       return GetReplacedValues(originalValues, model, variable, dataset, rows, replacementValues);
     272      return GetReplacedEstimates(originalValues, model, variable, dataset, rows, replacementValues);
    333273    }
    334274
     
    368308      }
    369309
    370       return GetReplacedValues(originalValues, model, variable, dataset, rows, replacementValues);
    371     }
    372 
    373     private static IEnumerable<double> GetReplacedValues(
     310      return GetReplacedEstimates(originalValues, model, variable, dataset, rows, replacementValues);
     311    }
     312
     313    private static IEnumerable<double> GetReplacedEstimates(
    374314      IList originalValues,
    375315      IRegressionModel model,
     
    386326    }
    387327
    388     private static double CalculateVariableImpact(IEnumerable<double> originalValues, IEnumerable<double> estimatedValues, out OnlineCalculatorError errorState) {
     328    public static double CalculateVariableImpact(IEnumerable<double> originalValues, IEnumerable<double> estimatedValues, out OnlineCalculatorError errorState) {
    389329      IEnumerator<double> firstEnumerator = originalValues.GetEnumerator();
    390330      IEnumerator<double> secondEnumerator = estimatedValues.GetEnumerator();
     
    408348      }
    409349    }
     350
     351    public static IEnumerable<int> GetPartitionRows(DataPartitionEnum dataPartition, IRegressionProblemData problemData) {
     352      IEnumerable<int> rows;
     353
     354      switch (dataPartition) {
     355        case DataPartitionEnum.All:
     356          rows = problemData.AllIndices;
     357          break;
     358        case DataPartitionEnum.Test:
     359          rows = problemData.TestIndices;
     360          break;
     361        case DataPartitionEnum.Training:
     362          rows = problemData.TrainingIndices;
     363          break;
     364        default:
     365          throw new NotSupportedException("DataPartition not supported");
     366      }
     367
     368      return rows;
     369    }
    410370  }
    411371}
  • branches/2904_CalculateImpacts/HeuristicLab.Problems.DataAnalysis.Views/3.4/Regression/RegressionSolutionVariableImpactsView.cs

    r16034 r16035  
    160160      int count = originalVariableOrdering.Count;
    161161      int i = 0;
    162 
    163       foreach (var variable in originalVariableOrdering) {
     162      var modifiableDataset = ((Dataset)(problemData.Dataset).Clone()).ToModifiable();
     163      IEnumerable<int> rows = RegressionSolutionVariableImpactsCalculator.GetPartitionRows(dataPartition, problemData);
     164
     165      //Calculate original quality-values (via calculator, default is R²)
     166      OnlineCalculatorError error;
     167      IEnumerable<double> targetValuesPartition = rows.Select(v => problemData.TargetVariableValues.ElementAt(v));
     168      IEnumerable<double> estimatedValuesPartition = rows.Select(v => estimatedValues.ElementAt(v));
     169      var originalCalculatorValue = RegressionSolutionVariableImpactsCalculator.CalculateVariableImpact(targetValuesPartition, estimatedValuesPartition, out error);
     170      if (error != OnlineCalculatorError.None) throw new InvalidOperationException("Error during calculation.");
     171
     172      foreach (var variableName in originalVariableOrdering) {
    164173        if (cancellationToken.Token.IsCancellationRequested) { return null; }
    165174        progress.ProgressValue = (double)++i / count;
    166         progress.Status = string.Format("Calculating impact for variable {0} ({1} of {2})", variable, i, count);
    167 
    168         double impact = RegressionSolutionVariableImpactsCalculator.CalculateImpact(variable, model, problemData, Content.EstimatedValues, dataPartition, replMethod, factorReplMethod);
    169         impacts.Add(new Tuple<string, double>(variable, impact));
     175        progress.Status = string.Format("Calculating impact for variable {0} ({1} of {2})", variableName, i, count);
     176
     177        double impact = RegressionSolutionVariableImpactsCalculator.CalculateImpact(variableName, model, modifiableDataset, rows, targetValuesPartition, originalCalculatorValue, replMethod, factorReplMethod);
     178        impacts.Add(new Tuple<string, double>(variableName, impact));
    170179      }
    171180
Note: See TracChangeset for help on using the changeset viewer.