Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
07/08/16 12:25:13 (8 years ago)
Author:
mkommend
Message:

#2595: Merged r13766, r13942, r13985, r13986, r13987 into stable.

Location:
stable
Files:
2 edited
1 copied

Legend:

Unmodified
Added
Removed
  • stable

  • stable/HeuristicLab.Problems.DataAnalysis

  • stable/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Regression/RegressionSolutionVariableImpactsCalculator.cs

    r13766 r14022  
    3030using HeuristicLab.Parameters;
    3131using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
     32using HeuristicLab.Random;
    3233
    3334namespace HeuristicLab.Problems.DataAnalysis {
    3435  [StorableClass]
    35   [Item("RegressionSolution Impacts Calculator", "Calculation of the impacts of input variables for a concrete ")]
     36  [Item("RegressionSolution Impacts Calculator", "Calculation of the impacts of input variables for any regression solution")]
    3637  public sealed class RegressionSolutionVariableImpactsCalculator : ParameterizedNamedItem {
    3738    public enum ReplacementMethodEnum {
    3839      Median,
    39       Average
     40      Average,
     41      Shuffle,
     42      Noise
    4043    }
    4144
     
    7780      : base() {
    7881      Parameters.Add(new FixedValueParameter<EnumValue<ReplacementMethodEnum>>(ReplacementParameterName, "The replacement method for variables during impact calculation.", new EnumValue<ReplacementMethodEnum>(ReplacementMethodEnum.Median)));
    79       Parameters.Add(new FixedValueParameter<EnumValue<DataPartitionEnum>>(ReplacementParameterName, "The data partition on which the impacts are calculated.", new EnumValue<DataPartitionEnum>(DataPartitionEnum.Training)));
     82      Parameters.Add(new FixedValueParameter<EnumValue<DataPartitionEnum>>(DataPartitionParameterName, "The data partition on which the impacts are calculated.", new EnumValue<DataPartitionEnum>(DataPartitionEnum.Training)));
    8083    }
    8184
     
    138141      var originalValues = dataset.GetReadOnlyDoubleValues(variable).ToList();
    139142      double replacementValue;
     143      List<double> replacementValues;
     144      IRandom rand;
    140145
    141146      switch (replacement) {
    142147        case ReplacementMethodEnum.Median:
    143148          replacementValue = rows.Select(r => originalValues[r]).Median();
     149          replacementValues = Enumerable.Repeat(replacementValue, dataset.Rows).ToList();
    144150          break;
    145151        case ReplacementMethodEnum.Average:
    146152          replacementValue = rows.Select(r => originalValues[r]).Average();
     153          replacementValues = Enumerable.Repeat(replacementValue, dataset.Rows).ToList();
    147154          break;
     155        case ReplacementMethodEnum.Shuffle:
     156          // new var has same empirical distribution but the relation to y is broken
     157          rand = new FastRandom(31415);
     158          replacementValues = rows.Select(r => originalValues[r]).Shuffle(rand).ToList();
     159          break;
     160        case ReplacementMethodEnum.Noise:
     161          var avg = rows.Select(r => originalValues[r]).Average();
     162          var stdDev = rows.Select(r => originalValues[r]).StandardDeviation();
     163          rand = new FastRandom(31415);
     164          replacementValues = rows.Select(_ => NormalDistributedRandom.NextDouble(rand, avg, stdDev)).ToList();
     165          break;
     166
    148167        default:
    149168          throw new ArgumentException(string.Format("ReplacementMethod {0} cannot be handled.", replacement));
    150169      }
    151170
    152       dataset.ReplaceVariable(variable, Enumerable.Repeat(replacementValue, dataset.Rows).ToList());
     171      dataset.ReplaceVariable(variable, replacementValues);
    153172      //mkommend: ToList is used on purpose to avoid lazy evaluation that could result in wrong estimates due to variable replacements
    154173      var estimates = model.GetEstimatedValues(dataset, rows).ToList();
Note: See TracChangeset for help on using the changeset viewer.