Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
07/03/16 19:56:55 (8 years ago)
Author:
gkronber
Message:

#2595: added two new variants for variable impact calculation (shuffle and Gaussian noise)

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Regression/RegressionSolutionVariableImpactsCalculator.cs

    r13985 r13986  
    3030using HeuristicLab.Parameters;
    3131using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
     32using HeuristicLab.Random;
    3233
    3334namespace HeuristicLab.Problems.DataAnalysis {
     
    3738    public enum ReplacementMethodEnum {
    3839      Median,
    39       Average
     40      Average,
     41      Shuffle,
     42      Noise
    4043    }
    4144
     
    138141      var originalValues = dataset.GetReadOnlyDoubleValues(variable).ToList();
    139142      double replacementValue;
     143      List<double> replacementValues;
     144      IRandom rand;
    140145
    141146      switch (replacement) {
    142147        case ReplacementMethodEnum.Median:
    143148          replacementValue = rows.Select(r => originalValues[r]).Median();
     149          replacementValues = Enumerable.Repeat(replacementValue, dataset.Rows).ToList();
    144150          break;
    145151        case ReplacementMethodEnum.Average:
    146152          replacementValue = rows.Select(r => originalValues[r]).Average();
     153          replacementValues = Enumerable.Repeat(replacementValue, dataset.Rows).ToList();
    147154          break;
     155        case ReplacementMethodEnum.Shuffle:
     156          // new var has same empirical distribution but the relation to y is broken
     157          rand = new FastRandom(31415);
     158          replacementValues = rows.Select(r => originalValues[r]).Shuffle(rand).ToList();
     159          break;
     160        case ReplacementMethodEnum.Noise:
     161          var avg = rows.Select(r => originalValues[r]).Average();
     162          var stdDev = rows.Select(r => originalValues[r]).StandardDeviation();
     163          rand = new FastRandom(31415);
     164          replacementValues = rows.Select(_ => NormalDistributedRandom.NextDouble(rand, avg, stdDev)).ToList();
     165          break;
     166
    148167        default:
    149168          throw new ArgumentException(string.Format("ReplacementMethod {0} cannot be handled.", replacement));
    150169      }
    151170
    152       dataset.ReplaceVariable(variable, Enumerable.Repeat(replacementValue, dataset.Rows).ToList());
     171      dataset.ReplaceVariable(variable, replacementValues);
    153172      //mkommend: ToList is used on purpose to avoid lazy evaluation that could result in wrong estimates due to variable replacements
    154173      var estimates = model.GetEstimatedValues(dataset, rows).ToList();
Note: See TracChangeset for help on using the changeset viewer.