Changeset 13986


Ignore:
Timestamp:
07/03/16 19:56:55 (5 years ago)
Author:
gkronber
Message:

#2595: added two new variants for variable impact calculation (shuffle and Gaussian noise)

Location:
trunk/sources
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Symbolic.Views/3.4/Plugin.cs.frame

    r13321 r13986  
    4646  [PluginDependency("HeuristicLab.Problems.DataAnalysis", "3.4")]
    4747  [PluginDependency("HeuristicLab.Problems.DataAnalysis.Symbolic", "3.4")]
     48  [PluginDependency("HeuristicLab.Random", "3.3")]
    4849  public class HeuristicLabProblemsDataAnalysisSymbolicViewsPlugin : PluginBase {
    4950  }
  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.4/HeuristicLab.Problems.DataAnalysis-3.4.csproj

    r13941 r13986  
    317317      <Private>False</Private>
    318318    </ProjectReference>
     319    <ProjectReference Include="..\..\HeuristicLab.Random\3.3\HeuristicLab.Random-3.3.csproj">
     320      <Project>{F4539FB6-4708-40C9-BE64-0A1390AEA197}</Project>
     321      <Name>HeuristicLab.Random-3.3</Name>
     322    </ProjectReference>
    319323  </ItemGroup>
    320324  <ItemGroup />
  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Regression/RegressionSolutionVariableImpactsCalculator.cs

    r13985 r13986  
    3030using HeuristicLab.Parameters;
    3131using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
     32using HeuristicLab.Random;
    3233
    3334namespace HeuristicLab.Problems.DataAnalysis {
     
    3738    public enum ReplacementMethodEnum {
    3839      Median,
    39       Average
     40      Average,
     41      Shuffle,
     42      Noise
    4043    }
    4144
     
    138141      var originalValues = dataset.GetReadOnlyDoubleValues(variable).ToList();
    139142      double replacementValue;
     143      List<double> replacementValues;
     144      IRandom rand;
    140145
    141146      switch (replacement) {
    142147        case ReplacementMethodEnum.Median:
    143148          replacementValue = rows.Select(r => originalValues[r]).Median();
     149          replacementValues = Enumerable.Repeat(replacementValue, dataset.Rows).ToList();
    144150          break;
    145151        case ReplacementMethodEnum.Average:
    146152          replacementValue = rows.Select(r => originalValues[r]).Average();
     153          replacementValues = Enumerable.Repeat(replacementValue, dataset.Rows).ToList();
    147154          break;
     155        case ReplacementMethodEnum.Shuffle:
     156          // new var has same empirical distribution but the relation to y is broken
     157          rand = new FastRandom(31415);
     158          replacementValues = rows.Select(r => originalValues[r]).Shuffle(rand).ToList();
     159          break;
     160        case ReplacementMethodEnum.Noise:
     161          var avg = rows.Select(r => originalValues[r]).Average();
     162          var stdDev = rows.Select(r => originalValues[r]).StandardDeviation();
     163          rand = new FastRandom(31415);
     164          replacementValues = rows.Select(_ => NormalDistributedRandom.NextDouble(rand, avg, stdDev)).ToList();
     165          break;
     166
    148167        default:
    149168          throw new ArgumentException(string.Format("ReplacementMethod {0} cannot be handled.", replacement));
    150169      }
    151170
    152       dataset.ReplaceVariable(variable, Enumerable.Repeat(replacementValue, dataset.Rows).ToList());
     171      dataset.ReplaceVariable(variable, replacementValues);
    153172      //mkommend: ToList is used on purpose to avoid lazy evaluation that could result in wrong estimates due to variable replacements
    154173      var estimates = model.GetEstimatedValues(dataset, rows).ToList();
Note: See TracChangeset for help on using the changeset viewer.