Free cookie consent management tool by TermsFeed Policy Generator

Changeset 16438 for stable


Ignore:
Timestamp:
12/21/18 14:20:24 (6 years ago)
Author:
mkommend
Message:

#2904: Merged r16422 and r16423 into stable.

Location:
stable
Files:
1 deleted
14 edited
2 copied

Legend:

Unmodified
Added
Removed
  • stable

  • stable/HeuristicLab.Problems.DataAnalysis

  • stable/HeuristicLab.Problems.DataAnalysis.Views

  • stable/HeuristicLab.Problems.DataAnalysis.Views/3.4

    • Property svn:mergeinfo set to (toggle deleted branches)
      /trunk/HeuristicLab.Problems.DataAnalysis.Views/3.4mergedeligible
      /branches/2904_CalculateImpacts/HeuristicLab.Problems.DataAnalysis.Views/3.416029-16421
      /branches/Async/HeuristicLab.Problems.DataAnalysis.Views/3.413329-15286
      /branches/Benchmarking/sources/HeuristicLab.Problems.DataAnalysis.Views/3.46917-7005
      /branches/ClassificationModelComparison/HeuristicLab.Problems.DataAnalysis.Views/3.49116-13099
      /branches/CloningRefactoring/HeuristicLab.Problems.DataAnalysis.Views/3.44656-4721
      /branches/DataAnalysis Refactoring/HeuristicLab.Problems.DataAnalysis.Views/3.45471-5808
      /branches/DataAnalysis SolutionEnsembles/HeuristicLab.Problems.DataAnalysis.Views/3.45815-6180
      /branches/DataAnalysis/HeuristicLab.Problems.DataAnalysis.Views/3.44458-4459,​4462,​4464
      /branches/DataPreprocessing/HeuristicLab.Problems.DataAnalysis.Views/3.410085-11101
      /branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis.Views/3.48036-8538
      /branches/GP.Grammar.Editor/HeuristicLab.Problems.DataAnalysis.Views/3.46284-6795
      /branches/GP.Symbols (TimeLag, Diff, Integral)/HeuristicLab.Problems.DataAnalysis.Views/3.45060
      /branches/HeuristicLab.DatasetRefactor/sources/HeuristicLab.Problems.DataAnalysis.Views/3.411570-12508
      /branches/HeuristicLab.Problems.Orienteering/HeuristicLab.Problems.DataAnalysis.Views/3.411130-12721
      /branches/HeuristicLab.RegressionSolutionGradientView/HeuristicLab.Problems.DataAnalysis.Views/3.413780-14091
      /branches/HeuristicLab.TimeSeries/HeuristicLab.Problems.DataAnalysis.Views/3.47098-8789
      /branches/NET40/sources/HeuristicLab.Problems.DataAnalysis.Views/3.45138-5162
      /branches/ParallelEngine/HeuristicLab.Problems.DataAnalysis.Views/3.45175-5192
      /branches/ProblemInstancesRegressionAndClassification/HeuristicLab.Problems.DataAnalysis.Views/3.47568-7810
      /branches/QAPAlgorithms/HeuristicLab.Problems.DataAnalysis.Views/3.46350-6627
      /branches/Restructure trunk solution/HeuristicLab.Problems.DataAnalysis.Views/3.46828
      /branches/SimplifierViewsProgress/HeuristicLab.Problems.DataAnalysis.Views/3.415318-15370
      /branches/SuccessProgressAnalysis/HeuristicLab.Problems.DataAnalysis.Views/3.45370-5682
      /branches/Trunk/HeuristicLab.Problems.DataAnalysis.Views/3.46829-6865
      /branches/VNS/HeuristicLab.Problems.DataAnalysis.Views/3.45594-5752
      /branches/histogram/HeuristicLab.Problems.DataAnalysis.Views/3.45959-6341
      /branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Views/3.414232-14825
      /trunk/sources/HeuristicLab.Problems.DataAnalysis.Views/3.49568,​9845,​9859-9860,​9865-9868,​9893-9896,​9900-9901,​9905,​9907,​9973-9975,​9994,​10173-10176,​10500,​10526,​10540-10541,​10543,​10545,​10941,​11031,​11093,​11114,​11116,​11156,​11214,​11367,​11623,​11631,​11634,​12137,​12151-12152,​12365,​12493,​12509-12511,​12524,​12577-12578,​12614,​12642,​12670,​12679,​12722,​12770,​12772,​12790-12792,​12796,​12798,​12801,​12811-12812,​12817,​12836-12837,​12907,​12971,​13002-13004,​13087,​13100-13104,​13154,​13167-13169,​13186,​13268,​13406,​13428-13430,​13434,​13439,​13450,​13474,​13501,​13503,​13511,​13513,​13534-13535,​13540,​13550,​13552,​13592-13593,​13645,​13648,​13650-13652,​13654,​13657-13659,​13661-13662,​13666,​13669,​13682-13684,​13690-13693,​13704-13705,​13708-13709,​13711,​13715,​13724,​13746,​13764-13766,​13807,​13938,​13942,​13958,​13985-13987,​13992-13993,​14000-14001,​14007-14008,​14014-14016,​14095-14096,​14098-14099,​14107,​14118-14119,​14131,​14135,​14142,​14152,​14155-14160,​14226,​14228-14230,​14234-14236,​14244-14247,​14250,​14255-14258,​14260,​14267,​14271-14272,​14282,​14284-14292,​14296-14298,​14300,​14307,​14314-14316,​14319,​14322,​14332,​14343-14350,​14358,​14367-14368,​14378,​14381-14382,​14384,​14388,​14390-14391,​14393-14394,​14396,​14400,​14405,​14407-14408,​14418,​14422-14423,​14425,​14434,​14463-14464,​14468-14469,​14479,​14483,​14486,​14507,​14517,​14523,​14527,​14529,​14531-14533,​14553,​14623,​14630,​14770,​14772,​14781,​14789-14791,​14805,​14826-14827,​14829-14832,​14839-14840,​14843,​14845-14847,​14851-14854,​14857,​14864-14865,​14871,​14889-14890,​14899,​14904,​14918,​14937-14938,​14940,​14943-14946,​14948-14951,​15002,​15013,​15015-15016,​15023-15024,​15026,​15046,​15052-15054,​15058,​15077,​15085,​15088,​15094,​15103-15106,​15111-15113,​15122-15124,​15129,​15139,​15160,​15163,​15165,​15184-15185,​15187,​15194,​15211,​15213,​15222,​15287,​15371-15372,​15390,​15395,​15400,​15402,​15427,​15486
  • stable/HeuristicLab.Problems.DataAnalysis.Views/3.4/Classification/ClassificationSolutionVariableImpactsView.Designer.cs

    r16432 r16438  
    1919 */
    2020#endregion
     21
     22
    2123namespace HeuristicLab.Problems.DataAnalysis.Views {
    2224  partial class ClassificationSolutionVariableImpactsView {
     
    4446    /// </summary>
    4547    private void InitializeComponent() {
    46       this.variableImactsArrayView = new HeuristicLab.Data.Views.StringConvertibleArrayView();
    47       this.dataPartitionComboBox = new System.Windows.Forms.ComboBox();
    48       this.dataPartitionLabel = new System.Windows.Forms.Label();
    49       this.numericVarReplacementLabel = new System.Windows.Forms.Label();
    50       this.replacementComboBox = new System.Windows.Forms.ComboBox();
    51       this.factorVarReplacementLabel = new System.Windows.Forms.Label();
    52       this.factorVarReplComboBox = new System.Windows.Forms.ComboBox();
    5348      this.ascendingCheckBox = new System.Windows.Forms.CheckBox();
    5449      this.sortByLabel = new System.Windows.Forms.Label();
    5550      this.sortByComboBox = new System.Windows.Forms.ComboBox();
    56       this.backgroundWorker = new System.ComponentModel.BackgroundWorker();
     51      this.factorVarReplComboBox = new System.Windows.Forms.ComboBox();
     52      this.factorVarReplacementLabel = new System.Windows.Forms.Label();
     53      this.replacementComboBox = new System.Windows.Forms.ComboBox();
     54      this.numericVarReplacementLabel = new System.Windows.Forms.Label();
     55      this.dataPartitionLabel = new System.Windows.Forms.Label();
     56      this.dataPartitionComboBox = new System.Windows.Forms.ComboBox();
     57      this.variableImpactsArrayView = new HeuristicLab.Data.Views.StringConvertibleArrayView();
    5758      this.SuspendLayout();
    5859      //
    59       // variableImactsArrayView
    60       //
    61       this.variableImactsArrayView.Anchor = ((System.Windows.Forms.AnchorStyles)((((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Bottom)
    62             | System.Windows.Forms.AnchorStyles.Left)
    63             | System.Windows.Forms.AnchorStyles.Right)));
    64       this.variableImactsArrayView.Caption = "StringConvertibleArray View";
    65       this.variableImactsArrayView.Content = null;
    66       this.variableImactsArrayView.Location = new System.Drawing.Point(3, 84);
    67       this.variableImactsArrayView.Name = "variableImactsArrayView";
    68       this.variableImactsArrayView.ReadOnly = true;
    69       this.variableImactsArrayView.Size = new System.Drawing.Size(662, 278);
    70       this.variableImactsArrayView.TabIndex = 2;
    71       //
    72       // dataPartitionComboBox
    73       //
    74       this.dataPartitionComboBox.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList;
    75       this.dataPartitionComboBox.FormattingEnabled = true;
    76       this.dataPartitionComboBox.Items.AddRange(new object[] {
    77             HeuristicLab.Problems.DataAnalysis.ClassificationSolutionVariableImpactsCalculator.DataPartitionEnum.Training,
    78             HeuristicLab.Problems.DataAnalysis.ClassificationSolutionVariableImpactsCalculator.DataPartitionEnum.Test,
    79             HeuristicLab.Problems.DataAnalysis.ClassificationSolutionVariableImpactsCalculator.DataPartitionEnum.All});
    80       this.dataPartitionComboBox.Location = new System.Drawing.Point(197, 3);
    81       this.dataPartitionComboBox.Name = "dataPartitionComboBox";
    82       this.dataPartitionComboBox.Size = new System.Drawing.Size(121, 21);
    83       this.dataPartitionComboBox.TabIndex = 1;
    84       this.dataPartitionComboBox.SelectedIndexChanged += new System.EventHandler(this.dataPartitionComboBox_SelectedIndexChanged);
    85       //
    86       // dataPartitionLabel
    87       //
    88       this.dataPartitionLabel.AutoSize = true;
    89       this.dataPartitionLabel.Location = new System.Drawing.Point(3, 6);
    90       this.dataPartitionLabel.Name = "dataPartitionLabel";
    91       this.dataPartitionLabel.Size = new System.Drawing.Size(73, 13);
    92       this.dataPartitionLabel.TabIndex = 0;
    93       this.dataPartitionLabel.Text = "Data partition:";
    94       //
    95       // numericVarReplacementLabel
    96       //
    97       this.numericVarReplacementLabel.AutoSize = true;
    98       this.numericVarReplacementLabel.Location = new System.Drawing.Point(3, 33);
    99       this.numericVarReplacementLabel.Name = "numericVarReplacementLabel";
    100       this.numericVarReplacementLabel.Size = new System.Drawing.Size(173, 13);
    101       this.numericVarReplacementLabel.TabIndex = 2;
    102       this.numericVarReplacementLabel.Text = "Replacement for numeric variables:";
     60      // ascendingCheckBox
     61      //
     62      this.ascendingCheckBox.AutoSize = true;
     63      this.ascendingCheckBox.CheckAlign = System.Drawing.ContentAlignment.MiddleRight;
     64      this.ascendingCheckBox.Location = new System.Drawing.Point(452, 32);
     65      this.ascendingCheckBox.Name = "ascendingCheckBox";
     66      this.ascendingCheckBox.Size = new System.Drawing.Size(76, 17);
     67      this.ascendingCheckBox.TabIndex = 7;
     68      this.ascendingCheckBox.Text = "Ascending";
     69      this.ascendingCheckBox.UseVisualStyleBackColor = true;
     70      this.ascendingCheckBox.CheckedChanged += new System.EventHandler(this.ascendingCheckBox_CheckedChanged);
     71      //
     72      // sortByLabel
     73      //
     74      this.sortByLabel.AutoSize = true;
     75      this.sortByLabel.Location = new System.Drawing.Point(324, 6);
     76      this.sortByLabel.Name = "sortByLabel";
     77      this.sortByLabel.Size = new System.Drawing.Size(77, 13);
     78      this.sortByLabel.TabIndex = 4;
     79      this.sortByLabel.Text = "Sorting criteria:";
     80      //
     81      // sortByComboBox
     82      //
     83      this.sortByComboBox.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList;
     84      this.sortByComboBox.FormattingEnabled = true;
     85      this.sortByComboBox.Items.AddRange(new object[] {
     86            HeuristicLab.Problems.DataAnalysis.Views.ClassificationSolutionVariableImpactsView.SortingCriteria.ImpactValue,
     87            HeuristicLab.Problems.DataAnalysis.Views.ClassificationSolutionVariableImpactsView.SortingCriteria.Occurrence,
     88            HeuristicLab.Problems.DataAnalysis.Views.ClassificationSolutionVariableImpactsView.SortingCriteria.VariableName});
     89      this.sortByComboBox.Location = new System.Drawing.Point(407, 3);
     90      this.sortByComboBox.Name = "sortByComboBox";
     91      this.sortByComboBox.Size = new System.Drawing.Size(121, 21);
     92      this.sortByComboBox.TabIndex = 5;
     93      this.sortByComboBox.SelectedIndexChanged += new System.EventHandler(this.sortByComboBox_SelectedIndexChanged);
     94      //
     95      // factorVarReplComboBox
     96      //
     97      this.factorVarReplComboBox.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList;
     98      this.factorVarReplComboBox.FormattingEnabled = true;
     99      this.factorVarReplComboBox.Items.AddRange(new object[] {
     100            HeuristicLab.Problems.DataAnalysis.ClassificationSolutionVariableImpactsCalculator.FactorReplacementMethodEnum.Best,
     101            HeuristicLab.Problems.DataAnalysis.ClassificationSolutionVariableImpactsCalculator.FactorReplacementMethodEnum.Mode,
     102            HeuristicLab.Problems.DataAnalysis.ClassificationSolutionVariableImpactsCalculator.FactorReplacementMethodEnum.Shuffle});
     103      this.factorVarReplComboBox.Location = new System.Drawing.Point(197, 57);
     104      this.factorVarReplComboBox.Name = "factorVarReplComboBox";
     105      this.factorVarReplComboBox.Size = new System.Drawing.Size(121, 21);
     106      this.factorVarReplComboBox.TabIndex = 1;
     107      this.factorVarReplComboBox.SelectedIndexChanged += new System.EventHandler(this.replacementComboBox_SelectedIndexChanged);
     108      //
     109      // factorVarReplacementLabel
     110      //
     111      this.factorVarReplacementLabel.AutoSize = true;
     112      this.factorVarReplacementLabel.Location = new System.Drawing.Point(3, 60);
     113      this.factorVarReplacementLabel.Name = "factorVarReplacementLabel";
     114      this.factorVarReplacementLabel.Size = new System.Drawing.Size(188, 13);
     115      this.factorVarReplacementLabel.TabIndex = 0;
     116      this.factorVarReplacementLabel.Text = "Replacement for categorical variables:";
    103117      //
    104118      // replacementComboBox
     
    117131      this.replacementComboBox.SelectedIndexChanged += new System.EventHandler(this.replacementComboBox_SelectedIndexChanged);
    118132      //
    119       // factorVarReplacementLabel
    120       //
    121       this.factorVarReplacementLabel.AutoSize = true;
    122       this.factorVarReplacementLabel.Location = new System.Drawing.Point(3, 60);
    123       this.factorVarReplacementLabel.Name = "factorVarReplacementLabel";
    124       this.factorVarReplacementLabel.Size = new System.Drawing.Size(188, 13);
    125       this.factorVarReplacementLabel.TabIndex = 0;
    126       this.factorVarReplacementLabel.Text = "Replacement for categorical variables:";
    127       //
    128       // factorVarReplComboBox
    129       //
    130       this.factorVarReplComboBox.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList;
    131       this.factorVarReplComboBox.FormattingEnabled = true;
    132       this.factorVarReplComboBox.Items.AddRange(new object[] {
    133             HeuristicLab.Problems.DataAnalysis.ClassificationSolutionVariableImpactsCalculator.FactorReplacementMethodEnum.Best,
    134             HeuristicLab.Problems.DataAnalysis.ClassificationSolutionVariableImpactsCalculator.FactorReplacementMethodEnum.Mode,
    135             HeuristicLab.Problems.DataAnalysis.ClassificationSolutionVariableImpactsCalculator.FactorReplacementMethodEnum.Shuffle});
    136       this.factorVarReplComboBox.Location = new System.Drawing.Point(197, 57);
    137       this.factorVarReplComboBox.Name = "factorVarReplComboBox";
    138       this.factorVarReplComboBox.Size = new System.Drawing.Size(121, 21);
    139       this.factorVarReplComboBox.TabIndex = 1;
    140       this.factorVarReplComboBox.SelectedIndexChanged += new System.EventHandler(this.replacementComboBox_SelectedIndexChanged);
    141       //
    142       // ascendingCheckBox
    143       //
    144       this.ascendingCheckBox.AutoSize = true;
    145       this.ascendingCheckBox.Location = new System.Drawing.Point(534, 6);
    146       this.ascendingCheckBox.Name = "ascendingCheckBox";
    147       this.ascendingCheckBox.Size = new System.Drawing.Size(76, 17);
    148       this.ascendingCheckBox.TabIndex = 10;
    149       this.ascendingCheckBox.Text = "Ascending";
    150       this.ascendingCheckBox.UseVisualStyleBackColor = true;
    151       this.ascendingCheckBox.CheckedChanged += new System.EventHandler(this.ascendingCheckBox_CheckedChanged);
    152       //
    153       // sortByLabel
    154       //
    155       this.sortByLabel.AutoSize = true;
    156       this.sortByLabel.Location = new System.Drawing.Point(324, 6);
    157       this.sortByLabel.Name = "sortByLabel";
    158       this.sortByLabel.Size = new System.Drawing.Size(77, 13);
    159       this.sortByLabel.TabIndex = 8;
    160       this.sortByLabel.Text = "Sorting criteria:";
    161       //
    162       // sortByComboBox
    163       //
    164       this.sortByComboBox.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList;
    165       this.sortByComboBox.FormattingEnabled = true;
    166       this.sortByComboBox.Location = new System.Drawing.Point(407, 3);
    167       this.sortByComboBox.Name = "sortByComboBox";
    168       this.sortByComboBox.Size = new System.Drawing.Size(121, 21);
    169       this.sortByComboBox.TabIndex = 9;
    170       this.sortByComboBox.SelectedIndexChanged += new System.EventHandler(this.sortByComboBox_SelectedIndexChanged);
     133      // numericVarReplacementLabel
     134      //
     135      this.numericVarReplacementLabel.AutoSize = true;
     136      this.numericVarReplacementLabel.Location = new System.Drawing.Point(3, 33);
     137      this.numericVarReplacementLabel.Name = "numericVarReplacementLabel";
     138      this.numericVarReplacementLabel.Size = new System.Drawing.Size(173, 13);
     139      this.numericVarReplacementLabel.TabIndex = 2;
     140      this.numericVarReplacementLabel.Text = "Replacement for numeric variables:";
     141      //
     142      // dataPartitionLabel
     143      //
     144      this.dataPartitionLabel.AutoSize = true;
     145      this.dataPartitionLabel.Location = new System.Drawing.Point(3, 6);
     146      this.dataPartitionLabel.Name = "dataPartitionLabel";
     147      this.dataPartitionLabel.Size = new System.Drawing.Size(73, 13);
     148      this.dataPartitionLabel.TabIndex = 0;
     149      this.dataPartitionLabel.Text = "Data partition:";
     150      //
     151      // dataPartitionComboBox
     152      //
     153      this.dataPartitionComboBox.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList;
     154      this.dataPartitionComboBox.FormattingEnabled = true;
     155      this.dataPartitionComboBox.Items.AddRange(new object[] {
     156            HeuristicLab.Problems.DataAnalysis.ClassificationSolutionVariableImpactsCalculator.DataPartitionEnum.Training,
     157            HeuristicLab.Problems.DataAnalysis.ClassificationSolutionVariableImpactsCalculator.DataPartitionEnum.Test,
     158            HeuristicLab.Problems.DataAnalysis.ClassificationSolutionVariableImpactsCalculator.DataPartitionEnum.All});
     159      this.dataPartitionComboBox.Location = new System.Drawing.Point(197, 3);
     160      this.dataPartitionComboBox.Name = "dataPartitionComboBox";
     161      this.dataPartitionComboBox.Size = new System.Drawing.Size(121, 21);
     162      this.dataPartitionComboBox.TabIndex = 1;
     163      this.dataPartitionComboBox.SelectedIndexChanged += new System.EventHandler(this.dataPartitionComboBox_SelectedIndexChanged);
     164      //
     165      // variableImpactsArrayView
     166      //
     167      this.variableImpactsArrayView.Anchor = ((System.Windows.Forms.AnchorStyles)((((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Bottom)
     168            | System.Windows.Forms.AnchorStyles.Left)
     169            | System.Windows.Forms.AnchorStyles.Right)));
     170      this.variableImpactsArrayView.Caption = "StringConvertibleArray View";
     171      this.variableImpactsArrayView.Content = null;
     172      this.variableImpactsArrayView.Location = new System.Drawing.Point(3, 84);
     173      this.variableImpactsArrayView.Name = "variableImpactsArrayView";
     174      this.variableImpactsArrayView.ReadOnly = true;
     175      this.variableImpactsArrayView.Size = new System.Drawing.Size(706, 278);
     176      this.variableImpactsArrayView.TabIndex = 2;
    171177      //
    172178      // ClassificationSolutionVariableImpactsView
     
    183189      this.Controls.Add(this.dataPartitionLabel);
    184190      this.Controls.Add(this.dataPartitionComboBox);
    185       this.Controls.Add(this.variableImactsArrayView);
     191      this.Controls.Add(this.variableImpactsArrayView);
    186192      this.Name = "ClassificationSolutionVariableImpactsView";
    187       this.Size = new System.Drawing.Size(668, 365);
     193      this.Size = new System.Drawing.Size(712, 365);
    188194      this.VisibleChanged += new System.EventHandler(this.ClassificationSolutionVariableImpactsView_VisibleChanged);
    189195      this.ResumeLayout(false);
     
    194200    #endregion
    195201
    196     private Data.Views.StringConvertibleArrayView variableImactsArrayView;
     202    private Data.Views.StringConvertibleArrayView variableImpactsArrayView;
    197203    private System.Windows.Forms.ComboBox dataPartitionComboBox;
    198204    private System.Windows.Forms.Label dataPartitionLabel;
     
    201207    private System.Windows.Forms.Label factorVarReplacementLabel;
    202208    private System.Windows.Forms.ComboBox factorVarReplComboBox;
    203     private System.Windows.Forms.CheckBox ascendingCheckBox;
    204209    private System.Windows.Forms.Label sortByLabel;
    205210    private System.Windows.Forms.ComboBox sortByComboBox;
    206     private System.ComponentModel.BackgroundWorker backgroundWorker;
     211    private System.Windows.Forms.CheckBox ascendingCheckBox;
    207212  }
    208213}
  • stable/HeuristicLab.Problems.DataAnalysis.Views/3.4/Classification/ClassificationSolutionVariableImpactsView.cs

    r16432 r16438  
    3333  [Content(typeof(IClassificationSolution))]
    3434  public partial class ClassificationSolutionVariableImpactsView : DataAnalysisSolutionEvaluationView {
    35     #region Nested Types
    3635    private enum SortingCriteria {
    3736      ImpactValue,
     
    3938      VariableName
    4039    }
    41     #endregion
    42 
    43     #region Fields
    44     private Dictionary<string, double> rawVariableImpacts = new Dictionary<string, double>();
    45     private Thread thread;
    46     #endregion
    47 
    48     #region Getter/Setter
     40    private CancellationTokenSource cancellationToken = new CancellationTokenSource();
     41    private List<Tuple<string, double>> rawVariableImpacts = new List<Tuple<string, double>>();
     42
    4943    public new IClassificationSolution Content {
    5044      get { return (IClassificationSolution)base.Content; }
     
    5347      }
    5448    }
    55     #endregion
    56 
    57     #region Ctor
     49
    5850    public ClassificationSolutionVariableImpactsView()
    5951      : base() {
    6052      InitializeComponent();
    6153
    62       this.sortByComboBox.Items.AddRange(Enum.GetValues(typeof(SortingCriteria)).Cast<object>().ToArray());
    63       this.sortByComboBox.SelectedItem = SortingCriteria.ImpactValue;
    64 
    6554      //Set the default values
    6655      this.dataPartitionComboBox.SelectedIndex = 0;
    67       this.replacementComboBox.SelectedIndex = 0;
     56      this.replacementComboBox.SelectedIndex = 3;
    6857      this.factorVarReplComboBox.SelectedIndex = 0;
    69     }
    70     #endregion
    71 
    72     #region Events
     58      this.sortByComboBox.SelectedItem = SortingCriteria.ImpactValue;
     59    }
     60
    7361    protected override void RegisterContentEvents() {
    7462      base.RegisterContentEvents();
     
    7664      Content.ProblemDataChanged += new EventHandler(Content_ProblemDataChanged);
    7765    }
    78 
    7966    protected override void DeregisterContentEvents() {
    8067      base.DeregisterContentEvents();
     
    8673      OnContentChanged();
    8774    }
    88 
    8975    protected virtual void Content_ModelChanged(object sender, EventArgs e) {
    9076      OnContentChanged();
    9177    }
    92 
    9378    protected override void OnContentChanged() {
    9479      base.OnContentChanged();
     80      rawVariableImpacts.Clear();
     81
    9582      if (Content == null) {
    96         variableImactsArrayView.Content = null;
     83        variableImpactsArrayView.Content = null;
    9784      } else {
    9885        UpdateVariableImpact();
    9986      }
    10087    }
    101 
    10288    private void ClassificationSolutionVariableImpactsView_VisibleChanged(object sender, EventArgs e) {
    103       if (thread == null) { return; }
    104 
    105       if (thread.IsAlive) { thread.Abort(); }
    106       thread = null;
    107     }
    108 
     89      cancellationToken.Cancel();
     90    }
    10991
    11092    private void dataPartitionComboBox_SelectedIndexChanged(object sender, EventArgs e) {
     93      rawVariableImpacts.Clear();
    11194      UpdateVariableImpact();
    11295    }
    113 
    11496    private void replacementComboBox_SelectedIndexChanged(object sender, EventArgs e) {
     97      rawVariableImpacts.Clear();
    11598      UpdateVariableImpact();
    11699    }
    117 
    118100    private void sortByComboBox_SelectedIndexChanged(object sender, EventArgs e) {
    119101      //Update the default ordering (asc,desc), but remove the eventHandler beforehand (otherwise the data would be ordered twice)
    120102      ascendingCheckBox.CheckedChanged -= ascendingCheckBox_CheckedChanged;
    121       switch ((SortingCriteria)sortByComboBox.SelectedItem) {
    122         case SortingCriteria.ImpactValue:
    123           ascendingCheckBox.Checked = false;
    124           break;
    125         case SortingCriteria.Occurrence:
    126           ascendingCheckBox.Checked = true;
    127           break;
    128         case SortingCriteria.VariableName:
    129           ascendingCheckBox.Checked = true;
    130           break;
    131         default:
    132           throw new NotImplementedException("Ordering for selected SortingCriteria not implemented");
    133       }
     103      ascendingCheckBox.Checked = (SortingCriteria)sortByComboBox.SelectedItem != SortingCriteria.ImpactValue;
    134104      ascendingCheckBox.CheckedChanged += ascendingCheckBox_CheckedChanged;
    135105
    136       UpdateDataOrdering();
    137     }
    138 
     106      UpdateOrdering();
     107    }
    139108    private void ascendingCheckBox_CheckedChanged(object sender, EventArgs e) {
    140       UpdateDataOrdering();
    141     }
    142 
    143     #endregion
    144 
    145     #region Helper Methods   
    146     private void UpdateVariableImpact() {
     109      UpdateOrdering();
     110    }
     111
     112    private async void UpdateVariableImpact() {
     113      IProgress progress;
     114
    147115      //Check if the selection is valid
    148116      if (Content == null) { return; }
     
    157125      var dataPartition = (ClassificationSolutionVariableImpactsCalculator.DataPartitionEnum)dataPartitionComboBox.SelectedItem;
    158126
    159       variableImactsArrayView.Caption = Content.Name + " Variable Impacts";
    160 
    161       mainForm.AddOperationProgressToView(this, "Calculating variable impacts for " + Content.Name);
    162 
    163       Task.Factory.StartNew(() => {
    164         thread = Thread.CurrentThread;
    165         //Remember the original ordering of the variables
    166         var impacts = ClassificationSolutionVariableImpactsCalculator.CalculateImpacts(Content, dataPartition, replMethod, factorReplMethod);
     127      variableImpactsArrayView.Caption = Content.Name + " Variable Impacts";
     128      progress = mainForm.AddOperationProgressToView(this, "Calculating variable impacts for " + Content.Name);
     129      progress.ProgressValue = 0;
     130
     131      cancellationToken = new CancellationTokenSource();
     132
     133      try {
    167134        var problemData = Content.ProblemData;
    168135        var inputvariables = new HashSet<string>(problemData.AllowedInputVariables.Union(Content.Model.VariablesUsedForPrediction));
    169         var originalVariableOrdering = problemData.Dataset.VariableNames.Where(v => inputvariables.Contains(v)).Where(problemData.Dataset.VariableHasType<double>).ToList();
    170 
    171         rawVariableImpacts.Clear();
    172         originalVariableOrdering.ForEach(v => rawVariableImpacts.Add(v, impacts.First(vv => vv.Item1 == v).Item2));
    173       }).ContinueWith((o) => {
    174         UpdateDataOrdering();
    175         mainForm.RemoveOperationProgressFromView(this);
    176         thread = null;
    177       }, TaskScheduler.FromCurrentSynchronizationContext());
     136        //Remember the original ordering of the variables
     137        var originalVariableOrdering = problemData.Dataset.VariableNames
     138          .Where(v => inputvariables.Contains(v))
     139          .Where(v => problemData.Dataset.VariableHasType<double>(v) || problemData.Dataset.VariableHasType<string>(v))
     140          .ToList();
     141
     142        List<Tuple<string, double>> impacts = null;
     143        await Task.Run(() => { impacts = CalculateVariableImpacts(originalVariableOrdering, Content.Model, problemData, Content.EstimatedClassValues, dataPartition, replMethod, factorReplMethod, cancellationToken.Token, progress); });
     144        if (impacts == null) { return; }
     145
     146        rawVariableImpacts.AddRange(impacts);
     147        UpdateOrdering();
     148      }
     149      finally {
     150        ((MainForm.WindowsForms.MainForm)MainFormManager.MainForm).RemoveOperationProgressFromView(this);
     151      }
     152    }
     153    private List<Tuple<string, double>> CalculateVariableImpacts(List<string> originalVariableOrdering,
     154      IClassificationModel model,
     155      IClassificationProblemData problemData,
     156      IEnumerable<double> estimatedClassValues,
     157      ClassificationSolutionVariableImpactsCalculator.DataPartitionEnum dataPartition,
     158      ClassificationSolutionVariableImpactsCalculator.ReplacementMethodEnum replMethod,
     159      ClassificationSolutionVariableImpactsCalculator.FactorReplacementMethodEnum factorReplMethod,
     160      CancellationToken token,
     161      IProgress progress) {
     162      List<Tuple<string, double>> impacts = new List<Tuple<string, double>>();
     163      int count = originalVariableOrdering.Count;
     164      int i = 0;
     165      var modifiableDataset = ((Dataset)(problemData.Dataset).Clone()).ToModifiable();
     166      IEnumerable<int> rows = ClassificationSolutionVariableImpactsCalculator.GetPartitionRows(dataPartition, problemData);
     167
     168      //Calculate original quality-values (via calculator, default is R²)
     169      IEnumerable<double> targetValuesPartition = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows);
     170      IEnumerable<double> estimatedClassValuesPartition = Content.GetEstimatedClassValues(rows);
     171
     172      var originalCalculatorValue = ClassificationSolutionVariableImpactsCalculator.CalculateQuality(targetValuesPartition, estimatedClassValuesPartition);
     173      var clonedModel = (IClassificationModel)model.Clone();
     174      foreach (var variableName in originalVariableOrdering) {
     175        if (cancellationToken.Token.IsCancellationRequested) { return null; }
     176        progress.ProgressValue = (double)++i / count;
     177        progress.Status = string.Format("Calculating impact for variable {0} ({1} of {2})", variableName, i, count);
     178
     179        double impact = 0;
     180        //If the variable isn't used for prediction, it has zero impact.
     181        if (model.VariablesUsedForPrediction.Contains(variableName)) {
     182          impact = ClassificationSolutionVariableImpactsCalculator.CalculateImpact(variableName, clonedModel, problemData, modifiableDataset, rows, replMethod, factorReplMethod, targetValuesPartition, originalCalculatorValue);
     183        }
     184        impacts.Add(new Tuple<string, double>(variableName, impact));
     185      }
     186
     187      return impacts;
    178188    }
    179189
    180190    /// <summary>
    181     /// Updates the <see cref="variableImactsArrayView"/> according to the selected ordering <see cref="ascendingCheckBox"/> of the selected Column <see cref="sortByComboBox"/>
     191    /// Updates the <see cref="variableImpactsArrayView"/> according to the selected ordering <see cref="ascendingCheckBox"/> of the selected Column <see cref="sortByComboBox"/>
    182192    /// The default is "Descending" by "VariableImpact" (as in previous versions)
    183193    /// </summary>
    184     private void UpdateDataOrdering() {
     194    private void UpdateOrdering() {
    185195      //Check if valid sortingCriteria is selected and data exists
    186196      if (sortByComboBox.SelectedIndex == -1) { return; }
     
    191201      bool ascending = ascendingCheckBox.Checked;
    192202
    193       IEnumerable<KeyValuePair<string, double>> orderedEntries = null;
     203      IEnumerable<Tuple<string, double>> orderedEntries = null;
    194204
    195205      //Sort accordingly
    196206      switch (selectedItem) {
    197207        case SortingCriteria.ImpactValue:
    198           orderedEntries = rawVariableImpacts.OrderBy(v => v.Value);
     208          orderedEntries = rawVariableImpacts.OrderBy(v => v.Item2);
    199209          break;
    200210        case SortingCriteria.Occurrence:
     
    202212          break;
    203213        case SortingCriteria.VariableName:
    204           orderedEntries = rawVariableImpacts.OrderBy(v => v.Key, new NaturalStringComparer());
     214          orderedEntries = rawVariableImpacts.OrderBy(v => v.Item1, new NaturalStringComparer());
    205215          break;
    206216        default:
     
    211221
    212222      //Write the data back
    213       var impactArray = new DoubleArray(orderedEntries.Select(i => i.Value).ToArray()) {
    214         ElementNames = orderedEntries.Select(i => i.Key)
     223      var impactArray = new DoubleArray(orderedEntries.Select(i => i.Item2).ToArray()) {
     224        ElementNames = orderedEntries.Select(i => i.Item1)
    215225      };
    216226
    217227      //Could be, if the View was closed
    218       if (!variableImactsArrayView.IsDisposed) {
    219         variableImactsArrayView.Content = (DoubleArray)impactArray.AsReadOnly();
    220       }
    221     }
    222     #endregion 
     228      if (!variableImpactsArrayView.IsDisposed) {
     229        variableImpactsArrayView.Content = (DoubleArray)impactArray.AsReadOnly();
     230      }
     231    }
    223232  }
    224233}
  • stable/HeuristicLab.Problems.DataAnalysis.Views/3.4/Regression/RegressionSolutionVariableImpactsView.Designer.cs

    r16435 r16438  
    5555      this.dataPartitionLabel = new System.Windows.Forms.Label();
    5656      this.dataPartitionComboBox = new System.Windows.Forms.ComboBox();
    57       this.variableImactsArrayView = new HeuristicLab.Data.Views.StringConvertibleArrayView();
     57      this.variableImpactsArrayView = new HeuristicLab.Data.Views.StringConvertibleArrayView();
    5858      this.SuspendLayout();
    5959      //
     
    6161      //
    6262      this.ascendingCheckBox.AutoSize = true;
    63       this.ascendingCheckBox.Location = new System.Drawing.Point(534, 6);
     63      this.ascendingCheckBox.CheckAlign = System.Drawing.ContentAlignment.MiddleRight;
     64      this.ascendingCheckBox.Location = new System.Drawing.Point(452, 32);
    6465      this.ascendingCheckBox.Name = "ascendingCheckBox";
    6566      this.ascendingCheckBox.Size = new System.Drawing.Size(76, 17);
     
    162163      this.dataPartitionComboBox.SelectedIndexChanged += new System.EventHandler(this.dataPartitionComboBox_SelectedIndexChanged);
    163164      //
    164       // variableImactsArrayView
    165       //
    166       this.variableImactsArrayView.Anchor = ((System.Windows.Forms.AnchorStyles)((((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Bottom)
    167             | System.Windows.Forms.AnchorStyles.Left)
     165      // variableImpactsArrayView
     166      //
     167      this.variableImpactsArrayView.Anchor = ((System.Windows.Forms.AnchorStyles)((((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Bottom)
     168            | System.Windows.Forms.AnchorStyles.Left) 
    168169            | System.Windows.Forms.AnchorStyles.Right)));
    169       this.variableImactsArrayView.Caption = "StringConvertibleArray View";
    170       this.variableImactsArrayView.Content = null;
    171       this.variableImactsArrayView.Location = new System.Drawing.Point(3, 84);
    172       this.variableImactsArrayView.Name = "variableImactsArrayView";
    173       this.variableImactsArrayView.ReadOnly = true;
    174       this.variableImactsArrayView.Size = new System.Drawing.Size(706, 278);
    175       this.variableImactsArrayView.TabIndex = 2;
     170      this.variableImpactsArrayView.Caption = "StringConvertibleArray View";
     171      this.variableImpactsArrayView.Content = null;
     172      this.variableImpactsArrayView.Location = new System.Drawing.Point(3, 84);
     173      this.variableImpactsArrayView.Name = "variableImpactsArrayView";
     174      this.variableImpactsArrayView.ReadOnly = true;
     175      this.variableImpactsArrayView.Size = new System.Drawing.Size(706, 278);
     176      this.variableImpactsArrayView.TabIndex = 2;
    176177      //
    177178      // RegressionSolutionVariableImpactsView
     
    188189      this.Controls.Add(this.dataPartitionLabel);
    189190      this.Controls.Add(this.dataPartitionComboBox);
    190       this.Controls.Add(this.variableImactsArrayView);
     191      this.Controls.Add(this.variableImpactsArrayView);
    191192      this.Name = "RegressionSolutionVariableImpactsView";
    192193      this.Size = new System.Drawing.Size(712, 365);
     
    199200    #endregion
    200201
    201     private Data.Views.StringConvertibleArrayView variableImactsArrayView;
     202    private Data.Views.StringConvertibleArrayView variableImpactsArrayView;
    202203    private System.Windows.Forms.ComboBox dataPartitionComboBox;
    203204    private System.Windows.Forms.Label dataPartitionLabel;
  • stable/HeuristicLab.Problems.DataAnalysis.Views/3.4/Regression/RegressionSolutionVariableImpactsView.cs

    r16435 r16438  
    3333  [Content(typeof(IRegressionSolution))]
    3434  public partial class RegressionSolutionVariableImpactsView : DataAnalysisSolutionEvaluationView {
    35     private CancellationTokenSource cancellationToken = new CancellationTokenSource();
    3635    private enum SortingCriteria {
    3736      ImpactValue,
     
    3938      VariableName
    4039    }
     40    private CancellationTokenSource cancellationToken = new CancellationTokenSource();
    4141    private List<Tuple<string, double>> rawVariableImpacts = new List<Tuple<string, double>>();
    4242
     
    6464      Content.ProblemDataChanged += new EventHandler(Content_ProblemDataChanged);
    6565    }
    66 
    6766    protected override void DeregisterContentEvents() {
    6867      base.DeregisterContentEvents();
     
    7473      OnContentChanged();
    7574    }
    76 
    7775    protected virtual void Content_ModelChanged(object sender, EventArgs e) {
    7876      OnContentChanged();
    7977    }
    80 
    8178    protected override void OnContentChanged() {
    8279      base.OnContentChanged();
     80      rawVariableImpacts.Clear();
     81
    8382      if (Content == null) {
    84         variableImactsArrayView.Content = null;
     83        variableImpactsArrayView.Content = null;
    8584      } else {
    8685        UpdateVariableImpact();
    8786      }
    8887    }
    89 
    9088    private void RegressionSolutionVariableImpactsView_VisibleChanged(object sender, EventArgs e) {
    9189      cancellationToken.Cancel();
    9290    }
    9391
    94 
    9592    private void dataPartitionComboBox_SelectedIndexChanged(object sender, EventArgs e) {
     93      rawVariableImpacts.Clear();
    9694      UpdateVariableImpact();
    9795    }
    98 
    9996    private void replacementComboBox_SelectedIndexChanged(object sender, EventArgs e) {
     97      rawVariableImpacts.Clear();
    10098      UpdateVariableImpact();
    10199    }
    102 
    103100    private void sortByComboBox_SelectedIndexChanged(object sender, EventArgs e) {
    104101      //Update the default ordering (asc,desc), but remove the eventHandler beforehand (otherwise the data would be ordered twice)
     
    109106      UpdateOrdering();
    110107    }
    111 
    112108    private void ascendingCheckBox_CheckedChanged(object sender, EventArgs e) {
    113109      UpdateOrdering();
    114110    }
    115 
    116111
    117112    private async void UpdateVariableImpact() {
     
    130125      var dataPartition = (RegressionSolutionVariableImpactsCalculator.DataPartitionEnum)dataPartitionComboBox.SelectedItem;
    131126
    132       variableImactsArrayView.Caption = Content.Name + " Variable Impacts";
     127      variableImpactsArrayView.Caption = Content.Name + " Variable Impacts";
    133128      progress = mainForm.AddOperationProgressToView(this, "Calculating variable impacts for " + Content.Name);
    134129      progress.ProgressValue = 0;
    135130
    136131      cancellationToken = new CancellationTokenSource();
    137       //Remember the original ordering of the variables
     132
    138133      try {
    139         var impacts = await Task.Run(() => RegressionSolutionVariableImpactsCalculator.CalculateImpacts(Content, dataPartition, replMethod, factorReplMethod,
    140           (i, s) => {
    141             progress.ProgressValue = i;
    142             progress.Status = s;
    143             return cancellationToken.Token.IsCancellationRequested;
    144           }), cancellationToken.Token);
    145 
    146         if (cancellationToken.Token.IsCancellationRequested) { return; }
    147134        var problemData = Content.ProblemData;
    148135        var inputvariables = new HashSet<string>(problemData.AllowedInputVariables.Union(Content.Model.VariablesUsedForPrediction));
     136        //Remember the original ordering of the variables
    149137        var originalVariableOrdering = problemData.Dataset.VariableNames
    150138          .Where(v => inputvariables.Contains(v))
     
    152140          .ToList();
    153141
    154         rawVariableImpacts.Clear();
    155         originalVariableOrdering.ForEach(v => rawVariableImpacts.Add(new Tuple<string, double>(v, impacts.First(vv => vv.Item1 == v).Item2)));
     142        List<Tuple<string, double>> impacts = null;
     143        await Task.Run(() => { impacts = CalculateVariableImpacts(originalVariableOrdering, Content.Model, problemData, Content.EstimatedValues, dataPartition, replMethod, factorReplMethod, cancellationToken.Token, progress); });
     144        if (impacts == null) { return; }
     145
     146        rawVariableImpacts.AddRange(impacts);
    156147        UpdateOrdering();
    157       } finally {
     148      }
     149      finally {
    158150        ((MainForm.WindowsForms.MainForm)MainFormManager.MainForm).RemoveOperationProgressFromView(this);
    159151      }
    160152    }
     153    private List<Tuple<string, double>> CalculateVariableImpacts(List<string> originalVariableOrdering,
     154      IRegressionModel model,
     155      IRegressionProblemData problemData,
     156      IEnumerable<double> estimatedValues,
     157      RegressionSolutionVariableImpactsCalculator.DataPartitionEnum dataPartition,
     158      RegressionSolutionVariableImpactsCalculator.ReplacementMethodEnum replMethod,
     159      RegressionSolutionVariableImpactsCalculator.FactorReplacementMethodEnum factorReplMethod,
     160      CancellationToken token,
     161      IProgress progress) {
     162      List<Tuple<string, double>> impacts = new List<Tuple<string, double>>();
     163      int count = originalVariableOrdering.Count;
     164      int i = 0;
     165      var modifiableDataset = ((Dataset)(problemData.Dataset).Clone()).ToModifiable();
     166      IEnumerable<int> rows = RegressionSolutionVariableImpactsCalculator.GetPartitionRows(dataPartition, problemData);
     167
     168      //Calculate original quality-values (via calculator, default is R²)
     169      IEnumerable<double> targetValuesPartition = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows);
     170      IEnumerable<double> estimatedValuesPartition = Content.GetEstimatedValues(rows);
     171
     172      var originalCalculatorValue = RegressionSolutionVariableImpactsCalculator.CalculateQuality(targetValuesPartition, estimatedValuesPartition);
     173
     174      foreach (var variableName in originalVariableOrdering) {
     175        if (cancellationToken.Token.IsCancellationRequested) { return null; }
     176        progress.ProgressValue = (double)++i / count;
     177        progress.Status = string.Format("Calculating impact for variable {0} ({1} of {2})", variableName, i, count);
     178
     179        double impact = 0;
     180        //If the variable isn't used for prediction, it has zero impact.
     181        if (model.VariablesUsedForPrediction.Contains(variableName)) {
     182          impact = RegressionSolutionVariableImpactsCalculator.CalculateImpact(variableName, model, problemData, modifiableDataset, rows, replMethod, factorReplMethod, targetValuesPartition, originalCalculatorValue);
     183        }
     184        impacts.Add(new Tuple<string, double>(variableName, impact));
     185      }
     186
     187      return impacts;
     188    }
    161189
    162190    /// <summary>
    163     /// Updates the <see cref="variableImactsArrayView"/> according to the selected ordering <see cref="ascendingCheckBox"/> of the selected Column <see cref="sortByComboBox"/>
     191    /// Updates the <see cref="variableImpactsArrayView"/> according to the selected ordering <see cref="ascendingCheckBox"/> of the selected Column <see cref="sortByComboBox"/>
    164192    /// The default is "Descending" by "VariableImpact" (as in previous versions)
    165193    /// </summary>
     
    198226
    199227      //Could be, if the View was closed
    200       if (!variableImactsArrayView.IsDisposed) {
    201         variableImactsArrayView.Content = (DoubleArray)impactArray.AsReadOnly();
     228      if (!variableImpactsArrayView.IsDisposed) {
     229        variableImpactsArrayView.Content = (DoubleArray)impactArray.AsReadOnly();
    202230      }
    203231    }
  • stable/HeuristicLab.Problems.DataAnalysis/3.4

    • Property svn:mergeinfo set to (toggle deleted branches)
      /trunk/HeuristicLab.Problems.DataAnalysis/3.4mergedeligible
      /branches/2904_CalculateImpacts/3.415808-16421
      /branches/Async/HeuristicLab.Problems.DataAnalysis/3.413329-15286
      /branches/Classification-Extensions/HeuristicLab.Problems.DataAnalysis/3.411606-11761
      /branches/ClassificationModelComparison/HeuristicLab.Problems.DataAnalysis/3.49073-13099
      /branches/CloningRefactoring/HeuristicLab.Problems.DataAnalysis/3.44656-4721
      /branches/DataAnalysis Refactoring/HeuristicLab.Problems.DataAnalysis/3.45471-5808
      /branches/DataAnalysis SolutionEnsembles/HeuristicLab.Problems.DataAnalysis/3.45815-6180
      /branches/DataAnalysis/HeuristicLab.Problems.DataAnalysis/3.44220,​4226,​4236-4238,​4389,​4458-4459,​4462,​4464
      /branches/DataAnalysisCSVImport/HeuristicLab.Problems.DataAnalysis/3.48713-8875
      /branches/DataPreprocessing/HeuristicLab.Problems.DataAnalysis/3.410085-11101
      /branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis/3.48035-8538
      /branches/GP.Grammar.Editor/HeuristicLab.Problems.DataAnalysis/3.46284-6795
      /branches/GP.Symbols (TimeLag, Diff, Integral)/HeuristicLab.Problems.DataAnalysis/3.45060
      /branches/HeuristicLab.DatasetRefactor/sources/HeuristicLab.Problems.DataAnalysis/3.411570-12508
      /branches/HeuristicLab.Problems.Orienteering/HeuristicLab.Problems.DataAnalysis/3.411130-12721
      /branches/HeuristicLab.RegressionSolutionGradientView/HeuristicLab.Problems.DataAnalysis/3.413819-14091
      /branches/HeuristicLab.TimeSeries/HeuristicLab.Problems.DataAnalysis/3.47098-8789
      /branches/LogResidualEvaluator/HeuristicLab.Problems.DataAnalysis/3.410202-10483
      /branches/NET40/sources/HeuristicLab.Problems.DataAnalysis/3.45138-5162
      /branches/ParallelEngine/HeuristicLab.Problems.DataAnalysis/3.45175-5192
      /branches/ProblemInstancesRegressionAndClassification/HeuristicLab.Problems.DataAnalysis/3.47570-7810
      /branches/QAPAlgorithms/HeuristicLab.Problems.DataAnalysis/3.46350-6627
      /branches/Restructure trunk solution/HeuristicLab.Problems.DataAnalysis/3.46828
      /branches/SimplifierViewsProgress/HeuristicLab.Problems.DataAnalysis/3.415318-15370
      /branches/SpectralKernelForGaussianProcesses/HeuristicLab.Problems.DataAnalysis/3.410204-10479
      /branches/Trunk/HeuristicLab.Problems.DataAnalysis/3.46829-6865
      /branches/histogram/HeuristicLab.Problems.DataAnalysis/3.45959-6341
      /branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis/3.414232-14825
      /trunk/sources/HeuristicLab.Problems.DataAnalysis/3.49498,​9552,​9762,​9973-9975,​9994,​10406,​10480,​10484,​10486,​10540-10541,​10543,​10545,​11031,​11114,​11116,​11156,​11330,​11332,​11417,​11422,​11623,​11631,​11634,​11762-11764,​11766,​12067,​12485,​12492,​12504,​12506,​12509-12512,​12524,​12578,​12581,​12612,​12622,​12641,​12649,​12664,​12722,​12770,​12772,​12790-12792,​12796,​12798,​12801,​12811-12812,​12816-12817,​12836-12837,​12851,​12907,​12971,​13001,​13027,​13038,​13040,​13100-13104,​13154,​13268,​13395,​13406,​13419,​13427-13430,​13434,​13440-13442,​13445-13447,​13450,​13474,​13501,​13503,​13511,​13513,​13525-13526,​13529,​13534-13535,​13539-13540,​13550,​13552,​13584-13585,​13593,​13645,​13648,​13650-13652,​13654,​13657-13659,​13661-13662,​13666,​13669,​13682-13684,​13690-13693,​13697-13698,​13700-13702,​13704-13705,​13708-13709,​13711,​13715,​13724,​13746,​13760-13761,​13766,​13785-13786,​13801,​13826,​13901,​13921-13922,​13925,​13938,​13941-13942,​13985-13987,​13992-13993,​14000-14001,​14015-14016,​14095-14096,​14098-14099,​14107,​14118-14119,​14131,​14135,​14140,​14142,​14157-14158,​14160,​14226,​14228-14230,​14234-14236,​14244-14247,​14250,​14255-14258,​14260,​14267,​14271-14272,​14282,​14284-14298,​14300,​14307,​14314-14316,​14319,​14322,​14332,​14343-14350,​14358,​14367-14368,​14372,​14376,​14378,​14381-14382,​14384,​14388,​14390-14391,​14393-14394,​14396,​14400,​14405,​14407-14408,​14418,​14422-14423,​14425,​14434,​14463-14465,​14468-14469,​14479,​14483,​14486,​14507,​14517,​14523,​14527,​14529,​14531-14533,​14553,​14623,​14630,​14781,​14789-14791,​14805,​14826-14827,​14829-14832,​14839-14840,​14843,​14845-14847,​14851-14854,​14857,​14864-14865,​14871,​14889-14890,​14899,​14904,​14918,​14938,​14940,​14943-14946,​14948-14951,​15002,​15013,​15015-15016,​15023-15024,​15026,​15046,​15052-15054,​15058,​15077,​15085,​15088,​15094,​15103-15106,​15111-15113,​15122-15124,​15129,​15139,​15160,​15163,​15165,​15184-15185,​15187,​15194,​15287,​15371-15372,​15390,​15396,​15400,​15402,​15427,​15498,​15517
  • stable/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/ClassificationSolutionVariableImpactsCalculator.cs

    r16434 r16438  
    2323
    2424using System;
     25using System.Collections;
    2526using System.Collections.Generic;
    2627using System.Linq;
     
    3637  [Item("ClassificationSolution Impacts Calculator", "Calculation of the impacts of input variables for any classification solution")]
    3738  public sealed class ClassificationSolutionVariableImpactsCalculator : ParameterizedNamedItem {
     39    #region Parameters/Properties
    3840    public enum ReplacementMethodEnum {
    3941      Median,
     
    5456
    5557    private const string ReplacementParameterName = "Replacement Method";
     58    private const string FactorReplacementParameterName = "Factor Replacement Method";
    5659    private const string DataPartitionParameterName = "DataPartition";
    5760
    5861    public IFixedValueParameter<EnumValue<ReplacementMethodEnum>> ReplacementParameter {
    5962      get { return (IFixedValueParameter<EnumValue<ReplacementMethodEnum>>)Parameters[ReplacementParameterName]; }
     63    }
     64    public IFixedValueParameter<EnumValue<FactorReplacementMethodEnum>> FactorReplacementParameter {
     65      get { return (IFixedValueParameter<EnumValue<FactorReplacementMethodEnum>>)Parameters[FactorReplacementParameterName]; }
    6066    }
    6167    public IFixedValueParameter<EnumValue<DataPartitionEnum>> DataPartitionParameter {
     
    6773      set { ReplacementParameter.Value.Value = value; }
    6874    }
     75    public FactorReplacementMethodEnum FactorReplacementMethod {
     76      get { return FactorReplacementParameter.Value.Value; }
     77      set { FactorReplacementParameter.Value.Value = value; }
     78    }
    6979    public DataPartitionEnum DataPartition {
    7080      get { return DataPartitionParameter.Value.Value; }
    7181      set { DataPartitionParameter.Value.Value = value; }
    7282    }
    73 
    74 
     83    #endregion
     84
     85    #region Ctor/Cloner
    7586    [StorableConstructor]
    7687    private ClassificationSolutionVariableImpactsCalculator(bool deserializing) : base(deserializing) { }
    7788    private ClassificationSolutionVariableImpactsCalculator(ClassificationSolutionVariableImpactsCalculator original, Cloner cloner)
    7889      : base(original, cloner) { }
     90    public ClassificationSolutionVariableImpactsCalculator()
     91      : base() {
     92      Parameters.Add(new FixedValueParameter<EnumValue<ReplacementMethodEnum>>(ReplacementParameterName, "The replacement method for variables during impact calculation.", new EnumValue<ReplacementMethodEnum>(ReplacementMethodEnum.Shuffle)));
     93      Parameters.Add(new FixedValueParameter<EnumValue<FactorReplacementMethodEnum>>(FactorReplacementParameterName, "The replacement method for factor variables during impact calculation.", new EnumValue<FactorReplacementMethodEnum>(FactorReplacementMethodEnum.Best)));
     94      Parameters.Add(new FixedValueParameter<EnumValue<DataPartitionEnum>>(DataPartitionParameterName, "The data partition on which the impacts are calculated.", new EnumValue<DataPartitionEnum>(DataPartitionEnum.Training)));
     95    }
     96
    7997    public override IDeepCloneable Clone(Cloner cloner) {
    8098      return new ClassificationSolutionVariableImpactsCalculator(this, cloner);
    8199    }
    82 
    83     public ClassificationSolutionVariableImpactsCalculator()
    84       : base() {
    85       Parameters.Add(new FixedValueParameter<EnumValue<ReplacementMethodEnum>>(ReplacementParameterName, "The replacement method for variables during impact calculation.", new EnumValue<ReplacementMethodEnum>(ReplacementMethodEnum.Median)));
    86       Parameters.Add(new FixedValueParameter<EnumValue<DataPartitionEnum>>(DataPartitionParameterName, "The data partition on which the impacts are calculated.", new EnumValue<DataPartitionEnum>(DataPartitionEnum.Training)));
    87     }
     100    #endregion
    88101
    89102    //mkommend: annoying name clash with static method, open to better naming suggestions
    90103    public IEnumerable<Tuple<string, double>> Calculate(IClassificationSolution solution) {
    91       return CalculateImpacts(solution, DataPartition, ReplacementMethod);
     104      return CalculateImpacts(solution, ReplacementMethod, FactorReplacementMethod, DataPartition);
    92105    }
    93106
    94107    public static IEnumerable<Tuple<string, double>> CalculateImpacts(
    95108      IClassificationSolution solution,
    96       DataPartitionEnum data = DataPartitionEnum.Training,
    97       ReplacementMethodEnum replacementMethod = ReplacementMethodEnum.Median,
     109      ReplacementMethodEnum replacementMethod = ReplacementMethodEnum.Shuffle,
     110      FactorReplacementMethodEnum factorReplacementMethod = FactorReplacementMethodEnum.Best,
     111      DataPartitionEnum dataPartition = DataPartitionEnum.Training) {
     112
     113      IEnumerable<int> rows = GetPartitionRows(dataPartition, solution.ProblemData);
     114      IEnumerable<double> estimatedClassValues = solution.GetEstimatedClassValues(rows);
     115      var model = (IClassificationModel)solution.Model.Clone(); //mkommend: clone of model is necessary, because the thresholds for IDiscriminantClassificationModels are updated
     116
     117      return CalculateImpacts(model, solution.ProblemData, estimatedClassValues, rows, replacementMethod, factorReplacementMethod);
     118    }
     119
     120    public static IEnumerable<Tuple<string, double>> CalculateImpacts(
     121     IClassificationModel model,
     122     IClassificationProblemData problemData,
     123     IEnumerable<double> estimatedClassValues,
     124     IEnumerable<int> rows,
     125     ReplacementMethodEnum replacementMethod = ReplacementMethodEnum.Shuffle,
     126     FactorReplacementMethodEnum factorReplacementMethod = FactorReplacementMethodEnum.Best) {
     127
     128      //fholzing: try and catch in case a different dataset is loaded, otherwise statement is neglectable
     129      var missingVariables = model.VariablesUsedForPrediction.Except(problemData.Dataset.VariableNames);
     130      if (missingVariables.Any()) {
     131        throw new InvalidOperationException(string.Format("Can not calculate variable impacts, because the model uses inputs missing in the dataset ({0})", string.Join(", ", missingVariables)));
     132      }
     133      IEnumerable<double> targetValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows);
     134      var originalQuality = CalculateQuality(targetValues, estimatedClassValues);
     135
     136      var impacts = new Dictionary<string, double>();
     137      var inputvariables = new HashSet<string>(problemData.AllowedInputVariables.Union(model.VariablesUsedForPrediction));
     138      var modifiableDataset = ((Dataset)(problemData.Dataset).Clone()).ToModifiable();
     139
     140      foreach (var inputVariable in inputvariables) {
     141        impacts[inputVariable] = CalculateImpact(inputVariable, model, problemData, modifiableDataset, rows, replacementMethod, factorReplacementMethod, targetValues, originalQuality);
     142      }
     143
     144      return impacts.Select(i => Tuple.Create(i.Key, i.Value));
     145    }
     146
     147    public static double CalculateImpact(string variableName,
     148      IClassificationModel model,
     149      IClassificationProblemData problemData,
     150      ModifiableDataset modifiableDataset,
     151      IEnumerable<int> rows,
     152      ReplacementMethodEnum replacementMethod = ReplacementMethodEnum.Shuffle,
     153      FactorReplacementMethodEnum factorReplacementMethod = FactorReplacementMethodEnum.Best,
     154      IEnumerable<double> targetValues = null,
     155      double quality = double.NaN) {
     156
     157      if (!model.VariablesUsedForPrediction.Contains(variableName)) { return 0.0; }
     158      if (!problemData.Dataset.VariableNames.Contains(variableName)) {
     159        throw new InvalidOperationException(string.Format("Can not calculate variable impact, because the model uses inputs missing in the dataset ({0})", variableName));
     160      }
     161
     162      if (targetValues == null) {
     163        targetValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows);
     164      }
     165      if (quality == double.NaN) {
     166        quality = CalculateQuality(model.GetEstimatedClassValues(modifiableDataset, rows), targetValues);
     167      }
     168
     169      IList originalValues = null;
     170      IList replacementValues = GetReplacementValues(modifiableDataset, variableName, model, rows, targetValues, out originalValues, replacementMethod, factorReplacementMethod);
     171
     172      double newValue = CalculateQualityForReplacement(model, modifiableDataset, variableName, originalValues, rows, replacementValues, targetValues);
     173      double impact = quality - newValue;
     174
     175      return impact;
     176    }
     177
     178    private static IList GetReplacementValues(ModifiableDataset modifiableDataset,
     179      string variableName,
     180      IClassificationModel model,
     181      IEnumerable<int> rows,
     182      IEnumerable<double> targetValues,
     183      out IList originalValues,
     184      ReplacementMethodEnum replacementMethod = ReplacementMethodEnum.Shuffle,
    98185      FactorReplacementMethodEnum factorReplacementMethod = FactorReplacementMethodEnum.Best) {
    99186
    100       var problemData = solution.ProblemData;
    101       var dataset = problemData.Dataset;
    102       var model = (IClassificationModel)solution.Model.Clone(); //mkommend: clone of model is necessary, because the thresholds for IDiscriminantClassificationModels are updated
    103 
    104       IEnumerable<int> rows;
    105       IEnumerable<double> targetValues;
    106       double originalAccuracy;
    107 
    108       OnlineCalculatorError error;
    109 
    110       switch (data) {
    111         case DataPartitionEnum.All:
    112           rows = problemData.AllIndices;
    113           targetValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.AllIndices).ToList();
    114           originalAccuracy = OnlineAccuracyCalculator.Calculate(targetValues, solution.EstimatedClassValues, out error);
    115           if (error != OnlineCalculatorError.None) throw new InvalidOperationException("Error during accuracy calculation.");
    116           break;
    117         case DataPartitionEnum.Training:
    118           rows = problemData.TrainingIndices;
    119           targetValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TrainingIndices).ToList();
    120           originalAccuracy = OnlineAccuracyCalculator.Calculate(targetValues, solution.EstimatedTrainingClassValues, out error);
    121           if (error != OnlineCalculatorError.None) throw new InvalidOperationException("Error during accuracy calculation.");
    122           break;
    123         case DataPartitionEnum.Test:
    124           rows = problemData.TestIndices;
    125           targetValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TestIndices).ToList();
    126           originalAccuracy = OnlineAccuracyCalculator.Calculate(targetValues, solution.EstimatedTestClassValues, out error);
    127           if (error != OnlineCalculatorError.None) throw new InvalidOperationException("Error during accuracy calculation.");
    128           break;
    129         default: throw new ArgumentException(string.Format("DataPartition {0} cannot be handled.", data));
    130       }
    131 
    132       var impacts = new Dictionary<string, double>();
    133       var modifiableDataset = ((Dataset)dataset).ToModifiable();
    134 
    135       var inputvariables = new HashSet<string>(problemData.AllowedInputVariables.Union(solution.Model.VariablesUsedForPrediction));
    136       var allowedInputVariables = dataset.VariableNames.Where(v => inputvariables.Contains(v)).ToList();
    137 
    138       // calculate impacts for double variables
    139       foreach (var inputVariable in allowedInputVariables.Where(problemData.Dataset.VariableHasType<double>)) {
    140         var newEstimates = EvaluateModelWithReplacedVariable(model, inputVariable, modifiableDataset, rows, replacementMethod);
    141         var newAccuracy = OnlineAccuracyCalculator.Calculate(targetValues, newEstimates, out error);
    142         if (error != OnlineCalculatorError.None) throw new InvalidOperationException("Error during R² calculation with replaced inputs.");
    143 
    144         impacts[inputVariable] = originalAccuracy - newAccuracy;
    145       }
    146 
    147       // calculate impacts for string variables
    148       foreach (var inputVariable in allowedInputVariables.Where(problemData.Dataset.VariableHasType<string>)) {
    149         if (factorReplacementMethod == FactorReplacementMethodEnum.Best) {
    150           // try replacing with all possible values and find the best replacement value
    151           var smallestImpact = double.PositiveInfinity;
    152           foreach (var repl in problemData.Dataset.GetStringValues(inputVariable, rows).Distinct()) {
    153             var newEstimates = EvaluateModelWithReplacedVariable(model, inputVariable, modifiableDataset, rows,
    154               Enumerable.Repeat(repl, dataset.Rows));
    155             var newAccuracy = OnlineAccuracyCalculator.Calculate(targetValues, newEstimates, out error);
    156             if (error != OnlineCalculatorError.None)
    157               throw new InvalidOperationException("Error during accuracy calculation with replaced inputs.");
    158 
    159             var impact = originalAccuracy - newAccuracy;
    160             if (impact < smallestImpact) smallestImpact = impact;
    161           }
    162           impacts[inputVariable] = smallestImpact;
    163         } else {
    164           // for replacement methods shuffle and mode
    165           // calculate impacts for factor variables
    166 
    167           var newEstimates = EvaluateModelWithReplacedVariable(model, inputVariable, modifiableDataset, rows,
    168             factorReplacementMethod);
    169           var newAccuracy = OnlineAccuracyCalculator.Calculate(targetValues, newEstimates, out error);
    170           if (error != OnlineCalculatorError.None)
    171             throw new InvalidOperationException("Error during accuracy calculation with replaced inputs.");
    172 
    173           impacts[inputVariable] = originalAccuracy - newAccuracy;
    174         }
    175       } // foreach
    176       return impacts.OrderByDescending(i => i.Value).Select(i => Tuple.Create(i.Key, i.Value));
    177     }
    178 
    179     private static IEnumerable<double> EvaluateModelWithReplacedVariable(IClassificationModel model, string variable, ModifiableDataset dataset, IEnumerable<int> rows, ReplacementMethodEnum replacement = ReplacementMethodEnum.Median) {
    180       var originalValues = dataset.GetReadOnlyDoubleValues(variable).ToList();
     187      IList replacementValues = null;
     188      if (modifiableDataset.VariableHasType<double>(variableName)) {
     189        originalValues = modifiableDataset.GetReadOnlyDoubleValues(variableName).ToList();
     190        replacementValues = GetReplacementValuesForDouble(modifiableDataset, rows, (List<double>)originalValues, replacementMethod);
     191      } else if (modifiableDataset.VariableHasType<string>(variableName)) {
     192        originalValues = modifiableDataset.GetReadOnlyStringValues(variableName).ToList();
     193        replacementValues = GetReplacementValuesForString(model, modifiableDataset, variableName, rows, (List<string>)originalValues, targetValues, factorReplacementMethod);
     194      } else {
     195        throw new NotSupportedException("Variable not supported");
     196      }
     197
     198      return replacementValues;
     199    }
     200
     201    private static IList GetReplacementValuesForDouble(ModifiableDataset modifiableDataset,
     202      IEnumerable<int> rows,
     203      List<double> originalValues,
     204      ReplacementMethodEnum replacementMethod = ReplacementMethodEnum.Shuffle) {
     205
     206      IRandom random = new FastRandom(31415);
     207      List<double> replacementValues;
    181208      double replacementValue;
    182       List<double> replacementValues;
    183       IRandom rand;
    184 
    185       switch (replacement) {
     209
     210      switch (replacementMethod) {
    186211        case ReplacementMethodEnum.Median:
    187212          replacementValue = rows.Select(r => originalValues[r]).Median();
    188           replacementValues = Enumerable.Repeat(replacementValue, dataset.Rows).ToList();
     213          replacementValues = Enumerable.Repeat(replacementValue, modifiableDataset.Rows).ToList();
    189214          break;
    190215        case ReplacementMethodEnum.Average:
    191216          replacementValue = rows.Select(r => originalValues[r]).Average();
    192           replacementValues = Enumerable.Repeat(replacementValue, dataset.Rows).ToList();
     217          replacementValues = Enumerable.Repeat(replacementValue, modifiableDataset.Rows).ToList();
    193218          break;
    194219        case ReplacementMethodEnum.Shuffle:
    195220          // new var has same empirical distribution but the relation to y is broken
    196           rand = new FastRandom(31415);
    197221          // prepare a complete column for the dataset
    198           replacementValues = Enumerable.Repeat(double.NaN, dataset.Rows).ToList();
     222          replacementValues = Enumerable.Repeat(double.NaN, modifiableDataset.Rows).ToList();
    199223          // shuffle only the selected rows
    200           var shuffledValues = rows.Select(r => originalValues[r]).Shuffle(rand).ToList();
     224          var shuffledValues = rows.Select(r => originalValues[r]).Shuffle(random).ToList();
    201225          int i = 0;
    202226          // update column values
     
    208232          var avg = rows.Select(r => originalValues[r]).Average();
    209233          var stdDev = rows.Select(r => originalValues[r]).StandardDeviation();
    210           rand = new FastRandom(31415);
    211234          // prepare a complete column for the dataset
    212           replacementValues = Enumerable.Repeat(double.NaN, dataset.Rows).ToList();
     235          replacementValues = Enumerable.Repeat(double.NaN, modifiableDataset.Rows).ToList();
    213236          // update column values
    214237          foreach (var r in rows) {
    215             replacementValues[r] = NormalDistributedRandom.NextDouble(rand, avg, stdDev);
     238            replacementValues[r] = NormalDistributedRandom.NextDouble(random, avg, stdDev);
    216239          }
    217240          break;
    218241
    219242        default:
    220           throw new ArgumentException(string.Format("ReplacementMethod {0} cannot be handled.", replacement));
    221       }
    222 
    223       return EvaluateModelWithReplacedVariable(model, variable, dataset, rows, replacementValues);
    224     }
    225 
    226     private static IEnumerable<double> EvaluateModelWithReplacedVariable(
    227       IClassificationModel model, string variable, ModifiableDataset dataset,
    228       IEnumerable<int> rows,
    229       FactorReplacementMethodEnum replacement = FactorReplacementMethodEnum.Shuffle) {
    230       var originalValues = dataset.GetReadOnlyStringValues(variable).ToList();
    231       List<string> replacementValues;
    232       IRandom rand;
    233 
    234       switch (replacement) {
     243          throw new ArgumentException(string.Format("ReplacementMethod {0} cannot be handled.", replacementMethod));
     244      }
     245
     246      return replacementValues;
     247    }
     248
     249    private static IList GetReplacementValuesForString(IClassificationModel model,
     250      ModifiableDataset modifiableDataset,
     251      string variableName,
     252      IEnumerable<int> rows,
     253      List<string> originalValues,
     254      IEnumerable<double> targetValues,
     255      FactorReplacementMethodEnum factorReplacementMethod = FactorReplacementMethodEnum.Shuffle) {
     256
     257      List<string> replacementValues = null;
     258      IRandom random = new FastRandom(31415);
     259
     260      switch (factorReplacementMethod) {
     261        case FactorReplacementMethodEnum.Best:
     262          // try replacing with all possible values and find the best replacement value
     263          var bestQuality = double.NegativeInfinity;
     264          foreach (var repl in modifiableDataset.GetStringValues(variableName, rows).Distinct()) {
     265            List<string> curReplacementValues = Enumerable.Repeat(repl, modifiableDataset.Rows).ToList();
     266            //fholzing: this result could be used later on (theoretically), but is neglected for better readability/method consistency
     267            var newValue = CalculateQualityForReplacement(model, modifiableDataset, variableName, originalValues, rows, curReplacementValues, targetValues);
     268            var curQuality = newValue;
     269
     270            if (curQuality > bestQuality) {
     271              bestQuality = curQuality;
     272              replacementValues = curReplacementValues;
     273            }
     274          }
     275          break;
    235276        case FactorReplacementMethodEnum.Mode:
    236277          var mostCommonValue = rows.Select(r => originalValues[r])
     
    238279            .OrderByDescending(g => g.Count())
    239280            .First().Key;
    240           replacementValues = Enumerable.Repeat(mostCommonValue, dataset.Rows).ToList();
     281          replacementValues = Enumerable.Repeat(mostCommonValue, modifiableDataset.Rows).ToList();
    241282          break;
    242283        case FactorReplacementMethodEnum.Shuffle:
    243284          // new var has same empirical distribution but the relation to y is broken
    244           rand = new FastRandom(31415);
    245285          // prepare a complete column for the dataset
    246           replacementValues = Enumerable.Repeat(string.Empty, dataset.Rows).ToList();
     286          replacementValues = Enumerable.Repeat(string.Empty, modifiableDataset.Rows).ToList();
    247287          // shuffle only the selected rows
    248           var shuffledValues = rows.Select(r => originalValues[r]).Shuffle(rand).ToList();
     288          var shuffledValues = rows.Select(r => originalValues[r]).Shuffle(random).ToList();
    249289          int i = 0;
    250290          // update column values
     
    254294          break;
    255295        default:
    256           throw new ArgumentException(string.Format("FactorReplacementMethod {0} cannot be handled.", replacement));
    257       }
    258 
    259       return EvaluateModelWithReplacedVariable(model, variable, dataset, rows, replacementValues);
    260     }
    261 
    262     private static IEnumerable<double> EvaluateModelWithReplacedVariable(IClassificationModel model, string variable,
    263       ModifiableDataset dataset, IEnumerable<int> rows, IEnumerable<double> replacementValues) {
    264       var originalValues = dataset.GetReadOnlyDoubleValues(variable).ToList();
    265       dataset.ReplaceVariable(variable, replacementValues.ToList());
    266 
     296          throw new ArgumentException(string.Format("FactorReplacementMethod {0} cannot be handled.", factorReplacementMethod));
     297      }
     298
     299      return replacementValues;
     300    }
     301
     302    private static double CalculateQualityForReplacement(
     303      IClassificationModel model,
     304      ModifiableDataset modifiableDataset,
     305      string variableName,
     306      IList originalValues,
     307      IEnumerable<int> rows,
     308      IList replacementValues,
     309      IEnumerable<double> targetValues) {
     310
     311      modifiableDataset.ReplaceVariable(variableName, replacementValues);
    267312      var discModel = model as IDiscriminantFunctionClassificationModel;
    268313      if (discModel != null) {
    269         var problemData = new ClassificationProblemData(dataset, dataset.VariableNames, model.TargetVariable);
     314        var problemData = new ClassificationProblemData(modifiableDataset, modifiableDataset.VariableNames, model.TargetVariable);
    270315        discModel.RecalculateModelParameters(problemData, rows);
    271316      }
    272317
    273318      //mkommend: ToList is used on purpose to avoid lazy evaluation that could result in wrong estimates due to variable replacements
    274       var estimates = model.GetEstimatedClassValues(dataset, rows).ToList();
    275       dataset.ReplaceVariable(variable, originalValues);
    276 
    277       return estimates;
    278     }
    279     private static IEnumerable<double> EvaluateModelWithReplacedVariable(IClassificationModel model, string variable,
    280       ModifiableDataset dataset, IEnumerable<int> rows, IEnumerable<string> replacementValues) {
    281       var originalValues = dataset.GetReadOnlyStringValues(variable).ToList();
    282       dataset.ReplaceVariable(variable, replacementValues.ToList());
    283 
    284 
    285       var discModel = model as IDiscriminantFunctionClassificationModel;
    286       if (discModel != null) {
    287         var problemData = new ClassificationProblemData(dataset, dataset.VariableNames, model.TargetVariable);
    288         discModel.RecalculateModelParameters(problemData, rows);
    289       }
    290 
    291       //mkommend: ToList is used on purpose to avoid lazy evaluation that could result in wrong estimates due to variable replacements
    292       var estimates = model.GetEstimatedClassValues(dataset, rows).ToList();
    293       dataset.ReplaceVariable(variable, originalValues);
    294 
    295       return estimates;
     319      var estimates = model.GetEstimatedClassValues(modifiableDataset, rows).ToList();
     320      var ret = CalculateQuality(targetValues, estimates);
     321      modifiableDataset.ReplaceVariable(variableName, originalValues);
     322
     323      return ret;
     324    }
     325
     326    public static double CalculateQuality(IEnumerable<double> targetValues, IEnumerable<double> estimatedClassValues) {
     327      OnlineCalculatorError errorState;
     328      var ret = OnlineAccuracyCalculator.Calculate(targetValues, estimatedClassValues, out errorState);
     329      if (errorState != OnlineCalculatorError.None) { throw new InvalidOperationException("Error during calculation with replaced inputs."); }
     330      return ret;
     331    }
     332
     333    public static IEnumerable<int> GetPartitionRows(DataPartitionEnum dataPartition, IClassificationProblemData problemData) {
     334      IEnumerable<int> rows;
     335
     336      switch (dataPartition) {
     337        case DataPartitionEnum.All:
     338          rows = problemData.AllIndices;
     339          break;
     340        case DataPartitionEnum.Test:
     341          rows = problemData.TestIndices;
     342          break;
     343        case DataPartitionEnum.Training:
     344          rows = problemData.TrainingIndices;
     345          break;
     346        default:
     347          throw new NotSupportedException("DataPartition not supported");
     348      }
     349
     350      return rows;
    296351    }
    297352  }
  • stable/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Regression/RegressionSolutionVariableImpactsCalculator.cs

    r16435 r16438  
    2323
    2424using System;
     25using System.Collections;
    2526using System.Collections.Generic;
    2627using System.Linq;
     
    3637  [Item("RegressionSolution Impacts Calculator", "Calculation of the impacts of input variables for any regression solution")]
    3738  public sealed class RegressionSolutionVariableImpactsCalculator : ParameterizedNamedItem {
     39    #region Parameters/Properties
    3840    public enum ReplacementMethodEnum {
    3941      Median,
     
    5456
    5557    private const string ReplacementParameterName = "Replacement Method";
     58    private const string FactorReplacementParameterName = "Factor Replacement Method";
    5659    private const string DataPartitionParameterName = "DataPartition";
    5760
    5861    public IFixedValueParameter<EnumValue<ReplacementMethodEnum>> ReplacementParameter {
    5962      get { return (IFixedValueParameter<EnumValue<ReplacementMethodEnum>>)Parameters[ReplacementParameterName]; }
     63    }
     64    public IFixedValueParameter<EnumValue<FactorReplacementMethodEnum>> FactorReplacementParameter {
     65      get { return (IFixedValueParameter<EnumValue<FactorReplacementMethodEnum>>)Parameters[FactorReplacementParameterName]; }
    6066    }
    6167    public IFixedValueParameter<EnumValue<DataPartitionEnum>> DataPartitionParameter {
     
    6773      set { ReplacementParameter.Value.Value = value; }
    6874    }
     75    public FactorReplacementMethodEnum FactorReplacementMethod {
     76      get { return FactorReplacementParameter.Value.Value; }
     77      set { FactorReplacementParameter.Value.Value = value; }
     78    }
    6979    public DataPartitionEnum DataPartition {
    7080      get { return DataPartitionParameter.Value.Value; }
    7181      set { DataPartitionParameter.Value.Value = value; }
    7282    }
    73 
    74 
     83    #endregion
     84
     85    #region Ctor/Cloner
    7586    [StorableConstructor]
    7687    private RegressionSolutionVariableImpactsCalculator(bool deserializing) : base(deserializing) { }
    7788    private RegressionSolutionVariableImpactsCalculator(RegressionSolutionVariableImpactsCalculator original, Cloner cloner)
    7889      : base(original, cloner) { }
     90    public RegressionSolutionVariableImpactsCalculator()
     91      : base() {
     92      Parameters.Add(new FixedValueParameter<EnumValue<ReplacementMethodEnum>>(ReplacementParameterName, "The replacement method for variables during impact calculation.", new EnumValue<ReplacementMethodEnum>(ReplacementMethodEnum.Shuffle)));
     93      Parameters.Add(new FixedValueParameter<EnumValue<FactorReplacementMethodEnum>>(FactorReplacementParameterName, "The replacement method for factor variables during impact calculation.", new EnumValue<FactorReplacementMethodEnum>(FactorReplacementMethodEnum.Best)));
     94      Parameters.Add(new FixedValueParameter<EnumValue<DataPartitionEnum>>(DataPartitionParameterName, "The data partition on which the impacts are calculated.", new EnumValue<DataPartitionEnum>(DataPartitionEnum.Training)));
     95    }
     96
    7997    public override IDeepCloneable Clone(Cloner cloner) {
    8098      return new RegressionSolutionVariableImpactsCalculator(this, cloner);
    8199    }
    82 
    83     public RegressionSolutionVariableImpactsCalculator()
    84       : base() {
    85       Parameters.Add(new FixedValueParameter<EnumValue<ReplacementMethodEnum>>(ReplacementParameterName, "The replacement method for variables during impact calculation.", new EnumValue<ReplacementMethodEnum>(ReplacementMethodEnum.Median)));
    86       Parameters.Add(new FixedValueParameter<EnumValue<DataPartitionEnum>>(DataPartitionParameterName, "The data partition on which the impacts are calculated.", new EnumValue<DataPartitionEnum>(DataPartitionEnum.Training)));
    87     }
     100    #endregion
    88101
    89102    //mkommend: annoying name clash with static method, open to better naming suggestions
    90103    public IEnumerable<Tuple<string, double>> Calculate(IRegressionSolution solution) {
    91       return CalculateImpacts(solution, DataPartition, ReplacementMethod);
     104      return CalculateImpacts(solution, ReplacementMethod, FactorReplacementMethod, DataPartition);
    92105    }
    93106
    94107    public static IEnumerable<Tuple<string, double>> CalculateImpacts(
    95108      IRegressionSolution solution,
    96       DataPartitionEnum data = DataPartitionEnum.Training,
    97       ReplacementMethodEnum replacementMethod = ReplacementMethodEnum.Median,
     109      ReplacementMethodEnum replacementMethod = ReplacementMethodEnum.Shuffle,
    98110      FactorReplacementMethodEnum factorReplacementMethod = FactorReplacementMethodEnum.Best,
    99       Func<double, string, bool> progressCallback = null) {
    100 
    101       var problemData = solution.ProblemData;
    102       var dataset = problemData.Dataset;
    103 
    104       IEnumerable<int> rows;
    105       IEnumerable<double> targetValues;
    106       double originalR2 = -1;
    107 
    108       OnlineCalculatorError error;
    109 
    110       switch (data) {
    111         case DataPartitionEnum.All:
    112           rows = solution.ProblemData.AllIndices;
    113           targetValues = problemData.TargetVariableValues.ToList();
    114           originalR2 = OnlinePearsonsRCalculator.Calculate(problemData.TargetVariableValues, solution.EstimatedValues, out error);
    115           if (error != OnlineCalculatorError.None) throw new InvalidOperationException("Error during R² calculation.");
    116           originalR2 = originalR2 * originalR2;
    117           break;
    118         case DataPartitionEnum.Training:
    119           rows = problemData.TrainingIndices;
    120           targetValues = problemData.TargetVariableTrainingValues.ToList();
    121           originalR2 = solution.TrainingRSquared;
    122           break;
    123         case DataPartitionEnum.Test:
    124           rows = problemData.TestIndices;
    125           targetValues = problemData.TargetVariableTestValues.ToList();
    126           originalR2 = solution.TestRSquared;
    127           break;
    128         default: throw new ArgumentException(string.Format("DataPartition {0} cannot be handled.", data));
    129       }
     111      DataPartitionEnum dataPartition = DataPartitionEnum.Training) {
     112
     113      IEnumerable<int> rows = GetPartitionRows(dataPartition, solution.ProblemData);
     114      IEnumerable<double> estimatedValues = solution.GetEstimatedValues(rows);
     115      return CalculateImpacts(solution.Model, solution.ProblemData, estimatedValues, rows, replacementMethod, factorReplacementMethod);
     116    }
     117
     118    public static IEnumerable<Tuple<string, double>> CalculateImpacts(
     119     IRegressionModel model,
     120     IRegressionProblemData problemData,
     121     IEnumerable<double> estimatedValues,
     122     IEnumerable<int> rows,
     123     ReplacementMethodEnum replacementMethod = ReplacementMethodEnum.Shuffle,
     124     FactorReplacementMethodEnum factorReplacementMethod = FactorReplacementMethodEnum.Best) {
     125
     126      //fholzing: try and catch in case a different dataset is loaded, otherwise statement is neglectable
     127      var missingVariables = model.VariablesUsedForPrediction.Except(problemData.Dataset.VariableNames);
     128      if (missingVariables.Any()) {
     129        throw new InvalidOperationException(string.Format("Can not calculate variable impacts, because the model uses inputs missing in the dataset ({0})", string.Join(", ", missingVariables)));
     130      }
     131      IEnumerable<double> targetValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows);
     132      var originalQuality = CalculateQuality(targetValues, estimatedValues);
    130133
    131134      var impacts = new Dictionary<string, double>();
    132       var modifiableDataset = ((Dataset)dataset).ToModifiable();
    133 
    134       var inputvariables = new HashSet<string>(problemData.AllowedInputVariables.Union(solution.Model.VariablesUsedForPrediction));
    135       var allowedInputVariables = dataset.VariableNames.Where(v => inputvariables.Contains(v)).ToList();
    136 
    137       int curIdx = 0;
    138       int count = allowedInputVariables.Where(problemData.Dataset.VariableHasType<double>).Count();
    139       // calculate impacts for double variables
    140       foreach (var inputVariable in allowedInputVariables.Where(problemData.Dataset.VariableHasType<double>)) {
    141         //Report the current progress in percent. If the callback returns true, it means the execution shall be stopped
    142         if (progressCallback != null) {
    143           curIdx++;
    144           if (progressCallback((double)curIdx / count, string.Format("Calculating impact for variable {0} ({1} of {2})", inputVariable, curIdx, count))) { return null; }
    145         }
    146         var newEstimates = EvaluateModelWithReplacedVariable(solution.Model, inputVariable, modifiableDataset, rows, replacementMethod);
    147         var newR2 = OnlinePearsonsRCalculator.Calculate(targetValues, newEstimates, out error);
    148         if (error != OnlineCalculatorError.None) throw new InvalidOperationException("Error during R² calculation with replaced inputs.");
    149 
    150         newR2 = newR2 * newR2;
    151         var impact = originalR2 - newR2;
    152         impacts[inputVariable] = impact;
    153       }
    154 
    155       // calculate impacts for string variables
    156       foreach (var inputVariable in allowedInputVariables.Where(problemData.Dataset.VariableHasType<string>)) {
    157         if (factorReplacementMethod == FactorReplacementMethodEnum.Best) {
    158           // try replacing with all possible values and find the best replacement value
    159           var smallestImpact = double.PositiveInfinity;
    160           foreach (var repl in problemData.Dataset.GetStringValues(inputVariable, rows).Distinct()) {
    161             var newEstimates = EvaluateModelWithReplacedVariable(solution.Model, inputVariable, modifiableDataset, rows,
    162               Enumerable.Repeat(repl, dataset.Rows));
    163             var newR2 = OnlinePearsonsRCalculator.Calculate(targetValues, newEstimates, out error);
    164             if (error != OnlineCalculatorError.None)
    165               throw new InvalidOperationException("Error during R² calculation with replaced inputs.");
    166 
    167             newR2 = newR2 * newR2;
    168             var impact = originalR2 - newR2;
    169             if (impact < smallestImpact) smallestImpact = impact;
    170           }
    171           impacts[inputVariable] = smallestImpact;
    172         } else {
    173           // for replacement methods shuffle and mode
    174           // calculate impacts for factor variables
    175 
    176           var newEstimates = EvaluateModelWithReplacedVariable(solution.Model, inputVariable, modifiableDataset, rows,
    177             factorReplacementMethod);
    178           var newR2 = OnlinePearsonsRCalculator.Calculate(targetValues, newEstimates, out error);
    179           if (error != OnlineCalculatorError.None)
    180             throw new InvalidOperationException("Error during R² calculation with replaced inputs.");
    181 
    182           newR2 = newR2 * newR2;
    183           var impact = originalR2 - newR2;
    184           impacts[inputVariable] = impact;
    185         }
    186       } // foreach
    187       return impacts.OrderByDescending(i => i.Value).Select(i => Tuple.Create(i.Key, i.Value));
    188     }
    189 
    190 
    191     private static IEnumerable<double> EvaluateModelWithReplacedVariable(IRegressionModel model, string variable, ModifiableDataset dataset, IEnumerable<int> rows, ReplacementMethodEnum replacement = ReplacementMethodEnum.Median) {
    192       var originalValues = dataset.GetReadOnlyDoubleValues(variable).ToList();
     135      var inputvariables = new HashSet<string>(problemData.AllowedInputVariables.Union(model.VariablesUsedForPrediction));
     136      var modifiableDataset = ((Dataset)(problemData.Dataset).Clone()).ToModifiable();
     137
     138      foreach (var inputVariable in inputvariables) {
     139        impacts[inputVariable] = CalculateImpact(inputVariable, model, problemData, modifiableDataset, rows, replacementMethod, factorReplacementMethod, targetValues, originalQuality);
     140      }
     141
     142      return impacts.Select(i => Tuple.Create(i.Key, i.Value));
     143    }
     144
     145    public static double CalculateImpact(string variableName,
     146      IRegressionModel model,
     147      IRegressionProblemData problemData,
     148      ModifiableDataset modifiableDataset,
     149      IEnumerable<int> rows,
     150      ReplacementMethodEnum replacementMethod = ReplacementMethodEnum.Shuffle,
     151      FactorReplacementMethodEnum factorReplacementMethod = FactorReplacementMethodEnum.Best,
     152      IEnumerable<double> targetValues = null,
     153      double quality = double.NaN) {
     154
     155      if (!model.VariablesUsedForPrediction.Contains(variableName)) { return 0.0; }
     156      if (!problemData.Dataset.VariableNames.Contains(variableName)) {
     157        throw new InvalidOperationException(string.Format("Can not calculate variable impact, because the model uses inputs missing in the dataset ({0})", variableName));
     158      }
     159
     160      if (targetValues == null) {
     161        targetValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows);
     162      }
     163      if (quality == double.NaN) {
     164        quality = CalculateQuality(model.GetEstimatedValues(modifiableDataset, rows), targetValues);
     165      }
     166
     167      IList originalValues = null;
     168      IList replacementValues = GetReplacementValues(modifiableDataset, variableName, model, rows, targetValues, out originalValues, replacementMethod, factorReplacementMethod);
     169
     170      double newValue = CalculateQualityForReplacement(model, modifiableDataset, variableName, originalValues, rows, replacementValues, targetValues);
     171      double impact = quality - newValue;
     172
     173      return impact;
     174    }
     175
     176    private static IList GetReplacementValues(ModifiableDataset modifiableDataset,
     177      string variableName,
     178      IRegressionModel model,
     179      IEnumerable<int> rows,
     180      IEnumerable<double> targetValues,
     181      out IList originalValues,
     182      ReplacementMethodEnum replacementMethod = ReplacementMethodEnum.Shuffle,
     183      FactorReplacementMethodEnum factorReplacementMethod = FactorReplacementMethodEnum.Best) {
     184
     185      IList replacementValues = null;
     186      if (modifiableDataset.VariableHasType<double>(variableName)) {
     187        originalValues = modifiableDataset.GetReadOnlyDoubleValues(variableName).ToList();
     188        replacementValues = GetReplacementValuesForDouble(modifiableDataset, rows, (List<double>)originalValues, replacementMethod);
     189      } else if (modifiableDataset.VariableHasType<string>(variableName)) {
     190        originalValues = modifiableDataset.GetReadOnlyStringValues(variableName).ToList();
     191        replacementValues = GetReplacementValuesForString(model, modifiableDataset, variableName, rows, (List<string>)originalValues, targetValues, factorReplacementMethod);
     192      } else {
     193        throw new NotSupportedException("Variable not supported");
     194      }
     195
     196      return replacementValues;
     197    }
     198
     199    private static IList GetReplacementValuesForDouble(ModifiableDataset modifiableDataset,
     200      IEnumerable<int> rows,
     201      List<double> originalValues,
     202      ReplacementMethodEnum replacementMethod = ReplacementMethodEnum.Shuffle) {
     203
     204      IRandom random = new FastRandom(31415);
     205      List<double> replacementValues;
    193206      double replacementValue;
    194       List<double> replacementValues;
    195       IRandom rand;
    196 
    197       switch (replacement) {
     207
     208      switch (replacementMethod) {
    198209        case ReplacementMethodEnum.Median:
    199210          replacementValue = rows.Select(r => originalValues[r]).Median();
    200           replacementValues = Enumerable.Repeat(replacementValue, dataset.Rows).ToList();
     211          replacementValues = Enumerable.Repeat(replacementValue, modifiableDataset.Rows).ToList();
    201212          break;
    202213        case ReplacementMethodEnum.Average:
    203214          replacementValue = rows.Select(r => originalValues[r]).Average();
    204           replacementValues = Enumerable.Repeat(replacementValue, dataset.Rows).ToList();
     215          replacementValues = Enumerable.Repeat(replacementValue, modifiableDataset.Rows).ToList();
    205216          break;
    206217        case ReplacementMethodEnum.Shuffle:
    207218          // new var has same empirical distribution but the relation to y is broken
    208           rand = new FastRandom(31415);
    209219          // prepare a complete column for the dataset
    210           replacementValues = Enumerable.Repeat(double.NaN, dataset.Rows).ToList();
     220          replacementValues = Enumerable.Repeat(double.NaN, modifiableDataset.Rows).ToList();
    211221          // shuffle only the selected rows
    212           var shuffledValues = rows.Select(r => originalValues[r]).Shuffle(rand).ToList();
     222          var shuffledValues = rows.Select(r => originalValues[r]).Shuffle(random).ToList();
    213223          int i = 0;
    214224          // update column values
     
    220230          var avg = rows.Select(r => originalValues[r]).Average();
    221231          var stdDev = rows.Select(r => originalValues[r]).StandardDeviation();
    222           rand = new FastRandom(31415);
    223232          // prepare a complete column for the dataset
    224           replacementValues = Enumerable.Repeat(double.NaN, dataset.Rows).ToList();
     233          replacementValues = Enumerable.Repeat(double.NaN, modifiableDataset.Rows).ToList();
    225234          // update column values
    226235          foreach (var r in rows) {
    227             replacementValues[r] = NormalDistributedRandom.NextDouble(rand, avg, stdDev);
     236            replacementValues[r] = NormalDistributedRandom.NextDouble(random, avg, stdDev);
    228237          }
    229238          break;
    230239
    231240        default:
    232           throw new ArgumentException(string.Format("ReplacementMethod {0} cannot be handled.", replacement));
    233       }
    234 
    235       return EvaluateModelWithReplacedVariable(model, variable, dataset, rows, replacementValues);
    236     }
    237 
    238     private static IEnumerable<double> EvaluateModelWithReplacedVariable(
    239       IRegressionModel model, string variable, ModifiableDataset dataset,
    240       IEnumerable<int> rows,
    241       FactorReplacementMethodEnum replacement = FactorReplacementMethodEnum.Shuffle) {
    242       var originalValues = dataset.GetReadOnlyStringValues(variable).ToList();
    243       List<string> replacementValues;
    244       IRandom rand;
    245 
    246       switch (replacement) {
     241          throw new ArgumentException(string.Format("ReplacementMethod {0} cannot be handled.", replacementMethod));
     242      }
     243
     244      return replacementValues;
     245    }
     246
     247    private static IList GetReplacementValuesForString(IRegressionModel model,
     248      ModifiableDataset modifiableDataset,
     249      string variableName,
     250      IEnumerable<int> rows,
     251      List<string> originalValues,
     252      IEnumerable<double> targetValues,
     253      FactorReplacementMethodEnum factorReplacementMethod = FactorReplacementMethodEnum.Shuffle) {
     254
     255      List<string> replacementValues = null;
     256      IRandom random = new FastRandom(31415);
     257
     258      switch (factorReplacementMethod) {
     259        case FactorReplacementMethodEnum.Best:
     260          // try replacing with all possible values and find the best replacement value
     261          var bestQuality = double.NegativeInfinity;
     262          foreach (var repl in modifiableDataset.GetStringValues(variableName, rows).Distinct()) {
     263            List<string> curReplacementValues = Enumerable.Repeat(repl, modifiableDataset.Rows).ToList();
     264            //fholzing: this result could be used later on (theoretically), but is neglected for better readability/method consistency
     265            var newValue = CalculateQualityForReplacement(model, modifiableDataset, variableName, originalValues, rows, curReplacementValues, targetValues);
     266            var curQuality = newValue;
     267
     268            if (curQuality > bestQuality) {
     269              bestQuality = curQuality;
     270              replacementValues = curReplacementValues;
     271            }
     272          }
     273          break;
    247274        case FactorReplacementMethodEnum.Mode:
    248275          var mostCommonValue = rows.Select(r => originalValues[r])
     
    250277            .OrderByDescending(g => g.Count())
    251278            .First().Key;
    252           replacementValues = Enumerable.Repeat(mostCommonValue, dataset.Rows).ToList();
     279          replacementValues = Enumerable.Repeat(mostCommonValue, modifiableDataset.Rows).ToList();
    253280          break;
    254281        case FactorReplacementMethodEnum.Shuffle:
    255282          // new var has same empirical distribution but the relation to y is broken
    256           rand = new FastRandom(31415);
    257283          // prepare a complete column for the dataset
    258           replacementValues = Enumerable.Repeat(string.Empty, dataset.Rows).ToList();
     284          replacementValues = Enumerable.Repeat(string.Empty, modifiableDataset.Rows).ToList();
    259285          // shuffle only the selected rows
    260           var shuffledValues = rows.Select(r => originalValues[r]).Shuffle(rand).ToList();
     286          var shuffledValues = rows.Select(r => originalValues[r]).Shuffle(random).ToList();
    261287          int i = 0;
    262288          // update column values
     
    266292          break;
    267293        default:
    268           throw new ArgumentException(string.Format("FactorReplacementMethod {0} cannot be handled.", replacement));
    269       }
    270 
    271       return EvaluateModelWithReplacedVariable(model, variable, dataset, rows, replacementValues);
    272     }
    273 
    274     private static IEnumerable<double> EvaluateModelWithReplacedVariable(IRegressionModel model, string variable,
    275       ModifiableDataset dataset, IEnumerable<int> rows, IEnumerable<double> replacementValues) {
    276       var originalValues = dataset.GetReadOnlyDoubleValues(variable).ToList();
    277       dataset.ReplaceVariable(variable, replacementValues.ToList());
     294          throw new ArgumentException(string.Format("FactorReplacementMethod {0} cannot be handled.", factorReplacementMethod));
     295      }
     296
     297      return replacementValues;
     298    }
     299
     300    private static double CalculateQualityForReplacement(
     301      IRegressionModel model,
     302      ModifiableDataset modifiableDataset,
     303      string variableName,
     304      IList originalValues,
     305      IEnumerable<int> rows,
     306      IList replacementValues,
     307      IEnumerable<double> targetValues) {
     308
     309      modifiableDataset.ReplaceVariable(variableName, replacementValues);
    278310      //mkommend: ToList is used on purpose to avoid lazy evaluation that could result in wrong estimates due to variable replacements
    279       var estimates = model.GetEstimatedValues(dataset, rows).ToList();
    280       dataset.ReplaceVariable(variable, originalValues);
    281 
    282       return estimates;
    283     }
    284     private static IEnumerable<double> EvaluateModelWithReplacedVariable(IRegressionModel model, string variable,
    285       ModifiableDataset dataset, IEnumerable<int> rows, IEnumerable<string> replacementValues) {
    286       var originalValues = dataset.GetReadOnlyStringValues(variable).ToList();
    287       dataset.ReplaceVariable(variable, replacementValues.ToList());
    288       //mkommend: ToList is used on purpose to avoid lazy evaluation that could result in wrong estimates due to variable replacements
    289       var estimates = model.GetEstimatedValues(dataset, rows).ToList();
    290       dataset.ReplaceVariable(variable, originalValues);
    291 
    292       return estimates;
     311      var estimates = model.GetEstimatedValues(modifiableDataset, rows).ToList();
     312      var ret = CalculateQuality(targetValues, estimates);
     313      modifiableDataset.ReplaceVariable(variableName, originalValues);
     314
     315      return ret;
     316    }
     317
     318    public static double CalculateQuality(IEnumerable<double> targetValues, IEnumerable<double> estimatedValues) {
     319      OnlineCalculatorError errorState;
     320      var ret = OnlinePearsonsRCalculator.Calculate(targetValues, estimatedValues, out errorState);
     321      if (errorState != OnlineCalculatorError.None) { throw new InvalidOperationException("Error during calculation with replaced inputs."); }
     322      return ret * ret;
     323    }
     324
     325    public static IEnumerable<int> GetPartitionRows(DataPartitionEnum dataPartition, IRegressionProblemData problemData) {
     326      IEnumerable<int> rows;
     327
     328      switch (dataPartition) {
     329        case DataPartitionEnum.All:
     330          rows = problemData.AllIndices;
     331          break;
     332        case DataPartitionEnum.Test:
     333          rows = problemData.TestIndices;
     334          break;
     335        case DataPartitionEnum.Training:
     336          rows = problemData.TrainingIndices;
     337          break;
     338        default:
     339          throw new NotSupportedException("DataPartition not supported");
     340      }
     341
     342      return rows;
    293343    }
    294344  }
  • stable/HeuristicLab.Tests

  • stable/HeuristicLab.Tests/HeuristicLab.Problems.DataAnalysis.Symbolic-3.4/IntervalInterpreterTest.cs

    r16436 r16438  
    11using System;
    22using System.Collections.Generic;
    3 using HeuristicLab.Problems.DataAnalysis;
    4 using HeuristicLab.Problems.DataAnalysis.Symbolic;
    53using Microsoft.VisualStudio.TestTools.UnitTesting;
    64
  • stable/HeuristicLab.Tests/HeuristicLab.Tests.csproj

    r16436 r16438  
    583583    <Compile Include="HeuristicLab.Persistence-3.3\UseCases.cs" />
    584584    <Compile Include="HeuristicLab.PluginInfraStructure-3.3\TypeExtensionsTest.cs" />
     585    <Compile Include="HeuristicLab.Problems.DataAnalysis-3.4\ClassificationVariableImpactCalculationTest.cs" />
    585586    <Compile Include="HeuristicLab.Problems.DataAnalysis-3.4\IntervalTest.cs" />
     587    <Compile Include="HeuristicLab.Problems.DataAnalysis-3.4\RegressionVariableImpactCalculationTest.cs" />
    586588    <Compile Include="HeuristicLab.Problems.DataAnalysis-3.4\ThresholdCalculatorsTest.cs" />
    587589    <Compile Include="HeuristicLab.Problems.DataAnalysis-3.4\OnlineCalculatorPerformanceTest.cs" />
    588590    <Compile Include="HeuristicLab.Problems.DataAnalysis-3.4\StatisticCalculatorsTest.cs" />
    589     <Compile Include="HeuristicLab.Problems.DataAnalysis-3.4\VariableImpactCalculationTest.cs" />
    590591    <Compile Include="HeuristicLab.Problems.DataAnalysis.Symbolic-3.4\InfixExpressionParserTest.cs" />
    591592    <Compile Include="HeuristicLab.Problems.DataAnalysis.Symbolic-3.4\IntervalInterpreterTest.cs" />
Note: See TracChangeset for help on using the changeset viewer.