Changeset 6709


Ignore:
Timestamp:
09/05/11 14:20:39 (8 years ago)
Author:
gkronber
Message:

#1635: symbolic regression variable impacts can be calculated for each fold separately for cross-validation runs. Added a drop down box in the variable impact view to choose the fold. Minor change: variable impacts are rounded to 3 decimal digits.

Location:
trunk/sources
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.Encodings.SymbolicExpressionTreeEncoding/3.4/Analyzers/SymbolicExpressionSymbolFrequencyAnalyzer.cs

    r5983 r6709  
    2020#endregion
    2121
     22using System;
    2223using System.Collections.Generic;
    2324using System.Linq;
     
    108109          symbolFrequencies.Rows.Add(row);
    109110        }
    110         symbolFrequencies.Rows[pair.Key].Values.Add(pair.Value);
     111        symbolFrequencies.Rows[pair.Key].Values.Add(Math.Round(pair.Value, 3));
    111112      }
    112113
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Symbolic.Views/3.4/RunCollectionVariableImpactView.Designer.cs

    r5809 r6709  
    4646    private void InitializeComponent() {
    4747      this.matrixView = new HeuristicLab.Data.Views.StringConvertibleMatrixView();
     48      this.comboBox = new System.Windows.Forms.ComboBox();
     49      this.label1 = new System.Windows.Forms.Label();
     50      this.variableImpactsGroupBox = new System.Windows.Forms.GroupBox();
     51      this.variableImpactsGroupBox.SuspendLayout();
    4852      this.SuspendLayout();
    4953      //
     
    5559      this.matrixView.Caption = "StringConvertibleMatrix View";
    5660      this.matrixView.Content = null;
    57       this.matrixView.Location = new System.Drawing.Point(3, 3);
     61      this.matrixView.Location = new System.Drawing.Point(6, 19);
    5862      this.matrixView.Name = "matrixView";
    5963      this.matrixView.ReadOnly = true;
    60       this.matrixView.Size = new System.Drawing.Size(303, 229);
     64      this.matrixView.ShowRowsAndColumnsTextBox = true;
     65      this.matrixView.ShowStatisticalInformation = true;
     66      this.matrixView.Size = new System.Drawing.Size(294, 174);
    6167      this.matrixView.TabIndex = 0;
     68      //
     69      // comboBox
     70      //
     71      this.comboBox.FormattingEnabled = true;
     72      this.comboBox.Location = new System.Drawing.Point(39, 6);
     73      this.comboBox.Name = "comboBox";
     74      this.comboBox.Size = new System.Drawing.Size(68, 21);
     75      this.comboBox.TabIndex = 1;
     76      this.comboBox.SelectedValueChanged += new System.EventHandler(this.comboBox_SelectedValueChanged);
     77      //
     78      // label1
     79      //
     80      this.label1.AutoSize = true;
     81      this.label1.Location = new System.Drawing.Point(3, 9);
     82      this.label1.Name = "label1";
     83      this.label1.Size = new System.Drawing.Size(30, 13);
     84      this.label1.TabIndex = 2;
     85      this.label1.Text = "Fold:";
     86      //
     87      // variableImpactsGroupBox
     88      //
     89      this.variableImpactsGroupBox.Anchor = ((System.Windows.Forms.AnchorStyles)((((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Bottom)
     90                  | System.Windows.Forms.AnchorStyles.Left)
     91                  | System.Windows.Forms.AnchorStyles.Right)));
     92      this.variableImpactsGroupBox.Controls.Add(this.matrixView);
     93      this.variableImpactsGroupBox.Location = new System.Drawing.Point(0, 33);
     94      this.variableImpactsGroupBox.Name = "variableImpactsGroupBox";
     95      this.variableImpactsGroupBox.Size = new System.Drawing.Size(306, 199);
     96      this.variableImpactsGroupBox.TabIndex = 3;
     97      this.variableImpactsGroupBox.TabStop = false;
     98      this.variableImpactsGroupBox.Text = "Variable impacts:";
    6299      //
    63100      // RunCollectionVariableImpactView
     
    65102      this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F);
    66103      this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Font;
    67       this.Controls.Add(this.matrixView);
     104      this.Controls.Add(this.variableImpactsGroupBox);
     105      this.Controls.Add(this.label1);
     106      this.Controls.Add(this.comboBox);
    68107      this.Name = "RunCollectionVariableImpactView";
    69108      this.Size = new System.Drawing.Size(309, 235);
     109      this.variableImpactsGroupBox.ResumeLayout(false);
    70110      this.ResumeLayout(false);
     111      this.PerformLayout();
    71112
    72113    }
     
    75116
    76117    private HeuristicLab.Data.Views.StringConvertibleMatrixView matrixView;
     118    private System.Windows.Forms.ComboBox comboBox;
     119    private System.Windows.Forms.Label label1;
     120    private System.Windows.Forms.GroupBox variableImpactsGroupBox;
    77121  }
    78122}
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Symbolic.Views/3.4/RunCollectionVariableImpactView.cs

    r5975 r6709  
    3535  public sealed partial class RunCollectionVariableImpactView : AsynchronousContentView {
    3636    private const string variableImpactResultName = "Variable impacts";
     37    private const string crossValidationFoldsResultName = "CrossValidation Folds";
     38    private const string numberOfFoldsParameterName = "Folds";
    3739    public RunCollectionVariableImpactView() {
    3840      InitializeComponent();
     
    9597    }
    9698
     99    private void comboBox_SelectedValueChanged(object sender, EventArgs e) {
     100      if (comboBox.SelectedItem != null) {
     101        var cvRuns = from r in Content
     102                     where r.Visible
     103                     where r.Parameters.ContainsKey(numberOfFoldsParameterName)
     104                     select r;
     105        var selectedFolds = from r in cvRuns
     106                            let foldCollection = (RunCollection)r.Results[crossValidationFoldsResultName]
     107                            select (IRun)foldCollection.ElementAt((int)comboBox.SelectedItem).Clone();
     108        matrixView.Content = CalculateVariableImpactMatrix(selectedFolds.ToArray(), cvRuns.Select(r => r.Name).ToArray());
     109      }
     110    }
     111
     112
    97113    private void UpdateData() {
    98       matrixView.Content = CalculateVariableImpactMatrix();
    99     }
    100 
    101     private DoubleMatrix CalculateVariableImpactMatrix() {
     114      if (Content != null) {
     115        comboBox.Items.Clear();
     116        comboBox.Enabled = false;
     117        var visibleRuns = Content.Where(r => r.Visible).ToArray();
     118        var representativeCvRun =
     119          visibleRuns.Where(r => r.Parameters.ContainsKey(numberOfFoldsParameterName)).FirstOrDefault();
     120        if (representativeCvRun != null) {
     121          // make sure all runs have the same number of folds
     122          int nFolds = ((IntValue)representativeCvRun.Parameters[numberOfFoldsParameterName]).Value;
     123          var cvRuns = visibleRuns.Where(r => r.Parameters.ContainsKey(numberOfFoldsParameterName));
     124          if (cvRuns.All(r => ((IntValue)r.Parameters[numberOfFoldsParameterName]).Value == nFolds)) {
     125            // populate combobox
     126            for (int foldIndex = 0; foldIndex < nFolds; foldIndex++) {
     127              comboBox.Items.Add(foldIndex);
     128            }
     129            comboBox.Enabled = true;
     130            var selectedFolds = from r in cvRuns
     131                                let foldCollection = (RunCollection)r.Results[crossValidationFoldsResultName]
     132                                select foldCollection.First();
     133            matrixView.Content = CalculateVariableImpactMatrix(selectedFolds.ToArray(), cvRuns.Select(f => f.Name).ToArray());
     134          } else {
     135            matrixView.Content = null;
     136          }
     137        } else {
     138          var runsWithVariables = visibleRuns.Where(r => r.Results.ContainsKey(variableImpactResultName)).ToArray();
     139          matrixView.Content = CalculateVariableImpactMatrix(runsWithVariables);
     140        }
     141      }
     142    }
     143
     144    private IStringConvertibleMatrix CalculateVariableImpactMatrix(IRun[] runs) {
     145      return CalculateVariableImpactMatrix(runs, runs.Select(r => r.Name).ToArray());
     146    }
     147
     148    private DoubleMatrix CalculateVariableImpactMatrix(IRun[] runs, string[] runNames) {
    102149      DoubleMatrix matrix = null;
    103       if (Content != null) {
    104         List<IRun> runsWithVariables = Content.Where(r => r.Visible && r.Results.ContainsKey(variableImpactResultName)).ToList();
    105         IEnumerable<DoubleMatrix> allVariableImpacts = (from run in runsWithVariables
    106                                                         select run.Results[variableImpactResultName]).Cast<DoubleMatrix>();
    107         IEnumerable<string> variableNames = (from variableImpact in allVariableImpacts
    108                                              from variableName in variableImpact.RowNames
    109                                              select variableName)
    110                                             .Distinct();
    111         // filter variableNames: only include names that have at least one non-zero value in a run
    112         List<string> variableNamesList = (from variableName in variableNames
    113                                           where GetVariableImpacts(variableName, allVariableImpacts).Any(x => !x.IsAlmost(0.0))
    114                                           select variableName)
    115                                          .ToList();
    116 
    117         List<string> statictics = new List<string> { "Median Rank", "Mean", "StdDev", "pValue" };
    118         List<string> columnNames = runsWithVariables.Select(r => r.Name).ToList();
    119         columnNames.AddRange(statictics);
    120         int runs = runsWithVariables.Count();
    121 
    122         matrix = new DoubleMatrix(variableNamesList.Count, runs + statictics.Count);
    123         matrix.SortableView = true;
    124         matrix.RowNames = variableNamesList;
    125         matrix.ColumnNames = columnNames;
    126 
    127         for (int i = 0; i < runsWithVariables.Count; i++) {
    128           IRun run = runsWithVariables[i];
    129           DoubleMatrix runVariableImpacts = (DoubleMatrix)run.Results[variableImpactResultName];
    130           for (int j = 0; j < runVariableImpacts.Rows; j++) {
    131             int rowIndex = variableNamesList.FindIndex(s => s == runVariableImpacts.RowNames.ElementAt(j));
    132             if (rowIndex > -1) {
    133               matrix[rowIndex, i] = runVariableImpacts[j, 0];
    134             }
    135           }
    136         }
    137 
    138         List<List<double>> variableImpactsOverRuns = (from variableName in variableNamesList
    139                                                       select GetVariableImpacts(variableName, allVariableImpacts).ToList())
    140                                                      .ToList();
    141         List<List<double>> variableRanks = (from variableName in variableNamesList
    142                                             select GetVariableImpactRanks(variableName, allVariableImpacts).ToList())
    143                                         .ToList();
    144         if (variableImpactsOverRuns.Count() > 0) {
    145           // the variable with the worst median impact value is chosen as the reference variable
    146           // this is problematic if all variables are relevant, however works often in practice
    147           List<double> referenceImpacts = (from impacts in variableImpactsOverRuns
    148                                            let avg = impacts.Median()
    149                                            orderby avg
    150                                            select impacts)
    151                                            .First();
    152           // for all variables
    153           for (int row = 0; row < variableImpactsOverRuns.Count; row++) {
    154             // median rank
    155             matrix[row, runs] = variableRanks[row].Median();
    156             // also show mean and std.dev. of relative variable impacts to indicate the relative difference in impacts of variables
    157             matrix[row, runs + 1] = variableImpactsOverRuns[row].Average();
    158             matrix[row, runs + 2] = variableImpactsOverRuns[row].StandardDeviation();
    159 
    160             double leftTail = 0; double rightTail = 0; double bothTails = 0;
    161             // calc differences of impacts for current variable and reference variable
    162             double[] z = new double[referenceImpacts.Count];
    163             for (int i = 0; i < z.Length; i++) {
    164               z[i] = variableImpactsOverRuns[row][i] - referenceImpacts[i];
    165             }
    166             // wilcoxon signed rank test is used because the impact values of two variables in a single run are not independent
    167             alglib.wsr.wilcoxonsignedranktest(z, z.Length, 0, ref bothTails, ref leftTail, ref rightTail);
    168             matrix[row, runs + 3] = bothTails;
    169           }
    170         }
    171       }
    172       return matrix;
     150      IEnumerable<DoubleMatrix> allVariableImpacts = (from run in runs
     151                                                      select run.Results[variableImpactResultName]).Cast<DoubleMatrix>();
     152      IEnumerable<string> variableNames = (from variableImpact in allVariableImpacts
     153                                           from variableName in variableImpact.RowNames
     154                                           select variableName)
     155                                          .Distinct();
     156      // filter variableNames: only include names that have at least one non-zero value in a run
     157      List<string> variableNamesList = (from variableName in variableNames
     158                                        where GetVariableImpacts(variableName, allVariableImpacts).Any(x => !x.IsAlmost(0.0))
     159                                        select variableName)
     160                                       .ToList();
     161
     162      List<string> statictics = new List<string> { "Median Rank", "Mean", "StdDev", "pValue" };
     163      List<string> columnNames = new List<string>(runNames);
     164      columnNames.AddRange(statictics);
     165      int numberOfRuns = runs.Length;
     166
     167      matrix = new DoubleMatrix(variableNamesList.Count, numberOfRuns + statictics.Count);
     168      matrix.SortableView = true;
     169      matrix.RowNames = variableNamesList;
     170      matrix.ColumnNames = columnNames;
     171
     172      // calculate statistics
     173      List<List<double>> variableImpactsOverRuns = (from variableName in variableNamesList
     174                                                    select GetVariableImpacts(variableName, allVariableImpacts).ToList())
     175                                             .ToList();
     176      List<List<double>> variableRanks = (from variableName in variableNamesList
     177                                          select GetVariableImpactRanks(variableName, allVariableImpacts).ToList())
     178                                      .ToList();
     179      if (variableImpactsOverRuns.Count() > 0) {
     180        // the variable with the worst median impact value is chosen as the reference variable
     181        // this is problematic if all variables are relevant, however works often in practice
     182        List<double> referenceImpacts = (from impacts in variableImpactsOverRuns
     183                                         let avg = impacts.Median()
     184                                         orderby avg
     185                                         select impacts)
     186                                         .First();
     187        // for all variables
     188        for (int row = 0; row < variableImpactsOverRuns.Count; row++) {
     189          // median rank
     190          matrix[row, numberOfRuns] = variableRanks[row].Median();
     191          // also show mean and std.dev. of relative variable impacts to indicate the relative difference in impacts of variables
     192          matrix[row, numberOfRuns + 1] = Math.Round(variableImpactsOverRuns[row].Average(), 3);
     193          matrix[row, numberOfRuns + 2] = Math.Round(variableImpactsOverRuns[row].StandardDeviation(), 3);
     194
     195          double leftTail = 0; double rightTail = 0; double bothTails = 0;
     196          // calc differences of impacts for current variable and reference variable
     197          double[] z = new double[referenceImpacts.Count];
     198          for (int i = 0; i < z.Length; i++) {
     199            z[i] = variableImpactsOverRuns[row][i] - referenceImpacts[i];
     200          }
     201          // wilcoxon signed rank test is used because the impact values of two variables in a single run are not independent
     202          alglib.wsr.wilcoxonsignedranktest(z, z.Length, 0, ref bothTails, ref leftTail, ref rightTail);
     203          matrix[row, numberOfRuns + 3] = Math.Round(bothTails, 4);
     204        }
     205      }
     206
     207      // fill matrix with impacts from runs
     208      for (int i = 0; i < runs.Length; i++) {
     209        IRun run = runs[i];
     210        DoubleMatrix runVariableImpacts = (DoubleMatrix)run.Results[variableImpactResultName];
     211        for (int j = 0; j < runVariableImpacts.Rows; j++) {
     212          int rowIndex = variableNamesList.FindIndex(s => s == runVariableImpacts.RowNames.ElementAt(j));
     213          if (rowIndex > -1) {
     214            matrix[rowIndex, i] = Math.Round(runVariableImpacts[j, 0], 3);
     215          }
     216        }
     217      }
     218      // sort by median
     219      var sortedMatrix = (DoubleMatrix)matrix.Clone();
     220      var sortedIndexes = from i in Enumerable.Range(0, sortedMatrix.Rows)
     221                          orderby matrix[i, numberOfRuns]
     222                          select i;
     223
     224      int targetIndex = 0;
     225      foreach (var sourceIndex in sortedIndexes) {
     226        for (int c = 0; c < matrix.Columns; c++)
     227          sortedMatrix[targetIndex, c] = matrix[sourceIndex, c];
     228        targetIndex++;
     229      }
     230      return sortedMatrix;
    173231    }
    174232
     
    213271      }
    214272    }
     273
    215274  }
    216275}
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Analyzers/SymbolicDataAnalysisVariableFrequencyAnalyzer.cs

    r5924 r6709  
    2020#endregion
    2121
     22using System;
    2223using System.Collections.Generic;
    2324using System.Linq;
     
    104105          datatable.Rows.Add(row);
    105106        }
    106         datatable.Rows[pair.Key].Values.Add(pair.Value);
     107        datatable.Rows[pair.Key].Values.Add(Math.Round(pair.Value, 3));
    107108      }
    108109
Note: See TracChangeset for help on using the changeset viewer.