Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
09/14/11 13:59:25 (13 years ago)
Author:
epitzer
Message:

#1530 integrate changes from trunk

Location:
branches/PersistenceSpeedUp
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • branches/PersistenceSpeedUp

  • branches/PersistenceSpeedUp/HeuristicLab.Problems.DataAnalysis.Symbolic.Views/3.4/RunCollectionVariableImpactView.cs

    r5975 r6760  
    3535  public sealed partial class RunCollectionVariableImpactView : AsynchronousContentView {
    3636    private const string variableImpactResultName = "Variable impacts";
     37    private const string crossValidationFoldsResultName = "CrossValidation Folds";
     38    private const string numberOfFoldsParameterName = "Folds";
    3739    public RunCollectionVariableImpactView() {
    3840      InitializeComponent();
     
    9597    }
    9698
     99    private void comboBox_SelectedValueChanged(object sender, EventArgs e) {
     100      if (comboBox.SelectedItem != null) {
     101        var cvRuns = from r in Content
     102                     where r.Visible
     103                     where r.Parameters.ContainsKey(numberOfFoldsParameterName)
     104                     select r;
     105        var selectedFolds = from r in cvRuns
     106                            let foldCollection = (RunCollection)r.Results[crossValidationFoldsResultName]
     107                            select (IRun)foldCollection.ElementAt((int)comboBox.SelectedItem).Clone();
     108        matrixView.Content = CalculateVariableImpactMatrix(selectedFolds.ToArray(), cvRuns.Select(r => r.Name).ToArray());
     109      }
     110    }
     111
     112
    97113    private void UpdateData() {
    98       matrixView.Content = CalculateVariableImpactMatrix();
    99     }
    100 
    101     private DoubleMatrix CalculateVariableImpactMatrix() {
     114      if (Content != null) {
     115        comboBox.Items.Clear();
     116        comboBox.Enabled = false;
     117        var visibleRuns = Content.Where(r => r.Visible).ToArray();
     118        var representativeCvRun =
     119          visibleRuns.Where(r => r.Parameters.ContainsKey(numberOfFoldsParameterName)).FirstOrDefault();
     120        if (representativeCvRun != null) {
     121          // make sure all runs have the same number of folds
     122          int nFolds = ((IntValue)representativeCvRun.Parameters[numberOfFoldsParameterName]).Value;
     123          var cvRuns = visibleRuns.Where(r => r.Parameters.ContainsKey(numberOfFoldsParameterName));
     124          if (cvRuns.All(r => ((IntValue)r.Parameters[numberOfFoldsParameterName]).Value == nFolds)) {
     125            // populate combobox
     126            for (int foldIndex = 0; foldIndex < nFolds; foldIndex++) {
     127              comboBox.Items.Add(foldIndex);
     128            }
     129            comboBox.Enabled = true;
     130            var selectedFolds = from r in cvRuns
     131                                let foldCollection = (RunCollection)r.Results[crossValidationFoldsResultName]
     132                                select foldCollection.First();
     133            matrixView.Content = CalculateVariableImpactMatrix(selectedFolds.ToArray(), cvRuns.Select(f => f.Name).ToArray());
     134          } else {
     135            matrixView.Content = null;
     136          }
     137        } else {
     138          var runsWithVariables = visibleRuns.Where(r => r.Results.ContainsKey(variableImpactResultName)).ToArray();
     139          matrixView.Content = CalculateVariableImpactMatrix(runsWithVariables);
     140        }
     141      }
     142    }
     143
     144    private IStringConvertibleMatrix CalculateVariableImpactMatrix(IRun[] runs) {
     145      return CalculateVariableImpactMatrix(runs, runs.Select(r => r.Name).ToArray());
     146    }
     147
     148    private DoubleMatrix CalculateVariableImpactMatrix(IRun[] runs, string[] runNames) {
    102149      DoubleMatrix matrix = null;
    103       if (Content != null) {
    104         List<IRun> runsWithVariables = Content.Where(r => r.Visible && r.Results.ContainsKey(variableImpactResultName)).ToList();
    105         IEnumerable<DoubleMatrix> allVariableImpacts = (from run in runsWithVariables
    106                                                         select run.Results[variableImpactResultName]).Cast<DoubleMatrix>();
    107         IEnumerable<string> variableNames = (from variableImpact in allVariableImpacts
    108                                              from variableName in variableImpact.RowNames
    109                                              select variableName)
    110                                             .Distinct();
    111         // filter variableNames: only include names that have at least one non-zero value in a run
    112         List<string> variableNamesList = (from variableName in variableNames
    113                                           where GetVariableImpacts(variableName, allVariableImpacts).Any(x => !x.IsAlmost(0.0))
    114                                           select variableName)
    115                                          .ToList();
    116 
    117         List<string> statictics = new List<string> { "Median Rank", "Mean", "StdDev", "pValue" };
    118         List<string> columnNames = runsWithVariables.Select(r => r.Name).ToList();
    119         columnNames.AddRange(statictics);
    120         int runs = runsWithVariables.Count();
    121 
    122         matrix = new DoubleMatrix(variableNamesList.Count, runs + statictics.Count);
    123         matrix.SortableView = true;
    124         matrix.RowNames = variableNamesList;
    125         matrix.ColumnNames = columnNames;
    126 
    127         for (int i = 0; i < runsWithVariables.Count; i++) {
    128           IRun run = runsWithVariables[i];
    129           DoubleMatrix runVariableImpacts = (DoubleMatrix)run.Results[variableImpactResultName];
    130           for (int j = 0; j < runVariableImpacts.Rows; j++) {
    131             int rowIndex = variableNamesList.FindIndex(s => s == runVariableImpacts.RowNames.ElementAt(j));
    132             if (rowIndex > -1) {
    133               matrix[rowIndex, i] = runVariableImpacts[j, 0];
    134             }
    135           }
    136         }
    137 
    138         List<List<double>> variableImpactsOverRuns = (from variableName in variableNamesList
    139                                                       select GetVariableImpacts(variableName, allVariableImpacts).ToList())
    140                                                      .ToList();
    141         List<List<double>> variableRanks = (from variableName in variableNamesList
    142                                             select GetVariableImpactRanks(variableName, allVariableImpacts).ToList())
    143                                         .ToList();
    144         if (variableImpactsOverRuns.Count() > 0) {
    145           // the variable with the worst median impact value is chosen as the reference variable
    146           // this is problematic if all variables are relevant, however works often in practice
    147           List<double> referenceImpacts = (from impacts in variableImpactsOverRuns
    148                                            let avg = impacts.Median()
    149                                            orderby avg
    150                                            select impacts)
    151                                            .First();
    152           // for all variables
    153           for (int row = 0; row < variableImpactsOverRuns.Count; row++) {
    154             // median rank
    155             matrix[row, runs] = variableRanks[row].Median();
    156             // also show mean and std.dev. of relative variable impacts to indicate the relative difference in impacts of variables
    157             matrix[row, runs + 1] = variableImpactsOverRuns[row].Average();
    158             matrix[row, runs + 2] = variableImpactsOverRuns[row].StandardDeviation();
    159 
    160             double leftTail = 0; double rightTail = 0; double bothTails = 0;
    161             // calc differences of impacts for current variable and reference variable
    162             double[] z = new double[referenceImpacts.Count];
    163             for (int i = 0; i < z.Length; i++) {
    164               z[i] = variableImpactsOverRuns[row][i] - referenceImpacts[i];
    165             }
    166             // wilcoxon signed rank test is used because the impact values of two variables in a single run are not independent
    167             alglib.wsr.wilcoxonsignedranktest(z, z.Length, 0, ref bothTails, ref leftTail, ref rightTail);
    168             matrix[row, runs + 3] = bothTails;
    169           }
    170         }
    171       }
    172       return matrix;
     150      IEnumerable<DoubleMatrix> allVariableImpacts = (from run in runs
     151                                                      select run.Results[variableImpactResultName]).Cast<DoubleMatrix>();
     152      IEnumerable<string> variableNames = (from variableImpact in allVariableImpacts
     153                                           from variableName in variableImpact.RowNames
     154                                           select variableName)
     155                                          .Distinct();
     156      // filter variableNames: only include names that have at least one non-zero value in a run
     157      List<string> variableNamesList = (from variableName in variableNames
     158                                        where GetVariableImpacts(variableName, allVariableImpacts).Any(x => !x.IsAlmost(0.0))
     159                                        select variableName)
     160                                       .ToList();
     161
     162      List<string> statictics = new List<string> { "Median Rank", "Mean", "StdDev", "pValue" };
     163      List<string> columnNames = new List<string>(runNames);
     164      columnNames.AddRange(statictics);
     165      int numberOfRuns = runs.Length;
     166
     167      matrix = new DoubleMatrix(variableNamesList.Count, numberOfRuns + statictics.Count);
     168      matrix.SortableView = true;
     169      matrix.RowNames = variableNamesList;
     170      matrix.ColumnNames = columnNames;
     171
     172      // calculate statistics
     173      List<List<double>> variableImpactsOverRuns = (from variableName in variableNamesList
     174                                                    select GetVariableImpacts(variableName, allVariableImpacts).ToList())
     175                                             .ToList();
     176      List<List<double>> variableRanks = (from variableName in variableNamesList
     177                                          select GetVariableImpactRanks(variableName, allVariableImpacts).ToList())
     178                                      .ToList();
     179      if (variableImpactsOverRuns.Count() > 0) {
     180        // the variable with the worst median impact value is chosen as the reference variable
     181        // this is problematic if all variables are relevant, however works often in practice
     182        List<double> referenceImpacts = (from impacts in variableImpactsOverRuns
     183                                         let avg = impacts.Median()
     184                                         orderby avg
     185                                         select impacts)
     186                                         .First();
     187        // for all variables
     188        for (int row = 0; row < variableImpactsOverRuns.Count; row++) {
     189          // median rank
     190          matrix[row, numberOfRuns] = variableRanks[row].Median();
     191          // also show mean and std.dev. of relative variable impacts to indicate the relative difference in impacts of variables
     192          matrix[row, numberOfRuns + 1] = Math.Round(variableImpactsOverRuns[row].Average(), 3);
     193          matrix[row, numberOfRuns + 2] = Math.Round(variableImpactsOverRuns[row].StandardDeviation(), 3);
     194
     195          double leftTail = 0; double rightTail = 0; double bothTails = 0;
     196          // calc differences of impacts for current variable and reference variable
     197          double[] z = new double[referenceImpacts.Count];
     198          for (int i = 0; i < z.Length; i++) {
     199            z[i] = variableImpactsOverRuns[row][i] - referenceImpacts[i];
     200          }
     201          // wilcoxon signed rank test is used because the impact values of two variables in a single run are not independent
     202          alglib.wsr.wilcoxonsignedranktest(z, z.Length, 0, ref bothTails, ref leftTail, ref rightTail);
     203          matrix[row, numberOfRuns + 3] = Math.Round(bothTails, 4);
     204        }
     205      }
     206
     207      // fill matrix with impacts from runs
     208      for (int i = 0; i < runs.Length; i++) {
     209        IRun run = runs[i];
     210        DoubleMatrix runVariableImpacts = (DoubleMatrix)run.Results[variableImpactResultName];
     211        for (int j = 0; j < runVariableImpacts.Rows; j++) {
     212          int rowIndex = variableNamesList.FindIndex(s => s == runVariableImpacts.RowNames.ElementAt(j));
     213          if (rowIndex > -1) {
     214            matrix[rowIndex, i] = Math.Round(runVariableImpacts[j, 0], 3);
     215          }
     216        }
     217      }
     218      // sort by median
     219      var sortedMatrix = (DoubleMatrix)matrix.Clone();
     220      var sortedIndexes = from i in Enumerable.Range(0, sortedMatrix.Rows)
     221                          orderby matrix[i, numberOfRuns]
     222                          select i;
     223
     224      int targetIndex = 0;
     225      foreach (var sourceIndex in sortedIndexes) {
     226        for (int c = 0; c < matrix.Columns; c++)
     227          sortedMatrix[targetIndex, c] = matrix[sourceIndex, c];
     228        targetIndex++;
     229      }
     230      return sortedMatrix;
    173231    }
    174232
     
    213271      }
    214272    }
     273
    215274  }
    216275}
Note: See TracChangeset for help on using the changeset viewer.