Free cookie consent management tool by TermsFeed Policy Generator

Changeset 4124 for trunk/sources


Ignore:
Timestamp:
08/01/10 18:04:47 (14 years ago)
Author:
gkronber
Message:

Improved variable impact view. #1011

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Views/3.3/RunCollectionVariableImpactView.cs

    r4068 r4124  
    2929using HeuristicLab.MainForm.WindowsForms;
    3030using HeuristicLab.Optimization;
     31using System;
    3132
    3233namespace HeuristicLab.Problems.DataAnalysis.Views {
     
    7980      if (Content != null) {
    8081        List<IRun> runsWithVariables = Content.Where(r => r.Results.ContainsKey(variableImpactResultName)).ToList();
    81         IEnumerable<DoubleMatrix> variableImpacts = (from run in runsWithVariables
    82                                                      select run.Results[variableImpactResultName]).Cast<DoubleMatrix>();
    83         List<string> variableNames = (from varImpact in variableImpacts
    84                                       from variableName in varImpact.RowNames
    85                                       select variableName).Distinct().ToList();
    86         List<string> statictics = new List<string> { "Mean", "Median", "StdDev", "pValue Mean<0", "pValue Median<0" };
     82        IEnumerable<DoubleMatrix> allVariableImpacts = (from run in runsWithVariables
     83                                                        select run.Results[variableImpactResultName]).Cast<DoubleMatrix>();
     84        IEnumerable<string> variableNames = (from variableImpact in allVariableImpacts
     85                                             from variableName in variableImpact.RowNames
     86                                             select variableName)
     87                                            .Distinct();
     88        // filter variableNames: only include names that have at least one non-zero value in a run
     89        List<string> variableNamesList = (from variableName in variableNames
     90                                          where GetVariableImpacts(variableName, allVariableImpacts).Any(x => !x.IsAlmost(0.0))
     91                                          select variableName)
     92                                         .ToList();
     93
     94        List<string> statictics = new List<string> { "Median Rank", "pValue", "Mean", "StdDev", };
    8795        List<string> columnNames = runsWithVariables.Select(r => r.Name).ToList();
    8896        columnNames.AddRange(statictics);
    8997        int runs = runsWithVariables.Count();
    9098
    91         matrix = new DoubleMatrix(variableNames.Count, runs + statictics.Count);
     99        matrix = new DoubleMatrix(variableNamesList.Count, runs + statictics.Count);
    92100        matrix.SortableView = true;
    93         matrix.RowNames = variableNames;
     101        matrix.RowNames = variableNamesList;
    94102        matrix.ColumnNames = columnNames;
    95103
     
    98106          DoubleMatrix runVariableImpacts = (DoubleMatrix)run.Results[variableImpactResultName];
    99107          for (int j = 0; j < runVariableImpacts.Rows; j++) {
    100             int rowIndex = variableNames.FindIndex(s => s == runVariableImpacts.RowNames.ElementAt(j));
    101             matrix[rowIndex, i] = runVariableImpacts[j, 0];
     108            int rowIndex = variableNamesList.FindIndex(s => s == runVariableImpacts.RowNames.ElementAt(j));
     109            if (rowIndex > -1) {
     110              matrix[rowIndex, i] = runVariableImpacts[j, 0];
     111            }
    102112          }
    103113        }
    104114
    105         for (int variableIndex = 0; variableIndex < matrix.Rows; variableIndex++) {
    106           List<double> impacts = new List<double>();
    107           for (int runIndex = 0; runIndex < runs; runIndex++)
    108             impacts.Add(matrix[variableIndex, runIndex]);
    109           matrix[variableIndex, runs] = impacts.Average();
    110           matrix[variableIndex, runs + 1] = impacts.Median();
    111           matrix[variableIndex, runs + 2] = impacts.StandardDeviation();
    112           double leftTail = 0; double rightTail = 0; double bothTails = 0;
    113           double[] impactsArray = impacts.ToArray();
    114           studentttests.studentttest1(ref impactsArray, impactsArray.Length, 0, ref bothTails, ref leftTail, ref rightTail);
    115           matrix[variableIndex, runs + 3] = rightTail;
    116           wsr.wilcoxonsignedranktest(impactsArray, impactsArray.Length, 0, ref bothTails, ref leftTail, ref rightTail);
    117           matrix[variableIndex, runs + 4] = rightTail;
     115        List<List<double>> variableImpactsOverRuns = (from variableName in variableNamesList
     116                                                      select GetVariableImpacts(variableName, allVariableImpacts).ToList())
     117                                                     .ToList();
     118        List<List<double>> variableRanks = (from variableName in variableNamesList
     119                                            select GetVariableImpactRanks(variableName, allVariableImpacts).ToList())
     120                                        .ToList();
     121        if (variableImpactsOverRuns.Count() > 0) {
     122          // reference median is the worst median rank
     123          double referenceMedian = (from impacts in variableRanks
     124                                    let med = impacts.Median()
     125                                    orderby med
     126                                    select med)
     127                                           .Last();
     128          // for all variables
     129          for (int row = 0; row < variableImpactsOverRuns.Count; row++) {
     130            matrix[row, runs] = variableRanks[row].Median();
     131
     132            // check if the median of the ranks is significantly different to the reference median rank
     133            double leftTail = 0; double rightTail = 0; double bothTails = 0;
     134            double[] ranksArray = variableRanks[row].ToArray();
     135
     136            // wilcoxon signed rank test is used because the ranks of two variables in a single run are not independent
     137            alglib.wsr.wilcoxonsignedranktest(ranksArray, ranksArray.Length, referenceMedian, ref bothTails, ref leftTail, ref rightTail);
     138            matrix[row, runs + 1] = bothTails;
     139
     140            // also show mean and std.dev. of relative variable impacts to indicate the relative difference in impacts of variables
     141            matrix[row, runs + 2] = variableImpactsOverRuns[row].Average();
     142            matrix[row, runs + 3] = variableImpactsOverRuns[row].StandardDeviation();
     143
     144          }
    118145        }
    119146      }
    120147      return matrix;
    121148    }
     149
     150    private IEnumerable<double> GetVariableImpactRanks(string variableName, IEnumerable<DoubleMatrix> allVariableImpacts) {
     151      foreach (DoubleMatrix runVariableImpacts in allVariableImpacts) {
     152        // certainly not yet very efficient because ranks are computed multiple times for the same run
     153        string[] variableNames = runVariableImpacts.RowNames.ToArray();
     154        double[] values = (from row in Enumerable.Range(0, runVariableImpacts.Rows)
     155                           select runVariableImpacts[row, 0] * -1)
     156                          .ToArray();
     157        Array.Sort(values, variableNames);
     158        // calculate ranks
     159        double[] ranks = new double[values.Length];
     160        // check for tied ranks
     161        int i = 0;
     162        while (i < values.Length) {
     163          ranks[i] = i + 1;
     164          int j = i + 1;
     165          while (j < values.Length && values[i].IsAlmost(values[j])) {
     166            ranks[j] = ranks[i];
     167            j++;
     168          }
     169          i = j;
     170        }
     171        int rankIndex = 0;
     172        foreach (string rowVariableName in variableNames) {
     173          if (rowVariableName == variableName)
     174            yield return ranks[rankIndex];
     175          rankIndex++;
     176        }
     177      }
     178    }
     179
     180    private IEnumerable<double> GetVariableImpacts(string variableName, IEnumerable<DoubleMatrix> allVariableImpacts) {
     181      foreach (DoubleMatrix runVariableImpacts in allVariableImpacts) {
     182        int row = 0;
     183        foreach (string rowName in runVariableImpacts.RowNames) {
     184          if (rowName == variableName)
     185            yield return runVariableImpacts[row, 0];
     186          row++;
     187        }
     188      }
     189    }
     190
    122191  }
    123192}
Note: See TracChangeset for help on using the changeset viewer.