Ignore:
Timestamp:
06/27/16 15:28:26 (5 years ago)
Author:
mkommend
Message:

#2619:

  • Refactored and separated the different feature correlation calculations.
  • Added a checkbox to ignore missing values in the calculation.
File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Views/3.4/FeatureCorrelation/FeatureCorrelationCalculator.cs

    r12509 r13938  
    2424using System.ComponentModel;
    2525using System.Linq;
     26using System.Threading.Tasks;
    2627using HeuristicLab.PluginInfrastructure;
    2728
    2829namespace HeuristicLab.Problems.DataAnalysis.Views {
    2930  [NonDiscoverableType]
    30   public class FeatureCorrelationCalculator : Object {
     31  public sealed class FeatureCorrelationCalculator : AbstractFeatureCorrelationCalculator {
     32    public FeatureCorrelationCalculator() : base() { }
    3133
    32     private BackgroundWorker bw;
    33     private BackgroundWorkerInfo bwInfo;
     34    public void CalculateElements(IDataAnalysisProblemData problemData, IDependencyCalculator calc, string partition, bool ignoreMissingValues) {
     35      var indices = GetRelevantIndices(problemData, partition);
     36      var info = new BackgroundWorkerInfo {
     37        Dataset = problemData.Dataset, Calculator = calc, Partition = partition, Indices = indices, IgnoreMissingValues = ignoreMissingValues
     38      };
    3439
    35     private IDataAnalysisProblemData problemData;
    36     public IDataAnalysisProblemData ProblemData {
    37       set {
    38         if (bw != null) {
    39           bw.CancelAsync();
    40         }
    41         problemData = value;
    42       }
     40      StartCalculation(info);
    4341    }
    4442
    45     public FeatureCorrelationCalculator()
    46       : base() { }
     43    protected override void BackgroundWorker_DoWork(object sender, DoWorkEventArgs e) {
     44      BackgroundWorker worker = (BackgroundWorker)sender;
     45      BackgroundWorkerInfo bwInfo = (BackgroundWorkerInfo)e.Argument;
    4746
    48     public FeatureCorrelationCalculator(IDataAnalysisProblemData problemData)
    49       : base() {
    50       this.problemData = problemData;
    51     }
    52 
    53     public void CalculateElements(IDependencyCalculator calc, string partition) {
    54       CalculateElements(problemData.Dataset, calc, partition);
    55     }
    56 
    57     // returns true if any calculation takes place
    58     public bool CalculateTimeframeElements(IDependencyCalculator calc, string partition, string variable, int frames, double[,] correlation = null) {
    59       if (correlation == null || correlation.GetLength(1) <= frames) {
    60         CalculateElements(problemData.Dataset, calc, partition, variable, frames, correlation);
    61         return true;
    62       } else {
    63         return false;
    64       }
    65     }
    66 
    67     public void TryCancelCalculation() {
    68       if (bw != null && bw.IsBusy) {
    69         bwInfo = null;
    70         bw.CancelAsync();
    71       }
    72     }
    73 
    74     private void CalculateElements(IDataset dataset, IDependencyCalculator calc, string partition, string variable = null, int frames = 0, double[,] alreadyCalculated = null) {
    75       var indices = GetRelevantIndices(problemData, partition);
    76       bwInfo = new BackgroundWorkerInfo {
    77         Dataset = dataset, Calculator = calc, Partition = partition, Indices = indices,
    78         Variable = variable, Frames = frames, AlreadyCalculated = alreadyCalculated
    79       };
    80       if (bw == null) {
    81         bw = new BackgroundWorker();
    82         bw.WorkerReportsProgress = true;
    83         bw.WorkerSupportsCancellation = true;
    84         bw.DoWork += new DoWorkEventHandler(BwDoWork);
    85         bw.ProgressChanged += new ProgressChangedEventHandler(BwProgressChanged);
    86         bw.RunWorkerCompleted += new RunWorkerCompletedEventHandler(BwRunWorkerCompleted);
    87       }
    88       if (bw.IsBusy) {
    89         bw.CancelAsync();
    90       } else {
    91         bw.RunWorkerAsync(bwInfo);
    92       }
    93     }
    94 
    95     private IEnumerable<int> GetRelevantIndices(IDataAnalysisProblemData problemData, string partition) {
    96       IEnumerable<int> var;
    97       if (partition.Equals(AbstractFeatureCorrelationView.TRAININGSAMPLES))
    98         var = problemData.TrainingIndices;
    99       else if (partition.Equals(AbstractFeatureCorrelationView.TESTSAMPLES))
    100         var = problemData.TestIndices;
    101       else var = Enumerable.Range(0, problemData.Dataset.Rows);
    102       return var;
    103     }
    104 
    105     #region backgroundworker
    106     private void BwDoWork(object sender, DoWorkEventArgs e) {
    107       BackgroundWorkerInfo bwInfo = (BackgroundWorkerInfo)e.Argument;
    108       if (bwInfo.Variable == null) {
    109         BwCalculateCorrelation(sender, e);
    110       } else {
    111         BwCalculateTimeframeCorrelation(sender, e);
    112       }
    113     }
    114 
    115     private void BwCalculateCorrelation(object sender, DoWorkEventArgs e) {
    116       BackgroundWorker worker = sender as BackgroundWorker;
    117 
    118       BackgroundWorkerInfo bwInfo = (BackgroundWorkerInfo)e.Argument;
    11947      var dataset = bwInfo.Dataset;
    120       IEnumerable<int> indices = bwInfo.Indices;
     48      var indices = bwInfo.Indices.ToArray();
    12149      IDependencyCalculator calc = bwInfo.Calculator;
    12250
     
    12553      int length = doubleVariableNames.Count;
    12654      double[,] elements = new double[length, length];
    127       double calculations = (Math.Pow(length, 2) + length) / 2;
    12855
    12956      worker.ReportProgress(0);
    13057
    131       for (int i = 0; i < length; i++) {
    132         for (int j = 0; j < i + 1; j++) {
    133           if (worker.CancellationPending) {
    134             worker.ReportProgress(100);
    135             e.Cancel = true;
    136             return;
    137           }
    138           IEnumerable<double> var1 = problemData.Dataset.GetDoubleValues(doubleVariableNames[i], indices);
    139           IEnumerable<double> var2 = problemData.Dataset.GetDoubleValues(doubleVariableNames[j], indices);
     58      for (int counter = 0; counter < length; counter++) {
     59        if (worker.CancellationPending) {
     60          worker.ReportProgress(100);
     61          e.Cancel = true;
     62          return;
     63        }
    14064
    141           elements[i, j] = calc.Calculate(var1, var2, out error);
     65        var i = counter;
     66        Parallel.ForEach(Enumerable.Range(i, length - i), j => {
     67          var var1 = dataset.GetDoubleValues(doubleVariableNames[i], indices);
     68          var var2 = dataset.GetDoubleValues(doubleVariableNames[j], indices);
     69
     70          if (bwInfo.IgnoreMissingValues) {
     71            var filtered = FilterNaNValues(var1, var2);
     72            elements[i, j] = calc.Calculate(filtered, out error);
     73          } else
     74            elements[i, j] = calc.Calculate(var1, var2, out error);
    14275
    14376          if (!error.Equals(OnlineCalculatorError.None)) {
     
    14578          }
    14679          elements[j, i] = elements[i, j];
    147           worker.ReportProgress((int)Math.Round((((Math.Pow(i, 2) + i) / 2 + j + 1.0) / calculations) * 100));
    148         }
     80
     81        });
     82        worker.ReportProgress((int)(((double)counter) / length * 100));
    14983      }
    15084      e.Result = elements;
     
    15286    }
    15387
    154     private void BwCalculateTimeframeCorrelation(object sender, DoWorkEventArgs e) {
    155       BackgroundWorker worker = sender as BackgroundWorker;
    15688
    157       BackgroundWorkerInfo bwInfo = (BackgroundWorkerInfo)e.Argument;
    158       var dataset = bwInfo.Dataset;
    159       IEnumerable<int> indices = bwInfo.Indices;
    160       IDependencyCalculator calc = bwInfo.Calculator;
    161       string variable = bwInfo.Variable;
    162       int frames = bwInfo.Frames;
    163       double[,] alreadyCalculated = bwInfo.AlreadyCalculated;
     89    private static IEnumerable<Tuple<double, double>> FilterNaNValues(IEnumerable<double> first, IEnumerable<double> second) {
     90      var firstEnumerator = first.GetEnumerator();
     91      var secondEnumerator = second.GetEnumerator();
    16492
    165       IList<string> doubleVariableNames = dataset.DoubleVariables.ToList();
    166       OnlineCalculatorError error = OnlineCalculatorError.None;
    167       int length = doubleVariableNames.Count;
    168       double[,] elements = new double[length, frames + 1];
    169       double calculations = (frames + 1) * length;
     93      while (firstEnumerator.MoveNext() & secondEnumerator.MoveNext()) {
     94        var firstValue = firstEnumerator.Current;
     95        var secondValue = secondEnumerator.Current;
    17096
    171       worker.ReportProgress(0);
     97        if (double.IsNaN(firstValue)) continue;
     98        if (double.IsNaN(secondValue)) continue;
    17299
    173       int start = 0;
    174       if (alreadyCalculated != null) {
    175         for (int i = 0; i < alreadyCalculated.GetLength(0); i++) {
    176           Array.Copy(alreadyCalculated, i * alreadyCalculated.GetLength(1), elements, i * elements.GetLength(1), alreadyCalculated.GetLength(1));
    177         }
    178         start = alreadyCalculated.GetLength(1);
     100        yield return Tuple.Create(firstValue, secondValue);
    179101      }
    180102
    181       for (int i = 0; i < length; i++) {
    182         for (int j = start; j <= frames; j++) {
    183           if (worker.CancellationPending) {
    184             worker.ReportProgress(100);
    185             e.Cancel = true;
    186             return;
    187           }
    188 
    189           IEnumerable<double> var1 = problemData.Dataset.GetDoubleValues(variable, indices);
    190           IEnumerable<double> var2 = problemData.Dataset.GetDoubleValues(doubleVariableNames[i], indices);
    191 
    192           var valuesInFrame = var1.Take(j);
    193           var help = var1.Skip(j).ToList();
    194           help.AddRange(valuesInFrame);
    195           var1 = help;
    196 
    197           elements[i, j] = calc.Calculate(var1, var2, out error);
    198 
    199           if (!error.Equals(OnlineCalculatorError.None)) {
    200             elements[i, j] = double.NaN;
    201           }
    202           worker.ReportProgress((int)((100.0 / calculations) * (i * (frames + 1) + j + 1)));
    203         }
     103      if (firstEnumerator.MoveNext() || secondEnumerator.MoveNext()) {
     104        throw new ArgumentException("Number of elements in first and second enumeration doesn't match.");
    204105      }
    205       e.Result = elements;
    206       worker.ReportProgress(100);
    207     }
    208 
    209     private void BwRunWorkerCompleted(object sender, RunWorkerCompletedEventArgs e) {
    210       BackgroundWorker worker = sender as BackgroundWorker;
    211       if (!e.Cancelled && !worker.CancellationPending) {
    212         if (e.Error != null) {
    213           ErrorHandling.ShowErrorDialog(e.Error);
    214         } else {
    215           OnCorrelationCalculationFinished((double[,])e.Result, bwInfo.Calculator, bwInfo.Partition, bwInfo.Variable);
    216         }
    217       } else if (bwInfo != null) {
    218         bw.RunWorkerAsync(bwInfo);
    219       }
    220     }
    221     #endregion
    222 
    223     #region events
    224     public class CorrelationCalculationFinishedArgs : EventArgs {
    225       public double[,] Correlation { get; private set; }
    226       public IDependencyCalculator Calculcator { get; private set; }
    227       public string Partition { get; private set; }
    228       public string Variable { get; private set; }
    229 
    230       public CorrelationCalculationFinishedArgs(double[,] correlation, IDependencyCalculator calculator, string partition, string variable = null) {
    231         this.Correlation = correlation;
    232         this.Calculcator = calculator;
    233         this.Partition = partition;
    234         this.Variable = variable;
    235       }
    236     }
    237 
    238     public delegate void CorrelationCalculationFinishedHandler(object sender, CorrelationCalculationFinishedArgs e);
    239     public event CorrelationCalculationFinishedHandler CorrelationCalculationFinished;
    240     protected virtual void OnCorrelationCalculationFinished(double[,] correlation, IDependencyCalculator calculator, string partition, string variable = null) {
    241       var handler = CorrelationCalculationFinished;
    242       if (handler != null)
    243         handler(this, new CorrelationCalculationFinishedArgs(correlation, calculator, partition, variable));
    244     }
    245 
    246     public delegate void ProgressCalculationHandler(object sender, ProgressChangedEventArgs e);
    247     public event ProgressCalculationHandler ProgressCalculation;
    248     protected void BwProgressChanged(object sender, ProgressChangedEventArgs e) {
    249       BackgroundWorker worker = sender as BackgroundWorker;
    250       if (ProgressCalculation != null) {
    251         ProgressCalculation(sender, e);
    252       }
    253     }
    254     #endregion
    255 
    256     private class BackgroundWorkerInfo {
    257       public IDataset Dataset { get; set; }
    258       public IDependencyCalculator Calculator { get; set; }
    259       public string Partition { get; set; }
    260       public IEnumerable<int> Indices { get; set; }
    261       public string Variable { get; set; }
    262       public int Frames { get; set; }
    263       public double[,] AlreadyCalculated { get; set; }
    264106    }
    265107  }
Note: See TracChangeset for help on using the changeset viewer.