Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/FeatureCorrelation/FeatureCorrelationCalculator.cs @ 8689

Last change on this file since 8689 was 8689, checked in by sforsten, 10 years ago

#1292:

  • NaN values are used, if the calculation is invalid (e.g. missing values, infinity etc.)
  • Variables can now be filtered. Initially allowed input variables and target variable are shown, but with a right click a dialog can be opened to select variables, which shall be shown
File size: 11.3 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2012 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.ComponentModel;
25using System.Linq;
26using HeuristicLab.PluginInfrastructure;
27using FCE = HeuristicLab.Problems.DataAnalysis.FeatureCorrelationEnums;
28
29namespace HeuristicLab.Problems.DataAnalysis {
30  public class FeatureCorrelationCalculator : Object {
31
32    private BackgroundWorker bw;
33    private BackgroundWorkerInfo bwInfo;
34
35    private IDataAnalysisProblemData problemData;
36    public IDataAnalysisProblemData ProblemData {
37      set {
38        if (bw != null) {
39          bw.CancelAsync();
40        }
41        problemData = value;
42      }
43    }
44
45    public FeatureCorrelationCalculator()
46      : base() { }
47
48    public FeatureCorrelationCalculator(IDataAnalysisProblemData problemData)
49      : base() {
50      this.problemData = problemData;
51    }
52
53    public void CalculateElements(FCE.CorrelationCalculators calc, FCE.Partitions partition) {
54      CalculateElements(problemData.Dataset, calc, partition);
55    }
56
57    // returns if any calculation takes place
58    public bool CalculateTimeframeElements(FCE.CorrelationCalculators calc, FCE.Partitions partition, string variable, int frames, double[,] correlation = null) {
59      if (correlation == null || correlation.GetLength(1) <= frames) {
60        CalculateElements(problemData.Dataset, calc, partition, variable, frames, correlation);
61        return true;
62      } else {
63        return false;
64      }
65    }
66
67    private double[,] GetElementsOfCorrelation(double[,] corr, int frames) {
68      double[,] elements = new double[corr.GetLength(0), frames + 1];
69      for (int i = 0; i < corr.GetLength(0); i++) {
70        for (int j = 0; j <= frames; j++) {
71          elements[i, j] = corr[i, j];
72        }
73      }
74      return elements;
75    }
76
77    private void CalculateElements(Dataset dataset, FCE.CorrelationCalculators calc, FCE.Partitions partition, string variable = null, int frames = 0, double[,] alreadyCalculated = null) {
78      bwInfo = new BackgroundWorkerInfo { Dataset = dataset, Calculator = calc, Partition = partition, Variable = variable, Frames = frames, AlreadyCalculated = alreadyCalculated };
79      if (bw == null) {
80        bw = new BackgroundWorker();
81        bw.WorkerReportsProgress = true;
82        bw.WorkerSupportsCancellation = true;
83        bw.DoWork += new DoWorkEventHandler(BwDoWork);
84        bw.ProgressChanged += new ProgressChangedEventHandler(BwProgressChanged);
85        bw.RunWorkerCompleted += new RunWorkerCompletedEventHandler(BwRunWorkerCompleted);
86      }
87      if (bw.IsBusy) {
88        bw.CancelAsync();
89      } else {
90        bw.RunWorkerAsync(bwInfo);
91      }
92    }
93
94    #region backgroundworker
95    private void BwDoWork(object sender, DoWorkEventArgs e) {
96      BackgroundWorkerInfo bwInfo = (BackgroundWorkerInfo)e.Argument;
97      if (bwInfo.Variable == null) {
98        BwCalculateCorrelation(sender, e);
99      } else {
100        BwCalculateTimeframeCorrelation(sender, e);
101      }
102    }
103
104    private void BwCalculateCorrelation(object sender, DoWorkEventArgs e) {
105      BackgroundWorker worker = sender as BackgroundWorker;
106
107      BackgroundWorkerInfo bwInfo = (BackgroundWorkerInfo)e.Argument;
108      Dataset dataset = bwInfo.Dataset;
109      FCE.Partitions partition = bwInfo.Partition;
110      FCE.CorrelationCalculators calc = bwInfo.Calculator;
111
112      IList<string> doubleVariableNames = dataset.DoubleVariables.ToList();
113      OnlineCalculatorError error = OnlineCalculatorError.None;
114      int length = doubleVariableNames.Count;
115      double[,] elements = new double[length, length];
116      double calculations = (Math.Pow(length, 2) + length) / 2;
117
118      worker.ReportProgress(0);
119
120      for (int i = 0; i < length; i++) {
121        for (int j = 0; j < i + 1; j++) {
122          if (worker.CancellationPending) {
123            e.Cancel = true;
124            return;
125          }
126          IEnumerable<double> var1 = GetRelevantValues(problemData, partition, doubleVariableNames[i]);
127          IEnumerable<double> var2 = GetRelevantValues(problemData, partition, doubleVariableNames[j]);
128
129          elements[i, j] = CalculateElementWithCalculator(calc, var1, var2, out error);
130
131          if (!error.Equals(OnlineCalculatorError.None)) {
132            elements[i, j] = double.NaN;
133          }
134          elements[j, i] = elements[i, j];
135          worker.ReportProgress((int)Math.Round((((Math.Pow(i, 2) + i) / 2 + j + 1.0) / calculations) * 100));
136        }
137      }
138      e.Result = elements;
139    }
140
141    private void BwCalculateTimeframeCorrelation(object sender, DoWorkEventArgs e) {
142      BackgroundWorker worker = sender as BackgroundWorker;
143
144      BackgroundWorkerInfo bwInfo = (BackgroundWorkerInfo)e.Argument;
145      Dataset dataset = bwInfo.Dataset;
146      FCE.Partitions partition = bwInfo.Partition;
147      FCE.CorrelationCalculators calc = bwInfo.Calculator;
148      string variable = bwInfo.Variable;
149      int frames = bwInfo.Frames;
150      double[,] alreadyCalculated = bwInfo.AlreadyCalculated;
151
152      IList<string> doubleVariableNames = dataset.DoubleVariables.ToList();
153      OnlineCalculatorError error = OnlineCalculatorError.None;
154      int length = doubleVariableNames.Count;
155      double[,] elements = new double[length, frames + 1];
156      double calculations = (frames + 1) * length;
157
158      worker.ReportProgress(0);
159
160      int start = 0;
161      if (alreadyCalculated != null) {
162        for (int i = 0; i < alreadyCalculated.GetLength(0); i++) {
163          Array.Copy(alreadyCalculated, i * alreadyCalculated.GetLength(1), elements, i * elements.GetLength(1), alreadyCalculated.GetLength(1));
164        }
165        start = alreadyCalculated.GetLength(1);
166      }
167
168      for (int i = 0; i < length; i++) {
169        for (int j = start; j <= frames; j++) {
170          if (worker.CancellationPending) {
171            e.Cancel = true;
172            return;
173          }
174
175          IEnumerable<double> var1 = GetRelevantValues(problemData, partition, variable);
176          IEnumerable<double> var2 = GetRelevantValues(problemData, partition, doubleVariableNames[i]);
177
178          var valuesInFrame = var1.Take(j);
179          var help = var1.Skip(j).ToList();
180          help.AddRange(valuesInFrame);
181          var1 = help;
182
183          elements[i, j] = CalculateElementWithCalculator(calc, var1, var2, out error);
184
185          if (!error.Equals(OnlineCalculatorError.None)) {
186            elements[i, j] = double.NaN;
187          }
188          worker.ReportProgress((int)((100.0 / calculations) * (i * (frames + 1) + j + 1)));
189        }
190      }
191      e.Result = elements;
192    }
193
194    private IEnumerable<double> GetRelevantValues(IDataAnalysisProblemData problemData, FCE.Partitions partition, string variable) {
195      IEnumerable<double> var = problemData.Dataset.GetDoubleValues(variable);
196      if (partition.Equals(FCE.Partitions.TrainingSamples)) {
197        var = var.Skip(problemData.TrainingPartition.Start).Take(problemData.TrainingPartition.End - problemData.TrainingPartition.Start);
198      } else if (partition.Equals(FCE.Partitions.TestSamples)) {
199        var = var.Skip(problemData.TestPartition.Start).Take(problemData.TestPartition.End - problemData.TestPartition.Start);
200      }
201      return var;
202    }
203
204    private double CalculateElementWithCalculator(FCE.CorrelationCalculators calc, IEnumerable<double> var1, IEnumerable<double> var2, out OnlineCalculatorError error) {
205      if (calc.Equals(FCE.CorrelationCalculators.HoeffdingsDependence)) {
206        return HoeffdingsDependenceCalculator.Calculate(var1, var2, out error);
207      } else if (calc.Equals(FCE.CorrelationCalculators.SpearmansRank)) {
208        return SpearmansRankCorrelationCoefficientCalculator.Calculate(var1, var2, out error);
209      } else if (calc.Equals(FCE.CorrelationCalculators.PearsonsRSquared)) {
210        return OnlinePearsonsRSquaredCalculator.Calculate(var1, var2, out error);
211      } else {
212        return Math.Sqrt(OnlinePearsonsRSquaredCalculator.Calculate(var1, var2, out error));
213      }
214    }
215
216    private void BwRunWorkerCompleted(object sender, RunWorkerCompletedEventArgs e) {
217      BackgroundWorker worker = sender as BackgroundWorker;
218      if (!e.Cancelled && !worker.CancellationPending) {
219        if (!(e.Error == null)) {
220          ErrorHandling.ShowErrorDialog(e.Error);
221        } else {
222          OnCorrelationCalculationFinished((double[,])e.Result, bwInfo.Calculator, bwInfo.Partition, bwInfo.Variable);
223        }
224      } else {
225        bw.RunWorkerAsync(bwInfo);
226      }
227    }
228    #endregion
229
230    #region events
231    public class CorrelationCalculationFinishedArgs : EventArgs {
232      public double[,] Correlation { get; private set; }
233      public FCE.CorrelationCalculators Calculcator { get; private set; }
234      public FCE.Partitions Partition { get; private set; }
235      public string Variable { get; private set; }
236
237      public CorrelationCalculationFinishedArgs(double[,] correlation, FCE.CorrelationCalculators calculator, FCE.Partitions partition, string variable = null) {
238        this.Correlation = correlation;
239        this.Calculcator = calculator;
240        this.Partition = partition;
241        this.Variable = variable;
242      }
243    }
244    public delegate void CorrelationCalculationFinishedHandler(object sender, CorrelationCalculationFinishedArgs e);
245    public event CorrelationCalculationFinishedHandler CorrelationCalculationFinished;
246    protected virtual void OnCorrelationCalculationFinished(double[,] correlation, FCE.CorrelationCalculators calculator, FCE.Partitions partition, string variable = null) {
247      var handler = CorrelationCalculationFinished;
248      if (handler != null)
249        handler(this, new CorrelationCalculationFinishedArgs(correlation, calculator, partition, variable));
250    }
251
252    public delegate void ProgressCalculationHandler(object sender, ProgressChangedEventArgs e);
253    public event ProgressCalculationHandler ProgressCalculation;
254    protected void BwProgressChanged(object sender, ProgressChangedEventArgs e) {
255      BackgroundWorker worker = sender as BackgroundWorker;
256      if (!worker.CancellationPending && ProgressCalculation != null) {
257        ProgressCalculation(sender, e);
258      }
259    }
260    #endregion
261
262    protected class BackgroundWorkerInfo {
263      public Dataset Dataset { get; set; }
264      public FCE.CorrelationCalculators Calculator { get; set; }
265      public FCE.Partitions Partition { get; set; }
266      public string Variable { get; set; }
267      public int Frames { get; set; }
268      public double[,] AlreadyCalculated { get; set; }
269    }
270  }
271}
Note: See TracBrowser for help on using the repository browser.