source: branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/FeatureCorrelation.cs @ 8529

Last change on this file since 8529 was 8529, checked in by sforsten, 7 years ago

#1292:

  • BackgroundWorker is now reused in FeatureCorrelation
  • renamed some variables
  • ComboBoxes are now DropDownLists
  • FeatureCorrelation doesn't calculate the elements in the constructor anymore
  • small changes in the views
File size: 11.7 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2012 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20
21#endregion
22
23using System;
24using System.Collections.Generic;
25using System.ComponentModel;
26using System.Linq;
27using HeuristicLab.Analysis;
28using HeuristicLab.Common;
29using HeuristicLab.Core;
30using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
31using HeuristicLab.PluginInfrastructure;
32
33namespace HeuristicLab.Problems.DataAnalysis {
34  [StorableClass]
35  [Item("FeatureCorrelation", "Represents the correlation of features in a data set.")]
36  public class FeatureCorrelation : HeatMap {
37
38    private const string PearsonsR = "Pearsons R";
39    private const string PearsonsRSquared = "Pearsons R Squared";
40    private const string HoeffdingsDependence = "Hoeffdings Dependence";
41    private const string SpearmansRank = "Spearmans Rank";
42    public IEnumerable<string> CorrelationCalculators {
43      get { return new List<string>() { PearsonsR, PearsonsRSquared, HoeffdingsDependence, SpearmansRank }; }
44    }
45
46    private const string AllSamples = "All Samples";
47    private const string TrainingSamples = "Training Samples";
48    private const string TestSamples = "Test Samples";
49    public IEnumerable<string> Partitions {
50      get { return new List<string>() { AllSamples, TrainingSamples, TestSamples }; }
51    }
52
53    private IDataAnalysisProblemData problemData;
54    public IDataAnalysisProblemData ProblemData {
55      get { return problemData; }
56      set {
57        if (problemData != value) {
58          problemData = value;
59          columnNames = value.Dataset.DoubleVariables.ToList();
60          rowNames = value.Dataset.DoubleVariables.ToList();
61          OnProblemDataChanged();
62        }
63      }
64    }
65
66    private BackgroundWorker bw;
67    private BackgroundWorkerInfo bwInfo;
68
69    public FeatureCorrelation()
70      : base() {
71      this.Title = "Feature Correlation";
72      this.columnNames = Enumerable.Range(1, 2).Select(x => x.ToString()).ToList();
73      this.rowNames = Enumerable.Range(1, 2).Select(x => x.ToString()).ToList();
74      sortableView = true;
75    }
76
77    public FeatureCorrelation(IDataAnalysisProblemData problemData) {
78      this.problemData = problemData;
79      this.Title = "Feature Correlation";
80      this.columnNames = problemData.Dataset.DoubleVariables.ToList();
81      this.rowNames = problemData.Dataset.DoubleVariables.ToList();
82      sortableView = true;
83    }
84    protected FeatureCorrelation(FeatureCorrelation original, Cloner cloner)
85      : base(original, cloner) {
86      this.Title = "Feature Correlation";
87      this.problemData = original.problemData;
88      this.columnNames = original.problemData.Dataset.DoubleVariables.ToList();
89      this.rowNames = original.problemData.Dataset.DoubleVariables.ToList();
90    }
91    public override IDeepCloneable Clone(Cloner cloner) {
92      return new FeatureCorrelation(this, cloner);
93    }
94
95    public void Recalculate(string calc, string partition) {
96      CalculateElements(problemData.Dataset, calc, partition);
97    }
98
99    public void CalculateTimeframeElements(string calc, string partition, string variable, int frames) {
100      CalculateElements(problemData.Dataset, calc, partition, variable, frames);
101    }
102
103    private void CalculateElements(Dataset dataset) {
104      CalculateElements(dataset, CorrelationCalculators.First(), Partitions.First());
105    }
106
107    private void CalculateElements(Dataset dataset, string calc, string partition, string variable = null, int frames = 0) {
108      bwInfo = new BackgroundWorkerInfo { Dataset = dataset, Calculator = calc, Partition = partition, Variable = variable, Frames = frames };
109      if (bw == null) {
110        bw = new BackgroundWorker();
111        bw.WorkerReportsProgress = true;
112        bw.WorkerSupportsCancellation = true;
113        bw.DoWork += new DoWorkEventHandler(BwDoWork);
114        bw.ProgressChanged += new ProgressChangedEventHandler(BwProgressChanged);
115        bw.RunWorkerCompleted += new RunWorkerCompletedEventHandler(BwRunWorkerCompleted);
116      }
117      if (bw.IsBusy) {
118        bw.CancelAsync();
119      } else {
120        bw.RunWorkerAsync(bwInfo);
121      }
122      if (calc.Equals(PearsonsR) || calc.Equals(SpearmansRank)) {
123        Maximum = 1.0;
124        Minimum = -1.0;
125      } else if (calc.Equals(HoeffdingsDependence)) {
126        Maximum = 1.0;
127        Minimum = -0.5;
128      } else {
129        Maximum = 1.0;
130        Minimum = 0.0;
131      }
132    }
133
134    #region backgroundworker
135    private void BwDoWork(object sender, DoWorkEventArgs e) {
136      BackgroundWorkerInfo bwInfo = (BackgroundWorkerInfo)e.Argument;
137      if (bwInfo.Variable == null) {
138        BwCalculateCorrelation(sender, e);
139      } else {
140        BwCalculateTimeframeCorrelation(sender, e);
141      }
142    }
143
144    private void BwCalculateCorrelation(object sender, DoWorkEventArgs e) {
145      BackgroundWorker worker = sender as BackgroundWorker;
146
147      BackgroundWorkerInfo bwInfo = (BackgroundWorkerInfo)e.Argument;
148      Dataset dataset = bwInfo.Dataset;
149      string partition = bwInfo.Partition;
150      string calc = bwInfo.Calculator;
151
152      IList<string> doubleVariableNames = dataset.DoubleVariables.ToList();
153      OnlineCalculatorError error;
154      int length = doubleVariableNames.Count;
155      double[,] elements = new double[length, length];
156      double calculations = (Math.Pow(length, 2) + length) / 2;
157
158      worker.ReportProgress(0);
159
160      for (int i = 0; i < length; i++) {
161        for (int j = 0; j < i + 1; j++) {
162          if (worker.CancellationPending) {
163            e.Cancel = true;
164            return;
165          }
166
167          IEnumerable<double> var1 = GetRelevantValues(problemData, partition, doubleVariableNames[i]);
168          IEnumerable<double> var2 = GetRelevantValues(problemData, partition, doubleVariableNames[j]);
169
170          elements[i, j] = CalculateElementWithCalculator(calc, var1, var2, out error);
171
172          elements[j, i] = elements[i, j];
173          if (!error.Equals(OnlineCalculatorError.None)) {
174            worker.ReportProgress(100);
175            throw new ArgumentException("Calculator returned " + error + Environment.NewLine + "Maybe try another calculator.");
176          }
177          worker.ReportProgress((int)Math.Round((((Math.Pow(i, 2) + i) / 2 + j + 1.0) / calculations) * 100));
178        }
179      }
180      e.Result = elements;
181    }
182
183    private void BwCalculateTimeframeCorrelation(object sender, DoWorkEventArgs e) {
184      BackgroundWorker worker = sender as BackgroundWorker;
185
186      BackgroundWorkerInfo bwInfo = (BackgroundWorkerInfo)e.Argument;
187      Dataset dataset = bwInfo.Dataset;
188      string partition = bwInfo.Partition;
189      string calc = bwInfo.Calculator;
190      string variable = bwInfo.Variable;
191      int frames = bwInfo.Frames;
192
193      IList<string> doubleVariableNames = dataset.DoubleVariables.ToList();
194      OnlineCalculatorError error;
195      int length = doubleVariableNames.Count;
196      double[,] elements = new double[length, frames + 1];
197      double calculations = (frames + 1) * length;
198
199      worker.ReportProgress(0);
200
201      for (int i = 0; i < length; i++) {
202        for (int j = 0; j <= frames; j++) {
203          if (worker.CancellationPending) {
204            e.Cancel = true;
205            return;
206          }
207
208          IEnumerable<double> var1 = GetRelevantValues(problemData, partition, variable);
209          IEnumerable<double> var2 = GetRelevantValues(problemData, partition, doubleVariableNames[i]);
210
211          var valuesInFrame = var1.Take(j);
212          var help = var1.Skip(j).ToList();
213          help.AddRange(valuesInFrame);
214          var1 = help;
215
216          elements[i, j] = CalculateElementWithCalculator(calc, var1, var2, out error);
217
218          if (!error.Equals(OnlineCalculatorError.None)) {
219            worker.ReportProgress(100);
220            throw new ArgumentException("Calculator returned " + error + Environment.NewLine + "Maybe try another calculator.");
221          }
222          worker.ReportProgress((int)((100.0 / calculations) * (i * (frames + 1) + j + 1)));
223        }
224      }
225      e.Result = elements;
226    }
227
228    private IEnumerable<double> GetRelevantValues(IDataAnalysisProblemData problemData, string partition, string variable) {
229      IEnumerable<double> var = problemData.Dataset.GetDoubleValues(variable);
230      if (partition.Equals(TrainingSamples)) {
231        var = var.Skip(problemData.TrainingPartition.Start).Take(problemData.TrainingPartition.End - problemData.TrainingPartition.Start);
232      } else if (partition.Equals(TestSamples)) {
233        var = var.Skip(problemData.TestPartition.Start).Take(problemData.TestPartition.End - problemData.TestPartition.Start);
234      }
235      return var;
236    }
237
238    private double CalculateElementWithCalculator(string calc, IEnumerable<double> var1, IEnumerable<double> var2, out OnlineCalculatorError error) {
239      if (calc.Equals(HoeffdingsDependence)) {
240        return HoeffdingsDependenceCalculator.Calculate(var1, var2, out error);
241      } else if (calc.Equals(SpearmansRank)) {
242        return SpearmansRankCorrelationCoefficientCalculator.Calculate(var1, var2, out error);
243      } else if (calc.Equals(PearsonsRSquared)) {
244        return OnlinePearsonsRSquaredCalculator.Calculate(var1, var2, out error);
245      } else {
246        return OnlinePearsonsRSquaredCalculator.CalculateR(var1, var2, out error);
247      }
248    }
249
250    private void BwRunWorkerCompleted(object sender, RunWorkerCompletedEventArgs e) {
251      BackgroundWorker worker = sender as BackgroundWorker;
252      if (!e.Cancelled && !worker.CancellationPending) {
253        if (!(e.Error == null)) {
254          ErrorHandling.ShowErrorDialog(e.Error);
255        } else {
256          matrix = (double[,])e.Result;
257          OnCorrelationCalculationFinished();
258        }
259      } else {
260        bw.RunWorkerAsync(bwInfo);
261      }
262    }
263    #endregion
264
265    #region events
266    public event EventHandler CorrelationCalculationFinished;
267    protected virtual void OnCorrelationCalculationFinished() {
268      EventHandler handler = CorrelationCalculationFinished;
269      if (handler != null)
270        handler(this, EventArgs.Empty);
271    }
272
273    public delegate void ProgressCalculationHandler(object sender, ProgressChangedEventArgs e);
274    public event ProgressCalculationHandler ProgressCalculation;
275    protected void BwProgressChanged(object sender, ProgressChangedEventArgs e) {
276      BackgroundWorker worker = sender as BackgroundWorker;
277      if (!worker.CancellationPending && ProgressCalculation != null) {
278        ProgressCalculation(sender, e);
279      }
280    }
281
282    public event EventHandler ProblemDataChanged;
283    protected virtual void OnProblemDataChanged() {
284      EventHandler handler = ProblemDataChanged;
285      if (handler != null)
286        handler(this, EventArgs.Empty);
287    }
288    #endregion
289
290    protected class BackgroundWorkerInfo {
291      public Dataset Dataset { get; set; }
292      public string Calculator { get; set; }
293      public string Partition { get; set; }
294      public string Variable { get; set; }
295      public int Frames { get; set; }
296    }
297  }
298}
Note: See TracBrowser for help on using the repository browser.