Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/FeatureCorrelation.cs @ 8542

Last change on this file since 8542 was 8542, checked in by mkommend, 12 years ago

#1292: Integrated correlation analysis of datasets in the trunk.

File size: 11.7 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2012 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20
21#endregion
22
23using System;
24using System.Collections.Generic;
25using System.ComponentModel;
26using System.Linq;
27using HeuristicLab.Analysis;
28using HeuristicLab.Common;
29using HeuristicLab.Core;
30using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
31using HeuristicLab.PluginInfrastructure;
32
33namespace HeuristicLab.Problems.DataAnalysis {
34  [StorableClass]
35  [Item("FeatureCorrelation", "Represents the correlation of features in a data set.")]
36  public class FeatureCorrelation : HeatMap {
37    private const string PearsonsR = "Pearsons R";
38    private const string PearsonsRSquared = "Pearsons R Squared";
39    private const string HoeffdingsDependence = "Hoeffdings Dependence";
40    private const string SpearmansRank = "Spearmans Rank";
41    public IEnumerable<string> CorrelationCalculators {
42      get { return new List<string>() { PearsonsR, PearsonsRSquared, HoeffdingsDependence, SpearmansRank }; }
43    }
44
45    private const string AllSamples = "All Samples";
46    private const string TrainingSamples = "Training Samples";
47    private const string TestSamples = "Test Samples";
48    public IEnumerable<string> Partitions {
49      get { return new List<string>() { AllSamples, TrainingSamples, TestSamples }; }
50    }
51
52    private IDataAnalysisProblemData problemData;
53    [Storable]
54    public IDataAnalysisProblemData ProblemData {
55      get { return problemData; }
56      set {
57        if (problemData != value) {
58          problemData = value;
59          columnNames = value.Dataset.DoubleVariables.ToList();
60          rowNames = value.Dataset.DoubleVariables.ToList();
61          OnProblemDataChanged();
62        }
63      }
64    }
65
66    private BackgroundWorker bw;
67    private BackgroundWorkerInfo bwInfo;
68
69    public FeatureCorrelation() {
70      this.Title = "Feature Correlation";
71      this.columnNames = problemData.Dataset.DoubleVariables.ToList();
72      this.rowNames = problemData.Dataset.DoubleVariables.ToList();
73      sortableView = true;
74    }
75    public FeatureCorrelation(IDataAnalysisProblemData problemData)
76      : base() {
77      this.problemData = problemData;
78      this.Title = "Feature Correlation";
79      this.columnNames = problemData.Dataset.DoubleVariables.ToList();
80      this.rowNames = problemData.Dataset.DoubleVariables.ToList();
81      sortableView = true;
82    }
83    protected FeatureCorrelation(FeatureCorrelation original, Cloner cloner)
84      : base(original, cloner) {
85      this.problemData = cloner.Clone(original.problemData);
86    }
87    public override IDeepCloneable Clone(Cloner cloner) {
88      return new FeatureCorrelation(this, cloner);
89    }
90
91    [StorableConstructor]
92    protected FeatureCorrelation(bool deserializing) : base(deserializing) { }
93
94    public void Recalculate(string calc, string partition) {
95      CalculateElements(problemData.Dataset, calc, partition);
96    }
97
98    public void CalculateTimeframeElements(string calc, string partition, string variable, int frames) {
99      CalculateElements(problemData.Dataset, calc, partition, variable, frames);
100    }
101
102    private void CalculateElements(Dataset dataset) {
103      CalculateElements(dataset, CorrelationCalculators.First(), Partitions.First());
104    }
105
106    private void CalculateElements(Dataset dataset, string calc, string partition, string variable = null, int frames = 0) {
107      bwInfo = new BackgroundWorkerInfo { Dataset = dataset, Calculator = calc, Partition = partition, Variable = variable, Frames = frames };
108      if (bw == null) {
109        bw = new BackgroundWorker();
110        bw.WorkerReportsProgress = true;
111        bw.WorkerSupportsCancellation = true;
112        bw.DoWork += new DoWorkEventHandler(BwDoWork);
113        bw.ProgressChanged += new ProgressChangedEventHandler(BwProgressChanged);
114        bw.RunWorkerCompleted += new RunWorkerCompletedEventHandler(BwRunWorkerCompleted);
115      }
116      if (bw.IsBusy) {
117        bw.CancelAsync();
118      } else {
119        bw.RunWorkerAsync(bwInfo);
120      }
121      if (calc.Equals(PearsonsR) || calc.Equals(SpearmansRank)) {
122        Maximum = 1.0;
123        Minimum = -1.0;
124      } else if (calc.Equals(HoeffdingsDependence)) {
125        Maximum = 1.0;
126        Minimum = -0.5;
127      } else {
128        Maximum = 1.0;
129        Minimum = 0.0;
130      }
131    }
132
133    #region backgroundworker
134    private void BwDoWork(object sender, DoWorkEventArgs e) {
135      BackgroundWorkerInfo bwInfo = (BackgroundWorkerInfo)e.Argument;
136      if (bwInfo.Variable == null) {
137        BwCalculateCorrelation(sender, e);
138      } else {
139        BwCalculateTimeframeCorrelation(sender, e);
140      }
141    }
142
143    private void BwCalculateCorrelation(object sender, DoWorkEventArgs e) {
144      BackgroundWorker worker = sender as BackgroundWorker;
145
146      BackgroundWorkerInfo bwInfo = (BackgroundWorkerInfo)e.Argument;
147      Dataset dataset = bwInfo.Dataset;
148      string partition = bwInfo.Partition;
149      string calc = bwInfo.Calculator;
150
151      IList<string> doubleVariableNames = dataset.DoubleVariables.ToList();
152      OnlineCalculatorError error;
153      int length = doubleVariableNames.Count;
154      double[,] elements = new double[length, length];
155      double calculations = (Math.Pow(length, 2) + length) / 2;
156
157      worker.ReportProgress(0);
158
159      for (int i = 0; i < length; i++) {
160        for (int j = 0; j < i + 1; j++) {
161          if (worker.CancellationPending) {
162            e.Cancel = true;
163            return;
164          }
165
166          IEnumerable<double> var1 = GetRelevantValues(problemData, partition, doubleVariableNames[i]);
167          IEnumerable<double> var2 = GetRelevantValues(problemData, partition, doubleVariableNames[j]);
168
169          elements[i, j] = CalculateElementWithCalculator(calc, var1, var2, out error);
170
171          elements[j, i] = elements[i, j];
172          if (!error.Equals(OnlineCalculatorError.None)) {
173            worker.ReportProgress(100);
174            throw new ArgumentException("Calculator returned " + error + Environment.NewLine + "Maybe try another calculator.");
175          }
176          worker.ReportProgress((int)Math.Round((((Math.Pow(i, 2) + i) / 2 + j + 1.0) / calculations) * 100));
177        }
178      }
179      e.Result = elements;
180    }
181
182    private void BwCalculateTimeframeCorrelation(object sender, DoWorkEventArgs e) {
183      BackgroundWorker worker = sender as BackgroundWorker;
184
185      BackgroundWorkerInfo bwInfo = (BackgroundWorkerInfo)e.Argument;
186      Dataset dataset = bwInfo.Dataset;
187      string partition = bwInfo.Partition;
188      string calc = bwInfo.Calculator;
189      string variable = bwInfo.Variable;
190      int frames = bwInfo.Frames;
191
192      IList<string> doubleVariableNames = dataset.DoubleVariables.ToList();
193      OnlineCalculatorError error;
194      int length = doubleVariableNames.Count;
195      double[,] elements = new double[length, frames + 1];
196      double calculations = (frames + 1) * length;
197
198      worker.ReportProgress(0);
199
200      for (int i = 0; i < length; i++) {
201        for (int j = 0; j <= frames; j++) {
202          if (worker.CancellationPending) {
203            e.Cancel = true;
204            return;
205          }
206
207          IEnumerable<double> var1 = GetRelevantValues(problemData, partition, variable);
208          IEnumerable<double> var2 = GetRelevantValues(problemData, partition, doubleVariableNames[i]);
209
210          var valuesInFrame = var1.Take(j);
211          var help = var1.Skip(j).ToList();
212          help.AddRange(valuesInFrame);
213          var1 = help;
214
215          elements[i, j] = CalculateElementWithCalculator(calc, var1, var2, out error);
216
217          if (!error.Equals(OnlineCalculatorError.None)) {
218            worker.ReportProgress(100);
219            throw new ArgumentException("Calculator returned " + error + Environment.NewLine + "Maybe try another calculator.");
220          }
221          worker.ReportProgress((int)((100.0 / calculations) * (i * (frames + 1) + j + 1)));
222        }
223      }
224      e.Result = elements;
225    }
226
227    private IEnumerable<double> GetRelevantValues(IDataAnalysisProblemData problemData, string partition, string variable) {
228      IEnumerable<double> var = problemData.Dataset.GetDoubleValues(variable);
229      if (partition.Equals(TrainingSamples)) {
230        var = var.Skip(problemData.TrainingPartition.Start).Take(problemData.TrainingPartition.End - problemData.TrainingPartition.Start);
231      } else if (partition.Equals(TestSamples)) {
232        var = var.Skip(problemData.TestPartition.Start).Take(problemData.TestPartition.End - problemData.TestPartition.Start);
233      }
234      return var;
235    }
236
237    private double CalculateElementWithCalculator(string calc, IEnumerable<double> var1, IEnumerable<double> var2, out OnlineCalculatorError error) {
238      if (calc.Equals(HoeffdingsDependence)) {
239        return HoeffdingsDependenceCalculator.Calculate(var1, var2, out error);
240      } else if (calc.Equals(SpearmansRank)) {
241        return SpearmansRankCorrelationCoefficientCalculator.Calculate(var1, var2, out error);
242      } else if (calc.Equals(PearsonsRSquared)) {
243        return OnlinePearsonsRSquaredCalculator.Calculate(var1, var2, out error);
244      } else {
245        return Math.Sqrt(OnlinePearsonsRSquaredCalculator.Calculate(var1, var2, out error));
246      }
247    }
248
249    private void BwRunWorkerCompleted(object sender, RunWorkerCompletedEventArgs e) {
250      BackgroundWorker worker = sender as BackgroundWorker;
251      if (!e.Cancelled && !worker.CancellationPending) {
252        if (!(e.Error == null)) {
253          ErrorHandling.ShowErrorDialog(e.Error);
254        } else {
255          matrix = (double[,])e.Result;
256          OnCorrelationCalculationFinished();
257        }
258      } else {
259        bw.RunWorkerAsync(bwInfo);
260      }
261    }
262    #endregion
263
264    #region events
265    public event EventHandler CorrelationCalculationFinished;
266    protected virtual void OnCorrelationCalculationFinished() {
267      EventHandler handler = CorrelationCalculationFinished;
268      if (handler != null)
269        handler(this, EventArgs.Empty);
270    }
271
272    public delegate void ProgressCalculationHandler(object sender, ProgressChangedEventArgs e);
273    public event ProgressCalculationHandler ProgressCalculation;
274    protected void BwProgressChanged(object sender, ProgressChangedEventArgs e) {
275      BackgroundWorker worker = sender as BackgroundWorker;
276      if (!worker.CancellationPending && ProgressCalculation != null) {
277        ProgressCalculation(sender, e);
278      }
279    }
280
281    public event EventHandler ProblemDataChanged;
282    protected virtual void OnProblemDataChanged() {
283      EventHandler handler = ProblemDataChanged;
284      if (handler != null)
285        handler(this, EventArgs.Empty);
286    }
287    #endregion
288
289    protected class BackgroundWorkerInfo {
290      public Dataset Dataset { get; set; }
291      public string Calculator { get; set; }
292      public string Partition { get; set; }
293      public string Variable { get; set; }
294      public int Frames { get; set; }
295    }
296  }
297}
Note: See TracBrowser for help on using the repository browser.