Free cookie consent management tool by TermsFeed Policy Generator

source: branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/FeatureCorrelation.cs @ 8492

Last change on this file since 8492 was 8492, checked in by sforsten, 12 years ago

#1292:

  • added TimeframeFeatureCorrelationView
File size: 11.7 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2012 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20
21#endregion
22
23using System;
24using System.Collections.Generic;
25using System.ComponentModel;
26using System.Linq;
27using HeuristicLab.Analysis;
28using HeuristicLab.Common;
29using HeuristicLab.Core;
30using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
31using HeuristicLab.PluginInfrastructure;
32
33namespace HeuristicLab.Problems.DataAnalysis {
34  [StorableClass]
35  [Item("FeatureCorrelation", "Represents the correlation of features in a data set.")]
36  public class FeatureCorrelation : HeatMap {
37
38    private const string PearsonsR = "Pearsons R";
39    private const string PearsonsRSquared = "Pearsons R Squared";
40    private const string HoeffdingsDependence = "Hoeffdings Dependence";
41    private const string SpearmansRank = "Spearmans Rank";
42    public IEnumerable<string> CorrelationCalculators {
43      get { return new List<string>() { PearsonsR, PearsonsRSquared, HoeffdingsDependence, SpearmansRank }; }
44    }
45
46    private const string AllSamples = "All Samples";
47    private const string TrainingSamples = "Training Samples";
48    private const string TestSamples = "Test Samples";
49    public IEnumerable<string> Partitions {
50      get { return new List<string>() { AllSamples, TrainingSamples, TestSamples }; }
51    }
52
53    private IDataAnalysisProblemData problemData;
54    public IDataAnalysisProblemData ProblemData {
55      get { return problemData; }
56      set {
57        if (problemData != value) {
58          problemData = value;
59          columnNames = value.Dataset.DoubleVariables.ToList();
60          rowNames = value.Dataset.DoubleVariables.ToList();
61          OnProblemDataChanged();
62        }
63      }
64    }
65
66    private BackgroundWorker bw;
67
68    public FeatureCorrelation()
69      : base() {
70      this.Title = "Feature Correlation";
71      this.columnNames = Enumerable.Range(1, 2).Select(x => x.ToString()).ToList();
72      this.rowNames = Enumerable.Range(1, 2).Select(x => x.ToString()).ToList();
73      sortableView = true;
74    }
75
76    public FeatureCorrelation(IDataAnalysisProblemData problemData) {
77      this.problemData = problemData;
78      this.Title = "Feature Correlation";
79      this.columnNames = problemData.Dataset.DoubleVariables.ToList();
80      this.rowNames = problemData.Dataset.DoubleVariables.ToList();
81      sortableView = true;
82
83      CalculateElements(problemData.Dataset);
84    }
85    protected FeatureCorrelation(FeatureCorrelation original, Cloner cloner)
86      : base(original, cloner) {
87      this.Title = "Feature Correlation";
88      this.problemData = original.problemData;
89      this.columnNames = original.problemData.Dataset.DoubleVariables.ToList();
90      this.rowNames = original.problemData.Dataset.DoubleVariables.ToList();
91    }
92    public override IDeepCloneable Clone(Cloner cloner) {
93      return new FeatureCorrelation(this, cloner);
94    }
95
96    public void Recalculate(string calc, string partition) {
97      CalculateElements(problemData.Dataset, calc, partition);
98    }
99
100    public void CalculateTimeframeElements(string calc, string partition, string variable, int frames) {
101      CalculateElements(problemData.Dataset, calc, partition, variable, frames);
102    }
103
104    private void CalculateElements(Dataset dataset) {
105      CalculateElements(dataset, CorrelationCalculators.First(), Partitions.First());
106    }
107
108    private void CalculateElements(Dataset dataset, string calc, string partition, string variable = null, int frames = 0) {
109      if (bw == null || bw.IsBusy) {
110        if (bw != null) {
111          bw.CancelAsync();
112        }
113        bw = new BackgroundWorker();
114        bw.WorkerReportsProgress = true;
115        bw.WorkerSupportsCancellation = true;
116        bw.DoWork += new DoWorkEventHandler(BwDoWork);
117        bw.ProgressChanged += new ProgressChangedEventHandler(BwProgressChanged);
118        bw.RunWorkerCompleted += new RunWorkerCompletedEventHandler(BwRunWorkerCompleted);
119      }
120      bw.RunWorkerAsync(new BackgroundWorkerInfo { Dataset = dataset, Calculator = calc, Partition = partition, Variable = variable, Frames = frames });
121      if (calc.Equals(PearsonsR) || calc.Equals(SpearmansRank)) {
122        Maximum = 1.0;
123        Minimum = -1.0;
124      } else if (calc.Equals(HoeffdingsDependence)) {
125        Maximum = 1.0;
126        Minimum = -0.5;
127      } else {
128        Maximum = 1.0;
129        Minimum = 0.0;
130      }
131    }
132
133    #region backgroundworker
134    private void BwDoWork(object sender, DoWorkEventArgs e) {
135      BackgroundWorkerInfo bwInfo = (BackgroundWorkerInfo)e.Argument;
136      if (bwInfo.Variable == null) {
137        BwCalculateCorrelation(sender, e);
138      } else {
139        BwCalculateTimeframeCorrelation(sender, e);
140      }
141    }
142
143    private void BwCalculateCorrelation(object sender, DoWorkEventArgs e) {
144      BackgroundWorker worker = sender as BackgroundWorker;
145
146      BackgroundWorkerInfo bwInfo = (BackgroundWorkerInfo)e.Argument;
147      Dataset dataset = bwInfo.Dataset;
148      string partition = bwInfo.Partition;
149      string calc = bwInfo.Calculator;
150
151      IList<string> doubleVariableNames = dataset.DoubleVariables.ToList();
152      OnlineCalculatorError error;
153      int length = doubleVariableNames.Count;
154      double[,] elements = new double[length, length];
155      double calculations = (Math.Pow(length, 2) + length) / 2;
156
157      worker.ReportProgress(0);
158
159      for (int i = 0; i < length; i++) {
160        for (int j = 0; j < i + 1; j++) {
161          if (worker.CancellationPending) {
162            e.Cancel = true;
163            return;
164          }
165
166          IEnumerable<double> var1 = GetRelevantValues(problemData, partition, doubleVariableNames[i]);
167          IEnumerable<double> var2 = GetRelevantValues(problemData, partition, doubleVariableNames[j]);
168
169          elements[i, j] = CalculateElementWithCalculator(calc, var1, var2, out error);
170
171          elements[j, i] = elements[i, j];
172          if (!error.Equals(OnlineCalculatorError.None)) {
173            worker.ReportProgress(100);
174            throw new ArgumentException("Calculator returned " + error + Environment.NewLine + "Maybe try another calculator.");
175          }
176          worker.ReportProgress((int)Math.Round((((Math.Pow(i, 2) + i) / 2 + j + 1.0) / calculations) * 100));
177        }
178      }
179      e.Result = elements;
180    }
181
182    private void BwCalculateTimeframeCorrelation(object sender, DoWorkEventArgs e) {
183      BackgroundWorker worker = sender as BackgroundWorker;
184
185      BackgroundWorkerInfo bwInfo = (BackgroundWorkerInfo)e.Argument;
186      Dataset dataset = bwInfo.Dataset;
187      string partition = bwInfo.Partition;
188      string calc = bwInfo.Calculator;
189      string variable = bwInfo.Variable;
190      int frames = bwInfo.Frames;
191
192      IList<string> doubleVariableNames = dataset.DoubleVariables.ToList();
193      OnlineCalculatorError error;
194      int length = doubleVariableNames.Count;
195      double[,] elements = new double[length, frames + 1];
196      double calculations = (frames + 1) * length;
197
198      worker.ReportProgress(0);
199
200      for (int i = 0; i < length; i++) {
201        for (int j = 0; j <= frames; j++) {
202          if (worker.CancellationPending) {
203            e.Cancel = true;
204            return;
205          }
206
207          IEnumerable<double> var1 = GetRelevantValues(problemData, partition, variable);
208          IEnumerable<double> var2 = GetRelevantValues(problemData, partition, doubleVariableNames[i]);
209
210          var valuesInFrame = var1.Take(j);
211          var help = var1.Skip(j).ToList();
212          help.AddRange(valuesInFrame);
213          var1 = help;
214
215          elements[i, j] = CalculateElementWithCalculator(calc, var1, var2, out error);
216
217          if (!error.Equals(OnlineCalculatorError.None)) {
218            worker.ReportProgress(100);
219            throw new ArgumentException("Calculator returned " + error + Environment.NewLine + "Maybe try another calculator.");
220          }
221          worker.ReportProgress((int)((100.0 / calculations) * (i * (frames + 1) + j + 1)));
222        }
223      }
224      e.Result = elements;
225    }
226
227    private IEnumerable<double> GetRelevantValues(IDataAnalysisProblemData problemData, string partition, string variable) {
228      IEnumerable<double> var = problemData.Dataset.GetDoubleValues(variable);
229      if (partition.Equals(TrainingSamples)) {
230        var = var.Skip(problemData.TrainingPartition.Start).Take(problemData.TrainingPartition.End - problemData.TrainingPartition.Start);
231      } else if (partition.Equals(TestSamples)) {
232        var = var.Skip(problemData.TestPartition.Start).Take(problemData.TestPartition.End - problemData.TestPartition.Start);
233      }
234      return var;
235    }
236
237    private double CalculateElementWithCalculator(string calc, IEnumerable<double> var1, IEnumerable<double> var2, out OnlineCalculatorError error) {
238      if (calc.Equals(HoeffdingsDependence)) {
239        return HoeffdingsDependenceCalculator.Calculate(var1, var2, out error);
240      } else if (calc.Equals(SpearmansRank)) {
241        return SpearmansRankCorrelationCoefficientCalculator.Calculate(var1, var2, out error);
242      } else if (calc.Equals(PearsonsRSquared)) {
243        return OnlinePearsonsRSquaredCalculator.Calculate(var1, var2, out error);
244      } else {
245        return OnlinePearsonsRSquaredCalculator.CalculateR(var1, var2, out error);
246      }
247    }
248
249    private void BwRunWorkerCompleted(object sender, RunWorkerCompletedEventArgs e) {
250      BackgroundWorker worker = sender as BackgroundWorker;
251      if (!e.Cancelled && !worker.CancellationPending) {
252        if (!(e.Error == null)) {
253          ErrorHandling.ShowErrorDialog(e.Error);
254        } else {
255          matrix = (double[,])e.Result;
256          OnCorrelationCalculationFinished();
257        }
258      }
259    }
260    #endregion
261
262    #region events
263    public event EventHandler CorrelationCalculationFinished;
264    protected virtual void OnCorrelationCalculationFinished() {
265      EventHandler handler = CorrelationCalculationFinished;
266      if (handler != null)
267        handler(this, EventArgs.Empty);
268    }
269
270    public delegate void ProgressCalculationHandler(object sender, ProgressChangedEventArgs e);
271    public event ProgressCalculationHandler ProgressCalculation;
272    protected void BwProgressChanged(object sender, ProgressChangedEventArgs e) {
273      BackgroundWorker worker = sender as BackgroundWorker;
274      if (!worker.CancellationPending && ProgressCalculation != null) {
275        ProgressCalculation(sender, e);
276      }
277    }
278
279    public event EventHandler ProblemDataChanged;
280    protected virtual void OnProblemDataChanged() {
281      EventHandler handler = ProblemDataChanged;
282      if (handler != null)
283        handler(this, EventArgs.Empty);
284    }
285    #endregion
286
287    protected class BackgroundWorkerInfo {
288      public Dataset Dataset { get; set; }
289      public string Calculator { get; set; }
290      public string Partition { get; set; }
291      public string Variable { get; set; }
292      public int Frames { get; set; }
293    }
294  }
295}
Note: See TracBrowser for help on using the repository browser.