Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/FeatureCorrelation.cs @ 8559

Last change on this file since 8559 was 8559, checked in by gkronber, 12 years ago

#1292: removed the default constructor for FeatureCorrelation as it simply runs into a NullReferenceException (the default ctor is not used anywhere and is senseless).

This fixes the unit test fail for the meta-optimization branch on the builder.

File size: 11.4 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2012 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20
21#endregion
22
23using System;
24using System.Collections.Generic;
25using System.ComponentModel;
26using System.Linq;
27using HeuristicLab.Analysis;
28using HeuristicLab.Common;
29using HeuristicLab.Core;
30using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
31using HeuristicLab.PluginInfrastructure;
32
33namespace HeuristicLab.Problems.DataAnalysis {
34  [StorableClass]
35  [Item("FeatureCorrelation", "Represents the correlation of features in a data set.")]
36  public class FeatureCorrelation : HeatMap {
37    private const string PearsonsR = "Pearsons R";
38    private const string PearsonsRSquared = "Pearsons R Squared";
39    private const string HoeffdingsDependence = "Hoeffdings Dependence";
40    private const string SpearmansRank = "Spearmans Rank";
41    public IEnumerable<string> CorrelationCalculators {
42      get { return new List<string>() { PearsonsR, PearsonsRSquared, HoeffdingsDependence, SpearmansRank }; }
43    }
44
45    private const string AllSamples = "All Samples";
46    private const string TrainingSamples = "Training Samples";
47    private const string TestSamples = "Test Samples";
48    public IEnumerable<string> Partitions {
49      get { return new List<string>() { AllSamples, TrainingSamples, TestSamples }; }
50    }
51
52    private IDataAnalysisProblemData problemData;
53    [Storable]
54    public IDataAnalysisProblemData ProblemData {
55      get { return problemData; }
56      set {
57        if (problemData != value) {
58          problemData = value;
59          columnNames = value.Dataset.DoubleVariables.ToList();
60          rowNames = value.Dataset.DoubleVariables.ToList();
61          OnProblemDataChanged();
62        }
63      }
64    }
65
66    private BackgroundWorker bw;
67    private BackgroundWorkerInfo bwInfo;
68
69    public FeatureCorrelation(IDataAnalysisProblemData problemData)
70      : base() {
71      this.problemData = problemData;
72      this.Title = "Feature Correlation";
73      this.columnNames = problemData.Dataset.DoubleVariables.ToList();
74      this.rowNames = problemData.Dataset.DoubleVariables.ToList();
75      sortableView = true;
76    }
77    protected FeatureCorrelation(FeatureCorrelation original, Cloner cloner)
78      : base(original, cloner) {
79      this.problemData = cloner.Clone(original.problemData);
80    }
81    public override IDeepCloneable Clone(Cloner cloner) {
82      return new FeatureCorrelation(this, cloner);
83    }
84
85    [StorableConstructor]
86    protected FeatureCorrelation(bool deserializing) : base(deserializing) { }
87
88    public void Recalculate(string calc, string partition) {
89      CalculateElements(problemData.Dataset, calc, partition);
90    }
91
92    public void CalculateTimeframeElements(string calc, string partition, string variable, int frames) {
93      CalculateElements(problemData.Dataset, calc, partition, variable, frames);
94    }
95
96    private void CalculateElements(Dataset dataset) {
97      CalculateElements(dataset, CorrelationCalculators.First(), Partitions.First());
98    }
99
100    private void CalculateElements(Dataset dataset, string calc, string partition, string variable = null, int frames = 0) {
101      bwInfo = new BackgroundWorkerInfo { Dataset = dataset, Calculator = calc, Partition = partition, Variable = variable, Frames = frames };
102      if (bw == null) {
103        bw = new BackgroundWorker();
104        bw.WorkerReportsProgress = true;
105        bw.WorkerSupportsCancellation = true;
106        bw.DoWork += new DoWorkEventHandler(BwDoWork);
107        bw.ProgressChanged += new ProgressChangedEventHandler(BwProgressChanged);
108        bw.RunWorkerCompleted += new RunWorkerCompletedEventHandler(BwRunWorkerCompleted);
109      }
110      if (bw.IsBusy) {
111        bw.CancelAsync();
112      } else {
113        bw.RunWorkerAsync(bwInfo);
114      }
115      if (calc.Equals(PearsonsR) || calc.Equals(SpearmansRank)) {
116        Maximum = 1.0;
117        Minimum = -1.0;
118      } else if (calc.Equals(HoeffdingsDependence)) {
119        Maximum = 1.0;
120        Minimum = -0.5;
121      } else {
122        Maximum = 1.0;
123        Minimum = 0.0;
124      }
125    }
126
127    #region backgroundworker
128    private void BwDoWork(object sender, DoWorkEventArgs e) {
129      BackgroundWorkerInfo bwInfo = (BackgroundWorkerInfo)e.Argument;
130      if (bwInfo.Variable == null) {
131        BwCalculateCorrelation(sender, e);
132      } else {
133        BwCalculateTimeframeCorrelation(sender, e);
134      }
135    }
136
137    private void BwCalculateCorrelation(object sender, DoWorkEventArgs e) {
138      BackgroundWorker worker = sender as BackgroundWorker;
139
140      BackgroundWorkerInfo bwInfo = (BackgroundWorkerInfo)e.Argument;
141      Dataset dataset = bwInfo.Dataset;
142      string partition = bwInfo.Partition;
143      string calc = bwInfo.Calculator;
144
145      IList<string> doubleVariableNames = dataset.DoubleVariables.ToList();
146      OnlineCalculatorError error;
147      int length = doubleVariableNames.Count;
148      double[,] elements = new double[length, length];
149      double calculations = (Math.Pow(length, 2) + length) / 2;
150
151      worker.ReportProgress(0);
152
153      for (int i = 0; i < length; i++) {
154        for (int j = 0; j < i + 1; j++) {
155          if (worker.CancellationPending) {
156            e.Cancel = true;
157            return;
158          }
159
160          IEnumerable<double> var1 = GetRelevantValues(problemData, partition, doubleVariableNames[i]);
161          IEnumerable<double> var2 = GetRelevantValues(problemData, partition, doubleVariableNames[j]);
162
163          elements[i, j] = CalculateElementWithCalculator(calc, var1, var2, out error);
164
165          elements[j, i] = elements[i, j];
166          if (!error.Equals(OnlineCalculatorError.None)) {
167            worker.ReportProgress(100);
168            throw new ArgumentException("Calculator returned " + error + Environment.NewLine + "Maybe try another calculator.");
169          }
170          worker.ReportProgress((int)Math.Round((((Math.Pow(i, 2) + i) / 2 + j + 1.0) / calculations) * 100));
171        }
172      }
173      e.Result = elements;
174    }
175
176    private void BwCalculateTimeframeCorrelation(object sender, DoWorkEventArgs e) {
177      BackgroundWorker worker = sender as BackgroundWorker;
178
179      BackgroundWorkerInfo bwInfo = (BackgroundWorkerInfo)e.Argument;
180      Dataset dataset = bwInfo.Dataset;
181      string partition = bwInfo.Partition;
182      string calc = bwInfo.Calculator;
183      string variable = bwInfo.Variable;
184      int frames = bwInfo.Frames;
185
186      IList<string> doubleVariableNames = dataset.DoubleVariables.ToList();
187      OnlineCalculatorError error;
188      int length = doubleVariableNames.Count;
189      double[,] elements = new double[length, frames + 1];
190      double calculations = (frames + 1) * length;
191
192      worker.ReportProgress(0);
193
194      for (int i = 0; i < length; i++) {
195        for (int j = 0; j <= frames; j++) {
196          if (worker.CancellationPending) {
197            e.Cancel = true;
198            return;
199          }
200
201          IEnumerable<double> var1 = GetRelevantValues(problemData, partition, variable);
202          IEnumerable<double> var2 = GetRelevantValues(problemData, partition, doubleVariableNames[i]);
203
204          var valuesInFrame = var1.Take(j);
205          var help = var1.Skip(j).ToList();
206          help.AddRange(valuesInFrame);
207          var1 = help;
208
209          elements[i, j] = CalculateElementWithCalculator(calc, var1, var2, out error);
210
211          if (!error.Equals(OnlineCalculatorError.None)) {
212            worker.ReportProgress(100);
213            throw new ArgumentException("Calculator returned " + error + Environment.NewLine + "Maybe try another calculator.");
214          }
215          worker.ReportProgress((int)((100.0 / calculations) * (i * (frames + 1) + j + 1)));
216        }
217      }
218      e.Result = elements;
219    }
220
221    private IEnumerable<double> GetRelevantValues(IDataAnalysisProblemData problemData, string partition, string variable) {
222      IEnumerable<double> var = problemData.Dataset.GetDoubleValues(variable);
223      if (partition.Equals(TrainingSamples)) {
224        var = var.Skip(problemData.TrainingPartition.Start).Take(problemData.TrainingPartition.End - problemData.TrainingPartition.Start);
225      } else if (partition.Equals(TestSamples)) {
226        var = var.Skip(problemData.TestPartition.Start).Take(problemData.TestPartition.End - problemData.TestPartition.Start);
227      }
228      return var;
229    }
230
231    private double CalculateElementWithCalculator(string calc, IEnumerable<double> var1, IEnumerable<double> var2, out OnlineCalculatorError error) {
232      if (calc.Equals(HoeffdingsDependence)) {
233        return HoeffdingsDependenceCalculator.Calculate(var1, var2, out error);
234      } else if (calc.Equals(SpearmansRank)) {
235        return SpearmansRankCorrelationCoefficientCalculator.Calculate(var1, var2, out error);
236      } else if (calc.Equals(PearsonsRSquared)) {
237        return OnlinePearsonsRSquaredCalculator.Calculate(var1, var2, out error);
238      } else {
239        return Math.Sqrt(OnlinePearsonsRSquaredCalculator.Calculate(var1, var2, out error));
240      }
241    }
242
243    private void BwRunWorkerCompleted(object sender, RunWorkerCompletedEventArgs e) {
244      BackgroundWorker worker = sender as BackgroundWorker;
245      if (!e.Cancelled && !worker.CancellationPending) {
246        if (!(e.Error == null)) {
247          ErrorHandling.ShowErrorDialog(e.Error);
248        } else {
249          matrix = (double[,])e.Result;
250          OnCorrelationCalculationFinished();
251        }
252      } else {
253        bw.RunWorkerAsync(bwInfo);
254      }
255    }
256    #endregion
257
258    #region events
259    public event EventHandler CorrelationCalculationFinished;
260    protected virtual void OnCorrelationCalculationFinished() {
261      EventHandler handler = CorrelationCalculationFinished;
262      if (handler != null)
263        handler(this, EventArgs.Empty);
264    }
265
266    public delegate void ProgressCalculationHandler(object sender, ProgressChangedEventArgs e);
267    public event ProgressCalculationHandler ProgressCalculation;
268    protected void BwProgressChanged(object sender, ProgressChangedEventArgs e) {
269      BackgroundWorker worker = sender as BackgroundWorker;
270      if (!worker.CancellationPending && ProgressCalculation != null) {
271        ProgressCalculation(sender, e);
272      }
273    }
274
275    public event EventHandler ProblemDataChanged;
276    protected virtual void OnProblemDataChanged() {
277      EventHandler handler = ProblemDataChanged;
278      if (handler != null)
279        handler(this, EventArgs.Empty);
280    }
281    #endregion
282
283    protected class BackgroundWorkerInfo {
284      public Dataset Dataset { get; set; }
285      public string Calculator { get; set; }
286      public string Partition { get; set; }
287      public string Variable { get; set; }
288      public int Frames { get; set; }
289    }
290  }
291}
Note: See TracBrowser for help on using the repository browser.