Free cookie consent management tool by TermsFeed Policy Generator

source: branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/FeatureCorrelation.cs @ 8483

Last change on this file since 8483 was 8483, checked in by sforsten, 12 years ago

#1292:

  • Renamed ExtendedHeatMap to FeatureCorrelation
  • deleted old CorrelationHeatMapView
  • added FeatureCorrelationView
File size: 8.8 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2012 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20
21#endregion
22
23using System;
24using System.Collections.Generic;
25using System.ComponentModel;
26using System.Linq;
27using HeuristicLab.Analysis;
28using HeuristicLab.Common;
29using HeuristicLab.Core;
30using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
31using HeuristicLab.PluginInfrastructure;
32
33namespace HeuristicLab.Problems.DataAnalysis {
34  [StorableClass]
35  [Item("FeatureCorrelation", "Represents the correlation of features in a data set.")]
36  public class FeatureCorrelation : HeatMap {
37
38    private const string PearsonsR = "Pearsons R";
39    private const string PearsonsRSquared = "Pearsons R Squared";
40    private const string HoeffdingsDependence = "Hoeffdings Dependence";
41    private const string SpearmansRank = "Spearmans Rank";
42    public IEnumerable<string> CorrelationCalculators {
43      get { return new List<string>() { PearsonsR, PearsonsRSquared, HoeffdingsDependence, SpearmansRank }; }
44    }
45
46    private const string AllSamples = "All Samples";
47    private const string TrainingSamples = "Training Samples";
48    private const string TestSamples = "Test Samples";
49    public IEnumerable<string> Partitions {
50      get { return new List<string>() { AllSamples, TrainingSamples, TestSamples }; }
51    }
52
53    private IDataAnalysisProblemData problemData;
54    public IDataAnalysisProblemData ProblemData {
55      get { return problemData; }
56      set {
57        if (problemData != value) {
58          problemData = value;
59          columnNames = value.Dataset.DoubleVariables.ToList();
60          rowNames = value.Dataset.DoubleVariables.ToList();
61          OnProblemDataChanged();
62        }
63      }
64    }
65
66    private BackgroundWorker bw;
67
68    public FeatureCorrelation()
69      : base() {
70      this.Title = "Feature Correlation";
71      this.columnNames = Enumerable.Range(1, 2).Select(x => x.ToString()).ToList();
72      this.rowNames = Enumerable.Range(1, 2).Select(x => x.ToString()).ToList();
73      sortableView = true;
74    }
75
76    public FeatureCorrelation(IDataAnalysisProblemData problemData) {
77      this.problemData = problemData;
78      this.Title = "Feature Correlation";
79      this.columnNames = problemData.Dataset.DoubleVariables.ToList();
80      this.rowNames = problemData.Dataset.DoubleVariables.ToList();
81      sortableView = true;
82
83      CalculateElements(problemData.Dataset);
84    }
85    protected FeatureCorrelation(FeatureCorrelation original, Cloner cloner)
86      : base(original, cloner) {
87      this.Title = "Feature Correlation";
88      this.problemData = original.problemData;
89      this.columnNames = original.problemData.Dataset.DoubleVariables.ToList();
90      this.rowNames = original.problemData.Dataset.DoubleVariables.ToList();
91    }
92    public override IDeepCloneable Clone(Cloner cloner) {
93      return new FeatureCorrelation(this, cloner);
94    }
95
96    public void Recalculate(string calc, string partition) {
97      CalculateElements(problemData.Dataset, calc, partition);
98    }
99
100    private void CalculateElements(Dataset dataset) {
101      CalculateElements(dataset, CorrelationCalculators.First(), Partitions.First());
102    }
103
104    private void CalculateElements(Dataset dataset, string calc, string partition) {
105      if (bw == null || bw.IsBusy) {
106        if (bw != null) {
107          bw.CancelAsync();
108        }
109        bw = new BackgroundWorker();
110        bw.WorkerReportsProgress = true;
111        bw.WorkerSupportsCancellation = true;
112        bw.DoWork += new DoWorkEventHandler(bw_DoWork);
113        bw.ProgressChanged += new ProgressChangedEventHandler(bw_ProgressChanged);
114        bw.RunWorkerCompleted += new RunWorkerCompletedEventHandler(bw_RunWorkerCompleted);
115      }
116      bw.RunWorkerAsync(new BackgroundWorkerInfo { Dataset = dataset, Calculator = calc, Partition = partition });
117      if (calc.Equals(PearsonsR) || calc.Equals(SpearmansRank)) {
118        Maximum = 1.0;
119        Minimum = -1.0;
120      } else if (calc.Equals(HoeffdingsDependence)) {
121        Maximum = 1.0;
122        Minimum = -0.5;
123      } else {
124        Maximum = 1.0;
125        Minimum = 0.0;
126      }
127    }
128
129    private void bw_DoWork(object sender, DoWorkEventArgs e) {
130      BackgroundWorker worker = sender as BackgroundWorker;
131
132      BackgroundWorkerInfo bwInfo = (BackgroundWorkerInfo)e.Argument;
133      Dataset dataset = bwInfo.Dataset;
134      string partition = bwInfo.Partition;
135      string calc = bwInfo.Calculator;
136
137      IList<string> doubleVariableNames = dataset.DoubleVariables.ToList();
138      OnlineCalculatorError error;
139      int length = doubleVariableNames.Count;
140      double[,] elements = new double[length, length];
141
142      double calculations = (Math.Pow(length, 2) + length) / 2;
143
144      worker.ReportProgress(0);
145
146      for (int i = 0; i < length; i++) {
147        for (int j = 0; j < i + 1; j++) {
148          if (worker.CancellationPending) {
149            e.Cancel = true;
150            return;
151          }
152
153          IEnumerable<double> var1 = dataset.GetDoubleValues(doubleVariableNames[i]);
154          IEnumerable<double> var2 = dataset.GetDoubleValues(doubleVariableNames[j]);
155          if (partition.Equals(TrainingSamples)) {
156            var1 = var1.Skip(problemData.TrainingPartition.Start).Take(problemData.TrainingPartition.End - problemData.TrainingPartition.Start);
157            var2 = var2.Skip(problemData.TrainingPartition.Start).Take(problemData.TrainingPartition.End - problemData.TrainingPartition.Start);
158          } else if (partition.Equals(TestSamples)) {
159            var1 = var1.Skip(problemData.TestPartition.Start).Take(problemData.TestPartition.End - problemData.TestPartition.Start);
160            var2 = var2.Skip(problemData.TestPartition.Start).Take(problemData.TestPartition.End - problemData.TestPartition.Start);
161          }
162
163          if (calc.Equals(HoeffdingsDependence)) {
164            elements[i, j] = HoeffdingsDependenceCalculator.Calculate(var1, var2, out error);
165          } else if (calc.Equals(SpearmansRank)) {
166            elements[i, j] = SpearmansRankCorrelationCoefficientCalculator.Calculate(var1, var2, out error);
167          } else if (calc.Equals(PearsonsRSquared)) {
168            elements[i, j] = OnlinePearsonsRSquaredCalculator.Calculate(var1, var2, out error);
169          } else {
170            elements[i, j] = OnlinePearsonsRSquaredCalculator.CalculateR(var1, var2, out error);
171          }
172          elements[j, i] = elements[i, j];
173          if (!error.Equals(OnlineCalculatorError.None)) {
174            worker.ReportProgress(100);
175            throw new ArgumentException("Calculator returned " + error + Environment.NewLine + "Maybe try another calculator.");
176          }
177          worker.ReportProgress((int)Math.Round((((Math.Pow(i, 2) + i) / 2 + j + 1.0) / calculations) * 100));
178        }
179      }
180      e.Result = elements;
181    }
182
183    private void bw_RunWorkerCompleted(object sender, RunWorkerCompletedEventArgs e) {
184      BackgroundWorker worker = sender as BackgroundWorker;
185      if (!e.Cancelled && !worker.CancellationPending) {
186        if (!(e.Error == null)) {
187          ErrorHandling.ShowErrorDialog(e.Error);
188        } else {
189          matrix = (double[,])e.Result;
190          OnReset();
191        }
192      }
193    }
194
195    #region events
196    public delegate void ProgressCalculationHandler(object sender, ProgressChangedEventArgs e);
197    public event ProgressCalculationHandler ProgressCalculation;
198    protected void bw_ProgressChanged(object sender, ProgressChangedEventArgs e) {
199      BackgroundWorker worker = sender as BackgroundWorker;
200      if (!worker.CancellationPending && ProgressCalculation != null) {
201        ProgressCalculation(sender, e);
202      }
203    }
204
205    public event EventHandler ProblemDataChanged;
206    protected virtual void OnProblemDataChanged() {
207      EventHandler handler = ProblemDataChanged;
208      if (handler != null)
209        handler(this, EventArgs.Empty);
210    }
211    #endregion
212
213    protected class BackgroundWorkerInfo {
214      public Dataset Dataset { get; set; }
215      public string Calculator { get; set; }
216      public string Partition { get; set; }
217    }
218  }
219}
Note: See TracBrowser for help on using the repository browser.