Free cookie consent management tool by TermsFeed Policy Generator

source: branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/DataAnalysisProblemData.cs @ 8038

Last change on this file since 8038 was 8038, checked in by sforsten, 12 years ago

#1292:

  • completed branch creation
  • first simple implementation of a HeatMap, which shows the correlation of the dataset features
File size: 8.1 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2012 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Analysis;
26using HeuristicLab.Collections;
27using HeuristicLab.Common;
28using HeuristicLab.Core;
29using HeuristicLab.Data;
30using HeuristicLab.Parameters;
31using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
32
33namespace HeuristicLab.Problems.DataAnalysis {
34  [StorableClass]
35  public abstract class DataAnalysisProblemData : ParameterizedNamedItem, IDataAnalysisProblemData {
36    protected const string DatasetParameterName = "Dataset";
37    protected const string InputVariablesParameterName = "InputVariables";
38    protected const string TrainingPartitionParameterName = "TrainingPartition";
39    protected const string TestPartitionParameterName = "TestPartition";
40    protected const string DatasetHeatMapParameterName = "DatasetCorrelationHeatMap";
41
42    #region parameter properites
43    public IFixedValueParameter<Dataset> DatasetParameter {
44      get { return (IFixedValueParameter<Dataset>)Parameters[DatasetParameterName]; }
45    }
46    public IFixedValueParameter<ReadOnlyCheckedItemList<StringValue>> InputVariablesParameter {
47      get { return (IFixedValueParameter<ReadOnlyCheckedItemList<StringValue>>)Parameters[InputVariablesParameterName]; }
48    }
49    public IFixedValueParameter<IntRange> TrainingPartitionParameter {
50      get { return (IFixedValueParameter<IntRange>)Parameters[TrainingPartitionParameterName]; }
51    }
52    public IFixedValueParameter<IntRange> TestPartitionParameter {
53      get { return (IFixedValueParameter<IntRange>)Parameters[TestPartitionParameterName]; }
54    }
55    public IFixedValueParameter<HeatMap> DatasetHeatMapParameter {
56      get { return (IFixedValueParameter<HeatMap>)Parameters[DatasetHeatMapParameterName]; }
57    }
58    #endregion
59
60    #region properties
61    protected bool isEmpty = false;
62    public bool IsEmpty {
63      get { return isEmpty; }
64    }
65    public Dataset Dataset {
66      get { return DatasetParameter.Value; }
67    }
68    public ICheckedItemList<StringValue> InputVariables {
69      get { return InputVariablesParameter.Value; }
70    }
71    public IEnumerable<string> AllowedInputVariables {
72      get { return InputVariables.CheckedItems.Select(x => x.Value.Value); }
73    }
74
75    public IntRange TrainingPartition {
76      get { return TrainingPartitionParameter.Value; }
77    }
78    public IntRange TestPartition {
79      get { return TestPartitionParameter.Value; }
80    }
81    public HeatMap DatasetHeatMap {
82      get { return DatasetHeatMapParameter.Value; }
83    }
84
85    public virtual IEnumerable<int> TrainingIndizes {
86      get {
87        return Enumerable.Range(TrainingPartition.Start, Math.Max(0, TrainingPartition.End - TrainingPartition.Start))
88                         .Where(IsTrainingSample);
89      }
90    }
91    public virtual IEnumerable<int> TestIndizes {
92      get {
93        return Enumerable.Range(TestPartition.Start, Math.Max(0, TestPartition.End - TestPartition.Start))
94           .Where(IsTestSample);
95      }
96    }
97
98    public virtual bool IsTrainingSample(int index) {
99      return index >= 0 && index < Dataset.Rows &&
100        TrainingPartition.Start <= index && index < TrainingPartition.End &&
101        (index < TestPartition.Start || TestPartition.End <= index);
102    }
103
104    public virtual bool IsTestSample(int index) {
105      return index >= 0 && index < Dataset.Rows &&
106             TestPartition.Start <= index && index < TestPartition.End;
107    }
108    #endregion
109
110    protected DataAnalysisProblemData(DataAnalysisProblemData original, Cloner cloner)
111      : base(original, cloner) {
112      isEmpty = original.isEmpty;
113      RegisterEventHandlers();
114    }
115    [StorableConstructor]
116    protected DataAnalysisProblemData(bool deserializing) : base(deserializing) { }
117    [StorableHook(HookType.AfterDeserialization)]
118    private void AfterDeserialization() {
119      RegisterEventHandlers();
120    }
121
122    protected DataAnalysisProblemData(Dataset dataset, IEnumerable<string> allowedInputVariables) {
123      if (dataset == null) throw new ArgumentNullException("The dataset must not be null.");
124      if (allowedInputVariables == null) throw new ArgumentNullException("The allowedInputVariables must not be null.");
125
126      if (allowedInputVariables.Except(dataset.DoubleVariables).Any())
127        throw new ArgumentException("All allowed input variables must be present in the dataset and of type double.");
128
129      var inputVariables = new CheckedItemList<StringValue>(dataset.DoubleVariables.Select(x => new StringValue(x)));
130      foreach (StringValue x in inputVariables)
131        inputVariables.SetItemCheckedState(x, allowedInputVariables.Contains(x.Value));
132
133      int trainingPartitionStart = 0;
134      int trainingPartitionEnd = dataset.Rows / 2;
135      int testPartitionStart = dataset.Rows / 2;
136      int testPartitionEnd = dataset.Rows;
137
138      Parameters.Add(new FixedValueParameter<Dataset>(DatasetParameterName, "", dataset));
139      Parameters.Add(new FixedValueParameter<ReadOnlyCheckedItemList<StringValue>>(InputVariablesParameterName, "", inputVariables.AsReadOnly()));
140      Parameters.Add(new FixedValueParameter<IntRange>(TrainingPartitionParameterName, "", new IntRange(trainingPartitionStart, trainingPartitionEnd)));
141      Parameters.Add(new FixedValueParameter<IntRange>(TestPartitionParameterName, "", new IntRange(testPartitionStart, testPartitionEnd)));
142      Parameters.Add(new FixedValueParameter<HeatMap>(DatasetHeatMapParameterName, "", CalculateHeatMap(dataset)));
143
144      ((ValueParameter<Dataset>)DatasetParameter).ReactOnValueToStringChangedAndValueItemImageChanged = false;
145      RegisterEventHandlers();
146    }
147
148    private HeatMap CalculateHeatMap(Dataset dataset) {
149      IList<string> doubleVariableNames = dataset.DoubleVariables.ToList();
150      OnlineCalculatorError error;
151      int length = doubleVariableNames.Count;
152      double[,] elements = new double[length, length];
153
154      for (int i = 0; i < length; i++) {
155        for (int j = 0; j < i + 1; j++) {
156          elements[i, j] = OnlinePearsonsRSquaredCalculator.Calculate(dataset.GetDoubleValues(doubleVariableNames[length - 1 - i]), dataset.GetDoubleValues(doubleVariableNames[j]), out error);
157          elements[j, i] = elements[i, j];
158          if (!error.Equals(OnlineCalculatorError.None)) {
159            throw new ArgumentException("Calculator returned " + error);
160          }
161        }
162      }
163      return new HeatMap(elements, "Hoeffdings Dependence");
164    }
165
166    private void RegisterEventHandlers() {
167      DatasetParameter.ValueChanged += new EventHandler(Parameter_ValueChanged);
168      InputVariables.CheckedItemsChanged += new CollectionItemsChangedEventHandler<IndexedItem<StringValue>>(InputVariables_CheckedItemsChanged);
169      TrainingPartition.ValueChanged += new EventHandler(Parameter_ValueChanged);
170      TestPartition.ValueChanged += new EventHandler(Parameter_ValueChanged);
171    }
172
173    private void InputVariables_CheckedItemsChanged(object sender, CollectionItemsChangedEventArgs<IndexedItem<StringValue>> e) {
174      OnChanged();
175    }
176
177    private void Parameter_ValueChanged(object sender, EventArgs e) {
178      OnChanged();
179    }
180
181    public event EventHandler Changed;
182    protected virtual void OnChanged() {
183      var listeners = Changed;
184      if (listeners != null) listeners(this, EventArgs.Empty);
185    }
186  }
187}
Note: See TracBrowser for help on using the repository browser.