Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.DataPreprocessing/3.4/Data/PreprocessingData.cs @ 14381

Last change on this file since 14381 was 14381, checked in by pfleck, 7 years ago

#2698

  • Refactored CheckedVariablesView out of the ChartView to allow reuse of the checked variables list.
    • The new list visualizes the non-input/target variables in gray.
    • Added context menu to quickly (un)check all variables or only the inputs+target variables.
  • In the Multi-Scatterplot
    • New structure and layout of the single charts to support fixed header rows and columns (for the variable names). Instead, removed the legend of each plot for better usage of plot area.
    • Adapted the new CheckedVariablesView (but hidden until (un)checking is implemented).
File size: 8.1 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections;
24using System.Collections.Generic;
25using System.Linq;
26using HeuristicLab.Common;
27using HeuristicLab.Core;
28using HeuristicLab.Data;
29using HeuristicLab.Problems.DataAnalysis;
30
31namespace HeuristicLab.DataPreprocessing {
32
33  [Item("PreprocessingData", "Represents data used for preprocessing.")]
34  public abstract class PreprocessingData : NamedItem, IPreprocessingData {
35
36    public IntRange TrainingPartition { get; set; }
37    public IntRange TestPartition { get; set; }
38
39    protected IList<ITransformation> transformations;
40    public IList<ITransformation> Transformations {
41      get { return transformations; }
42    }
43
44    protected IList<IList> variableValues;
45    protected IList<string> variableNames;
46
47    public IEnumerable<string> VariableNames {
48      get { return variableNames; }
49    }
50
51    public IEnumerable<string> GetDoubleVariableNames() {
52      var doubleVariableNames = new List<string>();
53      for (int i = 0; i < Columns; ++i) {
54        if (VariableHasType<double>(i)) {
55          doubleVariableNames.Add(variableNames[i]);
56        }
57      }
58      return doubleVariableNames;
59    }
60
61    public IList<string> InputVariables { get; private set; }
62    public string TargetVariable { get; private set; } // optional
63
64    public int Columns {
65      get { return variableNames.Count; }
66    }
67
68    public int Rows {
69      get { return variableValues.Count > 0 ? variableValues[0].Count : 0; }
70    }
71
72    protected IDictionary<int, IList<int>> selection;
73    public IDictionary<int, IList<int>> Selection {
74      get { return selection; }
75      set {
76        selection = value;
77        OnSelectionChanged();
78      }
79    }
80
81    protected PreprocessingData(PreprocessingData original, Cloner cloner)
82      : base(original, cloner) {
83      variableValues = CopyVariableValues(original.variableValues);
84      variableNames = new List<string>(original.variableNames);
85      TrainingPartition = (IntRange)original.TrainingPartition.Clone(cloner);
86      TestPartition = (IntRange)original.TestPartition.Clone(cloner);
87      transformations = new List<ITransformation>(original.transformations.Select(cloner.Clone));
88
89      InputVariables = new List<string>(original.InputVariables);
90      TargetVariable = original.TargetVariable;
91
92      RegisterEventHandler();
93    }
94
95    protected PreprocessingData(IDataAnalysisProblemData problemData)
96      : base() {
97      Name = "Preprocessing Data";
98
99      transformations = new List<ITransformation>();
100      selection = new Dictionary<int, IList<int>>();
101
102      Import(problemData);
103
104      RegisterEventHandler();
105    }
106
107    public void Import(IDataAnalysisProblemData problemData) {
108      Dataset dataset = (Dataset)problemData.Dataset;
109      variableNames = new List<string>(problemData.Dataset.VariableNames);
110      InputVariables = new List<string>(problemData.AllowedInputVariables);
111      TargetVariable = (problemData is IRegressionProblemData) ? ((IRegressionProblemData)problemData).TargetVariable
112        : (problemData is IClassificationProblemData) ? ((IClassificationProblemData)problemData).TargetVariable
113        : null;
114
115      int columnIndex = 0;
116      variableValues = new List<IList>();
117      foreach (var variableName in problemData.Dataset.VariableNames) {
118        if (dataset.VariableHasType<double>(variableName)) {
119          variableValues.Insert(columnIndex, dataset.GetDoubleValues(variableName).ToList());
120        } else if (dataset.VariableHasType<string>(variableName)) {
121          variableValues.Insert(columnIndex, dataset.GetStringValues(variableName).ToList());
122        } else if (dataset.VariableHasType<DateTime>(variableName)) {
123          variableValues.Insert(columnIndex, dataset.GetDateTimeValues(variableName).ToList());
124        } else {
125          throw new ArgumentException("The datatype of column " + variableName + " must be of type double, string or DateTime");
126        }
127        ++columnIndex;
128      }
129
130      TrainingPartition = new IntRange(problemData.TrainingPartition.Start, problemData.TrainingPartition.End);
131      TestPartition = new IntRange(problemData.TestPartition.Start, problemData.TestPartition.End);
132    }
133
134    private void RegisterEventHandler() {
135      Changed += (s, e) => {
136        switch (e.Type) {
137          case DataPreprocessingChangedEventType.DeleteRow:
138            CheckPartitionRanges();
139            break;
140          case DataPreprocessingChangedEventType.Any:
141            CheckPartitionRanges();
142            break;
143          case DataPreprocessingChangedEventType.Transformation:
144            CheckPartitionRanges();
145            break;
146        }
147      };
148    }
149
150    private void CheckPartitionRanges() {
151      int maxRowIndex = Math.Max(0, Rows - 1);
152      TrainingPartition.Start = Math.Min(TrainingPartition.Start, maxRowIndex);
153      TrainingPartition.End = Math.Min(TrainingPartition.End, maxRowIndex);
154      TestPartition.Start = Math.Min(TestPartition.Start, maxRowIndex);
155      TestPartition.End = Math.Min(TestPartition.End, maxRowIndex);
156    }
157
158    protected IList<IList> CopyVariableValues(IList<IList> original) {
159      var copy = new List<IList>(original);
160      for (int i = 0; i < original.Count; ++i) {
161        copy[i] = (IList)Activator.CreateInstance(original[i].GetType(), original[i]);
162      }
163      return copy;
164    }
165
166
167    #region IPreprocessingData Members
168
169    public abstract T GetCell<T>(int columnIndex, int rowIndex);
170
171    public abstract void SetCell<T>(int columnIndex, int rowIndex, T value);
172
173    public abstract string GetCellAsString(int columnIndex, int rowIndex);
174
175    public abstract string GetVariableName(int columnIndex);
176
177    public abstract int GetColumnIndex(string variableName);
178
179    public abstract bool VariableHasType<T>(int columnIndex);
180
181    [Obsolete("use the index based variant, is faster")]
182    public abstract IList<T> GetValues<T>(string variableName, bool considerSelection);
183
184    public abstract IList<T> GetValues<T>(int columnIndex, bool considerSelection);
185
186    public abstract void SetValues<T>(int columnIndex, IList<T> values);
187
188    public abstract bool SetValue(string value, int columnIndex, int rowIndex);
189
190    public abstract bool Validate(string value, out string errorMessage, int columnIndex);
191
192    public abstract bool AreAllStringColumns(IEnumerable<int> columnIndices);
193
194    public abstract void DeleteRowsWithIndices(IEnumerable<int> rows);
195
196    public abstract void InsertRow(int rowIndex);
197
198    public abstract void DeleteRow(int rowIndex);
199
200    public abstract void InsertColumn<T>(string variableName, int columnIndex);
201
202    public abstract void DeleteColumn(int columnIndex);
203
204    public abstract void RenameColumn(int columnIndex, string name);
205    public abstract void RenameColumns(IList<string> list);
206
207    public abstract Dataset ExportToDataset();
208
209    public abstract void ClearSelection();
210
211    public abstract event EventHandler SelectionChanged;
212    protected abstract void OnSelectionChanged();
213
214    public event DataPreprocessingChangedEventHandler Changed;
215    protected virtual void OnChanged(DataPreprocessingChangedEventType type, int column, int row) {
216      var listeners = Changed;
217      if (listeners != null) listeners(this, new DataPreprocessingChangedEventArgs(type, column, row));
218    }
219    #endregion
220  }
221}
Note: See TracBrowser for help on using the repository browser.