Free cookie consent management tool by TermsFeed Policy Generator

source: branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/PreprocessingData.cs @ 10551

Last change on this file since 10551 was 10550, checked in by tsteinre, 11 years ago

implemented Undo-feature of PreprocessingData

File size: 10.1 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2013 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections;
24using System.Collections.Generic;
25using System.Linq;
26using HeuristicLab.Common;
27using HeuristicLab.Core;
28using HeuristicLab.Data;
29using HeuristicLab.Problems.DataAnalysis;
30
31namespace HeuristicLab.DataPreprocessing {
32
33  internal class PDSnapshot {
34    public IDictionary<int, IList> VariableValues { get; set; }
35
36    public IList<string> VariableNames { get; set; }
37
38    public double TrainingToTestRatio { get; set; }
39
40    public DataPreprocessingChangedEventType ChangedType { get; set; }
41
42    public int ChangedColumn { get; set; }
43
44    public int ChangedRow { get; set; }
45  }
46
47  [Item("PreprocessingData", "Represents data used for preprocessing.")]
48  public class PreprocessingData : NamedItem, IPreprocessingData {
49
50    private const int MAX_UNDO_DEPTH = 5;
51
52    private IDictionary<int, IList> variableValues;
53
54    private IList<string> variableNames;
55
56    private double trainingToTestRatio;
57
58    private IList<PDSnapshot> undoHistory;
59
60    private PreprocessingData(PreprocessingData original, Cloner cloner)
61      : base(original, cloner) {
62      variableValues = CopyVariableValues(original.variableValues);
63      variableNames = new List<string>(original.variableNames);
64      trainingToTestRatio = original.trainingToTestRatio;
65      undoHistory = new List<PDSnapshot>();
66    }
67
68    public PreprocessingData(IDataAnalysisProblemData problemData)
69      : base() {
70      Name = "-";
71
72      variableNames = new List<string>(problemData.Dataset.VariableNames);
73      // create dictionary from variable name to index
74
75      int columnIndex = 0;
76      variableValues = new Dictionary<int, IList>();
77      foreach (var variableName in problemData.Dataset.VariableNames) {
78        if (problemData.Dataset.IsType<double>(variableName)) {
79          variableValues[columnIndex] = problemData.Dataset.GetDoubleValues(variableName).ToList();
80        } else if (problemData.Dataset.IsType<string>(variableName)) {
81          variableValues[columnIndex] = CreateColumn<string>(problemData.Dataset, columnIndex, x => x);
82        } else if (problemData.Dataset.IsType<DateTime>(variableName)) {
83          variableValues[columnIndex] = CreateColumn<DateTime>(problemData.Dataset, columnIndex, x => DateTime.Parse(x));
84        } else {
85          throw new ArgumentException("The datatype of column " + variableName + " must be of type List<double>, List<string> or List<DateTime>");
86        }
87        ++columnIndex;
88      }
89
90      trainingToTestRatio = (double)problemData.TrainingPartition.Size / Math.Max(problemData.Dataset.Rows, double.Epsilon);
91      undoHistory = new List<PDSnapshot>();
92    }
93
94    private static IList CreateColumn<T>(Dataset ds, int column, Func<string, T> selector) {
95      var list = new List<T>(ds.Rows);
96      for (int row = 0; row < ds.Rows; ++row) {
97        list.Add(selector(ds.GetValue(row, column)));
98      }
99      return list;
100    }
101
102    private IDictionary<int, IList> CopyVariableValues(IDictionary<int, IList> original) {
103      var copy = new Dictionary<int, IList>(variableValues);
104      for (int i = 0; i < original.Count; i++) {
105        if (variableValues[i] is IList<double>) {
106          copy[i] = new List<double>((IList<double>)variableValues[i]);
107        } else if (variableValues[i] is IList<DateTime>) {
108          copy[i] = new List<DateTime>((IList<DateTime>)variableValues[i]);
109        } else if (variableValues[i] is IList<string>) {
110          copy[i] = new List<string>((IList<string>)variableValues[i]);
111        } else {
112          throw new NotImplementedException("The Type is not Supported");
113        }
114      }
115      return copy;
116    }
117
118    private void SaveSnapshot(DataPreprocessingChangedEventType changedType, int column, int row) {
119      PDSnapshot currentSnapshot = new PDSnapshot();
120      currentSnapshot.VariableValues = CopyVariableValues(variableValues);
121      currentSnapshot.VariableNames = new List<string>(variableNames);
122      currentSnapshot.TrainingToTestRatio = trainingToTestRatio;
123      currentSnapshot.ChangedType = changedType;
124      currentSnapshot.ChangedColumn = column;
125      currentSnapshot.ChangedRow = row;
126
127      if (undoHistory.Count >= MAX_UNDO_DEPTH)
128        undoHistory.RemoveAt(0);
129
130      undoHistory.Add(currentSnapshot);
131    }
132
133    #region NamedItem abstract Member Implementations
134
135    public override IDeepCloneable Clone(Cloner cloner) {
136      return new PreprocessingData(this, cloner);
137    }
138
139    #endregion
140
141    #region IPreprocessingData Members
142
143    public T GetCell<T>(int columnIndex, int rowIndex) {
144      return (T)variableValues[columnIndex][rowIndex];
145    }
146
147
148    public void SetCell<T>(int columnIndex, int rowIndex, T value) {
149      SaveSnapshot(DataPreprocessingChangedEventType.ChangeItem, columnIndex, rowIndex);
150      variableValues[columnIndex][rowIndex] = value;
151      OnChanged(DataPreprocessingChangedEventType.ChangeItem, columnIndex, rowIndex);
152    }
153
154
155    public string GetCellAsString(int columnIndex, int rowIndex) {
156      return variableValues[columnIndex][rowIndex].ToString();
157    }
158
159
160    [Obsolete("use the index based variant, is faster")]
161    public IList<T> GetValues<T>(string variableName) {
162      return GetValues<T>(GetColumnIndex(variableName));
163    }
164
165    public IList<T> GetValues<T>(int columnIndex) {
166      return (IList<T>)variableValues[columnIndex];
167    }
168
169    public void SetValues<T>(int columnIndex, IList<T> values) {
170      if (IsType<T>(columnIndex)) {
171        SaveSnapshot(DataPreprocessingChangedEventType.ChangeColumn, columnIndex, -1);
172        variableValues[columnIndex] = (IList)values;
173      } else {
174        throw new ArgumentException("The datatype of column " + columnIndex + " must be of type " + variableValues[columnIndex].GetType().Name + " but was " + typeof(T).Name);
175      }
176      OnChanged(DataPreprocessingChangedEventType.ChangeColumn, columnIndex, -1);
177    }
178
179    public void InsertRow(int rowIndex) {
180      SaveSnapshot(DataPreprocessingChangedEventType.DeleteRow, -1, rowIndex);
181      foreach (IList column in variableValues.Values) {
182        Type type = column.GetType().GetGenericArguments()[0];
183        column.Insert(rowIndex, type.IsValueType ? Activator.CreateInstance(type) : null);
184      }
185      OnChanged(DataPreprocessingChangedEventType.AddRow, -1, rowIndex);
186    }
187
188    public void DeleteRow(int rowIndex) {
189      SaveSnapshot(DataPreprocessingChangedEventType.AddRow, -1, rowIndex);
190      foreach (IList column in variableValues.Values) {
191        column.RemoveAt(rowIndex);
192      }
193      OnChanged(DataPreprocessingChangedEventType.DeleteRow, -1, rowIndex);
194    }
195
196    public void InsertColumn<T>(string variableName, int columnIndex) {
197      SaveSnapshot(DataPreprocessingChangedEventType.DeleteColumn, columnIndex, -1);
198      variableValues.Add(columnIndex, new List<T>(Rows));
199      variableNames.Insert(columnIndex, variableName);
200      OnChanged(DataPreprocessingChangedEventType.AddColumn, columnIndex, -1);
201    }
202
203    public void DeleteColumn(int columnIndex) {
204      SaveSnapshot(DataPreprocessingChangedEventType.AddColumn, columnIndex, -1);
205      variableValues.Remove(columnIndex);
206      variableNames.RemoveAt(columnIndex);
207      OnChanged(DataPreprocessingChangedEventType.DeleteColumn, columnIndex, -1);
208    }
209
210
211    public IntRange TrainingPartition {
212      get { return new IntRange(0, (int)(Rows * trainingToTestRatio)); }
213    }
214
215    public IntRange TestPartition {
216      get { return new IntRange((int)(Rows * trainingToTestRatio), Rows); }
217    }
218
219    public string GetVariableName(int columnIndex) {
220      return variableNames[columnIndex];
221    }
222
223    public IEnumerable<string> VariableNames {
224      get { return variableNames; }
225    }
226
227    public int GetColumnIndex(string variableName) {
228      return variableNames.IndexOf(variableName);
229    }
230
231    public bool IsType<T>(int columnIndex) {
232      return variableValues[columnIndex] is List<T>;
233    }
234
235    public int Columns {
236      get { return variableNames.Count; }
237    }
238
239    public int Rows {
240      get { return variableValues.Count > 0 ? variableValues[0].Count : 0; }
241    }
242
243    public Dataset ExportToDataset() {
244      IList<IList> values = new List<IList>();
245
246      for (int i = 0; i < Columns; ++i) {
247        values.Add(variableValues[i]);
248      }
249
250      var dataset = new Dataset(variableNames, values);
251      return dataset;
252    }
253
254    public event DataPreprocessingChangedEventHandler Changed;
255    protected virtual void OnChanged(DataPreprocessingChangedEventType type, int column, int row) {
256      var listeners = Changed;
257      if (listeners != null) listeners(this, new DataPreprocessingChangedEventArgs(type, column, row));
258    }
259
260    public bool IsUndoAvailable {
261      get { return undoHistory.Count > 0; }
262    }
263
264    public void Undo() {
265      if (IsUndoAvailable) {
266        PDSnapshot previousSnapshot = undoHistory[undoHistory.Count - 1];
267        variableValues = previousSnapshot.VariableValues;
268        variableNames = previousSnapshot.VariableNames;
269        trainingToTestRatio = previousSnapshot.TrainingToTestRatio;
270        undoHistory.Remove(previousSnapshot);
271        OnChanged(previousSnapshot.ChangedType,
272          previousSnapshot.ChangedColumn,
273          previousSnapshot.ChangedRow);
274      }
275    }
276
277    #endregion
278  }
279}
Note: See TracBrowser for help on using the repository browser.