Free cookie consent management tool by TermsFeed Policy Generator

source: branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/PreprocessingData.cs @ 10581

Last change on this file since 10581 was 10581, checked in by tsteinre, 10 years ago
  • removed index workaround in PreprocessingData Undo-Feature event handling.
File size: 10.5 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2013 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections;
24using System.Collections.Generic;
25using System.Linq;
26using HeuristicLab.Common;
27using HeuristicLab.Core;
28using HeuristicLab.Data;
29using HeuristicLab.Problems.DataAnalysis;
30
31namespace HeuristicLab.DataPreprocessing {
32
33  internal class PDSnapshot {
34    public IDictionary<int, IList> VariableValues { get; set; }
35
36    public IList<string> VariableNames { get; set; }
37
38    public double TrainingToTestRatio { get; set; }
39
40    public DataPreprocessingChangedEventType ChangedType { get; set; }
41
42    public int ChangedColumn { get; set; }
43
44    public int ChangedRow { get; set; }
45  }
46
47  [Item("PreprocessingData", "Represents data used for preprocessing.")]
48  public class PreprocessingData : NamedItem, IPreprocessingData {
49
50    private const int MAX_UNDO_DEPTH = 5;
51
52    private IDictionary<int, IList> variableValues;
53
54    private IList<string> variableNames;
55
56    private double trainingToTestRatio;
57
58    private IList<PDSnapshot> undoHistory;
59
60    //TODO: refactor extract Transaction logic in a own class
61    private int transactionDepth = 0;
62
63    private PreprocessingData(PreprocessingData original, Cloner cloner)
64      : base(original, cloner) {
65      variableValues = CopyVariableValues(original.variableValues);
66      variableNames = new List<string>(original.variableNames);
67      trainingToTestRatio = original.trainingToTestRatio;
68      undoHistory = new List<PDSnapshot>();
69    }
70
71    public PreprocessingData(IDataAnalysisProblemData problemData)
72      : base() {
73      Name = "-";
74
75      variableNames = new List<string>(problemData.Dataset.VariableNames);
76      // create dictionary from variable name to index
77
78      int columnIndex = 0;
79      variableValues = new Dictionary<int, IList>();
80      foreach (var variableName in problemData.Dataset.VariableNames) {
81        if (problemData.Dataset.IsType<double>(variableName)) {
82          variableValues[columnIndex] = problemData.Dataset.GetDoubleValues(variableName).ToList();
83        } else if (problemData.Dataset.IsType<string>(variableName)) {
84          variableValues[columnIndex] = CreateColumn<string>(problemData.Dataset, columnIndex, x => x);
85        } else if (problemData.Dataset.IsType<DateTime>(variableName)) {
86          variableValues[columnIndex] = CreateColumn<DateTime>(problemData.Dataset, columnIndex, x => DateTime.Parse(x));
87        } else {
88          throw new ArgumentException("The datatype of column " + variableName + " must be of type List<double>, List<string> or List<DateTime>");
89        }
90        ++columnIndex;
91      }
92
93      trainingToTestRatio = (double)problemData.TrainingPartition.Size / Math.Max(problemData.Dataset.Rows, double.Epsilon);
94      undoHistory = new List<PDSnapshot>();
95    }
96
97    private static IList CreateColumn<T>(Dataset ds, int column, Func<string, T> selector) {
98      var list = new List<T>(ds.Rows);
99      for (int row = 0; row < ds.Rows; ++row) {
100        list.Add(selector(ds.GetValue(row, column)));
101      }
102      return list;
103    }
104
105    private IDictionary<int, IList> CopyVariableValues(IDictionary<int, IList> original) {
106      var copy = new Dictionary<int, IList>(variableValues);
107      for (int i = 0; i < original.Count; i++) {
108        variableValues[i] = (IList)Activator.CreateInstance(original[i].GetType(), original[i]);
109      }
110      return copy;
111    }
112
113    private void SaveSnapshot(DataPreprocessingChangedEventType changedType, int column, int row) {
114      if (transactionDepth > 0) return;
115
116      PDSnapshot currentSnapshot = new PDSnapshot();
117      currentSnapshot.VariableValues = CopyVariableValues(variableValues);
118      currentSnapshot.VariableNames = new List<string>(variableNames);
119      currentSnapshot.TrainingToTestRatio = trainingToTestRatio;
120      currentSnapshot.ChangedType = changedType;
121      currentSnapshot.ChangedColumn = column;
122      currentSnapshot.ChangedRow = row;
123
124      if (undoHistory.Count >= MAX_UNDO_DEPTH)
125        undoHistory.RemoveAt(0);
126
127      undoHistory.Add(currentSnapshot);
128    }
129
130    #region NamedItem abstract Member Implementations
131
132    public override IDeepCloneable Clone(Cloner cloner) {
133      return new PreprocessingData(this, cloner);
134    }
135
136    #endregion
137
138    #region IPreprocessingData Members
139
140    public T GetCell<T>(int columnIndex, int rowIndex) {
141      return (T)variableValues[columnIndex][rowIndex];
142    }
143
144
145    public void SetCell<T>(int columnIndex, int rowIndex, T value) {
146      SaveSnapshot(DataPreprocessingChangedEventType.ChangeItem, columnIndex, rowIndex);
147      variableValues[columnIndex][rowIndex] = value;
148      if (transactionDepth <= 0)
149        OnChanged(DataPreprocessingChangedEventType.ChangeItem, columnIndex, rowIndex);
150    }
151
152
153    public string GetCellAsString(int columnIndex, int rowIndex) {
154      return variableValues[columnIndex][rowIndex].ToString();
155    }
156
157
158    [Obsolete("use the index based variant, is faster")]
159    public IList<T> GetValues<T>(string variableName) {
160      return GetValues<T>(GetColumnIndex(variableName));
161    }
162
163    public IList<T> GetValues<T>(int columnIndex) {
164      return (IList<T>)variableValues[columnIndex];
165    }
166
167    public void SetValues<T>(int columnIndex, IList<T> values) {
168      if (IsType<T>(columnIndex)) {
169        SaveSnapshot(DataPreprocessingChangedEventType.ChangeColumn, columnIndex, -1);
170        variableValues[columnIndex] = (IList)values;
171      } else {
172        throw new ArgumentException("The datatype of column " + columnIndex + " must be of type " + variableValues[columnIndex].GetType().Name + " but was " + typeof(T).Name);
173      }
174      if (transactionDepth <= 0)
175        OnChanged(DataPreprocessingChangedEventType.ChangeColumn, columnIndex, -1);
176    }
177
178    public void InsertRow(int rowIndex) {
179      SaveSnapshot(DataPreprocessingChangedEventType.DeleteRow, -1, rowIndex);
180      foreach (IList column in variableValues.Values) {
181        Type type = column.GetType().GetGenericArguments()[0];
182        column.Insert(rowIndex, type.IsValueType ? Activator.CreateInstance(type) : null);
183      }
184      if (transactionDepth <= 0)
185        OnChanged(DataPreprocessingChangedEventType.AddRow, -1, rowIndex);
186    }
187
188    public void DeleteRow(int rowIndex) {
189      SaveSnapshot(DataPreprocessingChangedEventType.AddRow, -1, rowIndex);
190      foreach (IList column in variableValues.Values) {
191        column.RemoveAt(rowIndex);
192      }
193      if (transactionDepth <= 0)
194        OnChanged(DataPreprocessingChangedEventType.DeleteRow, -1, rowIndex);
195    }
196
197    public void InsertColumn<T>(string variableName, int columnIndex) {
198      SaveSnapshot(DataPreprocessingChangedEventType.DeleteColumn, columnIndex, -1);
199      variableValues.Add(columnIndex, new List<T>(Rows));
200      variableNames.Insert(columnIndex, variableName);
201      if (transactionDepth <= 0)
202        OnChanged(DataPreprocessingChangedEventType.AddColumn, columnIndex, -1);
203    }
204
205    public void DeleteColumn(int columnIndex) {
206      SaveSnapshot(DataPreprocessingChangedEventType.AddColumn, columnIndex, -1);
207      variableValues.Remove(columnIndex);
208      variableNames.RemoveAt(columnIndex);
209      if (transactionDepth <= 0)
210        OnChanged(DataPreprocessingChangedEventType.DeleteColumn, columnIndex, -1);
211    }
212
213    public IntRange TrainingPartition {
214      get { return new IntRange(0, (int)(Rows * trainingToTestRatio)); }
215    }
216
217    public IntRange TestPartition {
218      get { return new IntRange((int)(Rows * trainingToTestRatio), Rows); }
219    }
220
221    public string GetVariableName(int columnIndex) {
222      return variableNames[columnIndex];
223    }
224
225    public IEnumerable<string> VariableNames {
226      get { return variableNames; }
227    }
228
229    public int GetColumnIndex(string variableName) {
230      return variableNames.IndexOf(variableName);
231    }
232
233    public bool IsType<T>(int columnIndex) {
234      return variableValues[columnIndex] is List<T>;
235    }
236
237    public int Columns {
238      get { return variableNames.Count; }
239    }
240
241    public int Rows {
242      get { return variableValues.Count > 0 ? variableValues[0].Count : 0; }
243    }
244
245    public Dataset ExportToDataset() {
246      IList<IList> values = new List<IList>();
247
248      for (int i = 0; i < Columns; ++i) {
249        values.Add(variableValues[i]);
250      }
251
252      var dataset = new Dataset(variableNames, values);
253      return dataset;
254    }
255
256    public event DataPreprocessingChangedEventHandler Changed;
257    protected virtual void OnChanged(DataPreprocessingChangedEventType type, int column, int row) {
258      var listeners = Changed;
259      if (listeners != null) listeners(this, new DataPreprocessingChangedEventArgs(type, column, row));
260    }
261
262    public bool IsUndoAvailable {
263      get { return undoHistory.Count > 0; }
264    }
265
266    public void Undo() {
267      if (IsUndoAvailable) {
268        PDSnapshot previousSnapshot = undoHistory[undoHistory.Count - 1];
269        variableValues = previousSnapshot.VariableValues;
270        variableNames = previousSnapshot.VariableNames;
271        trainingToTestRatio = previousSnapshot.TrainingToTestRatio;
272        undoHistory.Remove(previousSnapshot);
273        OnChanged(previousSnapshot.ChangedType,
274          previousSnapshot.ChangedColumn,
275          previousSnapshot.ChangedRow);
276      }
277    }
278
279    public void BeginTransaction() {
280      SaveSnapshot(DataPreprocessingChangedEventType.Any, -1, -1);
281      transactionDepth++;
282    }
283
284    public void EndTransaction() {
285      transactionDepth--;
286      if (transactionDepth < 0)
287        throw new InvalidOperationException("There is no open transaction that can be ended.");
288      if (transactionDepth == 0)
289        OnChanged(DataPreprocessingChangedEventType.Any, -1, -1);
290    }
291
292    #endregion
293  }
294}
Note: See TracBrowser for help on using the repository browser.