Free cookie consent management tool by TermsFeed Policy Generator

source: branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/PreprocessingData.cs @ 10573

Last change on this file since 10573 was 10554, checked in by pfleck, 11 years ago
  • removed resx reference
  • improved variable values copying
File size: 9.7 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2013 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections;
24using System.Collections.Generic;
25using System.Linq;
26using HeuristicLab.Common;
27using HeuristicLab.Core;
28using HeuristicLab.Data;
29using HeuristicLab.Problems.DataAnalysis;
30
31namespace HeuristicLab.DataPreprocessing {
32
33  internal class PDSnapshot {
34    public IDictionary<int, IList> VariableValues { get; set; }
35
36    public IList<string> VariableNames { get; set; }
37
38    public double TrainingToTestRatio { get; set; }
39
40    public DataPreprocessingChangedEventType ChangedType { get; set; }
41
42    public int ChangedColumn { get; set; }
43
44    public int ChangedRow { get; set; }
45  }
46
47  [Item("PreprocessingData", "Represents data used for preprocessing.")]
48  public class PreprocessingData : NamedItem, IPreprocessingData {
49
50    private const int MAX_UNDO_DEPTH = 5;
51
52    private IDictionary<int, IList> variableValues;
53
54    private IList<string> variableNames;
55
56    private double trainingToTestRatio;
57
58    private IList<PDSnapshot> undoHistory;
59
60    private PreprocessingData(PreprocessingData original, Cloner cloner)
61      : base(original, cloner) {
62      variableValues = CopyVariableValues(original.variableValues);
63      variableNames = new List<string>(original.variableNames);
64      trainingToTestRatio = original.trainingToTestRatio;
65      undoHistory = new List<PDSnapshot>();
66    }
67
68    public PreprocessingData(IDataAnalysisProblemData problemData)
69      : base() {
70      Name = "-";
71
72      variableNames = new List<string>(problemData.Dataset.VariableNames);
73      // create dictionary from variable name to index
74
75      int columnIndex = 0;
76      variableValues = new Dictionary<int, IList>();
77      foreach (var variableName in problemData.Dataset.VariableNames) {
78        if (problemData.Dataset.IsType<double>(variableName)) {
79          variableValues[columnIndex] = problemData.Dataset.GetDoubleValues(variableName).ToList();
80        } else if (problemData.Dataset.IsType<string>(variableName)) {
81          variableValues[columnIndex] = CreateColumn<string>(problemData.Dataset, columnIndex, x => x);
82        } else if (problemData.Dataset.IsType<DateTime>(variableName)) {
83          variableValues[columnIndex] = CreateColumn<DateTime>(problemData.Dataset, columnIndex, x => DateTime.Parse(x));
84        } else {
85          throw new ArgumentException("The datatype of column " + variableName + " must be of type List<double>, List<string> or List<DateTime>");
86        }
87        ++columnIndex;
88      }
89
90      trainingToTestRatio = (double)problemData.TrainingPartition.Size / Math.Max(problemData.Dataset.Rows, double.Epsilon);
91      undoHistory = new List<PDSnapshot>();
92    }
93
94    private static IList CreateColumn<T>(Dataset ds, int column, Func<string, T> selector) {
95      var list = new List<T>(ds.Rows);
96      for (int row = 0; row < ds.Rows; ++row) {
97        list.Add(selector(ds.GetValue(row, column)));
98      }
99      return list;
100    }
101
102    private IDictionary<int, IList> CopyVariableValues(IDictionary<int, IList> original) {
103      var copy = new Dictionary<int, IList>(variableValues);
104      for (int i = 0; i < original.Count; i++) {
105        variableValues[i] = (IList)Activator.CreateInstance(original[i].GetType(), original[i]);
106      }
107      return copy;
108    }
109
110    private void SaveSnapshot(DataPreprocessingChangedEventType changedType, int column, int row) {
111      PDSnapshot currentSnapshot = new PDSnapshot();
112      currentSnapshot.VariableValues = CopyVariableValues(variableValues);
113      currentSnapshot.VariableNames = new List<string>(variableNames);
114      currentSnapshot.TrainingToTestRatio = trainingToTestRatio;
115      currentSnapshot.ChangedType = changedType;
116      currentSnapshot.ChangedColumn = column;
117      currentSnapshot.ChangedRow = row;
118
119      if (undoHistory.Count >= MAX_UNDO_DEPTH)
120        undoHistory.RemoveAt(0);
121
122      undoHistory.Add(currentSnapshot);
123    }
124
125    #region NamedItem abstract Member Implementations
126
127    public override IDeepCloneable Clone(Cloner cloner) {
128      return new PreprocessingData(this, cloner);
129    }
130
131    #endregion
132
133    #region IPreprocessingData Members
134
135    public T GetCell<T>(int columnIndex, int rowIndex) {
136      return (T)variableValues[columnIndex][rowIndex];
137    }
138
139
140    public void SetCell<T>(int columnIndex, int rowIndex, T value) {
141      SaveSnapshot(DataPreprocessingChangedEventType.ChangeItem, columnIndex, rowIndex);
142      variableValues[columnIndex][rowIndex] = value;
143      OnChanged(DataPreprocessingChangedEventType.ChangeItem, columnIndex, rowIndex);
144    }
145
146
147    public string GetCellAsString(int columnIndex, int rowIndex) {
148      return variableValues[columnIndex][rowIndex].ToString();
149    }
150
151
152    [Obsolete("use the index based variant, is faster")]
153    public IList<T> GetValues<T>(string variableName) {
154      return GetValues<T>(GetColumnIndex(variableName));
155    }
156
157    public IList<T> GetValues<T>(int columnIndex) {
158      return (IList<T>)variableValues[columnIndex];
159    }
160
161    public void SetValues<T>(int columnIndex, IList<T> values) {
162      if (IsType<T>(columnIndex)) {
163        SaveSnapshot(DataPreprocessingChangedEventType.ChangeColumn, columnIndex, -1);
164        variableValues[columnIndex] = (IList)values;
165      } else {
166        throw new ArgumentException("The datatype of column " + columnIndex + " must be of type " + variableValues[columnIndex].GetType().Name + " but was " + typeof(T).Name);
167      }
168      OnChanged(DataPreprocessingChangedEventType.ChangeColumn, columnIndex, -1);
169    }
170
171    public void InsertRow(int rowIndex) {
172      SaveSnapshot(DataPreprocessingChangedEventType.DeleteRow, -1, rowIndex);
173      foreach (IList column in variableValues.Values) {
174        Type type = column.GetType().GetGenericArguments()[0];
175        column.Insert(rowIndex, type.IsValueType ? Activator.CreateInstance(type) : null);
176      }
177      OnChanged(DataPreprocessingChangedEventType.AddRow, -1, rowIndex);
178    }
179
180    public void DeleteRow(int rowIndex) {
181      SaveSnapshot(DataPreprocessingChangedEventType.AddRow, -1, rowIndex);
182      foreach (IList column in variableValues.Values) {
183        column.RemoveAt(rowIndex);
184      }
185      OnChanged(DataPreprocessingChangedEventType.DeleteRow, -1, rowIndex);
186    }
187
188    public void InsertColumn<T>(string variableName, int columnIndex) {
189      SaveSnapshot(DataPreprocessingChangedEventType.DeleteColumn, columnIndex, -1);
190      variableValues.Add(columnIndex, new List<T>(Rows));
191      variableNames.Insert(columnIndex, variableName);
192      OnChanged(DataPreprocessingChangedEventType.AddColumn, columnIndex, -1);
193    }
194
195    public void DeleteColumn(int columnIndex) {
196      SaveSnapshot(DataPreprocessingChangedEventType.AddColumn, columnIndex, -1);
197      variableValues.Remove(columnIndex);
198      variableNames.RemoveAt(columnIndex);
199      OnChanged(DataPreprocessingChangedEventType.DeleteColumn, columnIndex, -1);
200    }
201
202
203    public IntRange TrainingPartition {
204      get { return new IntRange(0, (int)(Rows * trainingToTestRatio)); }
205    }
206
207    public IntRange TestPartition {
208      get { return new IntRange((int)(Rows * trainingToTestRatio), Rows); }
209    }
210
211    public string GetVariableName(int columnIndex) {
212      return variableNames[columnIndex];
213    }
214
215    public IEnumerable<string> VariableNames {
216      get { return variableNames; }
217    }
218
219    public int GetColumnIndex(string variableName) {
220      return variableNames.IndexOf(variableName);
221    }
222
223    public bool IsType<T>(int columnIndex) {
224      return variableValues[columnIndex] is List<T>;
225    }
226
227    public int Columns {
228      get { return variableNames.Count; }
229    }
230
231    public int Rows {
232      get { return variableValues.Count > 0 ? variableValues[0].Count : 0; }
233    }
234
235    public Dataset ExportToDataset() {
236      IList<IList> values = new List<IList>();
237
238      for (int i = 0; i < Columns; ++i) {
239        values.Add(variableValues[i]);
240      }
241
242      var dataset = new Dataset(variableNames, values);
243      return dataset;
244    }
245
246    public event DataPreprocessingChangedEventHandler Changed;
247    protected virtual void OnChanged(DataPreprocessingChangedEventType type, int column, int row) {
248      var listeners = Changed;
249      if (listeners != null) listeners(this, new DataPreprocessingChangedEventArgs(type, column, row));
250    }
251
252    public bool IsUndoAvailable {
253      get { return undoHistory.Count > 0; }
254    }
255
256    public void Undo() {
257      if (IsUndoAvailable) {
258        PDSnapshot previousSnapshot = undoHistory[undoHistory.Count - 1];
259        variableValues = previousSnapshot.VariableValues;
260        variableNames = previousSnapshot.VariableNames;
261        trainingToTestRatio = previousSnapshot.TrainingToTestRatio;
262        undoHistory.Remove(previousSnapshot);
263        OnChanged(previousSnapshot.ChangedType,
264          previousSnapshot.ChangedColumn,
265          previousSnapshot.ChangedRow);
266      }
267    }
268
269    #endregion
270  }
271}
Note: See TracBrowser for help on using the repository browser.