Free cookie consent management tool by TermsFeed Policy Generator

source: branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/PreprocessingData.cs @ 10581

Last change on this file since 10581 was 10581, checked in by tsteinre, 10 years ago
  • removed index workaround in PreprocessingData Undo-Feature event handling.
File size: 10.5 KB
RevLine 
[10163]1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2013 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
[10168]23using System.Collections;
[10163]24using System.Collections.Generic;
[10185]25using System.Linq;
26using HeuristicLab.Common;
[10163]27using HeuristicLab.Core;
[10220]28using HeuristicLab.Data;
[10163]29using HeuristicLab.Problems.DataAnalysis;
30
[10182]31namespace HeuristicLab.DataPreprocessing {
[10550]32
33  internal class PDSnapshot {
34    public IDictionary<int, IList> VariableValues { get; set; }
35
36    public IList<string> VariableNames { get; set; }
37
38    public double TrainingToTestRatio { get; set; }
39
40    public DataPreprocessingChangedEventType ChangedType { get; set; }
41
42    public int ChangedColumn { get; set; }
43
44    public int ChangedRow { get; set; }
45  }
46
[10163]47  [Item("PreprocessingData", "Represents data used for preprocessing.")]
48  public class PreprocessingData : NamedItem, IPreprocessingData {
49
[10550]50    private const int MAX_UNDO_DEPTH = 5;
51
[10367]52    private IDictionary<int, IList> variableValues;
[10168]53
[10186]54    private IList<string> variableNames;
55
[10185]56    private double trainingToTestRatio;
[10220]57
[10550]58    private IList<PDSnapshot> undoHistory;
59
[10580]60    //TODO: refactor extract Transaction logic in a own class
61    private int transactionDepth = 0;
62
[10185]63    private PreprocessingData(PreprocessingData original, Cloner cloner)
64      : base(original, cloner) {
[10550]65      variableValues = CopyVariableValues(original.variableValues);
[10548]66      variableNames = new List<string>(original.variableNames);
67      trainingToTestRatio = original.trainingToTestRatio;
[10550]68      undoHistory = new List<PDSnapshot>();
[10185]69    }
[10187]70
[10168]71    public PreprocessingData(IDataAnalysisProblemData problemData)
72      : base() {
73      Name = "-";
74
[10187]75      variableNames = new List<string>(problemData.Dataset.VariableNames);
[10185]76      // create dictionary from variable name to index
[10187]77
[10367]78      int columnIndex = 0;
79      variableValues = new Dictionary<int, IList>();
[10185]80      foreach (var variableName in problemData.Dataset.VariableNames) {
81        if (problemData.Dataset.IsType<double>(variableName)) {
[10367]82          variableValues[columnIndex] = problemData.Dataset.GetDoubleValues(variableName).ToList();
[10185]83        } else if (problemData.Dataset.IsType<string>(variableName)) {
[10367]84          variableValues[columnIndex] = CreateColumn<string>(problemData.Dataset, columnIndex, x => x);
[10185]85        } else if (problemData.Dataset.IsType<DateTime>(variableName)) {
[10367]86          variableValues[columnIndex] = CreateColumn<DateTime>(problemData.Dataset, columnIndex, x => DateTime.Parse(x));
[10168]87        } else {
[10185]88          throw new ArgumentException("The datatype of column " + variableName + " must be of type List<double>, List<string> or List<DateTime>");
[10168]89        }
[10367]90        ++columnIndex;
[10168]91      }
[10185]92
[10235]93      trainingToTestRatio = (double)problemData.TrainingPartition.Size / Math.Max(problemData.Dataset.Rows, double.Epsilon);
[10550]94      undoHistory = new List<PDSnapshot>();
[10163]95    }
96
[10185]97    private static IList CreateColumn<T>(Dataset ds, int column, Func<string, T> selector) {
98      var list = new List<T>(ds.Rows);
[10341]99      for (int row = 0; row < ds.Rows; ++row) {
[10367]100        list.Add(selector(ds.GetValue(row, column)));
[10185]101      }
102      return list;
103    }
104
[10550]105    private IDictionary<int, IList> CopyVariableValues(IDictionary<int, IList> original) {
106      var copy = new Dictionary<int, IList>(variableValues);
107      for (int i = 0; i < original.Count; i++) {
[10554]108        variableValues[i] = (IList)Activator.CreateInstance(original[i].GetType(), original[i]);
[10550]109      }
110      return copy;
111    }
112
113    private void SaveSnapshot(DataPreprocessingChangedEventType changedType, int column, int row) {
[10580]114      if (transactionDepth > 0) return;
115
[10550]116      PDSnapshot currentSnapshot = new PDSnapshot();
117      currentSnapshot.VariableValues = CopyVariableValues(variableValues);
118      currentSnapshot.VariableNames = new List<string>(variableNames);
119      currentSnapshot.TrainingToTestRatio = trainingToTestRatio;
120      currentSnapshot.ChangedType = changedType;
121      currentSnapshot.ChangedColumn = column;
122      currentSnapshot.ChangedRow = row;
123
124      if (undoHistory.Count >= MAX_UNDO_DEPTH)
125        undoHistory.RemoveAt(0);
126
127      undoHistory.Add(currentSnapshot);
128    }
129
[10163]130    #region NamedItem abstract Member Implementations
131
[10185]132    public override IDeepCloneable Clone(Cloner cloner) {
133      return new PreprocessingData(this, cloner);
[10163]134    }
135
136    #endregion
137
138    #region IPreprocessingData Members
139
[10367]140    public T GetCell<T>(int columnIndex, int rowIndex) {
141      return (T)variableValues[columnIndex][rowIndex];
[10181]142    }
143
[10236]144
[10550]145    public void SetCell<T>(int columnIndex, int rowIndex, T value) {
146      SaveSnapshot(DataPreprocessingChangedEventType.ChangeItem, columnIndex, rowIndex);
[10367]147      variableValues[columnIndex][rowIndex] = value;
[10580]148      if (transactionDepth <= 0)
149        OnChanged(DataPreprocessingChangedEventType.ChangeItem, columnIndex, rowIndex);
[10367]150    }
151
152
153    public string GetCellAsString(int columnIndex, int rowIndex) {
154      return variableValues[columnIndex][rowIndex].ToString();
155    }
156
[10547]157
[10367]158    [Obsolete("use the index based variant, is faster")]
[10243]159    public IList<T> GetValues<T>(string variableName) {
[10367]160      return GetValues<T>(GetColumnIndex(variableName));
[10181]161    }
162
[10367]163    public IList<T> GetValues<T>(int columnIndex) {
164      return (IList<T>)variableValues[columnIndex];
165    }
166
[10550]167    public void SetValues<T>(int columnIndex, IList<T> values) {
[10367]168      if (IsType<T>(columnIndex)) {
[10550]169        SaveSnapshot(DataPreprocessingChangedEventType.ChangeColumn, columnIndex, -1);
[10367]170        variableValues[columnIndex] = (IList)values;
171      } else {
172        throw new ArgumentException("The datatype of column " + columnIndex + " must be of type " + variableValues[columnIndex].GetType().Name + " but was " + typeof(T).Name);
[10311]173      }
[10580]174      if (transactionDepth <= 0)
175        OnChanged(DataPreprocessingChangedEventType.ChangeColumn, columnIndex, -1);
[10181]176    }
177
[10550]178    public void InsertRow(int rowIndex) {
179      SaveSnapshot(DataPreprocessingChangedEventType.DeleteRow, -1, rowIndex);
[10194]180      foreach (IList column in variableValues.Values) {
181        Type type = column.GetType().GetGenericArguments()[0];
182        column.Insert(rowIndex, type.IsValueType ? Activator.CreateInstance(type) : null);
183      }
[10580]184      if (transactionDepth <= 0)
185        OnChanged(DataPreprocessingChangedEventType.AddRow, -1, rowIndex);
[10163]186    }
187
[10550]188    public void DeleteRow(int rowIndex) {
189      SaveSnapshot(DataPreprocessingChangedEventType.AddRow, -1, rowIndex);
[10194]190      foreach (IList column in variableValues.Values) {
191        column.RemoveAt(rowIndex);
192      }
[10580]193      if (transactionDepth <= 0)
194        OnChanged(DataPreprocessingChangedEventType.DeleteRow, -1, rowIndex);
[10163]195    }
196
[10550]197    public void InsertColumn<T>(string variableName, int columnIndex) {
198      SaveSnapshot(DataPreprocessingChangedEventType.DeleteColumn, columnIndex, -1);
[10367]199      variableValues.Add(columnIndex, new List<T>(Rows));
[10547]200      variableNames.Insert(columnIndex, variableName);
[10580]201      if (transactionDepth <= 0)
202        OnChanged(DataPreprocessingChangedEventType.AddColumn, columnIndex, -1);
[10163]203    }
204
[10550]205    public void DeleteColumn(int columnIndex) {
206      SaveSnapshot(DataPreprocessingChangedEventType.AddColumn, columnIndex, -1);
[10367]207      variableValues.Remove(columnIndex);
208      variableNames.RemoveAt(columnIndex);
[10580]209      if (transactionDepth <= 0)
210        OnChanged(DataPreprocessingChangedEventType.DeleteColumn, columnIndex, -1);
[10367]211    }
212
[10221]213    public IntRange TrainingPartition {
214      get { return new IntRange(0, (int)(Rows * trainingToTestRatio)); }
215    }
216
217    public IntRange TestPartition {
[10235]218      get { return new IntRange((int)(Rows * trainingToTestRatio), Rows); }
[10221]219    }
220
[10547]221    public string GetVariableName(int columnIndex) {
222      return variableNames[columnIndex];
223    }
224
[10243]225    public IEnumerable<string> VariableNames {
[10188]226      get { return variableNames; }
[10163]227    }
228
[10367]229    public int GetColumnIndex(string variableName) {
230      return variableNames.IndexOf(variableName);
231    }
[10243]232
[10367]233    public bool IsType<T>(int columnIndex) {
234      return variableValues[columnIndex] is List<T>;
235    }
[10181]236
[10163]237    public int Columns {
[10194]238      get { return variableNames.Count; }
[10163]239    }
240
241    public int Rows {
[10367]242      get { return variableValues.Count > 0 ? variableValues[0].Count : 0; }
[10163]243    }
[10189]244
[10220]245    public Dataset ExportToDataset() {
246      IList<IList> values = new List<IList>();
[10367]247
248      for (int i = 0; i < Columns; ++i) {
249        values.Add(variableValues[i]);
[10220]250      }
251
252      var dataset = new Dataset(variableNames, values);
253      return dataset;
254    }
255
[10544]256    public event DataPreprocessingChangedEventHandler Changed;
257    protected virtual void OnChanged(DataPreprocessingChangedEventType type, int column, int row) {
258      var listeners = Changed;
259      if (listeners != null) listeners(this, new DataPreprocessingChangedEventArgs(type, column, row));
260    }
261
[10547]262    public bool IsUndoAvailable {
[10550]263      get { return undoHistory.Count > 0; }
[10547]264    }
265
266    public void Undo() {
[10550]267      if (IsUndoAvailable) {
268        PDSnapshot previousSnapshot = undoHistory[undoHistory.Count - 1];
269        variableValues = previousSnapshot.VariableValues;
270        variableNames = previousSnapshot.VariableNames;
271        trainingToTestRatio = previousSnapshot.TrainingToTestRatio;
272        undoHistory.Remove(previousSnapshot);
273        OnChanged(previousSnapshot.ChangedType,
274          previousSnapshot.ChangedColumn,
275          previousSnapshot.ChangedRow);
276      }
[10547]277    }
278
[10580]279    public void BeginTransaction() {
[10581]280      SaveSnapshot(DataPreprocessingChangedEventType.Any, -1, -1);
[10580]281      transactionDepth++;
282    }
283
284    public void EndTransaction() {
285      transactionDepth--;
286      if (transactionDepth < 0)
287        throw new InvalidOperationException("There is no open transaction that can be ended.");
288      if (transactionDepth == 0)
[10581]289        OnChanged(DataPreprocessingChangedEventType.Any, -1, -1);
[10580]290    }
291
[10220]292    #endregion
[10163]293  }
294}
Note: See TracBrowser for help on using the repository browser.