#region License Information
/* HeuristicLab
* Copyright (C) 2002-2013 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
*
* This file is part of HeuristicLab.
*
* HeuristicLab is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* HeuristicLab is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with HeuristicLab. If not, see .
*/
#endregion
using System;
using System.Collections;
using System.Collections.Generic;
using System.Linq;
using HeuristicLab.Common;
using HeuristicLab.Core;
using HeuristicLab.Data;
using HeuristicLab.Problems.DataAnalysis;
namespace HeuristicLab.DataPreprocessing {
internal class PDSnapshot {
public IDictionary VariableValues { get; set; }
public IList VariableNames { get; set; }
public double TrainingToTestRatio { get; set; }
public DataPreprocessingChangedEventType ChangedType { get; set; }
public int ChangedColumn { get; set; }
public int ChangedRow { get; set; }
}
[Item("PreprocessingData", "Represents data used for preprocessing.")]
public class PreprocessingData : NamedItem, IPreprocessingData {
private const int MAX_UNDO_DEPTH = 5;
private IDictionary variableValues;
private IList variableNames;
private double trainingToTestRatio;
private IList undoHistory;
private PreprocessingData(PreprocessingData original, Cloner cloner)
: base(original, cloner) {
variableValues = CopyVariableValues(original.variableValues);
variableNames = new List(original.variableNames);
trainingToTestRatio = original.trainingToTestRatio;
undoHistory = new List();
}
public PreprocessingData(IDataAnalysisProblemData problemData)
: base() {
Name = "-";
variableNames = new List(problemData.Dataset.VariableNames);
// create dictionary from variable name to index
int columnIndex = 0;
variableValues = new Dictionary();
foreach (var variableName in problemData.Dataset.VariableNames) {
if (problemData.Dataset.IsType(variableName)) {
variableValues[columnIndex] = problemData.Dataset.GetDoubleValues(variableName).ToList();
} else if (problemData.Dataset.IsType(variableName)) {
variableValues[columnIndex] = CreateColumn(problemData.Dataset, columnIndex, x => x);
} else if (problemData.Dataset.IsType(variableName)) {
variableValues[columnIndex] = CreateColumn(problemData.Dataset, columnIndex, x => DateTime.Parse(x));
} else {
throw new ArgumentException("The datatype of column " + variableName + " must be of type List, List or List");
}
++columnIndex;
}
trainingToTestRatio = (double)problemData.TrainingPartition.Size / Math.Max(problemData.Dataset.Rows, double.Epsilon);
undoHistory = new List();
}
private static IList CreateColumn(Dataset ds, int column, Func selector) {
var list = new List(ds.Rows);
for (int row = 0; row < ds.Rows; ++row) {
list.Add(selector(ds.GetValue(row, column)));
}
return list;
}
private IDictionary CopyVariableValues(IDictionary original) {
var copy = new Dictionary(variableValues);
for (int i = 0; i < original.Count; i++) {
variableValues[i] = (IList)Activator.CreateInstance(original[i].GetType(), original[i]);
}
return copy;
}
private void SaveSnapshot(DataPreprocessingChangedEventType changedType, int column, int row) {
PDSnapshot currentSnapshot = new PDSnapshot();
currentSnapshot.VariableValues = CopyVariableValues(variableValues);
currentSnapshot.VariableNames = new List(variableNames);
currentSnapshot.TrainingToTestRatio = trainingToTestRatio;
currentSnapshot.ChangedType = changedType;
currentSnapshot.ChangedColumn = column;
currentSnapshot.ChangedRow = row;
if (undoHistory.Count >= MAX_UNDO_DEPTH)
undoHistory.RemoveAt(0);
undoHistory.Add(currentSnapshot);
}
#region NamedItem abstract Member Implementations
public override IDeepCloneable Clone(Cloner cloner) {
return new PreprocessingData(this, cloner);
}
#endregion
#region IPreprocessingData Members
public T GetCell(int columnIndex, int rowIndex) {
return (T)variableValues[columnIndex][rowIndex];
}
public void SetCell(int columnIndex, int rowIndex, T value) {
SaveSnapshot(DataPreprocessingChangedEventType.ChangeItem, columnIndex, rowIndex);
variableValues[columnIndex][rowIndex] = value;
OnChanged(DataPreprocessingChangedEventType.ChangeItem, columnIndex, rowIndex);
}
public string GetCellAsString(int columnIndex, int rowIndex) {
return variableValues[columnIndex][rowIndex].ToString();
}
[Obsolete("use the index based variant, is faster")]
public IList GetValues(string variableName) {
return GetValues(GetColumnIndex(variableName));
}
public IList GetValues(int columnIndex) {
return (IList)variableValues[columnIndex];
}
public void SetValues(int columnIndex, IList values) {
if (IsType(columnIndex)) {
SaveSnapshot(DataPreprocessingChangedEventType.ChangeColumn, columnIndex, -1);
variableValues[columnIndex] = (IList)values;
} else {
throw new ArgumentException("The datatype of column " + columnIndex + " must be of type " + variableValues[columnIndex].GetType().Name + " but was " + typeof(T).Name);
}
OnChanged(DataPreprocessingChangedEventType.ChangeColumn, columnIndex, -1);
}
public void InsertRow(int rowIndex) {
SaveSnapshot(DataPreprocessingChangedEventType.DeleteRow, -1, rowIndex);
foreach (IList column in variableValues.Values) {
Type type = column.GetType().GetGenericArguments()[0];
column.Insert(rowIndex, type.IsValueType ? Activator.CreateInstance(type) : null);
}
OnChanged(DataPreprocessingChangedEventType.AddRow, -1, rowIndex);
}
public void DeleteRow(int rowIndex) {
SaveSnapshot(DataPreprocessingChangedEventType.AddRow, -1, rowIndex);
foreach (IList column in variableValues.Values) {
column.RemoveAt(rowIndex);
}
OnChanged(DataPreprocessingChangedEventType.DeleteRow, -1, rowIndex);
}
public void InsertColumn(string variableName, int columnIndex) {
SaveSnapshot(DataPreprocessingChangedEventType.DeleteColumn, columnIndex, -1);
variableValues.Add(columnIndex, new List(Rows));
variableNames.Insert(columnIndex, variableName);
OnChanged(DataPreprocessingChangedEventType.AddColumn, columnIndex, -1);
}
public void DeleteColumn(int columnIndex) {
SaveSnapshot(DataPreprocessingChangedEventType.AddColumn, columnIndex, -1);
variableValues.Remove(columnIndex);
variableNames.RemoveAt(columnIndex);
OnChanged(DataPreprocessingChangedEventType.DeleteColumn, columnIndex, -1);
}
public IntRange TrainingPartition {
get { return new IntRange(0, (int)(Rows * trainingToTestRatio)); }
}
public IntRange TestPartition {
get { return new IntRange((int)(Rows * trainingToTestRatio), Rows); }
}
public string GetVariableName(int columnIndex) {
return variableNames[columnIndex];
}
public IEnumerable VariableNames {
get { return variableNames; }
}
public int GetColumnIndex(string variableName) {
return variableNames.IndexOf(variableName);
}
public bool IsType(int columnIndex) {
return variableValues[columnIndex] is List;
}
public int Columns {
get { return variableNames.Count; }
}
public int Rows {
get { return variableValues.Count > 0 ? variableValues[0].Count : 0; }
}
public Dataset ExportToDataset() {
IList values = new List();
for (int i = 0; i < Columns; ++i) {
values.Add(variableValues[i]);
}
var dataset = new Dataset(variableNames, values);
return dataset;
}
public event DataPreprocessingChangedEventHandler Changed;
protected virtual void OnChanged(DataPreprocessingChangedEventType type, int column, int row) {
var listeners = Changed;
if (listeners != null) listeners(this, new DataPreprocessingChangedEventArgs(type, column, row));
}
public bool IsUndoAvailable {
get { return undoHistory.Count > 0; }
}
public void Undo() {
if (IsUndoAvailable) {
PDSnapshot previousSnapshot = undoHistory[undoHistory.Count - 1];
variableValues = previousSnapshot.VariableValues;
variableNames = previousSnapshot.VariableNames;
trainingToTestRatio = previousSnapshot.TrainingToTestRatio;
undoHistory.Remove(previousSnapshot);
OnChanged(previousSnapshot.ChangedType,
previousSnapshot.ChangedColumn,
previousSnapshot.ChangedRow);
}
}
#endregion
}
}