#region License Information
/* HeuristicLab
* Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
*
* This file is part of HeuristicLab.
*
* HeuristicLab is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* HeuristicLab is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with HeuristicLab. If not, see .
*/
#endregion
using System;
using System.Collections;
using System.Collections.Generic;
using System.Linq;
using HeuristicLab.Common;
using HeuristicLab.Core;
using HeuristicLab.Data;
using HeuristicLab.Problems.DataAnalysis;
namespace HeuristicLab.DataPreprocessing {
[Item("PreprocessingData", "Represents data used for preprocessing.")]
public abstract class PreprocessingData : NamedItem, IPreprocessingData {
public IntRange TrainingPartition { get; set; }
public IntRange TestPartition { get; set; }
protected IList transformations;
public IList Transformations {
get { return transformations; }
}
protected IList variableValues;
protected IList variableNames;
public IEnumerable VariableNames {
get { return variableNames; }
}
public IEnumerable GetDoubleVariableNames() {
var doubleVariableNames = new List();
for (int i = 0; i < Columns; ++i) {
if (VariableHasType(i)) {
doubleVariableNames.Add(variableNames[i]);
}
}
return doubleVariableNames;
}
public IList InputVariables { get; private set; }
public string TargetVariable { get; private set; } // optional
public int Columns {
get { return variableNames.Count; }
}
public int Rows {
get { return variableValues.Count > 0 ? variableValues[0].Count : 0; }
}
protected IDictionary> selection;
public IDictionary> Selection {
get { return selection; }
set {
selection = value;
OnSelectionChanged();
}
}
protected PreprocessingData(PreprocessingData original, Cloner cloner)
: base(original, cloner) {
variableValues = CopyVariableValues(original.variableValues);
variableNames = new List(original.variableNames);
TrainingPartition = (IntRange)original.TrainingPartition.Clone(cloner);
TestPartition = (IntRange)original.TestPartition.Clone(cloner);
transformations = new List(original.transformations.Select(cloner.Clone));
InputVariables = new List(original.InputVariables);
TargetVariable = original.TargetVariable;
RegisterEventHandler();
}
protected PreprocessingData(IDataAnalysisProblemData problemData)
: base() {
Name = "Preprocessing Data";
transformations = new List();
selection = new Dictionary>();
Import(problemData);
RegisterEventHandler();
}
public void Import(IDataAnalysisProblemData problemData) {
Dataset dataset = (Dataset)problemData.Dataset;
variableNames = new List(problemData.Dataset.VariableNames);
InputVariables = new List(problemData.AllowedInputVariables);
TargetVariable = (problemData is IRegressionProblemData) ? ((IRegressionProblemData)problemData).TargetVariable
: (problemData is IClassificationProblemData) ? ((IClassificationProblemData)problemData).TargetVariable
: null;
int columnIndex = 0;
variableValues = new List();
foreach (var variableName in problemData.Dataset.VariableNames) {
if (dataset.VariableHasType(variableName)) {
variableValues.Insert(columnIndex, dataset.GetDoubleValues(variableName).ToList());
} else if (dataset.VariableHasType(variableName)) {
variableValues.Insert(columnIndex, dataset.GetStringValues(variableName).ToList());
} else if (dataset.VariableHasType(variableName)) {
variableValues.Insert(columnIndex, dataset.GetDateTimeValues(variableName).ToList());
} else {
throw new ArgumentException("The datatype of column " + variableName + " must be of type double, string or DateTime");
}
++columnIndex;
}
TrainingPartition = new IntRange(problemData.TrainingPartition.Start, problemData.TrainingPartition.End);
TestPartition = new IntRange(problemData.TestPartition.Start, problemData.TestPartition.End);
}
private void RegisterEventHandler() {
Changed += (s, e) => {
switch (e.Type) {
case DataPreprocessingChangedEventType.DeleteRow:
CheckPartitionRanges();
break;
case DataPreprocessingChangedEventType.Any:
CheckPartitionRanges();
break;
case DataPreprocessingChangedEventType.Transformation:
CheckPartitionRanges();
break;
}
};
}
private void CheckPartitionRanges() {
int maxRowIndex = Math.Max(0, Rows - 1);
TrainingPartition.Start = Math.Min(TrainingPartition.Start, maxRowIndex);
TrainingPartition.End = Math.Min(TrainingPartition.End, maxRowIndex);
TestPartition.Start = Math.Min(TestPartition.Start, maxRowIndex);
TestPartition.End = Math.Min(TestPartition.End, maxRowIndex);
}
protected IList CopyVariableValues(IList original) {
var copy = new List(original);
for (int i = 0; i < original.Count; ++i) {
copy[i] = (IList)Activator.CreateInstance(original[i].GetType(), original[i]);
}
return copy;
}
#region IPreprocessingData Members
public abstract T GetCell(int columnIndex, int rowIndex);
public abstract void SetCell(int columnIndex, int rowIndex, T value);
public abstract string GetCellAsString(int columnIndex, int rowIndex);
public abstract string GetVariableName(int columnIndex);
public abstract int GetColumnIndex(string variableName);
public abstract bool VariableHasType(int columnIndex);
[Obsolete("use the index based variant, is faster")]
public abstract IList GetValues(string variableName, bool considerSelection);
public abstract IList GetValues(int columnIndex, bool considerSelection);
public abstract void SetValues(int columnIndex, IList values);
public abstract bool SetValue(string value, int columnIndex, int rowIndex);
public abstract bool Validate(string value, out string errorMessage, int columnIndex);
public abstract bool AreAllStringColumns(IEnumerable columnIndices);
public abstract void DeleteRowsWithIndices(IEnumerable rows);
public abstract void InsertRow(int rowIndex);
public abstract void DeleteRow(int rowIndex);
public abstract void InsertColumn(string variableName, int columnIndex);
public abstract void DeleteColumn(int columnIndex);
public abstract void RenameColumn(int columnIndex, string name);
public abstract void RenameColumns(IList list);
public abstract Dataset ExportToDataset();
public abstract void ClearSelection();
public abstract event EventHandler SelectionChanged;
protected abstract void OnSelectionChanged();
public event DataPreprocessingChangedEventHandler Changed;
protected virtual void OnChanged(DataPreprocessingChangedEventType type, int column, int row) {
var listeners = Changed;
if (listeners != null) listeners(this, new DataPreprocessingChangedEventArgs(type, column, row));
}
#endregion
}
}