#region License Information /* HeuristicLab * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System; using System.Collections; using System.Collections.Generic; using System.Linq; using HeuristicLab.Common; using HeuristicLab.Core; using HeuristicLab.Data; using HeuristicLab.Problems.DataAnalysis; namespace HeuristicLab.DataPreprocessing { [Item("PreprocessingData", "Represents data used for preprocessing.")] public abstract class PreprocessingData : NamedItem, IPreprocessingData { public IntRange TrainingPartition { get; set; } public IntRange TestPartition { get; set; } public IList Transformations { get; protected set; } protected IList variableValues; protected IList variableNames; public IEnumerable VariableNames { get { return variableNames; } } public IEnumerable GetDoubleVariableNames() { var doubleVariableNames = new List(); for (int i = 0; i < Columns; ++i) { if (VariableHasType(i)) { doubleVariableNames.Add(variableNames[i]); } } return doubleVariableNames; } public IList InputVariables { get; private set; } public string TargetVariable { get; private set; } // optional public int Columns { get { return variableNames.Count; } } public int Rows { get { return variableValues.Count > 0 ? variableValues[0].Count : 0; } } protected IDictionary> selection; public IDictionary> Selection { get { return selection; } set { selection = value; OnSelectionChanged(); } } protected PreprocessingData(PreprocessingData original, Cloner cloner) : base(original, cloner) { variableValues = CopyVariableValues(original.variableValues); variableNames = new List(original.variableNames); TrainingPartition = (IntRange)original.TrainingPartition.Clone(cloner); TestPartition = (IntRange)original.TestPartition.Clone(cloner); Transformations = new List(original.Transformations.Select(cloner.Clone)); InputVariables = new List(original.InputVariables); TargetVariable = original.TargetVariable; RegisterEventHandler(); } protected PreprocessingData(IDataAnalysisProblemData problemData) : base() { Name = "Preprocessing Data"; Transformations = new List(); selection = new Dictionary>(); Import(problemData); RegisterEventHandler(); } public void Import(IDataAnalysisProblemData problemData) { Dataset dataset = (Dataset)problemData.Dataset; variableNames = new List(problemData.Dataset.VariableNames); InputVariables = new List(problemData.AllowedInputVariables); TargetVariable = (problemData is IRegressionProblemData) ? ((IRegressionProblemData)problemData).TargetVariable : (problemData is IClassificationProblemData) ? ((IClassificationProblemData)problemData).TargetVariable : null; int columnIndex = 0; variableValues = new List(); foreach (var variableName in problemData.Dataset.VariableNames) { if (dataset.VariableHasType(variableName)) { variableValues.Insert(columnIndex, dataset.GetDoubleValues(variableName).ToList()); } else if (dataset.VariableHasType(variableName)) { variableValues.Insert(columnIndex, dataset.GetStringValues(variableName).ToList()); } else if (dataset.VariableHasType(variableName)) { variableValues.Insert(columnIndex, dataset.GetDateTimeValues(variableName).ToList()); } else { throw new ArgumentException("The datatype of column " + variableName + " must be of type double, string or DateTime"); } ++columnIndex; } TrainingPartition = new IntRange(problemData.TrainingPartition.Start, problemData.TrainingPartition.End); TestPartition = new IntRange(problemData.TestPartition.Start, problemData.TestPartition.End); } private void RegisterEventHandler() { Changed += (s, e) => { switch (e.Type) { case DataPreprocessingChangedEventType.DeleteRow: CheckPartitionRanges(); break; case DataPreprocessingChangedEventType.Any: CheckPartitionRanges(); break; case DataPreprocessingChangedEventType.Transformation: CheckPartitionRanges(); break; } }; } private void CheckPartitionRanges() { int maxRowIndex = Math.Max(0, Rows); TrainingPartition.Start = Math.Min(TrainingPartition.Start, maxRowIndex); TrainingPartition.End = Math.Min(TrainingPartition.End, maxRowIndex); TestPartition.Start = Math.Min(TestPartition.Start, maxRowIndex); TestPartition.End = Math.Min(TestPartition.End, maxRowIndex); } protected IList CopyVariableValues(IList original) { var copy = new List(original); for (int i = 0; i < original.Count; ++i) { copy[i] = (IList)Activator.CreateInstance(original[i].GetType(), original[i]); } return copy; } #region IPreprocessingData Members public abstract T GetCell(int columnIndex, int rowIndex); public abstract void SetCell(int columnIndex, int rowIndex, T value); public abstract string GetCellAsString(int columnIndex, int rowIndex); public abstract string GetVariableName(int columnIndex); public abstract int GetColumnIndex(string variableName); public abstract bool VariableHasType(int columnIndex); [Obsolete("use the index based variant, is faster")] public abstract IList GetValues(string variableName, bool considerSelection); public abstract IList GetValues(int columnIndex, bool considerSelection); public abstract void SetValues(int columnIndex, IList values); public abstract bool SetValue(string value, int columnIndex, int rowIndex); public abstract bool Validate(string value, out string errorMessage, int columnIndex); public abstract bool AreAllStringColumns(IEnumerable columnIndices); public abstract void DeleteRowsWithIndices(IEnumerable rows); public abstract void InsertRow(int rowIndex); public abstract void DeleteRow(int rowIndex); public abstract void InsertColumn(string variableName, int columnIndex); public abstract void DeleteColumn(int columnIndex); public abstract void RenameColumn(int columnIndex, string name); public abstract void RenameColumns(IList list); public abstract Dataset ExportToDataset(); public abstract void ClearSelection(); public abstract event EventHandler SelectionChanged; protected abstract void OnSelectionChanged(); public event DataPreprocessingChangedEventHandler Changed; protected virtual void OnChanged(DataPreprocessingChangedEventType type, int column, int row) { var listeners = Changed; if (listeners != null) listeners(this, new DataPreprocessingChangedEventArgs(type, column, row)); } #endregion } }