#region License Information /* HeuristicLab * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System; using System.Collections.Generic; using System.Linq; using HeuristicLab.Common; using HeuristicLab.Core; using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; namespace HeuristicLab.DataPreprocessing { [Item("PreprocessingDataColumn", "")] [StorableClass] public abstract class PreprocessingDataColumn : NamedItem { #region Constructor, Cloning & Persistence protected PreprocessingDataColumn() { } protected PreprocessingDataColumn(string name) : base(name) { } protected PreprocessingDataColumn(PreprocessingDataColumn original, Cloner cloner) : base(original, cloner) { } [StorableConstructor] protected PreprocessingDataColumn(bool deserializing) : base(deserializing) { } #endregion public abstract int Length { get; } public abstract Type GetValueType(); public virtual bool IsType() { return GetValueType() == typeof(T); } public abstract bool IsValidValue(int index); #region Column Type Switches internal void TypeSwitch(Action doubleAction, Action stringAction = null, Action dateTimeAction = null) { var doubleColumn = this as DoublePreprocessingDataColumn; if (doubleColumn != null && doubleAction != null) doubleAction(doubleColumn); var stringColumn = this as StringPreprocessingDataColumn; if (stringColumn != null && stringAction != null) stringAction(stringColumn); var dateTimeColumn = this as DateTimePreprocessingDataColumn; if (dateTimeColumn != null && dateTimeAction != null) dateTimeAction(dateTimeColumn); } internal void TypeSwitch(TIn value, Action doubleAction, Action stringAction = null, Action dateTimeAction = null) { var doubleColumn = this as DoublePreprocessingDataColumn; if (doubleColumn != null && doubleAction != null) doubleAction(doubleColumn, Convert(value)); var stringColumn = this as StringPreprocessingDataColumn; if (stringColumn != null && stringAction != null) stringAction(stringColumn, Convert(value)); var dateTimeColumn = this as DateTimePreprocessingDataColumn; if (dateTimeColumn != null && dateTimeAction != null) dateTimeAction(dateTimeColumn, Convert(value)); } internal TOut TypeSwitch(Func doubleFunc, Func stringFunc = null, Func dateTimeFunc = null) { var doubleColumn = this as DoublePreprocessingDataColumn; if (doubleColumn != null && doubleFunc != null) return Convert(doubleFunc(doubleColumn)); var stringColumn = this as StringPreprocessingDataColumn; if (stringColumn != null && stringFunc != null) return Convert(stringFunc(stringColumn)); var dateTimeColumn = this as DateTimePreprocessingDataColumn; if (dateTimeColumn != null && dateTimeFunc != null) return Convert(dateTimeFunc(dateTimeColumn)); throw new InvalidOperationException("Invalid data column type."); } internal TOut TypeSwitch(Func doubleFunc, Func stringFunc = null, Func dateTimeFunc = null) { var doubleColumn = this as DoublePreprocessingDataColumn; if (doubleColumn != null && doubleFunc != null) return doubleFunc(doubleColumn); var stringColumn = this as StringPreprocessingDataColumn; if (stringColumn != null && stringFunc != null) return stringFunc(stringColumn); var dateTimeColumn = this as DateTimePreprocessingDataColumn; if (dateTimeColumn != null && dateTimeFunc != null) return dateTimeFunc(dateTimeColumn); throw new InvalidOperationException("Invalid data column type."); } internal TOut TypeSwitch(TIn value, Func doubleFunc, Func stringFunc = null, Func dateTimeFunc = null) { var doubleColumn = this as DoublePreprocessingDataColumn; if (doubleColumn != null && doubleFunc != null) return doubleFunc(doubleColumn, Convert(value)); var stringColumn = this as StringPreprocessingDataColumn; if (stringColumn != null && stringFunc != null) return stringFunc(stringColumn, Convert(value)); var dateTimeColumn = this as DateTimePreprocessingDataColumn; if (dateTimeColumn != null && dateTimeFunc != null) return dateTimeFunc(dateTimeColumn, Convert(value)); throw new InvalidOperationException("Invalid data column type."); } internal IEnumerable TypeSwitch(Func> doubleFunc, Func> stringFunc = null, Func> dateTimeFunc = null) { var doubleColumn = this as DoublePreprocessingDataColumn; if (doubleColumn != null && doubleFunc != null) return Convert>(doubleFunc(doubleColumn)); var stringColumn = this as StringPreprocessingDataColumn; if (stringColumn != null && stringFunc != null) return Convert>(stringFunc(stringColumn)); var dateTimeColumn = this as DateTimePreprocessingDataColumn; if (dateTimeColumn != null && dateTimeFunc != null) return Convert>(dateTimeFunc(dateTimeColumn)); throw new InvalidOperationException("Invalid data column type."); } internal IEnumerable TypeSwitch(TIn value, Func> doubleFunc, Func> stringFunc = null, Func> dateTimeFunc = null) { var doubleColumn = this as DoublePreprocessingDataColumn; if (doubleColumn != null && doubleFunc != null) return Convert>(doubleFunc(doubleColumn, Convert(value))); var stringColumn = this as StringPreprocessingDataColumn; if (stringColumn != null && stringFunc != null) return Convert>(stringFunc(stringColumn, Convert(value))); var dateTimeColumn = this as DateTimePreprocessingDataColumn; if (dateTimeColumn != null && dateTimeFunc != null) return Convert>(dateTimeFunc(dateTimeColumn, Convert(value))); throw new InvalidOperationException("Invalid data column type."); } private static T Convert(object obj) { return (T)obj; } #endregion #region Statistics public abstract int GetDistinctValues(IEnumerable indices = null); public abstract int GetNumberOfMissingValues(IEnumerable indices = null); #endregion #region String Handling public abstract bool Validate(string value, out string errorMessage); public abstract string GetValue(int index); public abstract bool SetValue(string value, int index); #endregion } [Item("PreprocessingDataColumn", "")] [StorableClass] public abstract class PreprocessingDataColumn : PreprocessingDataColumn where T : IComparable { #region Constructor, Cloning & Persistence protected PreprocessingDataColumn() : this(string.Empty, Enumerable.Empty()) { } protected PreprocessingDataColumn(string name, IEnumerable values) : base(name) { Values = new List(values); } protected PreprocessingDataColumn(PreprocessingDataColumn original, Cloner cloner) : base(original, cloner) { Values = new List(original.Values); } [StorableConstructor] protected PreprocessingDataColumn(bool deserializing) : base(deserializing) { } #endregion [Storable] internal List Values { get; private set; } public IEnumerable GetValues(IEnumerable indices = null) { return indices == null ? Values : indices.Select(index => Values[index]); } public IEnumerable GetValidValues(IEnumerable indices = null) { return indices == null ? Values.Where(IsValidValue) : indices.Select(index => Values[index]).Where(IsValidValue); } protected abstract T DefaultValue { get; } public override Type GetValueType() { return typeof(T); } public override int Length { get { return Values.Count; } } public T this[int index] { get { return Values[index]; } set { Values[index] = value; } } public virtual bool IsValidValue(T value) { return true; } public override bool IsValidValue(int index) { return IsValidValue(Values[index]); } #region Statistics public virtual T GetMin(IEnumerable indices = null) { return GetValidValues(indices).DefaultIfEmpty(DefaultValue).Min(); } public virtual T GetMax(IEnumerable indices = null) { return GetValidValues(indices).DefaultIfEmpty(DefaultValue).Max(); } public virtual T GetMedian(IEnumerable indices = null) { return GetValidValues(indices).DefaultIfEmpty(DefaultValue).Quantile(0.5); } public virtual T GetMode(IEnumerable indices = null) { return GetValidValues(indices).DefaultIfEmpty(DefaultValue).GroupBy(x => x).OrderByDescending(g => g.Count()).Select(g => g.Key).First(); } public virtual T GetQuantile(double alpha, IEnumerable indices = null) { return GetValidValues(indices).DefaultIfEmpty(DefaultValue).Quantile(alpha); } public override int GetDistinctValues(IEnumerable indices = null) { return GetValidValues(indices).GroupBy(x => x).Count(); } public override int GetNumberOfMissingValues(IEnumerable indices = null) { return GetValues(indices).Count(x => !IsValidValue(x)); } #endregion } }