Free cookie consent management tool by TermsFeed Policy Generator

source: branches/DataPreprocessing Cleanup/HeuristicLab.DataPreprocessing/3.4/Data/Columns/PreprocessingDataColumn.cs @ 15377

Last change on this file since 15377 was 15309, checked in by pfleck, 7 years ago

#2809 Worked on type-save PreprocessingDataColumns.

File size: 10.9 KB
RevLine 
[15291]1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
28
29namespace HeuristicLab.DataPreprocessing {
30  [Item("PreprocessingDataColumn", "")]
31  [StorableClass]
32  public abstract class PreprocessingDataColumn : NamedItem {
33
34    #region Constructor, Cloning & Persistence
35    protected PreprocessingDataColumn() {
36    }
37    protected PreprocessingDataColumn(string name)
38      : base(name) {
39    }
40
41    protected PreprocessingDataColumn(PreprocessingDataColumn original, Cloner cloner)
42      : base(original, cloner) {
43    }
44
45    [StorableConstructor]
46    protected PreprocessingDataColumn(bool deserializing)
47      : base(deserializing) { }
48    #endregion
49
50    public abstract int Length { get; }
51    public abstract Type GetValueType();
52    public virtual bool IsType<T>() { return GetValueType() == typeof(T); }
53
54    public abstract bool IsValidValue(int index);
55
[15309]56    #region Column Type Switches
57    internal void TypeSwitch(Action<DoublePreprocessingDataColumn> doubleAction, Action<StringPreprocessingDataColumn> stringAction = null, Action<DateTimePreprocessingDataColumn> dateTimeAction = null) {
58      var doubleColumn = this as DoublePreprocessingDataColumn;
59      if (doubleColumn != null && doubleAction != null) doubleAction(doubleColumn);
60      var stringColumn = this as StringPreprocessingDataColumn;
61      if (stringColumn != null && stringAction != null) stringAction(stringColumn);
62      var dateTimeColumn = this as DateTimePreprocessingDataColumn;
63      if (dateTimeColumn != null && dateTimeAction != null) dateTimeAction(dateTimeColumn);
64    }
65    internal void TypeSwitch<TIn>(TIn value, Action<DoublePreprocessingDataColumn, double> doubleAction, Action<StringPreprocessingDataColumn, string> stringAction = null, Action<DateTimePreprocessingDataColumn, DateTime> dateTimeAction = null) {
66      var doubleColumn = this as DoublePreprocessingDataColumn;
67      if (doubleColumn != null && doubleAction != null) doubleAction(doubleColumn, Convert<double>(value));
68      var stringColumn = this as StringPreprocessingDataColumn;
69      if (stringColumn != null && stringAction != null) stringAction(stringColumn, Convert<string>(value));
70      var dateTimeColumn = this as DateTimePreprocessingDataColumn;
71      if (dateTimeColumn != null && dateTimeAction != null) dateTimeAction(dateTimeColumn, Convert<DateTime>(value));
72    }
[15291]73
[15309]74    internal TOut TypeSwitch<TOut>(Func<DoublePreprocessingDataColumn, double> doubleFunc, Func<StringPreprocessingDataColumn, string> stringFunc = null, Func<DateTimePreprocessingDataColumn, DateTime> dateTimeFunc = null) {
75      var doubleColumn = this as DoublePreprocessingDataColumn;
76      if (doubleColumn != null && doubleFunc != null) return Convert<TOut>(doubleFunc(doubleColumn));
77      var stringColumn = this as StringPreprocessingDataColumn;
78      if (stringColumn != null && stringFunc != null) return Convert<TOut>(stringFunc(stringColumn));
79      var dateTimeColumn = this as DateTimePreprocessingDataColumn;
80      if (dateTimeColumn != null && dateTimeFunc != null) return Convert<TOut>(dateTimeFunc(dateTimeColumn));
81      throw new InvalidOperationException("Invalid data column type.");
82    }
83    internal TOut TypeSwitch<TOut>(Func<DoublePreprocessingDataColumn, TOut> doubleFunc, Func<StringPreprocessingDataColumn, TOut> stringFunc = null, Func<DateTimePreprocessingDataColumn, TOut> dateTimeFunc = null) {
84      var doubleColumn = this as DoublePreprocessingDataColumn;
85      if (doubleColumn != null && doubleFunc != null) return doubleFunc(doubleColumn);
86      var stringColumn = this as StringPreprocessingDataColumn;
87      if (stringColumn != null && stringFunc != null) return stringFunc(stringColumn);
88      var dateTimeColumn = this as DateTimePreprocessingDataColumn;
89      if (dateTimeColumn != null && dateTimeFunc != null) return dateTimeFunc(dateTimeColumn);
90      throw new InvalidOperationException("Invalid data column type.");
91    }
92    internal TOut TypeSwitch<TIn, TOut>(TIn value, Func<DoublePreprocessingDataColumn, double, TOut> doubleFunc, Func<StringPreprocessingDataColumn, string, TOut> stringFunc = null, Func<DateTimePreprocessingDataColumn, DateTime, TOut> dateTimeFunc = null) {
93      var doubleColumn = this as DoublePreprocessingDataColumn;
94      if (doubleColumn != null && doubleFunc != null) return doubleFunc(doubleColumn, Convert<double>(value));
95      var stringColumn = this as StringPreprocessingDataColumn;
96      if (stringColumn != null && stringFunc != null) return stringFunc(stringColumn, Convert<string>(value));
97      var dateTimeColumn = this as DateTimePreprocessingDataColumn;
98      if (dateTimeColumn != null && dateTimeFunc != null) return dateTimeFunc(dateTimeColumn, Convert<DateTime>(value));
99      throw new InvalidOperationException("Invalid data column type.");
100    }
101    internal IEnumerable<TOut> TypeSwitch<TOut>(Func<DoublePreprocessingDataColumn, IEnumerable<double>> doubleFunc, Func<StringPreprocessingDataColumn, IEnumerable<string>> stringFunc = null, Func<DateTimePreprocessingDataColumn, IEnumerable<DateTime>> dateTimeFunc = null) {
102      var doubleColumn = this as DoublePreprocessingDataColumn;
103      if (doubleColumn != null && doubleFunc != null) return Convert<IEnumerable<TOut>>(doubleFunc(doubleColumn));
104      var stringColumn = this as StringPreprocessingDataColumn;
105      if (stringColumn != null && stringFunc != null) return Convert<IEnumerable<TOut>>(stringFunc(stringColumn));
106      var dateTimeColumn = this as DateTimePreprocessingDataColumn;
107      if (dateTimeColumn != null && dateTimeFunc != null) return Convert<IEnumerable<TOut>>(dateTimeFunc(dateTimeColumn));
108      throw new InvalidOperationException("Invalid data column type.");
109    }
110    internal IEnumerable<TOut> TypeSwitch<TOut, TIn>(TIn value, Func<DoublePreprocessingDataColumn, double, IEnumerable<double>> doubleFunc, Func<StringPreprocessingDataColumn, string, IEnumerable<string>> stringFunc = null, Func<DateTimePreprocessingDataColumn, DateTime, IEnumerable<DateTime>> dateTimeFunc = null) {
111      var doubleColumn = this as DoublePreprocessingDataColumn;
112      if (doubleColumn != null && doubleFunc != null) return Convert<IEnumerable<TOut>>(doubleFunc(doubleColumn, Convert<double>(value)));
113      var stringColumn = this as StringPreprocessingDataColumn;
114      if (stringColumn != null && stringFunc != null) return Convert<IEnumerable<TOut>>(stringFunc(stringColumn, Convert<string>(value)));
115      var dateTimeColumn = this as DateTimePreprocessingDataColumn;
116      if (dateTimeColumn != null && dateTimeFunc != null) return Convert<IEnumerable<TOut>>(dateTimeFunc(dateTimeColumn, Convert<DateTime>(value)));
117      throw new InvalidOperationException("Invalid data column type.");
118    }
119
120    private static T Convert<T>(object obj) { return (T)obj; }
121    #endregion
122
123    #region Statistics
124    public abstract int GetDistinctValues(IEnumerable<int> indices = null);
125    public abstract int GetNumberOfMissingValues(IEnumerable<int> indices = null);
126    #endregion
127
[15291]128    #region String Handling
129    public abstract bool Validate(string value, out string errorMessage);
130    public abstract string GetValue(int index);
131    public abstract bool SetValue(string value, int index);
132    #endregion
133  }
134
135  [Item("PreprocessingDataColumn", "")]
136  [StorableClass]
[15309]137  public abstract class PreprocessingDataColumn<T> : PreprocessingDataColumn
138    where T : IComparable<T> {
[15291]139
140    #region Constructor, Cloning & Persistence
141    protected PreprocessingDataColumn()
[15309]142      : this(string.Empty, Enumerable.Empty<T>()) { }
143    protected PreprocessingDataColumn(string name, IEnumerable<T> values)
[15291]144      : base(name) {
[15309]145      Values = new List<T>(values);
[15291]146    }
147
[15309]148    protected PreprocessingDataColumn(PreprocessingDataColumn<T> original, Cloner cloner)
[15291]149      : base(original, cloner) {
[15309]150      Values = new List<T>(original.Values);
[15291]151    }
152
153    [StorableConstructor]
154    protected PreprocessingDataColumn(bool deserializing)
155      : base(deserializing) { }
156    #endregion
157
158    [Storable]
[15309]159    internal List<T> Values { get; private set; }
160    public IEnumerable<T> GetValues(IEnumerable<int> indices = null) {
161      return indices == null
162        ? Values
163        : indices.Select(index => Values[index]);
[15291]164    }
[15309]165    public IEnumerable<T> GetValidValues(IEnumerable<int> indices = null) {
166      return indices == null
167        ? Values.Where(IsValidValue)
168        : indices.Select(index => Values[index]).Where(IsValidValue);
169    }
[15291]170
[15309]171    protected abstract T DefaultValue { get; }
172
[15291]173    public override Type GetValueType() {
[15309]174      return typeof(T);
[15291]175    }
176
177    public override int Length {
178      get { return Values.Count; }
179    }
180
[15309]181    public T this[int index] {
[15291]182      get { return Values[index]; }
183      set { Values[index] = value; }
184    }
185
[15309]186    public virtual bool IsValidValue(T value) { return true; }
[15291]187    public override bool IsValidValue(int index) {
188      return IsValidValue(Values[index]);
189    }
190
191    #region Statistics
192
[15309]193    public virtual T GetMin(IEnumerable<int> indices = null) { return GetValidValues(indices).DefaultIfEmpty(DefaultValue).Min(); }
194    public virtual T GetMax(IEnumerable<int> indices = null) { return GetValidValues(indices).DefaultIfEmpty(DefaultValue).Max(); }
195    public virtual T GetMedian(IEnumerable<int> indices = null) { return GetValidValues(indices).DefaultIfEmpty(DefaultValue).Quantile(0.5); }
196    public virtual T GetMode(IEnumerable<int> indices = null) { return GetValidValues(indices).DefaultIfEmpty(DefaultValue).GroupBy(x => x).OrderByDescending(g => g.Count()).Select(g => g.Key).First(); }
197    public virtual T GetQuantile(double alpha, IEnumerable<int> indices = null) { return GetValidValues(indices).DefaultIfEmpty(DefaultValue).Quantile(alpha); }
198    public override int GetDistinctValues(IEnumerable<int> indices = null) { return GetValidValues(indices).GroupBy(x => x).Count(); }
199    public override int GetNumberOfMissingValues(IEnumerable<int> indices = null) { return GetValues(indices).Count(x => !IsValidValue(x)); }
[15291]200    #endregion
201  }
202}
Note: See TracBrowser for help on using the repository browser.