Free cookie consent management tool by TermsFeed Policy Generator

source: branches/DataPreprocessing Cleanup/HeuristicLab.DataPreprocessing/3.4/Data/Columns/PreprocessingDataColumn.cs @ 15291

Last change on this file since 15291 was 15291, checked in by pfleck, 7 years ago

#2809: Added (Double/String/DateTime)PreprocessingDataColumn. (experimental state)

File size: 6.9 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
28
29namespace HeuristicLab.DataPreprocessing {
30  [Item("PreprocessingDataColumn", "")]
31  [StorableClass]
32  public abstract class PreprocessingDataColumn : NamedItem {
33
34    #region Constructor, Cloning & Persistence
35    protected PreprocessingDataColumn() {
36    }
37    protected PreprocessingDataColumn(string name)
38      : base(name) {
39    }
40
41    protected PreprocessingDataColumn(PreprocessingDataColumn original, Cloner cloner)
42      : base(original, cloner) {
43    }
44
45    [StorableConstructor]
46    protected PreprocessingDataColumn(bool deserializing)
47      : base(deserializing) { }
48    #endregion
49
50    public abstract int Length { get; }
51    public abstract Type GetValueType();
52    public virtual bool IsType<T>() { return GetValueType() == typeof(T); }
53
54    public abstract bool IsValidValue(int index);
55
56
57    #region String Handling
58    public abstract bool Validate(string value, out string errorMessage);
59    public abstract string GetValue(int index);
60    public abstract bool SetValue(string value, int index);
61    #endregion
62  }
63
64  [Item("PreprocessingDataColumn", "")]
65  [StorableClass]
66  public abstract class PreprocessingDataColumn<TValue, TDistance> : PreprocessingDataColumn
67    where TValue : class, IComparable<TValue> {
68
69    #region Constructor, Cloning & Persistence
70    protected PreprocessingDataColumn()
71      : this(string.Empty, Enumerable.Empty<TValue>()) { }
72    protected PreprocessingDataColumn(string name, IEnumerable<TValue> values)
73      : base(name) {
74      Values = new List<TValue>(values);
75    }
76
77    protected PreprocessingDataColumn(PreprocessingDataColumn<TValue, TDistance> original, Cloner cloner)
78      : base(original, cloner) {
79      Values = new List<TValue>(original.Values);
80    }
81
82    [StorableConstructor]
83    protected PreprocessingDataColumn(bool deserializing)
84      : base(deserializing) { }
85    #endregion
86
87    [Storable]
88    public List<TValue> Values { get; private set; }
89    public IEnumerable<TValue> ValidValues {
90      get { return Values.Where(IsValidValue); }
91    }
92
93    public override Type GetValueType() {
94      return typeof(TValue);
95    }
96
97    public override int Length {
98      get { return Values.Count; }
99    }
100
101    public TValue this[int index] {
102      get { return Values[index]; }
103      set { Values[index] = value; }
104    }
105
106    public virtual bool IsValidValue(TValue value) { return true; }
107    public override bool IsValidValue(int index) {
108      return IsValidValue(Values[index]);
109    }
110
111    #region Statistics
112    public virtual TValue GetMin() { return Values.Min(); }
113    public virtual TValue GetMax() { return Values.Max(); }
114    public abstract TDistance GetRange();
115    public abstract TValue GetMean();
116    public virtual TValue GetMedian() { return Values.Quantile(0.5); }
117    public virtual TValue GetMode() { return Values.GroupBy(x => x).OrderByDescending(g => g.Count()).Select(g => g.Key).First(); }
118    public abstract TDistance GetStandardDeviation();
119    public abstract TDistance GetVariance();
120    public virtual TValue GetQuantile(double alpha) { return Values.Quantile(alpha); }
121    public virtual int GetDistinctValues() { return Values.GroupBy(x => x).Count(); }
122    public virtual int GetNumberOfMissingValues() { return Values.Count(IsValidValue); }
123    #endregion
124  }
125
126  [Item("NullablePreprocessingDataColumn", "")]
127  [StorableClass]
128  public abstract class NullablePreprocessingDataColumn<TValue, TDistance> : PreprocessingDataColumn
129    where TValue : struct, IComparable<TValue> {
130
131    #region Constructor, Cloning & Persistence
132    protected NullablePreprocessingDataColumn()
133      : this(string.Empty, Enumerable.Empty<TValue?>()) { }
134    protected NullablePreprocessingDataColumn(string name, IEnumerable<TValue> values)
135      : this(name, values.Select(x => (TValue?)x)) { }
136    protected NullablePreprocessingDataColumn(string name, IEnumerable<TValue?> values)
137      : base(name) {
138      Values = new List<TValue?>(values);
139    }
140
141    protected NullablePreprocessingDataColumn(NullablePreprocessingDataColumn<TValue, TDistance> original, Cloner cloner)
142      : base(original, cloner) {
143      Values = new List<TValue?>(original.Values);
144    }
145
146    [StorableConstructor]
147    protected NullablePreprocessingDataColumn(bool deserializing)
148      : base(deserializing) { }
149    #endregion
150
151    [Storable]
152    internal List<TValue?> Values { get; private set; }
153    protected IEnumerable<TValue> ValidValues {
154      get { return Values.Where(x => x.HasValue && IsValidValue(x.Value)).Select(x => x.Value); }
155    }
156
157    public override Type GetValueType() {
158      return typeof(TValue);
159    }
160
161    public override int Length {
162      get { return Values.Count; }
163    }
164
165    public TValue? this[int index] {
166      get { return Values[index]; }
167      set { Values[index] = value; }
168    }
169
170    public virtual bool IsValidValue(TValue value) { return true; }
171    public override bool IsValidValue(int index) {
172      var value = Values[index];
173      return value.HasValue && IsValidValue(value.Value);
174    }
175
176    #region Statistics
177    public virtual TValue GetMin() { return ValidValues.Min(); }
178    public virtual TValue GetMax() { return ValidValues.Max(); }
179    public abstract TDistance GetRange();
180    public abstract TValue GetMean();
181    public virtual TValue GetMedian() { return ValidValues.Quantile(0.5); }
182    public virtual TValue GetMode() { return ValidValues.GroupBy(x => x).OrderByDescending(g => g.Count()).Select(g => g.Key).First(); }
183    public abstract TDistance GetStandardDeviation();
184    public abstract TDistance GetVariance();
185    public virtual TValue GetQuantile(double alpha) { return ValidValues.Quantile(alpha); }
186    public virtual int GetDistinctValues() { return ValidValues.GroupBy(x => x).Count(); }
187    public virtual int GetNumberOfMissingValues() { return Values.Count - ValidValues.Count(); }
188    #endregion
189  }
190}
Note: See TracBrowser for help on using the repository browser.