Changeset 15309 for branches/DataPreprocessing Cleanup/HeuristicLab.DataPreprocessing/3.4/Data/Columns
- Timestamp:
- 08/07/17 09:43:58 (7 years ago)
- Location:
- branches/DataPreprocessing Cleanup/HeuristicLab.DataPreprocessing/3.4/Data/Columns
- Files:
-
- 4 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/DataPreprocessing Cleanup/HeuristicLab.DataPreprocessing/3.4/Data/Columns/DateTimePreprocessingDataColumn.cs
r15291 r15309 29 29 namespace HeuristicLab.DataPreprocessing { 30 30 [Item("DateTimePreprocessingDataColumn", "")] 31 public class DateTimePreprocessingDataColumn : NullablePreprocessingDataColumn<DateTime, TimeSpan> {31 public class DateTimePreprocessingDataColumn : PreprocessingDataColumn<DateTime> { 32 32 33 33 #region Constructor, Cloning & Persistence … … 35 35 : base() { } 36 36 public DateTimePreprocessingDataColumn(string name, IEnumerable<DateTime> values) 37 : base(name, values) {38 }39 public DateTimePreprocessingDataColumn(string name, IEnumerable<DateTime?> values)40 37 : base(name, values) { 41 38 } … … 53 50 #endregion 54 51 52 protected override DateTime DefaultValue { get { return DateTime.MinValue; } } 53 55 54 #region Statistics 56 public override TimeSpan GetRange() { return ValidValues.Max() - ValidValues.Min(); }57 public override DateTime GetMean() { return AggregateAsDouble(ValidValues, Enumerable.Average); }58 public override TimeSpan GetStandardDeviation() { return AggregateDistanceAsDouble(ValidValues, EnumerableStatisticExtensions.StandardDeviation); }59 public override TimeSpan GetVariance() { return AggregateDistanceAsDouble(ValidValues, EnumerableStatisticExtensions.Variance); }55 public TimeSpan GetRange(IEnumerable<int> indices = null) { return GetMax(indices) - GetMin(indices); } 56 public DateTime GetMean(IEnumerable<int> indices = null) { return AggregateAsDouble(GetValidValues(indices), Enumerable.Average); } 57 public TimeSpan GetStandardDeviation(IEnumerable<int> indices = null) { return AggregateDistanceAsDouble(GetValidValues(indices), EnumerableStatisticExtensions.StandardDeviation); } 58 public TimeSpan GetVariance(IEnumerable<int> indices = null) { return AggregateDistanceAsDouble(GetValidValues(indices), EnumerableStatisticExtensions.Variance); } 60 59 #endregion 61 60 … … 69 68 public override string GetValue(int index) { 70 69 var value = Values[index]; 71 return value.HasValue ? value.Value.ToString("o") : string.Empty;70 return IsValidValue(value) ? Values[index].ToString("o") : string.Empty; // format "s" sortable or "o" roundtrip 72 71 } 73 72 public override bool SetValue(string value, int index) { … … 77 76 return true; 78 77 } else if (string.IsNullOrEmpty(value)) { 79 Values[index] = null;78 Values[index] = DateTime.MinValue; 80 79 return true; 81 80 } else { … … 86 85 87 86 private static DateTime AggregateAsDouble(IEnumerable<DateTime> values, Func<IEnumerable<double>, double> func) { 88 return new DateTime((long)func(values.Select(x => (double)x.Ticks)));87 return values.Any() ? new DateTime((long)func(values.Select(x => (double)x.Ticks))) : DateTime.MinValue; 89 88 } 90 89 private static TimeSpan AggregateDistanceAsDouble(IEnumerable<DateTime> values, Func<IEnumerable<double>, double> func) { 91 return new TimeSpan((long)func(values.Select(x => (double)x.Ticks)));90 return values.Any() ? new TimeSpan((long)func(values.Select(x => (double)x.Ticks))) : TimeSpan.Zero; 92 91 } 93 92 } -
branches/DataPreprocessing Cleanup/HeuristicLab.DataPreprocessing/3.4/Data/Columns/DoublePreprocessingDataColumn.cs
r15291 r15309 31 31 [Item("DoublePreprocessingDataColumn", "")] 32 32 [StorableClass] 33 public sealed class DoublePreprocessingDataColumn : NullablePreprocessingDataColumn<double,double> {33 public sealed class DoublePreprocessingDataColumn : PreprocessingDataColumn<double> { 34 34 35 35 #region Constructor, Cloning & Persistence 36 36 37 public DoublePreprocessingDataColumn() 37 38 : base() { } 39 38 40 public DoublePreprocessingDataColumn(string name, IEnumerable<double> values) 39 : base(name, values) { 40 } 41 public DoublePreprocessingDataColumn(string name, IEnumerable<double?> values) 42 : base(name, values) { 43 } 41 : base(name, values) { } 44 42 45 43 private DoublePreprocessingDataColumn(DoublePreprocessingDataColumn original, Cloner cloner) 46 : base(original, cloner) { 47 } 44 : base(original, cloner) { } 45 48 46 public override IDeepCloneable Clone(Cloner cloner) { 49 47 return new DoublePreprocessingDataColumn(this, cloner); … … 53 51 private DoublePreprocessingDataColumn(bool deserializing) 54 52 : base(deserializing) { } 53 55 54 #endregion 56 55 … … 59 58 } 60 59 60 protected override double DefaultValue { get { return double.NaN; } } 61 61 62 #region Statistics 62 public override double GetRange() { return ValidValues.Max() - ValidValues.Min(); }63 public override double GetMean() { return ValidValues.Average(); }64 public override double GetMedian( ) { return ValidValues.Quantile(0.5); } // IEnumerable<doube> version is faster65 public override double GetStandardDeviation() { return ValidValues.StandardDeviation(); }66 public override double GetVariance() { return ValidValues.Variance(); }67 public override double GetQuantile(double alpha ) { return ValidValues.Quantile(alpha); } // IEnumerable<doube> version is faster63 public double GetRange(IEnumerable<int> indices = null) { return GetMax(indices) - GetMin(indices); } 64 public double GetMean(IEnumerable<int> indices = null) { return GetValidValues(indices).DefaultIfEmpty(DefaultValue).Average(); } 65 public override double GetMedian(IEnumerable<int> indices = null) { return GetValidValues(indices).DefaultIfEmpty(DefaultValue).Median(); } // IEnumerable<doube> version is faster 66 public double GetStandardDeviation(IEnumerable<int> indices = null) { return GetValidValues(indices).DefaultIfEmpty(DefaultValue).StandardDeviation(); } 67 public double GetVariance(IEnumerable<int> indices = null) { return GetValidValues(indices).DefaultIfEmpty(DefaultValue).Variance(); } 68 public override double GetQuantile(double alpha, IEnumerable<int> indices = null) { return GetValidValues(indices).DefaultIfEmpty(double.NaN).Quantile(alpha); } // IEnumerable<doube> version is faster 68 69 #endregion 69 70 … … 83 84 } 84 85 public override string GetValue(int index) { 85 var value = Values[index]; 86 return value.HasValue ? value.Value.ToString("r") : string.Empty; 86 return Values[index].ToString("r"); 87 87 } 88 88 public override bool SetValue(string value, int index) { … … 92 92 return true; 93 93 } else if (string.IsNullOrEmpty(value)) { 94 Values[index] = null;94 Values[index] = double.NaN; 95 95 return true; 96 96 } else { -
branches/DataPreprocessing Cleanup/HeuristicLab.DataPreprocessing/3.4/Data/Columns/PreprocessingDataColumn.cs
r15291 r15309 54 54 public abstract bool IsValidValue(int index); 55 55 56 #region Column Type Switches 57 internal void TypeSwitch(Action<DoublePreprocessingDataColumn> doubleAction, Action<StringPreprocessingDataColumn> stringAction = null, Action<DateTimePreprocessingDataColumn> dateTimeAction = null) { 58 var doubleColumn = this as DoublePreprocessingDataColumn; 59 if (doubleColumn != null && doubleAction != null) doubleAction(doubleColumn); 60 var stringColumn = this as StringPreprocessingDataColumn; 61 if (stringColumn != null && stringAction != null) stringAction(stringColumn); 62 var dateTimeColumn = this as DateTimePreprocessingDataColumn; 63 if (dateTimeColumn != null && dateTimeAction != null) dateTimeAction(dateTimeColumn); 64 } 65 internal void TypeSwitch<TIn>(TIn value, Action<DoublePreprocessingDataColumn, double> doubleAction, Action<StringPreprocessingDataColumn, string> stringAction = null, Action<DateTimePreprocessingDataColumn, DateTime> dateTimeAction = null) { 66 var doubleColumn = this as DoublePreprocessingDataColumn; 67 if (doubleColumn != null && doubleAction != null) doubleAction(doubleColumn, Convert<double>(value)); 68 var stringColumn = this as StringPreprocessingDataColumn; 69 if (stringColumn != null && stringAction != null) stringAction(stringColumn, Convert<string>(value)); 70 var dateTimeColumn = this as DateTimePreprocessingDataColumn; 71 if (dateTimeColumn != null && dateTimeAction != null) dateTimeAction(dateTimeColumn, Convert<DateTime>(value)); 72 } 73 74 internal TOut TypeSwitch<TOut>(Func<DoublePreprocessingDataColumn, double> doubleFunc, Func<StringPreprocessingDataColumn, string> stringFunc = null, Func<DateTimePreprocessingDataColumn, DateTime> dateTimeFunc = null) { 75 var doubleColumn = this as DoublePreprocessingDataColumn; 76 if (doubleColumn != null && doubleFunc != null) return Convert<TOut>(doubleFunc(doubleColumn)); 77 var stringColumn = this as StringPreprocessingDataColumn; 78 if (stringColumn != null && stringFunc != null) return Convert<TOut>(stringFunc(stringColumn)); 79 var dateTimeColumn = this as DateTimePreprocessingDataColumn; 80 if (dateTimeColumn != null && dateTimeFunc != null) return Convert<TOut>(dateTimeFunc(dateTimeColumn)); 81 throw new InvalidOperationException("Invalid data column type."); 82 } 83 internal TOut TypeSwitch<TOut>(Func<DoublePreprocessingDataColumn, TOut> doubleFunc, Func<StringPreprocessingDataColumn, TOut> stringFunc = null, Func<DateTimePreprocessingDataColumn, TOut> dateTimeFunc = null) { 84 var doubleColumn = this as DoublePreprocessingDataColumn; 85 if (doubleColumn != null && doubleFunc != null) return doubleFunc(doubleColumn); 86 var stringColumn = this as StringPreprocessingDataColumn; 87 if (stringColumn != null && stringFunc != null) return stringFunc(stringColumn); 88 var dateTimeColumn = this as DateTimePreprocessingDataColumn; 89 if (dateTimeColumn != null && dateTimeFunc != null) return dateTimeFunc(dateTimeColumn); 90 throw new InvalidOperationException("Invalid data column type."); 91 } 92 internal TOut TypeSwitch<TIn, TOut>(TIn value, Func<DoublePreprocessingDataColumn, double, TOut> doubleFunc, Func<StringPreprocessingDataColumn, string, TOut> stringFunc = null, Func<DateTimePreprocessingDataColumn, DateTime, TOut> dateTimeFunc = null) { 93 var doubleColumn = this as DoublePreprocessingDataColumn; 94 if (doubleColumn != null && doubleFunc != null) return doubleFunc(doubleColumn, Convert<double>(value)); 95 var stringColumn = this as StringPreprocessingDataColumn; 96 if (stringColumn != null && stringFunc != null) return stringFunc(stringColumn, Convert<string>(value)); 97 var dateTimeColumn = this as DateTimePreprocessingDataColumn; 98 if (dateTimeColumn != null && dateTimeFunc != null) return dateTimeFunc(dateTimeColumn, Convert<DateTime>(value)); 99 throw new InvalidOperationException("Invalid data column type."); 100 } 101 internal IEnumerable<TOut> TypeSwitch<TOut>(Func<DoublePreprocessingDataColumn, IEnumerable<double>> doubleFunc, Func<StringPreprocessingDataColumn, IEnumerable<string>> stringFunc = null, Func<DateTimePreprocessingDataColumn, IEnumerable<DateTime>> dateTimeFunc = null) { 102 var doubleColumn = this as DoublePreprocessingDataColumn; 103 if (doubleColumn != null && doubleFunc != null) return Convert<IEnumerable<TOut>>(doubleFunc(doubleColumn)); 104 var stringColumn = this as StringPreprocessingDataColumn; 105 if (stringColumn != null && stringFunc != null) return Convert<IEnumerable<TOut>>(stringFunc(stringColumn)); 106 var dateTimeColumn = this as DateTimePreprocessingDataColumn; 107 if (dateTimeColumn != null && dateTimeFunc != null) return Convert<IEnumerable<TOut>>(dateTimeFunc(dateTimeColumn)); 108 throw new InvalidOperationException("Invalid data column type."); 109 } 110 internal IEnumerable<TOut> TypeSwitch<TOut, TIn>(TIn value, Func<DoublePreprocessingDataColumn, double, IEnumerable<double>> doubleFunc, Func<StringPreprocessingDataColumn, string, IEnumerable<string>> stringFunc = null, Func<DateTimePreprocessingDataColumn, DateTime, IEnumerable<DateTime>> dateTimeFunc = null) { 111 var doubleColumn = this as DoublePreprocessingDataColumn; 112 if (doubleColumn != null && doubleFunc != null) return Convert<IEnumerable<TOut>>(doubleFunc(doubleColumn, Convert<double>(value))); 113 var stringColumn = this as StringPreprocessingDataColumn; 114 if (stringColumn != null && stringFunc != null) return Convert<IEnumerable<TOut>>(stringFunc(stringColumn, Convert<string>(value))); 115 var dateTimeColumn = this as DateTimePreprocessingDataColumn; 116 if (dateTimeColumn != null && dateTimeFunc != null) return Convert<IEnumerable<TOut>>(dateTimeFunc(dateTimeColumn, Convert<DateTime>(value))); 117 throw new InvalidOperationException("Invalid data column type."); 118 } 119 120 private static T Convert<T>(object obj) { return (T)obj; } 121 #endregion 122 123 #region Statistics 124 public abstract int GetDistinctValues(IEnumerable<int> indices = null); 125 public abstract int GetNumberOfMissingValues(IEnumerable<int> indices = null); 126 #endregion 56 127 57 128 #region String Handling … … 64 135 [Item("PreprocessingDataColumn", "")] 65 136 [StorableClass] 66 public abstract class PreprocessingDataColumn<T Value, TDistance> : PreprocessingDataColumn67 where T Value : class, IComparable<TValue> {137 public abstract class PreprocessingDataColumn<T> : PreprocessingDataColumn 138 where T : IComparable<T> { 68 139 69 140 #region Constructor, Cloning & Persistence 70 141 protected PreprocessingDataColumn() 71 : this(string.Empty, Enumerable.Empty<T Value>()) { }72 protected PreprocessingDataColumn(string name, IEnumerable<T Value> values)142 : this(string.Empty, Enumerable.Empty<T>()) { } 143 protected PreprocessingDataColumn(string name, IEnumerable<T> values) 73 144 : base(name) { 74 Values = new List<T Value>(values);75 } 76 77 protected PreprocessingDataColumn(PreprocessingDataColumn<T Value, TDistance> original, Cloner cloner)145 Values = new List<T>(values); 146 } 147 148 protected PreprocessingDataColumn(PreprocessingDataColumn<T> original, Cloner cloner) 78 149 : base(original, cloner) { 79 Values = new List<T Value>(original.Values);150 Values = new List<T>(original.Values); 80 151 } 81 152 … … 86 157 87 158 [Storable] 88 public List<TValue> Values { get; private set; } 89 public IEnumerable<TValue> ValidValues { 90 get { return Values.Where(IsValidValue); } 91 } 159 internal List<T> Values { get; private set; } 160 public IEnumerable<T> GetValues(IEnumerable<int> indices = null) { 161 return indices == null 162 ? Values 163 : indices.Select(index => Values[index]); 164 } 165 public IEnumerable<T> GetValidValues(IEnumerable<int> indices = null) { 166 return indices == null 167 ? Values.Where(IsValidValue) 168 : indices.Select(index => Values[index]).Where(IsValidValue); 169 } 170 171 protected abstract T DefaultValue { get; } 92 172 93 173 public override Type GetValueType() { 94 return typeof(T Value);174 return typeof(T); 95 175 } 96 176 … … 99 179 } 100 180 101 public T Valuethis[int index] {181 public T this[int index] { 102 182 get { return Values[index]; } 103 183 set { Values[index] = value; } 104 184 } 105 185 106 public virtual bool IsValidValue(T Valuevalue) { return true; }186 public virtual bool IsValidValue(T value) { return true; } 107 187 public override bool IsValidValue(int index) { 108 188 return IsValidValue(Values[index]); … … 110 190 111 191 #region Statistics 112 public virtual TValue GetMin() { return Values.Min(); } 113 public virtual TValue GetMax() { return Values.Max(); } 114 public abstract TDistance GetRange(); 115 public abstract TValue GetMean(); 116 public virtual TValue GetMedian() { return Values.Quantile(0.5); } 117 public virtual TValue GetMode() { return Values.GroupBy(x => x).OrderByDescending(g => g.Count()).Select(g => g.Key).First(); } 118 public abstract TDistance GetStandardDeviation(); 119 public abstract TDistance GetVariance(); 120 public virtual TValue GetQuantile(double alpha) { return Values.Quantile(alpha); } 121 public virtual int GetDistinctValues() { return Values.GroupBy(x => x).Count(); } 122 public virtual int GetNumberOfMissingValues() { return Values.Count(IsValidValue); } 123 #endregion 124 } 125 126 [Item("NullablePreprocessingDataColumn", "")] 127 [StorableClass] 128 public abstract class NullablePreprocessingDataColumn<TValue, TDistance> : PreprocessingDataColumn 129 where TValue : struct, IComparable<TValue> { 130 131 #region Constructor, Cloning & Persistence 132 protected NullablePreprocessingDataColumn() 133 : this(string.Empty, Enumerable.Empty<TValue?>()) { } 134 protected NullablePreprocessingDataColumn(string name, IEnumerable<TValue> values) 135 : this(name, values.Select(x => (TValue?)x)) { } 136 protected NullablePreprocessingDataColumn(string name, IEnumerable<TValue?> values) 137 : base(name) { 138 Values = new List<TValue?>(values); 139 } 140 141 protected NullablePreprocessingDataColumn(NullablePreprocessingDataColumn<TValue, TDistance> original, Cloner cloner) 142 : base(original, cloner) { 143 Values = new List<TValue?>(original.Values); 144 } 145 146 [StorableConstructor] 147 protected NullablePreprocessingDataColumn(bool deserializing) 148 : base(deserializing) { } 149 #endregion 150 151 [Storable] 152 internal List<TValue?> Values { get; private set; } 153 protected IEnumerable<TValue> ValidValues { 154 get { return Values.Where(x => x.HasValue && IsValidValue(x.Value)).Select(x => x.Value); } 155 } 156 157 public override Type GetValueType() { 158 return typeof(TValue); 159 } 160 161 public override int Length { 162 get { return Values.Count; } 163 } 164 165 public TValue? this[int index] { 166 get { return Values[index]; } 167 set { Values[index] = value; } 168 } 169 170 public virtual bool IsValidValue(TValue value) { return true; } 171 public override bool IsValidValue(int index) { 172 var value = Values[index]; 173 return value.HasValue && IsValidValue(value.Value); 174 } 175 176 #region Statistics 177 public virtual TValue GetMin() { return ValidValues.Min(); } 178 public virtual TValue GetMax() { return ValidValues.Max(); } 179 public abstract TDistance GetRange(); 180 public abstract TValue GetMean(); 181 public virtual TValue GetMedian() { return ValidValues.Quantile(0.5); } 182 public virtual TValue GetMode() { return ValidValues.GroupBy(x => x).OrderByDescending(g => g.Count()).Select(g => g.Key).First(); } 183 public abstract TDistance GetStandardDeviation(); 184 public abstract TDistance GetVariance(); 185 public virtual TValue GetQuantile(double alpha) { return ValidValues.Quantile(alpha); } 186 public virtual int GetDistinctValues() { return ValidValues.GroupBy(x => x).Count(); } 187 public virtual int GetNumberOfMissingValues() { return Values.Count - ValidValues.Count(); } 192 193 public virtual T GetMin(IEnumerable<int> indices = null) { return GetValidValues(indices).DefaultIfEmpty(DefaultValue).Min(); } 194 public virtual T GetMax(IEnumerable<int> indices = null) { return GetValidValues(indices).DefaultIfEmpty(DefaultValue).Max(); } 195 public virtual T GetMedian(IEnumerable<int> indices = null) { return GetValidValues(indices).DefaultIfEmpty(DefaultValue).Quantile(0.5); } 196 public virtual T GetMode(IEnumerable<int> indices = null) { return GetValidValues(indices).DefaultIfEmpty(DefaultValue).GroupBy(x => x).OrderByDescending(g => g.Count()).Select(g => g.Key).First(); } 197 public virtual T GetQuantile(double alpha, IEnumerable<int> indices = null) { return GetValidValues(indices).DefaultIfEmpty(DefaultValue).Quantile(alpha); } 198 public override int GetDistinctValues(IEnumerable<int> indices = null) { return GetValidValues(indices).GroupBy(x => x).Count(); } 199 public override int GetNumberOfMissingValues(IEnumerable<int> indices = null) { return GetValues(indices).Count(x => !IsValidValue(x)); } 188 200 #endregion 189 201 } -
branches/DataPreprocessing Cleanup/HeuristicLab.DataPreprocessing/3.4/Data/Columns/StringPreprocessingDataColumn.cs
r15291 r15309 28 28 [Item("StringPreprocessingDataColumn", "")] 29 29 [StorableClass] 30 public sealed class StringPreprocessingDataColumn : PreprocessingDataColumn<string , string> {30 public sealed class StringPreprocessingDataColumn : PreprocessingDataColumn<string> { 31 31 32 32 #region Constructor, Cloning & Persistence … … 53 53 } 54 54 55 #region Statistics 56 public override string GetRange() { return string.Empty; } 57 public override string GetMean() { return string.Empty; } 58 public override string GetStandardDeviation() { return string.Empty; } 59 public override string GetVariance() { return string.Empty; } 60 #endregion 55 protected override string DefaultValue { get { return string.Empty; } } 61 56 62 57 #region IStringConvertibleColumn 63 58 public override bool Validate(string value, out string errorMessage) { 64 if (value == null) { 65 errorMessage = "Invalid Value (string must not be null)"; 66 return false; 67 } else { 68 errorMessage = string.Empty; 69 return true; 70 } 59 errorMessage = string.Empty; 60 return true; 71 61 } 72 62 public override string GetValue(int index) {
Note: See TracChangeset
for help on using the changeset viewer.