Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
08/07/17 09:43:58 (7 years ago)
Author:
pfleck
Message:

#2809 Worked on type-save PreprocessingDataColumns.

Location:
branches/DataPreprocessing Cleanup/HeuristicLab.DataPreprocessing/3.4/Data/Columns
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • branches/DataPreprocessing Cleanup/HeuristicLab.DataPreprocessing/3.4/Data/Columns/DateTimePreprocessingDataColumn.cs

    r15291 r15309  
    2929namespace HeuristicLab.DataPreprocessing {
    3030  [Item("DateTimePreprocessingDataColumn", "")]
    31   public class DateTimePreprocessingDataColumn : NullablePreprocessingDataColumn<DateTime, TimeSpan> {
     31  public class DateTimePreprocessingDataColumn : PreprocessingDataColumn<DateTime> {
    3232
    3333    #region Constructor, Cloning & Persistence
     
    3535      : base() { }
    3636    public DateTimePreprocessingDataColumn(string name, IEnumerable<DateTime> values)
    37       : base(name, values) {
    38     }
    39     public DateTimePreprocessingDataColumn(string name, IEnumerable<DateTime?> values)
    4037      : base(name, values) {
    4138    }
     
    5350    #endregion
    5451
     52    protected override DateTime DefaultValue { get { return DateTime.MinValue; } }
     53
    5554    #region Statistics
    56     public override TimeSpan GetRange() { return ValidValues.Max() - ValidValues.Min(); }
    57     public override DateTime GetMean() { return AggregateAsDouble(ValidValues, Enumerable.Average); }
    58     public override TimeSpan GetStandardDeviation() { return AggregateDistanceAsDouble(ValidValues, EnumerableStatisticExtensions.StandardDeviation); }
    59     public override TimeSpan GetVariance() { return AggregateDistanceAsDouble(ValidValues, EnumerableStatisticExtensions.Variance); }
     55    public TimeSpan GetRange(IEnumerable<int> indices = null) { return GetMax(indices) - GetMin(indices); }
     56    public DateTime GetMean(IEnumerable<int> indices = null) { return AggregateAsDouble(GetValidValues(indices), Enumerable.Average); }
     57    public TimeSpan GetStandardDeviation(IEnumerable<int> indices = null) { return AggregateDistanceAsDouble(GetValidValues(indices), EnumerableStatisticExtensions.StandardDeviation); }
     58    public TimeSpan GetVariance(IEnumerable<int> indices = null) { return AggregateDistanceAsDouble(GetValidValues(indices), EnumerableStatisticExtensions.Variance); }
    6059    #endregion
    6160
     
    6968    public override string GetValue(int index) {
    7069      var value = Values[index];
    71       return value.HasValue ? value.Value.ToString("o") : string.Empty;
     70      return IsValidValue(value) ? Values[index].ToString("o") : string.Empty;   // format "s" sortable or "o" roundtrip
    7271    }
    7372    public override bool SetValue(string value, int index) {
     
    7776        return true;
    7877      } else if (string.IsNullOrEmpty(value)) {
    79         Values[index] = null;
     78        Values[index] = DateTime.MinValue;
    8079        return true;
    8180      } else {
     
    8685
    8786    private static DateTime AggregateAsDouble(IEnumerable<DateTime> values, Func<IEnumerable<double>, double> func) {
    88       return new DateTime((long)func(values.Select(x => (double)x.Ticks)));
     87      return values.Any() ? new DateTime((long)func(values.Select(x => (double)x.Ticks))) : DateTime.MinValue;
    8988    }
    9089    private static TimeSpan AggregateDistanceAsDouble(IEnumerable<DateTime> values, Func<IEnumerable<double>, double> func) {
    91       return new TimeSpan((long)func(values.Select(x => (double)x.Ticks)));
     90      return values.Any() ? new TimeSpan((long)func(values.Select(x => (double)x.Ticks))) : TimeSpan.Zero;
    9291    }
    9392  }
  • branches/DataPreprocessing Cleanup/HeuristicLab.DataPreprocessing/3.4/Data/Columns/DoublePreprocessingDataColumn.cs

    r15291 r15309  
    3131  [Item("DoublePreprocessingDataColumn", "")]
    3232  [StorableClass]
    33   public sealed class DoublePreprocessingDataColumn : NullablePreprocessingDataColumn<double, double> {
     33  public sealed class DoublePreprocessingDataColumn : PreprocessingDataColumn<double> {
    3434
    3535    #region Constructor, Cloning & Persistence
     36
    3637    public DoublePreprocessingDataColumn()
    3738      : base() { }
     39
    3840    public DoublePreprocessingDataColumn(string name, IEnumerable<double> values)
    39       : base(name, values) {
    40     }
    41     public DoublePreprocessingDataColumn(string name, IEnumerable<double?> values)
    42       : base(name, values) {
    43     }
     41      : base(name, values) { }
    4442
    4543    private DoublePreprocessingDataColumn(DoublePreprocessingDataColumn original, Cloner cloner)
    46       : base(original, cloner) {
    47     }
     44      : base(original, cloner) { }
     45
    4846    public override IDeepCloneable Clone(Cloner cloner) {
    4947      return new DoublePreprocessingDataColumn(this, cloner);
     
    5351    private DoublePreprocessingDataColumn(bool deserializing)
    5452      : base(deserializing) { }
     53
    5554    #endregion
    5655
     
    5958    }
    6059
     60    protected override double DefaultValue { get { return double.NaN; } }
     61
    6162    #region Statistics
    62     public override double GetRange() { return ValidValues.Max() - ValidValues.Min(); }
    63     public override double GetMean() { return ValidValues.Average(); }
    64     public override double GetMedian() { return ValidValues.Quantile(0.5); } // IEnumerable<doube> version is faster
    65     public override double GetStandardDeviation() { return ValidValues.StandardDeviation(); }
    66     public override double GetVariance() { return ValidValues.Variance(); }
    67     public override double GetQuantile(double alpha) { return ValidValues.Quantile(alpha); } // IEnumerable<doube> version is faster
     63    public double GetRange(IEnumerable<int> indices = null) { return GetMax(indices) - GetMin(indices); }
     64    public double GetMean(IEnumerable<int> indices = null) { return GetValidValues(indices).DefaultIfEmpty(DefaultValue).Average(); }
     65    public override double GetMedian(IEnumerable<int> indices = null) { return GetValidValues(indices).DefaultIfEmpty(DefaultValue).Median(); } // IEnumerable<doube> version is faster
     66    public double GetStandardDeviation(IEnumerable<int> indices = null) { return GetValidValues(indices).DefaultIfEmpty(DefaultValue).StandardDeviation(); }
     67    public double GetVariance(IEnumerable<int> indices = null) { return GetValidValues(indices).DefaultIfEmpty(DefaultValue).Variance(); }
     68    public override double GetQuantile(double alpha, IEnumerable<int> indices = null) { return GetValidValues(indices).DefaultIfEmpty(double.NaN).Quantile(alpha); } // IEnumerable<doube> version is faster
    6869    #endregion
    6970
     
    8384    }
    8485    public override string GetValue(int index) {
    85       var value = Values[index];
    86       return value.HasValue ? value.Value.ToString("r") : string.Empty;
     86      return Values[index].ToString("r");
    8787    }
    8888    public override bool SetValue(string value, int index) {
     
    9292        return true;
    9393      } else if (string.IsNullOrEmpty(value)) {
    94         Values[index] = null;
     94        Values[index] = double.NaN;
    9595        return true;
    9696      } else {
  • branches/DataPreprocessing Cleanup/HeuristicLab.DataPreprocessing/3.4/Data/Columns/PreprocessingDataColumn.cs

    r15291 r15309  
    5454    public abstract bool IsValidValue(int index);
    5555
     56    #region Column Type Switches
     57    internal void TypeSwitch(Action<DoublePreprocessingDataColumn> doubleAction, Action<StringPreprocessingDataColumn> stringAction = null, Action<DateTimePreprocessingDataColumn> dateTimeAction = null) {
     58      var doubleColumn = this as DoublePreprocessingDataColumn;
     59      if (doubleColumn != null && doubleAction != null) doubleAction(doubleColumn);
     60      var stringColumn = this as StringPreprocessingDataColumn;
     61      if (stringColumn != null && stringAction != null) stringAction(stringColumn);
     62      var dateTimeColumn = this as DateTimePreprocessingDataColumn;
     63      if (dateTimeColumn != null && dateTimeAction != null) dateTimeAction(dateTimeColumn);
     64    }
     65    internal void TypeSwitch<TIn>(TIn value, Action<DoublePreprocessingDataColumn, double> doubleAction, Action<StringPreprocessingDataColumn, string> stringAction = null, Action<DateTimePreprocessingDataColumn, DateTime> dateTimeAction = null) {
     66      var doubleColumn = this as DoublePreprocessingDataColumn;
     67      if (doubleColumn != null && doubleAction != null) doubleAction(doubleColumn, Convert<double>(value));
     68      var stringColumn = this as StringPreprocessingDataColumn;
     69      if (stringColumn != null && stringAction != null) stringAction(stringColumn, Convert<string>(value));
     70      var dateTimeColumn = this as DateTimePreprocessingDataColumn;
     71      if (dateTimeColumn != null && dateTimeAction != null) dateTimeAction(dateTimeColumn, Convert<DateTime>(value));
     72    }
     73
     74    internal TOut TypeSwitch<TOut>(Func<DoublePreprocessingDataColumn, double> doubleFunc, Func<StringPreprocessingDataColumn, string> stringFunc = null, Func<DateTimePreprocessingDataColumn, DateTime> dateTimeFunc = null) {
     75      var doubleColumn = this as DoublePreprocessingDataColumn;
     76      if (doubleColumn != null && doubleFunc != null) return Convert<TOut>(doubleFunc(doubleColumn));
     77      var stringColumn = this as StringPreprocessingDataColumn;
     78      if (stringColumn != null && stringFunc != null) return Convert<TOut>(stringFunc(stringColumn));
     79      var dateTimeColumn = this as DateTimePreprocessingDataColumn;
     80      if (dateTimeColumn != null && dateTimeFunc != null) return Convert<TOut>(dateTimeFunc(dateTimeColumn));
     81      throw new InvalidOperationException("Invalid data column type.");
     82    }
     83    internal TOut TypeSwitch<TOut>(Func<DoublePreprocessingDataColumn, TOut> doubleFunc, Func<StringPreprocessingDataColumn, TOut> stringFunc = null, Func<DateTimePreprocessingDataColumn, TOut> dateTimeFunc = null) {
     84      var doubleColumn = this as DoublePreprocessingDataColumn;
     85      if (doubleColumn != null && doubleFunc != null) return doubleFunc(doubleColumn);
     86      var stringColumn = this as StringPreprocessingDataColumn;
     87      if (stringColumn != null && stringFunc != null) return stringFunc(stringColumn);
     88      var dateTimeColumn = this as DateTimePreprocessingDataColumn;
     89      if (dateTimeColumn != null && dateTimeFunc != null) return dateTimeFunc(dateTimeColumn);
     90      throw new InvalidOperationException("Invalid data column type.");
     91    }
     92    internal TOut TypeSwitch<TIn, TOut>(TIn value, Func<DoublePreprocessingDataColumn, double, TOut> doubleFunc, Func<StringPreprocessingDataColumn, string, TOut> stringFunc = null, Func<DateTimePreprocessingDataColumn, DateTime, TOut> dateTimeFunc = null) {
     93      var doubleColumn = this as DoublePreprocessingDataColumn;
     94      if (doubleColumn != null && doubleFunc != null) return doubleFunc(doubleColumn, Convert<double>(value));
     95      var stringColumn = this as StringPreprocessingDataColumn;
     96      if (stringColumn != null && stringFunc != null) return stringFunc(stringColumn, Convert<string>(value));
     97      var dateTimeColumn = this as DateTimePreprocessingDataColumn;
     98      if (dateTimeColumn != null && dateTimeFunc != null) return dateTimeFunc(dateTimeColumn, Convert<DateTime>(value));
     99      throw new InvalidOperationException("Invalid data column type.");
     100    }
     101    internal IEnumerable<TOut> TypeSwitch<TOut>(Func<DoublePreprocessingDataColumn, IEnumerable<double>> doubleFunc, Func<StringPreprocessingDataColumn, IEnumerable<string>> stringFunc = null, Func<DateTimePreprocessingDataColumn, IEnumerable<DateTime>> dateTimeFunc = null) {
     102      var doubleColumn = this as DoublePreprocessingDataColumn;
     103      if (doubleColumn != null && doubleFunc != null) return Convert<IEnumerable<TOut>>(doubleFunc(doubleColumn));
     104      var stringColumn = this as StringPreprocessingDataColumn;
     105      if (stringColumn != null && stringFunc != null) return Convert<IEnumerable<TOut>>(stringFunc(stringColumn));
     106      var dateTimeColumn = this as DateTimePreprocessingDataColumn;
     107      if (dateTimeColumn != null && dateTimeFunc != null) return Convert<IEnumerable<TOut>>(dateTimeFunc(dateTimeColumn));
     108      throw new InvalidOperationException("Invalid data column type.");
     109    }
     110    internal IEnumerable<TOut> TypeSwitch<TOut, TIn>(TIn value, Func<DoublePreprocessingDataColumn, double, IEnumerable<double>> doubleFunc, Func<StringPreprocessingDataColumn, string, IEnumerable<string>> stringFunc = null, Func<DateTimePreprocessingDataColumn, DateTime, IEnumerable<DateTime>> dateTimeFunc = null) {
     111      var doubleColumn = this as DoublePreprocessingDataColumn;
     112      if (doubleColumn != null && doubleFunc != null) return Convert<IEnumerable<TOut>>(doubleFunc(doubleColumn, Convert<double>(value)));
     113      var stringColumn = this as StringPreprocessingDataColumn;
     114      if (stringColumn != null && stringFunc != null) return Convert<IEnumerable<TOut>>(stringFunc(stringColumn, Convert<string>(value)));
     115      var dateTimeColumn = this as DateTimePreprocessingDataColumn;
     116      if (dateTimeColumn != null && dateTimeFunc != null) return Convert<IEnumerable<TOut>>(dateTimeFunc(dateTimeColumn, Convert<DateTime>(value)));
     117      throw new InvalidOperationException("Invalid data column type.");
     118    }
     119
     120    private static T Convert<T>(object obj) { return (T)obj; }
     121    #endregion
     122
     123    #region Statistics
     124    public abstract int GetDistinctValues(IEnumerable<int> indices = null);
     125    public abstract int GetNumberOfMissingValues(IEnumerable<int> indices = null);
     126    #endregion
    56127
    57128    #region String Handling
     
    64135  [Item("PreprocessingDataColumn", "")]
    65136  [StorableClass]
    66   public abstract class PreprocessingDataColumn<TValue, TDistance> : PreprocessingDataColumn
    67     where TValue : class, IComparable<TValue> {
     137  public abstract class PreprocessingDataColumn<T> : PreprocessingDataColumn
     138    where T : IComparable<T> {
    68139
    69140    #region Constructor, Cloning & Persistence
    70141    protected PreprocessingDataColumn()
    71       : this(string.Empty, Enumerable.Empty<TValue>()) { }
    72     protected PreprocessingDataColumn(string name, IEnumerable<TValue> values)
     142      : this(string.Empty, Enumerable.Empty<T>()) { }
     143    protected PreprocessingDataColumn(string name, IEnumerable<T> values)
    73144      : base(name) {
    74       Values = new List<TValue>(values);
    75     }
    76 
    77     protected PreprocessingDataColumn(PreprocessingDataColumn<TValue, TDistance> original, Cloner cloner)
     145      Values = new List<T>(values);
     146    }
     147
     148    protected PreprocessingDataColumn(PreprocessingDataColumn<T> original, Cloner cloner)
    78149      : base(original, cloner) {
    79       Values = new List<TValue>(original.Values);
     150      Values = new List<T>(original.Values);
    80151    }
    81152
     
    86157
    87158    [Storable]
    88     public List<TValue> Values { get; private set; }
    89     public IEnumerable<TValue> ValidValues {
    90       get { return Values.Where(IsValidValue); }
    91     }
     159    internal List<T> Values { get; private set; }
     160    public IEnumerable<T> GetValues(IEnumerable<int> indices = null) {
     161      return indices == null
     162        ? Values
     163        : indices.Select(index => Values[index]);
     164    }
     165    public IEnumerable<T> GetValidValues(IEnumerable<int> indices = null) {
     166      return indices == null
     167        ? Values.Where(IsValidValue)
     168        : indices.Select(index => Values[index]).Where(IsValidValue);
     169    }
     170
     171    protected abstract T DefaultValue { get; }
    92172
    93173    public override Type GetValueType() {
    94       return typeof(TValue);
     174      return typeof(T);
    95175    }
    96176
     
    99179    }
    100180
    101     public TValue this[int index] {
     181    public T this[int index] {
    102182      get { return Values[index]; }
    103183      set { Values[index] = value; }
    104184    }
    105185
    106     public virtual bool IsValidValue(TValue value) { return true; }
     186    public virtual bool IsValidValue(T value) { return true; }
    107187    public override bool IsValidValue(int index) {
    108188      return IsValidValue(Values[index]);
     
    110190
    111191    #region Statistics
    112     public virtual TValue GetMin() { return Values.Min(); }
    113     public virtual TValue GetMax() { return Values.Max(); }
    114     public abstract TDistance GetRange();
    115     public abstract TValue GetMean();
    116     public virtual TValue GetMedian() { return Values.Quantile(0.5); }
    117     public virtual TValue GetMode() { return Values.GroupBy(x => x).OrderByDescending(g => g.Count()).Select(g => g.Key).First(); }
    118     public abstract TDistance GetStandardDeviation();
    119     public abstract TDistance GetVariance();
    120     public virtual TValue GetQuantile(double alpha) { return Values.Quantile(alpha); }
    121     public virtual int GetDistinctValues() { return Values.GroupBy(x => x).Count(); }
    122     public virtual int GetNumberOfMissingValues() { return Values.Count(IsValidValue); }
    123     #endregion
    124   }
    125 
    126   [Item("NullablePreprocessingDataColumn", "")]
    127   [StorableClass]
    128   public abstract class NullablePreprocessingDataColumn<TValue, TDistance> : PreprocessingDataColumn
    129     where TValue : struct, IComparable<TValue> {
    130 
    131     #region Constructor, Cloning & Persistence
    132     protected NullablePreprocessingDataColumn()
    133       : this(string.Empty, Enumerable.Empty<TValue?>()) { }
    134     protected NullablePreprocessingDataColumn(string name, IEnumerable<TValue> values)
    135       : this(name, values.Select(x => (TValue?)x)) { }
    136     protected NullablePreprocessingDataColumn(string name, IEnumerable<TValue?> values)
    137       : base(name) {
    138       Values = new List<TValue?>(values);
    139     }
    140 
    141     protected NullablePreprocessingDataColumn(NullablePreprocessingDataColumn<TValue, TDistance> original, Cloner cloner)
    142       : base(original, cloner) {
    143       Values = new List<TValue?>(original.Values);
    144     }
    145 
    146     [StorableConstructor]
    147     protected NullablePreprocessingDataColumn(bool deserializing)
    148       : base(deserializing) { }
    149     #endregion
    150 
    151     [Storable]
    152     internal List<TValue?> Values { get; private set; }
    153     protected IEnumerable<TValue> ValidValues {
    154       get { return Values.Where(x => x.HasValue && IsValidValue(x.Value)).Select(x => x.Value); }
    155     }
    156 
    157     public override Type GetValueType() {
    158       return typeof(TValue);
    159     }
    160 
    161     public override int Length {
    162       get { return Values.Count; }
    163     }
    164 
    165     public TValue? this[int index] {
    166       get { return Values[index]; }
    167       set { Values[index] = value; }
    168     }
    169 
    170     public virtual bool IsValidValue(TValue value) { return true; }
    171     public override bool IsValidValue(int index) {
    172       var value = Values[index];
    173       return value.HasValue && IsValidValue(value.Value);
    174     }
    175 
    176     #region Statistics
    177     public virtual TValue GetMin() { return ValidValues.Min(); }
    178     public virtual TValue GetMax() { return ValidValues.Max(); }
    179     public abstract TDistance GetRange();
    180     public abstract TValue GetMean();
    181     public virtual TValue GetMedian() { return ValidValues.Quantile(0.5); }
    182     public virtual TValue GetMode() { return ValidValues.GroupBy(x => x).OrderByDescending(g => g.Count()).Select(g => g.Key).First(); }
    183     public abstract TDistance GetStandardDeviation();
    184     public abstract TDistance GetVariance();
    185     public virtual TValue GetQuantile(double alpha) { return ValidValues.Quantile(alpha); }
    186     public virtual int GetDistinctValues() { return ValidValues.GroupBy(x => x).Count(); }
    187     public virtual int GetNumberOfMissingValues() { return Values.Count - ValidValues.Count(); }
     192
     193    public virtual T GetMin(IEnumerable<int> indices = null) { return GetValidValues(indices).DefaultIfEmpty(DefaultValue).Min(); }
     194    public virtual T GetMax(IEnumerable<int> indices = null) { return GetValidValues(indices).DefaultIfEmpty(DefaultValue).Max(); }
     195    public virtual T GetMedian(IEnumerable<int> indices = null) { return GetValidValues(indices).DefaultIfEmpty(DefaultValue).Quantile(0.5); }
     196    public virtual T GetMode(IEnumerable<int> indices = null) { return GetValidValues(indices).DefaultIfEmpty(DefaultValue).GroupBy(x => x).OrderByDescending(g => g.Count()).Select(g => g.Key).First(); }
     197    public virtual T GetQuantile(double alpha, IEnumerable<int> indices = null) { return GetValidValues(indices).DefaultIfEmpty(DefaultValue).Quantile(alpha); }
     198    public override int GetDistinctValues(IEnumerable<int> indices = null) { return GetValidValues(indices).GroupBy(x => x).Count(); }
     199    public override int GetNumberOfMissingValues(IEnumerable<int> indices = null) { return GetValues(indices).Count(x => !IsValidValue(x)); }
    188200    #endregion
    189201  }
  • branches/DataPreprocessing Cleanup/HeuristicLab.DataPreprocessing/3.4/Data/Columns/StringPreprocessingDataColumn.cs

    r15291 r15309  
    2828  [Item("StringPreprocessingDataColumn", "")]
    2929  [StorableClass]
    30   public sealed class StringPreprocessingDataColumn : PreprocessingDataColumn<string, string> {
     30  public sealed class StringPreprocessingDataColumn : PreprocessingDataColumn<string> {
    3131
    3232    #region Constructor, Cloning & Persistence
     
    5353    }
    5454
    55     #region Statistics
    56     public override string GetRange() { return string.Empty; }
    57     public override string GetMean() { return string.Empty; }
    58     public override string GetStandardDeviation() { return string.Empty; }
    59     public override string GetVariance() { return string.Empty; }
    60     #endregion
     55    protected override string DefaultValue { get { return string.Empty; } }
    6156
    6257    #region IStringConvertibleColumn
    6358    public override bool Validate(string value, out string errorMessage) {
    64       if (value == null) {
    65         errorMessage = "Invalid Value (string must not be null)";
    66         return false;
    67       } else {
    68         errorMessage = string.Empty;
    69         return true;
    70       }
     59      errorMessage = string.Empty;
     60      return true;
    7161    }
    7262    public override string GetValue(int index) {
Note: See TracChangeset for help on using the changeset viewer.