Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
08/07/17 09:43:58 (7 years ago)
Author:
pfleck
Message:

#2809 Worked on type-save PreprocessingDataColumns.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/DataPreprocessing Cleanup/HeuristicLab.DataPreprocessing/3.4/Data/PreprocessingData.cs

    r15291 r15309  
    3636  public class PreprocessingData : NamedItem, IPreprocessingData {
    3737
    38     [Storable]
    39     protected List<PreprocessingDataColumn> dataColumns;
     38    [Storable] private List<PreprocessingDataColumn> dataColumns;
     39
     40    public IList<PreprocessingDataColumn> DataColumns {
     41      get { return dataColumns; }
     42    }
     43
    4044
    4145    #region Constructor, Cloning & Persistence
     
    99103    }
    100104
    101     private void ColumnTypeSwitchAction<T>(int columnIndex, T value, Action<DoublePreprocessingDataColumn, double?> doubleAction,
    102       Action<StringPreprocessingDataColumn, string> stringAction = null, Action<DateTimePreprocessingDataColumn, DateTime?> dateTimeAction = null) {
    103       ColumnTypeSwitchAction(dataColumns[columnIndex], value, doubleAction, stringAction, dateTimeAction);
    104     }
    105     private void ColumnTypeSwitchAction<T>(PreprocessingDataColumn column, T value, Action<DoublePreprocessingDataColumn, double?> doubleAction,
    106       Action<StringPreprocessingDataColumn, string> stringAction = null, Action<DateTimePreprocessingDataColumn, DateTime?> dateTimeAction = null) {
    107       var doubleColumn = column as DoublePreprocessingDataColumn;
    108       if (doubleColumn != null && doubleAction != null) doubleAction(doubleColumn, Convert<double?>(value));
    109       var stringColumn = column as StringPreprocessingDataColumn;
    110       if (stringColumn != null && stringAction != null) stringAction(stringColumn, Convert<string>(value));
    111       var dateTimeColumn = column as DateTimePreprocessingDataColumn;
    112       if (dateTimeColumn != null && dateTimeAction != null) dateTimeAction(dateTimeColumn, Convert<DateTime?>(value));
    113     }
    114 
    115     private void ColumnTypeSwitchAction(int columnIndex, Action<DoublePreprocessingDataColumn> doubleAction,
    116       Action<StringPreprocessingDataColumn> stringAction = null, Action<DateTimePreprocessingDataColumn> dateTimeAction = null) {
    117       ColumnTypeSwitchAction(dataColumns[columnIndex], doubleAction, stringAction, dateTimeAction);
    118     }
    119     private void ColumnTypeSwitchAction(PreprocessingDataColumn column, Action<DoublePreprocessingDataColumn> doubleAction,
    120       Action<StringPreprocessingDataColumn> stringAction = null, Action<DateTimePreprocessingDataColumn> dateTimeAction = null) {
    121       var doubleColumn = column as DoublePreprocessingDataColumn;
    122       if (doubleColumn != null && doubleAction != null) doubleAction(doubleColumn);
    123       var stringColumn = column as StringPreprocessingDataColumn;
    124       if (stringColumn != null && stringAction != null) stringAction(stringColumn);
    125       var dateTimeColumn = column as DateTimePreprocessingDataColumn;
    126       if (dateTimeColumn != null && dateTimeAction != null) dateTimeAction(dateTimeColumn);
    127     }
    128 
    129 
    130     private T ColumnTypeSwitchFunc<T>(int columnIndex, Func<DoublePreprocessingDataColumn, double?> doubleFunc,
    131       Func<StringPreprocessingDataColumn, string> stringFunc = null, Func<DateTimePreprocessingDataColumn, DateTime?> dateTimeFunc = null) {
    132       var doubleColumn = dataColumns[columnIndex] as DoublePreprocessingDataColumn;
    133       if (doubleColumn != null && doubleFunc != null) return Convert<T>(doubleFunc(doubleColumn));
    134       var stringColumn = dataColumns[columnIndex] as StringPreprocessingDataColumn;
    135       if (stringColumn != null && stringFunc != null) return Convert<T>(stringFunc(stringColumn));
    136       var dateTimeColumn = dataColumns[columnIndex] as DateTimePreprocessingDataColumn;
    137       if (dateTimeColumn != null && dateTimeFunc != null) return Convert<T>(dateTimeFunc(dateTimeColumn));
    138       throw new InvalidOperationException("Invalid data column type.");
    139     }
    140 
    141     private T ColumnTypeSwitchFuncResult<T>(int columnIndex, Func<DoublePreprocessingDataColumn, T> doubleFunc,
    142       Func<StringPreprocessingDataColumn, T> stringFunc = null, Func<DateTimePreprocessingDataColumn, T> dateTimeFunc = null) {
    143       var doubleColumn = dataColumns[columnIndex] as DoublePreprocessingDataColumn;
    144       if (doubleColumn != null && doubleFunc != null) return doubleFunc(doubleColumn);
    145       var stringColumn = dataColumns[columnIndex] as StringPreprocessingDataColumn;
    146       if (stringColumn != null && stringFunc != null) return stringFunc(stringColumn);
    147       var dateTimeColumn = dataColumns[columnIndex] as DateTimePreprocessingDataColumn;
    148       if (dateTimeColumn != null && dateTimeFunc != null) return dateTimeFunc(dateTimeColumn);
    149       throw new InvalidOperationException("Invalid data column type.");
    150     }
    151     private TOut ColumnTypeSwitchFuncResult<TIn, TOut>(int columnIndex, TIn value, Func<DoublePreprocessingDataColumn, double?, TOut> doubleFunc,
    152      Func<StringPreprocessingDataColumn, string, TOut> stringFunc = null, Func<DateTimePreprocessingDataColumn, DateTime?, TOut> dateTimeFunc = null) {
    153       var doubleColumn = dataColumns[columnIndex] as DoublePreprocessingDataColumn;
    154       if (doubleColumn != null && doubleFunc != null) return doubleFunc(doubleColumn, Convert<double?>(value));
    155       var stringColumn = dataColumns[columnIndex] as StringPreprocessingDataColumn;
    156       if (stringColumn != null && stringFunc != null) return stringFunc(stringColumn, Convert<string>(value));
    157       var dateTimeColumn = dataColumns[columnIndex] as DateTimePreprocessingDataColumn;
    158       if (dateTimeColumn != null && dateTimeFunc != null) return dateTimeFunc(dateTimeColumn, Convert<DateTime?>(value));
    159       throw new InvalidOperationException("Invalid data column type.");
    160     }
    161 
    162     private IList<T> ColumnTypeSwitchFuncList<T>(int columnIndex, Func<DoublePreprocessingDataColumn, IList<double>> doubleFunc,
    163       Func<StringPreprocessingDataColumn, IList<string>> stringFunc = null, Func<DateTimePreprocessingDataColumn, IList<DateTime>> dateTimeFunc = null) {
    164       var doubleColumn = dataColumns[columnIndex] as DoublePreprocessingDataColumn;
    165       if (doubleColumn != null && doubleFunc != null) return Convert<IList<T>>(doubleFunc(doubleColumn));
    166       var stringColumn = dataColumns[columnIndex] as StringPreprocessingDataColumn;
    167       if (stringColumn != null && stringFunc != null) return Convert<IList<T>>(stringFunc(stringColumn));
    168       var dateTimeColumn = dataColumns[columnIndex] as DateTimePreprocessingDataColumn;
    169       if (dateTimeColumn != null && dateTimeFunc != null) return Convert<IList<T>>(dateTimeFunc(dateTimeColumn));
    170       throw new InvalidOperationException("Invalid data column type.");
    171     }
    172     private static T Convert<T>(object obj) { return (T)obj; }
    173 
    174 
    175105    public T GetCell<T>(int columnIndex, int rowIndex) {
    176       return ColumnTypeSwitchFunc<T>(columnIndex,
     106      return dataColumns[columnIndex].TypeSwitch<T>(
    177107        c => c[rowIndex],
    178108        c => c[rowIndex],
     
    188118        InsertColumn<T>(i.ToString(), i);
    189119
    190       ColumnTypeSwitchAction<T>(columnIndex, value,
     120      dataColumns[columnIndex].TypeSwitch<T>(value,
    191121        (c, v) => c[rowIndex] = v,
    192122        (c, v) => c[rowIndex] = v,
     
    201131    }
    202132
    203     public IList<T> GetValues<T>(int columnIndex, bool considerSelection) {
    204       if (considerSelection) {
    205         var list = new List<T>();
    206         foreach (var rowIdx in selection[columnIndex]) {
    207           list.Add(GetCell<T>(columnIndex, rowIdx));
    208           //list.Add((T)dataColumns[columnIndex][rowIdx]);
    209         }
    210         return list;
    211       } else {
    212         return ColumnTypeSwitchFuncList<T>(columnIndex,
    213           c => c.Values.Select(x => x ?? double.NaN).ToList(),
    214           c => c.Values,
    215           c => c.Values.Select(x => x ?? DateTime.MinValue).ToList());
    216         //(IList<T>)dataColumns[columnIndex];
    217       }
    218     }
    219 
    220     public void SetValues<T>(int columnIndex, IList<T> values) {
     133    public IEnumerable<T> GetValues<T>(int columnIndex, bool considerSelection) {
     134      return dataColumns[columnIndex].TypeSwitch<T>(
     135        c => c.GetValues(considerSelection ? selection[columnIndex] : null),
     136        c => c.GetValues(considerSelection ? selection[columnIndex] : null),
     137        c => c.GetValues(considerSelection ? selection[columnIndex] : null));
     138    }
     139
     140    public void SetValues<T>(int columnIndex, IEnumerable<T> values) {
    221141      SaveSnapshot(DataPreprocessingChangedEventType.ChangeColumn, columnIndex, -1);
    222142      if (VariableHasType<T>(columnIndex)) {
     
    239159
    240160    public bool SetValue(string value, int columnIndex, int rowIndex) {
    241       bool valid = false;
    242       if (VariableHasType<double>(columnIndex)) {
    243         double val;
    244         if (string.IsNullOrWhiteSpace(value)) {
    245           val = double.NaN;
    246           valid = true;
    247         } else {
    248           valid = double.TryParse(value, out val);
    249         }
    250         if (valid)
    251           SetCell(columnIndex, rowIndex, val);
    252       } else if (VariableHasType<string>(columnIndex)) {
    253         valid = value != null;
    254         if (valid)
    255           SetCell(columnIndex, rowIndex, value);
    256       } else if (VariableHasType<DateTime>(columnIndex)) {
    257         DateTime date;
    258         valid = DateTime.TryParse(value, out date);
    259         if (valid)
    260           SetCell(columnIndex, rowIndex, date);
    261       } else {
    262         throw new ArgumentException("column " + columnIndex + " contains a non supported type.");
    263       }
     161      var column = dataColumns[columnIndex];
     162      bool successful = column.SetValue(value, rowIndex);
    264163
    265164      if (!IsInTransaction)
    266165        OnChanged(DataPreprocessingChangedEventType.ChangeColumn, columnIndex, -1);
    267166
    268       return valid;
     167      return successful;
    269168    }
    270169
     
    274173
    275174    public int Rows {
    276       get { return dataColumns.Count > 0 ? dataColumns[0].Length : 0; }
     175      get { return dataColumns.Any() ? dataColumns.Max(c => c.Length) : 0; }
    277176    }
    278177    #endregion
     
    281180    public void InsertRow(int rowIndex) {
    282181      SaveSnapshot(DataPreprocessingChangedEventType.DeleteRow, -1, rowIndex);
     182
    283183      foreach (var column in dataColumns) {
    284         ColumnTypeSwitchAction(column,
     184        column.TypeSwitch(
     185          c => c.Values.Insert(rowIndex, double.NaN),
    285186          c => c.Values.Insert(rowIndex, null),
    286           c => c.Values.Insert(rowIndex, null),
    287           c => c.Values.Insert(rowIndex, null));
    288         //var valueType = column.GetValueType();
    289         //column.Insert(rowIndex, valueType.IsValueType ? Activator.CreateInstance(valueType) : null);
    290       }
     187          c => c.Values.Insert(rowIndex, DateTime.MinValue));
     188      }
     189
    291190      if (TrainingPartition.Start <= rowIndex && rowIndex <= TrainingPartition.End) {
    292191        TrainingPartition.End++;
     
    302201        }
    303202      }
     203
    304204      if (!IsInTransaction)
    305205        OnChanged(DataPreprocessingChangedEventType.AddRow, -1, rowIndex);
    306206    }
     207
    307208    public void DeleteRow(int rowIndex) {
    308       SaveSnapshot(DataPreprocessingChangedEventType.AddRow, -1, rowIndex);
    309       foreach (var column in dataColumns) {
    310         ColumnTypeSwitchAction(column,
    311           c => c.Values.RemoveAt(rowIndex),
    312           c => c.Values.RemoveAt(rowIndex),
    313           c => c.Values.RemoveAt(rowIndex));
    314         //column.RemoveAt(rowIndex);
    315       }
    316       if (TrainingPartition.Start <= rowIndex && rowIndex <= TrainingPartition.End) {
    317         TrainingPartition.End--;
    318         if (TrainingPartition.End <= TestPartition.Start) {
    319           TestPartition.Start--;
    320           TestPartition.End--;
    321         }
    322       } else if (TestPartition.Start <= rowIndex && rowIndex <= TestPartition.End) {
    323         TestPartition.End--;
    324         if (TestPartition.End <= TrainingPartition.Start) {
    325           TestPartition.Start--;
    326           TestPartition.End--;
    327         }
    328       }
    329       if (!IsInTransaction)
    330         OnChanged(DataPreprocessingChangedEventType.DeleteRow, -1, rowIndex);
    331     }
    332     public void DeleteRowsWithIndices(IEnumerable<int> rows) {
     209      DeleteRows(new[] { rowIndex });
     210    }
     211    public void DeleteRows(IEnumerable<int> rowIndices) {
    333212      SaveSnapshot(DataPreprocessingChangedEventType.AddRow, -1, -1);
    334       foreach (int rowIndex in rows.OrderByDescending(x => x)) {
     213
     214      foreach (int rowIndex in rowIndices.OrderByDescending(x => x)) {
    335215        foreach (var column in dataColumns) {
    336           ColumnTypeSwitchAction(column,
     216          column.TypeSwitch(
    337217            c => c.Values.RemoveAt(rowIndex),
    338218            c => c.Values.RemoveAt(rowIndex),
    339219            c => c.Values.RemoveAt(rowIndex));
    340           //column.RemoveAt(rowIndex);
    341         }
     220        }
     221
    342222        if (TrainingPartition.Start <= rowIndex && rowIndex <= TrainingPartition.End) {
    343223          TrainingPartition.End--;
     
    354234        }
    355235      }
     236
    356237      if (!IsInTransaction)
    357238        OnChanged(DataPreprocessingChangedEventType.DeleteRow, -1, -1);
     
    362243
    363244      if (typeof(T) == typeof(double)) {
    364         dataColumns.Insert(columnIndex, new DoublePreprocessingDataColumn(variableName, Enumerable.Repeat<double?>(null, Rows)));
     245        dataColumns.Insert(columnIndex, new DoublePreprocessingDataColumn(variableName, Enumerable.Repeat<double>(double.NaN, Rows)));
    365246      } else if (typeof(T) == typeof(string)) {
    366         dataColumns.Add(new StringPreprocessingDataColumn(variableName, Enumerable.Repeat<string>(null, Rows)));
     247        dataColumns.Insert(columnIndex, new StringPreprocessingDataColumn(variableName, Enumerable.Repeat<string>(string.Empty, Rows)));
    367248      } else if (typeof(T) == typeof(DateTime)) {
    368         dataColumns.Add(new DateTimePreprocessingDataColumn(variableName, Enumerable.Repeat<DateTime?>(null, Rows)));
     249        dataColumns.Insert(columnIndex, new DateTimePreprocessingDataColumn(variableName, Enumerable.Repeat<DateTime>(DateTime.MinValue, Rows)));
    369250      } else {
    370251        throw new ArgumentException("The datatype of column " + variableName + " must be of type double, string or DateTime");
    371252      }
    372253
    373       //dataColumns.Insert(columnIndex, new List<T>(Enumerable.Repeat(default(T), Rows)));
    374       //variableNames.Insert(columnIndex, variableName);
    375254      if (!IsInTransaction)
    376255        OnChanged(DataPreprocessingChangedEventType.AddColumn, columnIndex, -1);
     
    379258    public void DeleteColumn(int columnIndex) {
    380259      SaveSnapshot(DataPreprocessingChangedEventType.AddColumn, columnIndex, -1);
     260
    381261      dataColumns.RemoveAt(columnIndex);
    382       //variableNames.RemoveAt(columnIndex);
     262
    383263      if (!IsInTransaction)
    384264        OnChanged(DataPreprocessingChangedEventType.DeleteColumn, columnIndex, -1);
     
    386266
    387267    public void RenameColumn(int columnIndex, string name) {
    388       SaveSnapshot(DataPreprocessingChangedEventType.ChangeColumn, columnIndex, -1);
    389268      if (columnIndex < 0 || columnIndex > dataColumns.Count)
    390269        throw new ArgumentOutOfRangeException("columnIndex");
     270
     271      SaveSnapshot(DataPreprocessingChangedEventType.ChangeColumn, columnIndex, -1);
     272
    391273      dataColumns[columnIndex].Name = name;
    392274
     
    400282
    401283      SaveSnapshot(DataPreprocessingChangedEventType.ChangeColumn, -1, -1);
     284
    402285      for (int i = 0; i < names.Count; i++)
    403286        dataColumns[i].Name = names[i];
     
    408291
    409292    public bool AreAllStringColumns(IEnumerable<int> columnIndices) {
    410       return columnIndices.All(x => VariableHasType<string>(x));
     293      return columnIndices.All(VariableHasType<string>);
    411294    }
    412295    #endregion
     
    522405        var stringColumn = dataColumns[i] as StringPreprocessingDataColumn;
    523406        var dateTimeColumn = dataColumns[i] as DateTimePreprocessingDataColumn;
    524         if (doubleColumn != null) values.Add(new List<double>(doubleColumn.Values.Select(x => x ?? double.NaN)));
    525         else if (stringColumn != null) values.Add(new List<string>(stringColumn.Values));
    526         else if (dateTimeColumn != null) values.Add(new List<DateTime>(dateTimeColumn.Values.Select(x => x ?? DateTime.MinValue)));
     407        if (doubleColumn != null) values.Add(new List<double>(doubleColumn.GetValues()));
     408        else if (stringColumn != null) values.Add(new List<string>(stringColumn.GetValues()));
     409        else if (dateTimeColumn != null) values.Add(new List<DateTime>(dateTimeColumn.GetValues()));
    527410        else throw new InvalidOperationException("Column type not supported for export");
    528411      }
     
    638521    #endregion
    639522
    640     #region Statistics
    641     public T GetMin<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) {
    642       var values = GetValuesWithoutMissingValues<T>(columnIndex, considerSelection);
    643       return values.Any() ? values.Min() : emptyValue;
    644     }
    645 
    646     public T GetMax<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) {
    647       var values = GetValuesWithoutMissingValues<T>(columnIndex, considerSelection);
    648       return values.Any() ? values.Max() : emptyValue;
    649     }
    650 
    651     public T GetMean<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) {
    652       if (typeof(T) == typeof(double)) {
    653         var values = GetValuesWithoutMissingValues<double>(columnIndex, considerSelection);
    654         return values.Any() ? Convert<T>(values.Average()) : emptyValue;
    655       }
    656       if (typeof(T) == typeof(string)) {
    657         return Convert<T>(string.Empty);
    658       }
    659       if (typeof(T) == typeof(DateTime)) {
    660         var values = GetValuesWithoutMissingValues<DateTime>(columnIndex, considerSelection);
    661         return values.Any() ? Convert<T>(AggregateAsDouble(values, Enumerable.Average)) : emptyValue;
    662       }
    663 
    664       throw new InvalidOperationException(typeof(T) + " not supported");
    665     }
    666 
    667     public T GetMedian<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) where T : IComparable<T> {
    668       if (typeof(T) == typeof(double)) {// IEnumerable<double> is faster 
    669         var doubleValues = GetValuesWithoutMissingValues<double>(columnIndex, considerSelection);
    670         return doubleValues.Any() ? Convert<T>(doubleValues.Median()) : emptyValue;
    671       }
    672       var values = GetValuesWithoutMissingValues<T>(columnIndex, considerSelection);
    673       return values.Any() ? values.Quantile(0.5) : emptyValue;
    674     }
    675 
    676     public T GetMode<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) where T : IEquatable<T> {
    677       var values = GetValuesWithoutMissingValues<T>(columnIndex, considerSelection);
    678       return values.Any() ? values.GroupBy(x => x).OrderByDescending(g => g.Count()).Select(g => g.Key).First() : emptyValue;
    679     }
    680 
    681     public T GetStandardDeviation<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) {
    682       if (typeof(T) == typeof(double)) {
    683         var values = GetValuesWithoutMissingValues<double>(columnIndex, considerSelection);
    684         return values.Any() ? Convert<T>(values.StandardDeviation()) : emptyValue;
    685       }
    686       // For DateTime, std.dev / variance would have to be TimeSpan
    687       //if (typeof(T) == typeof(DateTime)) {
    688       //  var values = GetValuesWithoutMissingValues<DateTime>(columnIndex, considerSelection);
    689       //  return values.Any() ? Convert<T>(AggregateAsDouble(values, EnumerableStatisticExtensions.StandardDeviation)) : emptyValue;
    690       //}
    691       return default(T);
    692     }
    693 
    694     public T GetVariance<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) {
    695       if (typeof(T) == typeof(double)) {
    696         var values = GetValuesWithoutMissingValues<double>(columnIndex, considerSelection);
    697         return values.Any() ? Convert<T>(values.Variance()) : emptyValue;
    698       }
    699       // DateTime variance often overflows long, thus the corresponding DateTime is invalid
    700       //if (typeof(T) == typeof(DateTime)) {
    701       //  var values = GetValuesWithoutMissingValues<DateTime>(columnIndex, considerSelection);
    702       //  return values.Any() ? Convert<T>(AggregateAsDouble(values, EnumerableStatisticExtensions.Variance)) : emptyValue;
    703       //}
    704       return default(T);
    705     }
    706 
    707     public T GetQuantile<T>(double alpha, int columnIndex, bool considerSelection = false, T emptyValue = default(T)) where T : IComparable<T> {
    708       if (typeof(T) == typeof(double)) {// IEnumerable<double> is faster 
    709         var doubleValues = GetValuesWithoutMissingValues<double>(columnIndex, considerSelection);
    710         return doubleValues.Any() ? Convert<T>(doubleValues.Quantile(alpha)) : emptyValue;
    711       }
    712       var values = GetValuesWithoutMissingValues<T>(columnIndex, considerSelection);
    713       return values.Any() ? values.Quantile(alpha) : emptyValue;
    714     }
    715 
    716     public int GetDistinctValues<T>(int columnIndex, bool considerSelection = false) {
    717       var values = GetValuesWithoutMissingValues<T>(columnIndex, considerSelection);
    718       return values.GroupBy(x => x).Count();
    719     }
    720 
    721     private IEnumerable<T> GetValuesWithoutMissingValues<T>(int columnIndex, bool considerSelection) {
    722       //var doubleColumn = dataColumns[columnIndex] as DoublePreprocessingDataColumn;
    723       //var stringColumn = dataColumns[columnIndex] as StringPreprocessingDataColumn;
    724       //var dateTimeColumn = dataColumns[columnIndex] as DateTimePreprocessingDataColumn;
    725       //return GetValues<T>(columnIndex, considerSelection).Where(x =>
    726       //  doubleColumn != null ? doubleColumn.IsValidValue(Convert<double>(x))
    727       //  : stringColumn != null ? stringColumn.IsValidValue(Convert<string>(x))
    728       //  : dateTimeColumn != null ? dateTimeColumn.IsValidValue(Convert<DateTime>(x))
    729       //  : false);
    730       //!IsMissingValue(x));
    731 
    732       return GetValues<T>(columnIndex, considerSelection).Where(x =>
    733         ColumnTypeSwitchFuncResult<T, bool>(columnIndex, x,
    734           (c, v) => v.HasValue && c.IsValidValue(v.Value),
    735           (c, v) => c.IsValidValue(v),
    736           (c, v) => v.HasValue && c.IsValidValue(v.Value)
    737       ));
    738     }
    739 
    740     private static DateTime AggregateAsDouble(IEnumerable<DateTime> values, Func<IEnumerable<double>, double> func) {
    741       return new DateTime((long)(func(values.Select(x => (double)x.Ticks / TimeSpan.TicksPerSecond)) * TimeSpan.TicksPerSecond));
    742     }
    743 
    744     public int GetMissingValueCount() {
    745       int count = 0;
    746       for (int i = 0; i < Columns; ++i) {
    747         count += GetMissingValueCount(i);
    748       }
    749       return count;
    750     }
    751     public int GetMissingValueCount(int columnIndex) {
    752       int sum = 0;
    753       for (int i = 0; i < Rows; i++) {
    754         if (IsCellEmpty(columnIndex, i))
    755           sum++;
    756       }
    757       return sum;
    758     }
    759     public int GetRowMissingValueCount(int rowIndex) {
    760       int sum = 0;
    761       for (int i = 0; i < Columns; i++) {
    762         if (IsCellEmpty(i, rowIndex))
    763           sum++;
    764       }
    765       return sum;
    766     }
    767     #endregion
    768 
    769     #region Helpers
    770     private static IList<IList> CopyVariableValues(IList<IList> original) {
    771       var copy = new List<IList>(original);
    772       for (int i = 0; i < original.Count; ++i) {
    773         copy[i] = (IList)Activator.CreateInstance(original[i].GetType(), original[i]);
    774       }
    775       return copy;
    776     }
    777     #endregion
     523    /* #region Statistics
     524     public T GetMin<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) {
     525       try {
     526         return dataColumns[columnIndex].TypeSwitch<T>(
     527           col => col.GetMin(considerSelection ? Selection[columnIndex] : null),
     528           col => col.GetMin(considerSelection ? Selection[columnIndex] : null),
     529           col => col.GetMin(considerSelection ? Selection[columnIndex] : null));
     530       } catch (InvalidOperationException) {
     531         return emptyValue;
     532       }
     533     }
     534
     535     public T GetMax<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) {
     536       var values = GetValuesWithoutMissingValues<T>(columnIndex, considerSelection);
     537       return values.Any() ? values.Max() : emptyValue;
     538     }
     539
     540     public T GetMean<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) {
     541       return
     542
     543
     544       if (typeof(T) == typeof(double)) {
     545         var values = GetValuesWithoutMissingValues<double>(columnIndex, considerSelection);
     546         return values.Any() ? Convert<T>(values.Average()) : emptyValue;
     547       }
     548       if (typeof(T) == typeof(string)) {
     549         return Convert<T>(string.Empty);
     550       }
     551       if (typeof(T) == typeof(DateTime)) {
     552         var values = GetValuesWithoutMissingValues<DateTime>(columnIndex, considerSelection);
     553         return values.Any() ? Convert<T>(AggregateAsDouble(values, Enumerable.Average)) : emptyValue;
     554       }
     555
     556       throw new InvalidOperationException(typeof(T) + " not supported");
     557     }
     558
     559     public T GetMedian<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) where T : IComparable<T> {
     560       if (typeof(T) == typeof(double)) {// IEnumerable<double> is faster 
     561         var doubleValues = GetValuesWithoutMissingValues<double>(columnIndex, considerSelection);
     562         return doubleValues.Any() ? Convert<T>(doubleValues.Median()) : emptyValue;
     563       }
     564       var values = GetValuesWithoutMissingValues<T>(columnIndex, considerSelection);
     565       return values.Any() ? values.Quantile(0.5) : emptyValue;
     566     }
     567
     568     public T GetMode<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) where T : IEquatable<T> {
     569       var values = GetValuesWithoutMissingValues<T>(columnIndex, considerSelection);
     570       return values.Any() ? values.GroupBy(x => x).OrderByDescending(g => g.Count()).Select(g => g.Key).First() : emptyValue;
     571     }
     572
     573     public T GetStandardDeviation<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) {
     574       if (typeof(T) == typeof(double)) {
     575         var values = GetValuesWithoutMissingValues<double>(columnIndex, considerSelection);
     576         return values.Any() ? Convert<T>(values.StandardDeviation()) : emptyValue;
     577       }
     578       // For DateTime, std.dev / variance would have to be TimeSpan
     579       //if (typeof(T) == typeof(DateTime)) {
     580       //  var values = GetValuesWithoutMissingValues<DateTime>(columnIndex, considerSelection);
     581       //  return values.Any() ? Convert<T>(AggregateAsDouble(values, EnumerableStatisticExtensions.StandardDeviation)) : emptyValue;
     582       //}
     583       return default(T);
     584     }
     585
     586     public T GetVariance<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) {
     587       if (typeof(T) == typeof(double)) {
     588         var values = GetValuesWithoutMissingValues<double>(columnIndex, considerSelection);
     589         return values.Any() ? Convert<T>(values.Variance()) : emptyValue;
     590       }
     591       // DateTime variance often overflows long, thus the corresponding DateTime is invalid
     592       //if (typeof(T) == typeof(DateTime)) {
     593       //  var values = GetValuesWithoutMissingValues<DateTime>(columnIndex, considerSelection);
     594       //  return values.Any() ? Convert<T>(AggregateAsDouble(values, EnumerableStatisticExtensions.Variance)) : emptyValue;
     595       //}
     596       return default(T);
     597     }
     598
     599     public T GetQuantile<T>(double alpha, int columnIndex, bool considerSelection = false, T emptyValue = default(T)) where T : IComparable<T> {
     600       if (typeof(T) == typeof(double)) {// IEnumerable<double> is faster 
     601         var doubleValues = GetValuesWithoutMissingValues<double>(columnIndex, considerSelection);
     602         return doubleValues.Any() ? Convert<T>(doubleValues.Quantile(alpha)) : emptyValue;
     603       }
     604       var values = GetValuesWithoutMissingValues<T>(columnIndex, considerSelection);
     605       return values.Any() ? values.Quantile(alpha) : emptyValue;
     606     }
     607
     608     public int GetDistinctValues<T>(int columnIndex, bool considerSelection = false) {
     609       var values = GetValuesWithoutMissingValues<T>(columnIndex, considerSelection);
     610       return values.GroupBy(x => x).Count();
     611     }
     612
     613     private IEnumerable<T> GetValuesWithoutMissingValues<T>(int columnIndex, bool considerSelection) {
     614       return GetValues<T>(columnIndex, considerSelection).Where(x =>
     615         ColumnTypeSwitch<T, bool>(dataColumns[columnIndex], x,
     616           (c, v) => c.IsValidValue(v),
     617           (c, v) => c.IsValidValue(v),
     618           (c, v) => c.IsValidValue(v)
     619       ));
     620     }
     621
     622     private static DateTime AggregateAsDouble(IEnumerable<DateTime> values, Func<IEnumerable<double>, double> func) {
     623       return new DateTime((long)(func(values.Select(x => (double)x.Ticks / TimeSpan.TicksPerSecond)) * TimeSpan.TicksPerSecond));
     624     }
     625
     626     public int GetMissingValueCount() {
     627       int count = 0;
     628       for (int i = 0; i < Columns; ++i) {
     629         count += GetMissingValueCount(i);
     630       }
     631       return count;
     632     }
     633     public int GetMissingValueCount(int columnIndex) {
     634       int sum = 0;
     635       for (int i = 0; i < Rows; i++) {
     636         if (IsCellEmpty(columnIndex, i))
     637           sum++;
     638       }
     639       return sum;
     640     }
     641     public int GetRowMissingValueCount(int rowIndex) {
     642       int sum = 0;
     643       for (int i = 0; i < Columns; i++) {
     644         if (IsCellEmpty(i, rowIndex))
     645           sum++;
     646       }
     647       return sum;
     648     }
     649     #endregion  */
    778650  }
    779651
Note: See TracChangeset for help on using the changeset viewer.