Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
07/27/17 12:45:23 (7 years ago)
Author:
pfleck
Message:

#2809: Added (Double/String/DateTime)PreprocessingDataColumn. (experimental state)

Location:
branches/DataPreprocessing Cleanup/HeuristicLab.DataPreprocessing/3.4
Files:
5 added
2 edited

Legend:

Unmodified
Added
Removed
  • branches/DataPreprocessing Cleanup/HeuristicLab.DataPreprocessing/3.4/Data/PreprocessingData.cs

    r15285 r15291  
    3232
    3333namespace HeuristicLab.DataPreprocessing {
    34 
    3534  [Item("PreprocessingData", "Represents data used for preprocessing.")]
    3635  [StorableClass]
     
    3837
    3938    [Storable]
    40     protected IList<IList> variableValues;
    41     [Storable]
    42     protected IList<string> variableNames;
     39    protected List<PreprocessingDataColumn> dataColumns;
    4340
    4441    #region Constructor, Cloning & Persistence
     
    4744      Name = "Preprocessing Data";
    4845
     46      dataColumns = new List<PreprocessingDataColumn>();
    4947      Transformations = new List<ITransformation>();
    5048      selection = new Dictionary<int, IList<int>>();
     
    5755    protected PreprocessingData(PreprocessingData original, Cloner cloner)
    5856      : base(original, cloner) {
    59       variableValues = CopyVariableValues(original.variableValues);
    60       variableNames = new List<string>(original.variableNames);
    61       TrainingPartition = (IntRange)original.TrainingPartition.Clone(cloner);
    62       TestPartition = (IntRange)original.TestPartition.Clone(cloner);
     57      dataColumns = new List<PreprocessingDataColumn>(original.dataColumns.Select(cloner.Clone));
     58      TrainingPartition = cloner.Clone(original.TrainingPartition);
     59      TestPartition = cloner.Clone(original.TestPartition);
    6360      Transformations = new List<ITransformation>(original.Transformations.Select(cloner.Clone));
    6461
     
    9996    #region Cells
    10097    public bool IsCellEmpty(int columnIndex, int rowIndex) {
    101       var value = variableValues[columnIndex][rowIndex];
    102       return IsMissingValue(value);
    103     }
     98      return !dataColumns[columnIndex].IsValidValue(rowIndex);
     99    }
     100
     101    private void ColumnTypeSwitchAction<T>(int columnIndex, T value, Action<DoublePreprocessingDataColumn, double?> doubleAction,
     102      Action<StringPreprocessingDataColumn, string> stringAction = null, Action<DateTimePreprocessingDataColumn, DateTime?> dateTimeAction = null) {
     103      ColumnTypeSwitchAction(dataColumns[columnIndex], value, doubleAction, stringAction, dateTimeAction);
     104    }
     105    private void ColumnTypeSwitchAction<T>(PreprocessingDataColumn column, T value, Action<DoublePreprocessingDataColumn, double?> doubleAction,
     106      Action<StringPreprocessingDataColumn, string> stringAction = null, Action<DateTimePreprocessingDataColumn, DateTime?> dateTimeAction = null) {
     107      var doubleColumn = column as DoublePreprocessingDataColumn;
     108      if (doubleColumn != null && doubleAction != null) doubleAction(doubleColumn, Convert<double?>(value));
     109      var stringColumn = column as StringPreprocessingDataColumn;
     110      if (stringColumn != null && stringAction != null) stringAction(stringColumn, Convert<string>(value));
     111      var dateTimeColumn = column as DateTimePreprocessingDataColumn;
     112      if (dateTimeColumn != null && dateTimeAction != null) dateTimeAction(dateTimeColumn, Convert<DateTime?>(value));
     113    }
     114
     115    private void ColumnTypeSwitchAction(int columnIndex, Action<DoublePreprocessingDataColumn> doubleAction,
     116      Action<StringPreprocessingDataColumn> stringAction = null, Action<DateTimePreprocessingDataColumn> dateTimeAction = null) {
     117      ColumnTypeSwitchAction(dataColumns[columnIndex], doubleAction, stringAction, dateTimeAction);
     118    }
     119    private void ColumnTypeSwitchAction(PreprocessingDataColumn column, Action<DoublePreprocessingDataColumn> doubleAction,
     120      Action<StringPreprocessingDataColumn> stringAction = null, Action<DateTimePreprocessingDataColumn> dateTimeAction = null) {
     121      var doubleColumn = column as DoublePreprocessingDataColumn;
     122      if (doubleColumn != null && doubleAction != null) doubleAction(doubleColumn);
     123      var stringColumn = column as StringPreprocessingDataColumn;
     124      if (stringColumn != null && stringAction != null) stringAction(stringColumn);
     125      var dateTimeColumn = column as DateTimePreprocessingDataColumn;
     126      if (dateTimeColumn != null && dateTimeAction != null) dateTimeAction(dateTimeColumn);
     127    }
     128
     129
     130    private T ColumnTypeSwitchFunc<T>(int columnIndex, Func<DoublePreprocessingDataColumn, double?> doubleFunc,
     131      Func<StringPreprocessingDataColumn, string> stringFunc = null, Func<DateTimePreprocessingDataColumn, DateTime?> dateTimeFunc = null) {
     132      var doubleColumn = dataColumns[columnIndex] as DoublePreprocessingDataColumn;
     133      if (doubleColumn != null && doubleFunc != null) return Convert<T>(doubleFunc(doubleColumn));
     134      var stringColumn = dataColumns[columnIndex] as StringPreprocessingDataColumn;
     135      if (stringColumn != null && stringFunc != null) return Convert<T>(stringFunc(stringColumn));
     136      var dateTimeColumn = dataColumns[columnIndex] as DateTimePreprocessingDataColumn;
     137      if (dateTimeColumn != null && dateTimeFunc != null) return Convert<T>(dateTimeFunc(dateTimeColumn));
     138      throw new InvalidOperationException("Invalid data column type.");
     139    }
     140
     141    private T ColumnTypeSwitchFuncResult<T>(int columnIndex, Func<DoublePreprocessingDataColumn, T> doubleFunc,
     142      Func<StringPreprocessingDataColumn, T> stringFunc = null, Func<DateTimePreprocessingDataColumn, T> dateTimeFunc = null) {
     143      var doubleColumn = dataColumns[columnIndex] as DoublePreprocessingDataColumn;
     144      if (doubleColumn != null && doubleFunc != null) return doubleFunc(doubleColumn);
     145      var stringColumn = dataColumns[columnIndex] as StringPreprocessingDataColumn;
     146      if (stringColumn != null && stringFunc != null) return stringFunc(stringColumn);
     147      var dateTimeColumn = dataColumns[columnIndex] as DateTimePreprocessingDataColumn;
     148      if (dateTimeColumn != null && dateTimeFunc != null) return dateTimeFunc(dateTimeColumn);
     149      throw new InvalidOperationException("Invalid data column type.");
     150    }
     151    private TOut ColumnTypeSwitchFuncResult<TIn, TOut>(int columnIndex, TIn value, Func<DoublePreprocessingDataColumn, double?, TOut> doubleFunc,
     152     Func<StringPreprocessingDataColumn, string, TOut> stringFunc = null, Func<DateTimePreprocessingDataColumn, DateTime?, TOut> dateTimeFunc = null) {
     153      var doubleColumn = dataColumns[columnIndex] as DoublePreprocessingDataColumn;
     154      if (doubleColumn != null && doubleFunc != null) return doubleFunc(doubleColumn, Convert<double?>(value));
     155      var stringColumn = dataColumns[columnIndex] as StringPreprocessingDataColumn;
     156      if (stringColumn != null && stringFunc != null) return stringFunc(stringColumn, Convert<string>(value));
     157      var dateTimeColumn = dataColumns[columnIndex] as DateTimePreprocessingDataColumn;
     158      if (dateTimeColumn != null && dateTimeFunc != null) return dateTimeFunc(dateTimeColumn, Convert<DateTime?>(value));
     159      throw new InvalidOperationException("Invalid data column type.");
     160    }
     161
     162    private IList<T> ColumnTypeSwitchFuncList<T>(int columnIndex, Func<DoublePreprocessingDataColumn, IList<double>> doubleFunc,
     163      Func<StringPreprocessingDataColumn, IList<string>> stringFunc = null, Func<DateTimePreprocessingDataColumn, IList<DateTime>> dateTimeFunc = null) {
     164      var doubleColumn = dataColumns[columnIndex] as DoublePreprocessingDataColumn;
     165      if (doubleColumn != null && doubleFunc != null) return Convert<IList<T>>(doubleFunc(doubleColumn));
     166      var stringColumn = dataColumns[columnIndex] as StringPreprocessingDataColumn;
     167      if (stringColumn != null && stringFunc != null) return Convert<IList<T>>(stringFunc(stringColumn));
     168      var dateTimeColumn = dataColumns[columnIndex] as DateTimePreprocessingDataColumn;
     169      if (dateTimeColumn != null && dateTimeFunc != null) return Convert<IList<T>>(dateTimeFunc(dateTimeColumn));
     170      throw new InvalidOperationException("Invalid data column type.");
     171    }
     172    private static T Convert<T>(object obj) { return (T)obj; }
     173
    104174
    105175    public T GetCell<T>(int columnIndex, int rowIndex) {
    106       return (T)variableValues[columnIndex][rowIndex];
     176      return ColumnTypeSwitchFunc<T>(columnIndex,
     177        c => c[rowIndex],
     178        c => c[rowIndex],
     179        c => c[rowIndex]);
    107180    }
    108181
     
    115188        InsertColumn<T>(i.ToString(), i);
    116189
    117       variableValues[columnIndex][rowIndex] = value;
     190      ColumnTypeSwitchAction<T>(columnIndex, value,
     191        (c, v) => c[rowIndex] = v,
     192        (c, v) => c[rowIndex] = v,
     193        (c, v) => c[rowIndex] = v);
     194
    118195      if (!IsInTransaction)
    119196        OnChanged(DataPreprocessingChangedEventType.ChangeItem, columnIndex, rowIndex);
     
    121198
    122199    public string GetCellAsString(int columnIndex, int rowIndex) {
    123       return variableValues[columnIndex][rowIndex].ToString();
     200      return dataColumns[columnIndex].GetValue(rowIndex);
    124201    }
    125202
     
    128205        var list = new List<T>();
    129206        foreach (var rowIdx in selection[columnIndex]) {
    130           list.Add((T)variableValues[columnIndex][rowIdx]);
     207          list.Add(GetCell<T>(columnIndex, rowIdx));
     208          //list.Add((T)dataColumns[columnIndex][rowIdx]);
    131209        }
    132210        return list;
    133211      } else {
    134         return (IList<T>)variableValues[columnIndex];
     212        return ColumnTypeSwitchFuncList<T>(columnIndex,
     213          c => c.Values.Select(x => x ?? double.NaN).ToList(),
     214          c => c.Values,
     215          c => c.Values.Select(x => x ?? DateTime.MinValue).ToList());
     216        //(IList<T>)dataColumns[columnIndex];
    135217      }
    136218    }
     
    139221      SaveSnapshot(DataPreprocessingChangedEventType.ChangeColumn, columnIndex, -1);
    140222      if (VariableHasType<T>(columnIndex)) {
    141         variableValues[columnIndex] = (IList)values;
     223        var name = dataColumns[columnIndex].Name;
     224        if (dataColumns[columnIndex].IsType<double>()) {
     225          dataColumns[columnIndex] = new DoublePreprocessingDataColumn(name, (IEnumerable<double>)values);
     226        } else if (dataColumns[columnIndex].IsType<string>()) {
     227          dataColumns[columnIndex] = new StringPreprocessingDataColumn(name, (IEnumerable<string>)values);
     228        } else if (dataColumns[columnIndex].IsType<DateTime>()) {
     229          dataColumns[columnIndex] = new DateTimePreprocessingDataColumn(name, (IEnumerable<DateTime>)values);
     230        } else {
     231          throw new ArgumentException("Unknown column type");
     232        }
    142233      } else {
    143         throw new ArgumentException("The datatype of column " + columnIndex + " must be of type " + variableValues[columnIndex].GetType().Name + " but was " + typeof(T).Name);
     234        throw new ArgumentException("The datatype of column " + columnIndex + " must be of type " + dataColumns[columnIndex].GetType().Name + " but was " + typeof(T).Name);
    144235      }
    145236      if (!IsInTransaction)
     
    179270
    180271    public int Columns {
    181       get { return variableNames.Count; }
     272      get { return dataColumns.Count; }
    182273    }
    183274
    184275    public int Rows {
    185       get { return variableValues.Count > 0 ? variableValues[0].Count : 0; }
    186     }
    187 
    188     public static bool IsMissingValue(object value) {
    189       if (value is double) return double.IsNaN((double)value);
    190       if (value is string) return string.IsNullOrEmpty((string)value);
    191       if (value is DateTime) return ((DateTime)value).Equals(DateTime.MinValue);
    192       throw new ArgumentException();
     276      get { return dataColumns.Count > 0 ? dataColumns[0].Length : 0; }
    193277    }
    194278    #endregion
     
    197281    public void InsertRow(int rowIndex) {
    198282      SaveSnapshot(DataPreprocessingChangedEventType.DeleteRow, -1, rowIndex);
    199       foreach (IList column in variableValues) {
    200         Type type = column.GetType().GetGenericArguments()[0];
    201         column.Insert(rowIndex, type.IsValueType ? Activator.CreateInstance(type) : null);
     283      foreach (var column in dataColumns) {
     284        ColumnTypeSwitchAction(column,
     285          c => c.Values.Insert(rowIndex, null),
     286          c => c.Values.Insert(rowIndex, null),
     287          c => c.Values.Insert(rowIndex, null));
     288        //var valueType = column.GetValueType();
     289        //column.Insert(rowIndex, valueType.IsValueType ? Activator.CreateInstance(valueType) : null);
    202290      }
    203291      if (TrainingPartition.Start <= rowIndex && rowIndex <= TrainingPartition.End) {
     
    219307    public void DeleteRow(int rowIndex) {
    220308      SaveSnapshot(DataPreprocessingChangedEventType.AddRow, -1, rowIndex);
    221       foreach (IList column in variableValues) {
    222         column.RemoveAt(rowIndex);
     309      foreach (var column in dataColumns) {
     310        ColumnTypeSwitchAction(column,
     311          c => c.Values.RemoveAt(rowIndex),
     312          c => c.Values.RemoveAt(rowIndex),
     313          c => c.Values.RemoveAt(rowIndex));
     314        //column.RemoveAt(rowIndex);
    223315      }
    224316      if (TrainingPartition.Start <= rowIndex && rowIndex <= TrainingPartition.End) {
     
    241333      SaveSnapshot(DataPreprocessingChangedEventType.AddRow, -1, -1);
    242334      foreach (int rowIndex in rows.OrderByDescending(x => x)) {
    243         foreach (IList column in variableValues) {
    244           column.RemoveAt(rowIndex);
     335        foreach (var column in dataColumns) {
     336          ColumnTypeSwitchAction(column,
     337            c => c.Values.RemoveAt(rowIndex),
     338            c => c.Values.RemoveAt(rowIndex),
     339            c => c.Values.RemoveAt(rowIndex));
     340          //column.RemoveAt(rowIndex);
    245341        }
    246342        if (TrainingPartition.Start <= rowIndex && rowIndex <= TrainingPartition.End) {
     
    264360    public void InsertColumn<T>(string variableName, int columnIndex) {
    265361      SaveSnapshot(DataPreprocessingChangedEventType.DeleteColumn, columnIndex, -1);
    266       variableValues.Insert(columnIndex, new List<T>(Enumerable.Repeat(default(T), Rows)));
    267       variableNames.Insert(columnIndex, variableName);
     362
     363      if (typeof(T) == typeof(double)) {
     364        dataColumns.Insert(columnIndex, new DoublePreprocessingDataColumn(variableName, Enumerable.Repeat<double?>(null, Rows)));
     365      } else if (typeof(T) == typeof(string)) {
     366        dataColumns.Add(new StringPreprocessingDataColumn(variableName, Enumerable.Repeat<string>(null, Rows)));
     367      } else if (typeof(T) == typeof(DateTime)) {
     368        dataColumns.Add(new DateTimePreprocessingDataColumn(variableName, Enumerable.Repeat<DateTime?>(null, Rows)));
     369      } else {
     370        throw new ArgumentException("The datatype of column " + variableName + " must be of type double, string or DateTime");
     371      }
     372
     373      //dataColumns.Insert(columnIndex, new List<T>(Enumerable.Repeat(default(T), Rows)));
     374      //variableNames.Insert(columnIndex, variableName);
    268375      if (!IsInTransaction)
    269376        OnChanged(DataPreprocessingChangedEventType.AddColumn, columnIndex, -1);
     
    272379    public void DeleteColumn(int columnIndex) {
    273380      SaveSnapshot(DataPreprocessingChangedEventType.AddColumn, columnIndex, -1);
    274       variableValues.RemoveAt(columnIndex);
    275       variableNames.RemoveAt(columnIndex);
     381      dataColumns.RemoveAt(columnIndex);
     382      //variableNames.RemoveAt(columnIndex);
    276383      if (!IsInTransaction)
    277384        OnChanged(DataPreprocessingChangedEventType.DeleteColumn, columnIndex, -1);
     
    280387    public void RenameColumn(int columnIndex, string name) {
    281388      SaveSnapshot(DataPreprocessingChangedEventType.ChangeColumn, columnIndex, -1);
    282       if (columnIndex < 0 || columnIndex > variableNames.Count)
     389      if (columnIndex < 0 || columnIndex > dataColumns.Count)
    283390        throw new ArgumentOutOfRangeException("columnIndex");
    284       variableNames[columnIndex] = name;
     391      dataColumns[columnIndex].Name = name;
    285392
    286393      if (!IsInTransaction)
     
    290397    public void RenameColumns(IList<string> names) {
    291398      if (names == null) throw new ArgumentNullException("names");
    292       if (names.Count != variableNames.Count) throw new ArgumentException("number of names must match the number of columns.", "names");
     399      if (names.Count != dataColumns.Count) throw new ArgumentException("number of names must match the number of columns.", "names");
    293400
    294401      SaveSnapshot(DataPreprocessingChangedEventType.ChangeColumn, -1, -1);
    295402      for (int i = 0; i < names.Count; i++)
    296         variableNames[i] = names[i];
     403        dataColumns[i].Name = names[i];
    297404
    298405      if (!IsInTransaction)
     
    307414    #region Variables
    308415    public IEnumerable<string> VariableNames {
    309       get { return variableNames; }
     416      get { return dataColumns.Select(c => c.Name); }
    310417    }
    311418
    312419    public IEnumerable<string> GetDoubleVariableNames() {
    313       var doubleVariableNames = new List<string>();
    314       for (int i = 0; i < Columns; ++i) {
    315         if (VariableHasType<double>(i)) {
    316           doubleVariableNames.Add(variableNames[i]);
    317         }
    318       }
    319       return doubleVariableNames;
     420      return dataColumns.OfType<DoublePreprocessingDataColumn>().Select(c => c.Name);
    320421    }
    321422
    322423    public string GetVariableName(int columnIndex) {
    323       return variableNames[columnIndex];
     424      return dataColumns[columnIndex].Name;
    324425    }
    325426
    326427    public int GetColumnIndex(string variableName) {
    327       return variableNames.IndexOf(variableName);
     428      return dataColumns.FindIndex(c => c.Name == variableName);
    328429    }
    329430
    330431    public bool VariableHasType<T>(int columnIndex) {
    331       return columnIndex >= variableValues.Count || variableValues[columnIndex] is List<T>;
     432      return dataColumns[columnIndex].IsType<T>();
    332433    }
    333434
    334435    public Type GetVariableType(int columnIndex) {
    335       var listType = variableValues[columnIndex].GetType();
    336       return listType.GenericTypeArguments.Single();
     436      return dataColumns[columnIndex].GetValueType();
    337437    }
    338438
     
    392492    #region Import & Export
    393493    public void Import(IDataAnalysisProblemData problemData) {
    394       Dataset dataset = (Dataset)problemData.Dataset;
    395       variableNames = new List<string>(problemData.Dataset.VariableNames);
     494      var dataset = problemData.Dataset;
    396495      InputVariables = new List<string>(problemData.AllowedInputVariables);
    397       TargetVariable = (problemData is IRegressionProblemData) ? ((IRegressionProblemData)problemData).TargetVariable
    398         : (problemData is IClassificationProblemData) ? ((IClassificationProblemData)problemData).TargetVariable
    399           : null;
    400 
    401       int columnIndex = 0;
    402       variableValues = new List<IList>();
     496      TargetVariable = problemData is IRegressionProblemData ? ((IRegressionProblemData)problemData).TargetVariable
     497        : problemData is IClassificationProblemData ? ((IClassificationProblemData)problemData).TargetVariable
     498        : null;
     499
     500      dataColumns.Clear();
    403501      foreach (var variableName in problemData.Dataset.VariableNames) {
    404502        if (dataset.VariableHasType<double>(variableName)) {
    405           variableValues.Insert(columnIndex, dataset.GetDoubleValues(variableName).ToList());
     503          dataColumns.Add(new DoublePreprocessingDataColumn(variableName, dataset.GetDoubleValues(variableName)));
    406504        } else if (dataset.VariableHasType<string>(variableName)) {
    407           variableValues.Insert(columnIndex, dataset.GetStringValues(variableName).ToList());
     505          dataColumns.Add(new StringPreprocessingDataColumn(variableName, dataset.GetStringValues(variableName)));
    408506        } else if (dataset.VariableHasType<DateTime>(variableName)) {
    409           variableValues.Insert(columnIndex, dataset.GetDateTimeValues(variableName).ToList());
     507          dataColumns.Add(new DateTimePreprocessingDataColumn(variableName, dataset.GetDateTimeValues(variableName)));
    410508        } else {
    411509          throw new ArgumentException("The datatype of column " + variableName + " must be of type double, string or DateTime");
    412510        }
    413         ++columnIndex;
    414511      }
    415512
     
    421518      IList<IList> values = new List<IList>();
    422519
    423       for (int i = 0; i < Columns; ++i) {
    424         values.Add(variableValues[i]);
    425       }
    426 
    427       var dataset = new Dataset(variableNames, values);
    428       return dataset;
     520      for (int i = 0; i < Columns; i++) {
     521        var doubleColumn = dataColumns[i] as DoublePreprocessingDataColumn;
     522        var stringColumn = dataColumns[i] as StringPreprocessingDataColumn;
     523        var dateTimeColumn = dataColumns[i] as DateTimePreprocessingDataColumn;
     524        if (doubleColumn != null) values.Add(new List<double>(doubleColumn.Values.Select(x => x ?? double.NaN)));
     525        else if (stringColumn != null) values.Add(new List<string>(stringColumn.Values));
     526        else if (dateTimeColumn != null) values.Add(new List<DateTime>(dateTimeColumn.Values.Select(x => x ?? DateTime.MinValue)));
     527        else throw new InvalidOperationException("Column type not supported for export");
     528      }
     529
     530      return new Dataset(VariableNames, values);
    429531    }
    430532    #endregion
     
    452554
    453555    #region Transactions
    454     // Stapshot/History are nost storable/cloneable on purpose
     556    // Snapshot/History are not storable/cloneable on purpose
    455557    private class Snapshot {
    456       public IList<IList> VariableValues { get; set; }
    457       public IList<string> VariableNames { get; set; }
     558      public List<PreprocessingDataColumn> DataColumns { get; set; }
    458559
    459560      public IntRange TrainingPartition { get; set; }
     
    472573    }
    473574
    474     private const int MAX_UNDO_DEPTH = 5;
     575    private const int MaxUndoDepth = 5;
    475576
    476577    private readonly IList<Snapshot> undoHistory = new List<Snapshot>();
     
    482583      if (IsInTransaction) return;
    483584
     585      var cloner = new Cloner();
    484586      var currentSnapshot = new Snapshot {
    485         VariableValues = CopyVariableValues(variableValues),
    486         VariableNames = new List<string>(variableNames),
     587        DataColumns = new List<PreprocessingDataColumn>(dataColumns.Select(cloner.Clone)),
    487588        TrainingPartition = new IntRange(TrainingPartition.Start, TrainingPartition.End),
    488589        TestPartition = new IntRange(TestPartition.Start, TestPartition.End),
     
    493594      };
    494595
    495       if (undoHistory.Count >= MAX_UNDO_DEPTH)
     596      if (undoHistory.Count >= MaxUndoDepth)
    496597        undoHistory.RemoveAt(0);
    497598
     
    506607      if (IsUndoAvailable) {
    507608        Snapshot previousSnapshot = undoHistory[undoHistory.Count - 1];
    508         variableValues = previousSnapshot.VariableValues;
    509         variableNames = previousSnapshot.VariableNames;
     609        dataColumns = previousSnapshot.DataColumns;
    510610        TrainingPartition = previousSnapshot.TrainingPartition;
    511611        TestPartition = previousSnapshot.TestPartition;
     
    620720
    621721    private IEnumerable<T> GetValuesWithoutMissingValues<T>(int columnIndex, bool considerSelection) {
    622       return GetValues<T>(columnIndex, considerSelection).Where(x => !IsMissingValue(x));
     722      //var doubleColumn = dataColumns[columnIndex] as DoublePreprocessingDataColumn;
     723      //var stringColumn = dataColumns[columnIndex] as StringPreprocessingDataColumn;
     724      //var dateTimeColumn = dataColumns[columnIndex] as DateTimePreprocessingDataColumn;
     725      //return GetValues<T>(columnIndex, considerSelection).Where(x =>
     726      //  doubleColumn != null ? doubleColumn.IsValidValue(Convert<double>(x))
     727      //  : stringColumn != null ? stringColumn.IsValidValue(Convert<string>(x))
     728      //  : dateTimeColumn != null ? dateTimeColumn.IsValidValue(Convert<DateTime>(x))
     729      //  : false);
     730      //!IsMissingValue(x));
     731
     732      return GetValues<T>(columnIndex, considerSelection).Where(x =>
     733        ColumnTypeSwitchFuncResult<T, bool>(columnIndex, x,
     734          (c, v) => v.HasValue && c.IsValidValue(v.Value),
     735          (c, v) => c.IsValidValue(v),
     736          (c, v) => v.HasValue && c.IsValidValue(v.Value)
     737      ));
    623738    }
    624739
     
    626741      return new DateTime((long)(func(values.Select(x => (double)x.Ticks / TimeSpan.TicksPerSecond)) * TimeSpan.TicksPerSecond));
    627742    }
    628     private static T Convert<T>(object obj) { return (T)obj; }
    629743
    630744    public int GetMissingValueCount() {
  • branches/DataPreprocessing Cleanup/HeuristicLab.DataPreprocessing/3.4/HeuristicLab.DataPreprocessing-3.4.csproj

    r15285 r15291  
    123123    <Compile Include="Content\ScatterPlotContent.cs" />
    124124    <Compile Include="Content\DataCompletenessChartContent.cs" />
     125    <Compile Include="Data\Columns\DateTimePreprocessingDataColumn.cs" />
     126    <Compile Include="Data\Columns\DoublePreprocessingDataColumn.cs" />
     127    <Compile Include="Data\Columns\PreprocessingDataColumn.cs" />
     128    <Compile Include="Data\Columns\StringPreprocessingDataColumn.cs" />
    125129    <Compile Include="Data\FilteredPreprocessingData.cs" />
    126130    <Compile Include="Content\ManipulationContent.cs" />
Note: See TracChangeset for help on using the changeset viewer.