Ignore:
Timestamp:
08/06/18 18:15:29 (13 months ago)
Author:
jkarder
Message:

#2839:

Location:
branches/2839_HiveProjectManagement
Files:
3 deleted
28 edited
2 copied

Legend:

Unmodified
Added
Removed
  • branches/2839_HiveProjectManagement

  • branches/2839_HiveProjectManagement/HeuristicLab.DataPreprocessing

  • branches/2839_HiveProjectManagement/HeuristicLab.DataPreprocessing/3.4

  • branches/2839_HiveProjectManagement/HeuristicLab.DataPreprocessing/3.4/Content/CorrelationMatrixContent.cs

    r15110 r16057  
    11#region License Information
    22/* HeuristicLab
    3  * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
    44 *
    55 * This file is part of HeuristicLab.
     
    2323using HeuristicLab.Common;
    2424using HeuristicLab.Core;
     25using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
    2526using HeuristicLab.Problems.DataAnalysis;
    2627
    2728namespace HeuristicLab.DataPreprocessing {
    2829  [Item("Feature Correlation Matrix", "Represents the feature correlation matrix.")]
    29   public class CorrelationMatrixContent : Item, IViewShortcut {
     30  [StorableClass]
     31  public class CorrelationMatrixContent : PreprocessingContent, IViewShortcut {
    3032    public static new Image StaticItemImage {
    3133      get { return HeuristicLab.Common.Resources.VSImageLibrary.Gradient; }
    3234    }
    3335
     36    [Storable]
    3437    public PreprocessingContext Context { get; private set; }
    35     public ITransactionalPreprocessingData PreprocessingData {
    36       get { return Context.Data; }
    37     }
     38
    3839
    3940    public DataAnalysisProblemData ProblemData {
     
    4748    }
    4849
    49     public CorrelationMatrixContent(PreprocessingContext context) {
     50    #region Constructor, Cloning & Persistence
     51    public CorrelationMatrixContent(PreprocessingContext context)
     52      : base(context.Data) {
    5053      Context = context;
    5154    }
     
    5558      Context = original.Context;
    5659    }
    57 
    5860    public override IDeepCloneable Clone(Cloner cloner) {
    5961      return new CorrelationMatrixContent(this, cloner);
    6062    }
     63
     64    [StorableConstructor]
     65    protected CorrelationMatrixContent(bool deserializing)
     66      : base(deserializing) { }
     67    #endregion
    6168
    6269    public event DataPreprocessingChangedEventHandler Changed {
  • branches/2839_HiveProjectManagement/HeuristicLab.DataPreprocessing/3.4/Content/DataCompletenessChartContent.cs

    r15110 r16057  
    11#region License Information
    22/* HeuristicLab
    3  * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
    44 *
    55 * This file is part of HeuristicLab.
     
    2323using HeuristicLab.Common;
    2424using HeuristicLab.Core;
     25using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
    2526
    2627namespace HeuristicLab.DataPreprocessing {
    2728  [Item("Data Completeness Chart", "Represents a datacompleteness chart.")]
    28 
    29   public class DataCompletenessChartContent : Item, IViewShortcut {
     29  [StorableClass]
     30  public class DataCompletenessChartContent : PreprocessingContent, IViewShortcut {
    3031    public static new Image StaticItemImage {
    3132      get { return HeuristicLab.Common.Resources.VSImageLibrary.EditBrightnessContrast; }
    3233    }
    3334
    34     public SearchLogic SearchLogic { get; private set; }
    35 
    36     public DataCompletenessChartContent(SearchLogic searchLogic) {
    37       SearchLogic = searchLogic;
     35    #region Constructor, Cloning & Persistence
     36    public DataCompletenessChartContent(IFilteredPreprocessingData preprocessingData)
     37      : base(preprocessingData) {
    3838    }
    3939
    4040    public DataCompletenessChartContent(DataCompletenessChartContent content, Cloner cloner)
    4141      : base(content, cloner) {
    42       SearchLogic = content.SearchLogic;
    4342    }
    44 
    4543    public override IDeepCloneable Clone(Cloner cloner) {
    4644      return new DataCompletenessChartContent(this, cloner);
    4745    }
     46
     47    [StorableConstructor]
     48    protected DataCompletenessChartContent(bool deserializing)
     49      : base(deserializing) { }
     50    #endregion
    4851  }
    4952}
  • branches/2839_HiveProjectManagement/HeuristicLab.DataPreprocessing/3.4/Content/DataGridContent.cs

    r15110 r16057  
    11#region License Information
    22/* HeuristicLab
    3  * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
    44 *
    55 * This file is part of HeuristicLab.
     
    2727using HeuristicLab.Core;
    2828using HeuristicLab.Data;
     29using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
     30using HeuristicLab.Random;
    2931
    3032namespace HeuristicLab.DataPreprocessing {
    31 
    3233  [Item("Data Grid", "Represents a data grid.")]
    33   public class DataGridContent : Item, IStringConvertibleMatrix, IViewShortcut {
     34  [StorableClass]
     35  public class DataGridContent : PreprocessingContent, IStringConvertibleMatrix, IViewShortcut {
    3436    public static new Image StaticItemImage {
    3537      get { return HeuristicLab.Common.Resources.VSImageLibrary.Table; }
    3638    }
    3739
    38     public ITransactionalPreprocessingData PreProcessingData { get; private set; }
    39 
    40     public ManipulationLogic ManipulationLogic { get; private set; }
    41     public FilterLogic FilterLogic { get; private set; }
    42 
    4340    public int Rows {
    44       get { return PreProcessingData.Rows; }
     41      get { return PreprocessingData.Rows; }
    4542      set { }
    4643    }
    4744
    4845    public int Columns {
    49       get { return PreProcessingData.Columns; }
     46      get { return PreprocessingData.Columns; }
    5047      set { }
    5148    }
    5249
    5350    public IEnumerable<string> ColumnNames {
    54       get { return PreProcessingData.VariableNames; }
     51      get { return PreprocessingData.VariableNames; }
    5552      set { }
    5653    }
     
    7168
    7269    public IDictionary<int, IList<int>> Selection {
    73       get { return PreProcessingData.Selection; }
    74       set { PreProcessingData.Selection = value; }
    75     }
    76 
    77     public DataGridContent(ITransactionalPreprocessingData preProcessingData, ManipulationLogic theManipulationLogic, FilterLogic theFilterLogic) {
    78       ManipulationLogic = theManipulationLogic;
    79       FilterLogic = theFilterLogic;
    80       PreProcessingData = preProcessingData;
    81     }
    82 
    83     public DataGridContent(DataGridContent dataGridContent, Cloner cloner)
    84       : base(dataGridContent, cloner) {
    85 
     70      get { return PreprocessingData.Selection; }
     71      set { PreprocessingData.Selection = value; }
     72    }
     73
     74    #region Constructor, Cloning & Persistence
     75    public DataGridContent(IFilteredPreprocessingData preprocessingData)
     76      : base(preprocessingData) {
     77    }
     78
     79    public DataGridContent(DataGridContent original, Cloner cloner)
     80      : base(original, cloner) {
    8681    }
    8782    public override IDeepCloneable Clone(Cloner cloner) {
     
    8984    }
    9085
     86    [StorableConstructor]
     87    protected DataGridContent(bool deserializing)
     88      : base(deserializing) { }
     89    #endregion
     90
    9191    public void DeleteRows(IEnumerable<int> rows) {
    92       PreProcessingData.DeleteRowsWithIndices(rows);
     92      PreprocessingData.DeleteRowsWithIndices(rows);
    9393    }
    9494
    9595    public void DeleteColumn(int column) {
    96       PreProcessingData.DeleteColumn(column);
     96      PreprocessingData.DeleteColumn(column);
    9797    }
    9898
    9999    public bool Validate(string value, out string errorMessage, int columnIndex) {
    100       return PreProcessingData.Validate(value, out errorMessage, columnIndex);
     100      return PreprocessingData.Validate(value, out errorMessage, columnIndex);
    101101    }
    102102
    103103    public string GetValue(int rowIndex, int columnIndex) {
    104       return PreProcessingData.GetCellAsString(columnIndex, rowIndex);
     104      return PreprocessingData.GetCellAsString(columnIndex, rowIndex);
    105105    }
    106106
    107107    public bool SetValue(string value, int rowIndex, int columnIndex) {
    108       return PreProcessingData.SetValue(value, columnIndex, rowIndex);
     108      return PreprocessingData.SetValue(value, columnIndex, rowIndex);
    109109    }
    110110
    111111    public event DataPreprocessingChangedEventHandler Changed {
    112       add { PreProcessingData.Changed += value; }
    113       remove { PreProcessingData.Changed -= value; }
     112      add { PreprocessingData.Changed += value; }
     113      remove { PreprocessingData.Changed -= value; }
    114114    }
    115115
     
    132132#pragma warning restore 0067
    133133    #endregion
     134
     135    #region Manipulations
     136    private void ReplaceIndicesByValue(IDictionary<int, IList<int>> cells, Func<int, double> doubleAggregator = null,
     137      Func<int, DateTime> dateTimeAggregator = null, Func<int, string> stringAggregator = null) {
     138      PreprocessingData.InTransaction(() => {
     139        foreach (var column in cells) {
     140          if (doubleAggregator != null && PreprocessingData.VariableHasType<double>(column.Key)) {
     141            var value = doubleAggregator(column.Key);
     142            foreach (int index in column.Value)
     143              PreprocessingData.SetCell<double>(column.Key, index, value);
     144          } else if (dateTimeAggregator != null && PreprocessingData.VariableHasType<DateTime>(column.Key)) {
     145            var value = dateTimeAggregator(column.Key);
     146            foreach (int index in column.Value)
     147              PreprocessingData.SetCell<DateTime>(column.Key, index, value);
     148          } else if (stringAggregator != null && PreprocessingData.VariableHasType<string>(column.Key)) {
     149            var value = stringAggregator(column.Key);
     150            foreach (int index in column.Value)
     151              PreprocessingData.SetCell<string>(column.Key, index, value);
     152          }
     153        }
     154      });
     155    }
     156
     157    private void ReplaceIndicesByValues(IDictionary<int, IList<int>> cells, Func<int, IEnumerable<double>> doubleAggregator = null,
     158      Func<int, IEnumerable<DateTime>> dateTimeAggregator = null, Func<int, IEnumerable<string>> stringAggregator = null) {
     159      PreprocessingData.InTransaction(() => {
     160        foreach (var column in cells) {
     161          if (doubleAggregator != null && PreprocessingData.VariableHasType<double>(column.Key)) {
     162            var values = doubleAggregator(column.Key);
     163            foreach (var pair in column.Value.Zip(values, (row, value) => new { row, value }))
     164              PreprocessingData.SetCell<double>(column.Key, pair.row, pair.value);
     165          } else if (dateTimeAggregator != null && PreprocessingData.VariableHasType<DateTime>(column.Key)) {
     166            var values = dateTimeAggregator(column.Key);
     167            foreach (var pair in column.Value.Zip(values, (row, value) => new { row, value }))
     168              PreprocessingData.SetCell<DateTime>(column.Key, pair.row, pair.value);
     169          } else if (stringAggregator != null && PreprocessingData.VariableHasType<string>(column.Key)) {
     170            var values = stringAggregator(column.Key);
     171            foreach (var pair in column.Value.Zip(values, (row, value) => new { row, value }))
     172              PreprocessingData.SetCell<string>(column.Key, pair.row, pair.value);
     173          }
     174        }
     175      });
     176    }
     177
     178    public void ReplaceIndicesByMean(IDictionary<int, IList<int>> cells, bool considerSelection = false) {
     179      ReplaceIndicesByValue(cells,
     180        col => PreprocessingData.GetMean<double>(col, considerSelection),
     181        col => PreprocessingData.GetMean<DateTime>(col, considerSelection));
     182    }
     183
     184    public void ReplaceIndicesByMedianValue(IDictionary<int, IList<int>> cells, bool considerSelection = false) {
     185      ReplaceIndicesByValue(cells,
     186        col => PreprocessingData.GetMedian<double>(col, considerSelection),
     187        col => PreprocessingData.GetMedian<DateTime>(col, considerSelection));
     188    }
     189
     190    public void ReplaceIndicesByMode(IDictionary<int, IList<int>> cells, bool considerSelection = false) {
     191      ReplaceIndicesByValue(cells,
     192        col => PreprocessingData.GetMode<double>(col, considerSelection),
     193        col => PreprocessingData.GetMode<DateTime>(col, considerSelection),
     194        col => PreprocessingData.GetMode<string>(col, considerSelection));
     195    }
     196
     197    public void ReplaceIndicesByRandomValue(IDictionary<int, IList<int>> cells, bool considerSelection = false) {
     198      var rand = new FastRandom();
     199      ReplaceIndicesByValues(cells,
     200        col => {
     201          double min = PreprocessingData.GetMin<double>(col, considerSelection);
     202          double max = PreprocessingData.GetMax<double>(col, considerSelection);
     203          double range = max - min;
     204          return cells[col].Select(_ => rand.NextDouble() * range + min);
     205        },
     206        col => {
     207          var min = PreprocessingData.GetMin<DateTime>(col, considerSelection);
     208          var max = PreprocessingData.GetMax<DateTime>(col, considerSelection);
     209          double range = (max - min).TotalSeconds;
     210          return cells[col].Select(_ => min + TimeSpan.FromSeconds(rand.NextDouble() * range));
     211        });
     212    }
     213
     214    public void ReplaceIndicesByString(IDictionary<int, IList<int>> cells, string value) {
     215      PreprocessingData.InTransaction(() => {
     216        foreach (var column in cells) {
     217          foreach (var rowIdx in column.Value) {
     218            PreprocessingData.SetValue(value, column.Key, rowIdx);
     219          }
     220        }
     221      });
     222    }
     223
     224
     225    public void ReplaceIndicesByLinearInterpolationOfNeighbours(IDictionary<int, IList<int>> cells) {
     226      PreprocessingData.InTransaction(() => {
     227        foreach (var column in cells) {
     228          IList<Tuple<int, int>> startEndings = GetStartAndEndingsForInterpolation(column);
     229          foreach (var tuple in startEndings) {
     230            Interpolate(column, tuple.Item1, tuple.Item2);
     231          }
     232        }
     233      });
     234    }
     235
     236    private List<Tuple<int, int>> GetStartAndEndingsForInterpolation(KeyValuePair<int, IList<int>> column) {
     237      var startEndings = new List<Tuple<int, int>>();
     238      var rowIndices = column.Value.OrderBy(x => x).ToList();
     239      var count = rowIndices.Count;
     240      int start = int.MinValue;
     241      for (int i = 0; i < count; ++i) {
     242        if (start == int.MinValue) {
     243          start = IndexOfPrevPresentValue(column.Key, rowIndices[i]);
     244        }
     245        if (i + 1 == count || (i + 1 < count && rowIndices[i + 1] - rowIndices[i] > 1)) {
     246          int next = IndexOfNextPresentValue(column.Key, rowIndices[i]);
     247          if (start > 0 && next < PreprocessingData.Rows) {
     248            startEndings.Add(new Tuple<int, int>(start, next));
     249          }
     250          start = int.MinValue;
     251        }
     252      }
     253      return startEndings;
     254    }
     255
     256    private void Interpolate(KeyValuePair<int, IList<int>> column, int prevIndex, int nextIndex) {
     257      int valuesToInterpolate = nextIndex - prevIndex;
     258
     259      if (PreprocessingData.VariableHasType<double>(column.Key)) {
     260        double prev = PreprocessingData.GetCell<double>(column.Key, prevIndex);
     261        double next = PreprocessingData.GetCell<double>(column.Key, nextIndex);
     262        double interpolationStep = (next - prev) / valuesToInterpolate;
     263
     264        for (int i = prevIndex; i < nextIndex; ++i) {
     265          double interpolated = prev + (interpolationStep * (i - prevIndex));
     266          PreprocessingData.SetCell<double>(column.Key, i, interpolated);
     267        }
     268      } else if (PreprocessingData.VariableHasType<DateTime>(column.Key)) {
     269        DateTime prev = PreprocessingData.GetCell<DateTime>(column.Key, prevIndex);
     270        DateTime next = PreprocessingData.GetCell<DateTime>(column.Key, nextIndex);
     271        double interpolationStep = (next - prev).TotalSeconds / valuesToInterpolate;
     272
     273        for (int i = prevIndex; i < nextIndex; ++i) {
     274          DateTime interpolated = prev.AddSeconds(interpolationStep * (i - prevIndex));
     275          PreprocessingData.SetCell<DateTime>(column.Key, i, interpolated);
     276        }
     277      }
     278    }
     279
     280    private int IndexOfPrevPresentValue(int columnIndex, int start) {
     281      int offset = start - 1;
     282      while (offset >= 0 && PreprocessingData.IsCellEmpty(columnIndex, offset)) {
     283        offset--;
     284      }
     285
     286      return offset;
     287    }
     288
     289    private int IndexOfNextPresentValue(int columnIndex, int start) {
     290      int offset = start + 1;
     291      while (offset < PreprocessingData.Rows && PreprocessingData.IsCellEmpty(columnIndex, offset)) {
     292        offset++;
     293      }
     294
     295      return offset;
     296    }
     297
     298    public void Shuffle(bool shuffleRangesSeparately) {
     299      var random = new FastRandom();
     300
     301      if (shuffleRangesSeparately) {
     302        var ranges = new[] { PreprocessingData.TestPartition, PreprocessingData.TrainingPartition };
     303        PreprocessingData.InTransaction(() => {
     304          // process all given ranges - e.g. TrainingPartition, TestPartition
     305          foreach (IntRange range in ranges) {
     306            var indices = Enumerable.Range(0, PreprocessingData.Rows).ToArray();
     307            var shuffledIndices = Enumerable.Range(range.Start, range.Size).Shuffle(random).ToArray();
     308            for (int i = range.Start, j = 0; i < range.End; i++, j++)
     309              indices[i] = shuffledIndices[j];
     310
     311            ReOrderToIndices(indices);
     312          }
     313        });
     314
     315      } else {
     316        PreprocessingData.InTransaction(() => {
     317          var indices = Enumerable.Range(0, PreprocessingData.Rows).ToArray();
     318          indices.ShuffleInPlace(random);
     319          ReOrderToIndices(indices);
     320        });
     321      }
     322    }
     323
     324    public void ReOrderToIndices(int[] indices) {
     325      PreprocessingData.InTransaction(() => {
     326        for (int i = 0; i < PreprocessingData.Columns; ++i) {
     327          if (PreprocessingData.VariableHasType<double>(i))
     328            ReOrderToIndices<double>(i, indices);
     329          else if (PreprocessingData.VariableHasType<string>(i))
     330            ReOrderToIndices<string>(i, indices);
     331          else if (PreprocessingData.VariableHasType<DateTime>(i))
     332            ReOrderToIndices<DateTime>(i, indices);
     333        }
     334      });
     335    }
     336
     337    private void ReOrderToIndices<T>(int columnIndex, int[] indices) {
     338      var originalData = new List<T>(PreprocessingData.GetValues<T>(columnIndex));
     339      if (indices.Length != originalData.Count) throw new InvalidOperationException("The number of provided indices does not match the values.");
     340
     341      for (int i = 0; i < indices.Length; i++) {
     342        T newValue = originalData[indices[i]];
     343        PreprocessingData.SetCell<T>(columnIndex, i, newValue);
     344      }
     345    }
     346    #endregion
    134347  }
    135348}
  • branches/2839_HiveProjectManagement/HeuristicLab.DataPreprocessing/3.4/Content/FilterContent.cs

    r15110 r16057  
    11#region License Information
    22/* HeuristicLab
    3  * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
    44 *
    55 * This file is part of HeuristicLab.
     
    2020#endregion
    2121
     22using System.Collections.Generic;
    2223using System.Drawing;
     24using System.Linq;
    2325using HeuristicLab.Common;
    2426using HeuristicLab.Core;
    2527using HeuristicLab.DataPreprocessing.Filter;
     28using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
    2629
    2730namespace HeuristicLab.DataPreprocessing {
    2831  [Item("Filter", "Represents the filter grid.")]
    29   public class FilterContent : Item, IViewShortcut {
     32  [StorableClass]
     33  public class FilterContent : PreprocessingContent, IViewShortcut {
    3034    public static new Image StaticItemImage {
    3135      get { return HeuristicLab.Common.Resources.VSImageLibrary.Filter; }
    3236    }
    33 
    34     public FilterLogic FilterLogic { get; private set; }
    35 
     37    [Storable]
    3638    public ICheckedItemCollection<IFilter> Filters { get; private set; }
    3739
     40    [Storable]
    3841    public bool IsAndCombination { get; set; }
    3942
    40     public FilterContent(FilterLogic filterLogic) {
     43    public IEnumerable<IFilter> ActiveFilters {
     44      get { return Filters.Where(f => f.Active && f.ConstraintData != null); }
     45    }
     46
     47    public bool[] GetRemainingRows() {
     48      var remainingRows = new bool[PreprocessingData.Rows];
     49      if (ActiveFilters.Any()) {
     50        var filterResults = ActiveFilters.Select(f => f.Check()).ToList();
     51        var rowFilterResults = new bool[filterResults.Count];
     52        for (int row = 0; row < remainingRows.Length; row++) {
     53          for (int i = 0; i < filterResults.Count; i++)
     54            rowFilterResults[i] = filterResults[i][row];
     55
     56          remainingRows[row] = IsAndCombination
     57            ? rowFilterResults.All(x => x)
     58            : rowFilterResults.Any(x => x);
     59        }
     60      } else {
     61        // if not filters active => all rows are remaining
     62        for (int i = 0; i < remainingRows.Length; i++)
     63          remainingRows[i] = true;
     64      }
     65      return remainingRows;
     66    }
     67
     68    #region Constructor, Cloning & Persistence
     69    public FilterContent(IFilteredPreprocessingData preprocessingData)
     70      : base(preprocessingData) {
    4171      Filters = new CheckedItemCollection<IFilter>();
    4272      IsAndCombination = true;
    43       FilterLogic = filterLogic;
    4473    }
    4574
    46     protected FilterContent(FilterContent content, Cloner cloner)
    47       : base(content, cloner) {
     75    protected FilterContent(FilterContent original, Cloner cloner)
     76      : base(original, cloner) {
     77      Filters = cloner.Clone(original.Filters);
     78      IsAndCombination = original.IsAndCombination;
    4879    }
    49 
    5080    public override IDeepCloneable Clone(Cloner cloner) {
    5181      return new FilterContent(this, cloner);
    5282    }
     83
     84    [StorableConstructor]
     85    protected FilterContent(bool deserializing)
     86      : base(deserializing) { }
     87    #endregion
    5388  }
    5489}
  • branches/2839_HiveProjectManagement/HeuristicLab.DataPreprocessing/3.4/Content/HistogramContent.cs

    r15210 r16057  
    11#region License Information
    22/* HeuristicLab
    3  * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
    44 *
    55 * This file is part of HeuristicLab.
     
    2626using HeuristicLab.Common;
    2727using HeuristicLab.Core;
     28using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
    2829
    2930namespace HeuristicLab.DataPreprocessing {
    3031  [Item("Histogram", "Represents the histogram grid.")]
     32  [StorableClass]
    3133  public class HistogramContent : PreprocessingChartContent {
    3234    public static new Image StaticItemImage {
     
    3436    }
    3537
     38    [Storable]
    3639    public string GroupingVariableName { get; set; }
    3740
     41    [Storable]
    3842    public int Bins { get; set; }
     43    [Storable]
    3944    public bool ExactBins { get; set; }
    4045
     46    [Storable]
    4147    public LegendOrder Order { get; set; }
    4248
     49    #region Constructor, Cloning & Persistence
    4350    public HistogramContent(IFilteredPreprocessingData preprocessingData)
    4451      : base(preprocessingData) {
     
    4754    }
    4855
    49     public HistogramContent(HistogramContent content, Cloner cloner)
    50       : base(content, cloner) {
     56    public HistogramContent(HistogramContent original, Cloner cloner)
     57      : base(original, cloner) {
     58      GroupingVariableName = original.GroupingVariableName;
     59      Bins = original.Bins;
     60      ExactBins = original.ExactBins;
     61      Order = original.Order;
    5162    }
    5263    public override IDeepCloneable Clone(Cloner cloner) {
    5364      return new HistogramContent(this, cloner);
    5465    }
     66
     67    [StorableConstructor]
     68    protected HistogramContent(bool deserializing)
     69      : base(deserializing) { }
     70    #endregion
    5571
    5672    public static DataTable CreateHistogram(IFilteredPreprocessingData preprocessingData, string variableName, string groupingVariableName, DataTableVisualProperties.DataTableHistogramAggregation aggregation, LegendOrder legendOrder = LegendOrder.Alphabetically) {
  • branches/2839_HiveProjectManagement/HeuristicLab.DataPreprocessing/3.4/Content/IViewShortcut.cs

    r14185 r16057  
    11#region License Information
    22/* HeuristicLab
    3  * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
    44 *
    55 * This file is part of HeuristicLab.
  • branches/2839_HiveProjectManagement/HeuristicLab.DataPreprocessing/3.4/Content/LineChartContent.cs

    r15110 r16057  
    11#region License Information
    22/* HeuristicLab
    3  * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
    44 *
    55 * This file is part of HeuristicLab.
     
    2323using HeuristicLab.Common;
    2424using HeuristicLab.Core;
     25using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
    2526
    2627namespace HeuristicLab.DataPreprocessing {
    27 
    2828  [Item("Line Chart", "Represents the line chart grid.")]
     29  [StorableClass]
    2930  public class LineChartContent : PreprocessingChartContent {
    30     public bool AllInOneMode { get; set; }
    31 
    3231    public static new Image StaticItemImage {
    3332      get { return HeuristicLab.Common.Resources.VSImageLibrary.Performance; }
    3433    }
    3534
     35    [Storable]
     36    public bool AllInOneMode { get; set; }
     37
     38
     39    #region Constructor, Cloning & Persistence
    3640    public LineChartContent(IFilteredPreprocessingData preprocessingData)
    3741      : base(preprocessingData) {
     
    3943    }
    4044
    41     public LineChartContent(LineChartContent content, Cloner cloner)
    42       : base(content, cloner) {
    43       this.AllInOneMode = content.AllInOneMode;
     45    public LineChartContent(LineChartContent original, Cloner cloner)
     46      : base(original, cloner) {
     47      AllInOneMode = original.AllInOneMode;
    4448    }
    4549    public override IDeepCloneable Clone(Cloner cloner) {
    4650      return new LineChartContent(this, cloner);
    4751    }
     52
     53    [StorableConstructor]
     54    protected LineChartContent(bool deserializing)
     55      : base(deserializing) { }
     56    #endregion
    4857  }
    4958}
  • branches/2839_HiveProjectManagement/HeuristicLab.DataPreprocessing/3.4/Content/ManipulationContent.cs

    r15110 r16057  
    11#region License Information
    22/* HeuristicLab
    3  * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
    44 *
    55 * This file is part of HeuristicLab.
     
    2020#endregion
    2121
     22using System;
     23using System.Collections.Generic;
    2224using System.Drawing;
     25using System.Linq;
    2326using HeuristicLab.Common;
    2427using HeuristicLab.Core;
     28using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
    2529
    2630namespace HeuristicLab.DataPreprocessing {
    27 
    2831  [Item("Manipulation", "Represents the available manipulations on a data set.")]
    29   public class ManipulationContent : Item, IViewShortcut {
     32  [StorableClass]
     33  public class ManipulationContent : PreprocessingContent, IViewShortcut {
    3034    public static new Image StaticItemImage {
    3135      get { return HeuristicLab.Common.Resources.VSImageLibrary.Method; }
    3236    }
    3337
    34     public ManipulationLogic ManipulationLogic { get; private set; }
    35     public SearchLogic SearchLogic { get; private set; }
    36     public FilterLogic FilterLogic { get; private set; }
    37 
    38     public ManipulationContent(ManipulationLogic manipulationLogic, SearchLogic searchLogic, FilterLogic filterLogic) {
    39       ManipulationLogic = manipulationLogic;
    40       SearchLogic = searchLogic;
    41       FilterLogic = filterLogic;
     38    #region Constructor, Cloning & Persistence
     39    public ManipulationContent(IFilteredPreprocessingData preprocessingData)
     40      : base(preprocessingData) {
    4241    }
    4342
    44     public ManipulationContent(ManipulationContent content, Cloner cloner) : base(content, cloner) { }
    45 
     43    public ManipulationContent(ManipulationContent original, Cloner cloner) :
     44      base(original, cloner) {
     45    }
    4646    public override IDeepCloneable Clone(Cloner cloner) {
    4747      return new ManipulationContent(this, cloner);
    4848    }
     49
     50    [StorableConstructor]
     51    protected ManipulationContent(bool deserializing)
     52      : base(deserializing) { }
     53    #endregion
     54
     55    public List<int> RowsWithMissingValuesGreater(double percent) {
     56      List<int> rows = new List<int>();
     57
     58      for (int i = 0; i < PreprocessingData.Rows; ++i) {
     59        int missingCount = PreprocessingData.GetRowMissingValueCount(i);
     60        if (100f / PreprocessingData.Columns * missingCount > percent) {
     61          rows.Add(i);
     62        }
     63      }
     64
     65      return rows;
     66    }
     67
     68    public List<int> ColumnsWithMissingValuesGreater(double percent) {
     69      List<int> columns = new List<int>();
     70      for (int i = 0; i < PreprocessingData.Columns; ++i) {
     71        int missingCount = PreprocessingData.GetMissingValueCount(i);
     72        if (100f / PreprocessingData.Rows * missingCount > percent) {
     73          columns.Add(i);
     74        }
     75      }
     76
     77      return columns;
     78    }
     79
     80    public List<int> ColumnsWithVarianceSmaller(double variance) {
     81      List<int> columns = new List<int>();
     82      for (int i = 0; i < PreprocessingData.Columns; ++i) {
     83        if (PreprocessingData.VariableHasType<double>(i)) {
     84          double columnVariance = PreprocessingData.GetVariance<double>(i);
     85          if (columnVariance < variance) {
     86            columns.Add(i);
     87          }
     88        } else if (PreprocessingData.VariableHasType<DateTime>(i)) {
     89          double columnVariance = (double)PreprocessingData.GetVariance<DateTime>(i).Ticks / TimeSpan.TicksPerSecond;
     90          if (columnVariance < variance) {
     91            columns.Add(i);
     92          }
     93        }
     94      }
     95      return columns;
     96    }
     97
     98    public void DeleteRowsWithMissingValuesGreater(double percent) {
     99      DeleteRows(RowsWithMissingValuesGreater(percent));
     100    }
     101
     102    public void DeleteColumnsWithMissingValuesGreater(double percent) {
     103      DeleteColumns(ColumnsWithMissingValuesGreater(percent));
     104    }
     105
     106    public void DeleteColumnsWithVarianceSmaller(double variance) {
     107      DeleteColumns(ColumnsWithVarianceSmaller(variance));
     108    }
     109
     110    private void DeleteRows(List<int> rows) {
     111      PreprocessingData.InTransaction(() => {
     112        foreach (int row in rows.OrderByDescending(x => x)) {
     113          PreprocessingData.DeleteRow(row);
     114        }
     115      });
     116    }
     117
     118    private void DeleteColumns(List<int> columns) {
     119      PreprocessingData.InTransaction(() => {
     120        foreach (int column in columns.OrderByDescending(x => x)) {
     121          PreprocessingData.DeleteColumn(column);
     122        }
     123      });
     124    }
    49125  }
    50126}
  • branches/2839_HiveProjectManagement/HeuristicLab.DataPreprocessing/3.4/Content/MultiScatterPlotContent.cs

    r15110 r16057  
    11#region License Information
    22/* HeuristicLab
    3  * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
    44 *
    55 * This file is part of HeuristicLab.
     
    2323using HeuristicLab.Common;
    2424using HeuristicLab.Core;
     25using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
    2526
    2627namespace HeuristicLab.DataPreprocessing {
    27 
    2828  [Item("Multi Scatter Plot", "Represents a multi scatter plot.")]
     29  [StorableClass]
    2930  public class MultiScatterPlotContent : ScatterPlotContent {
    3031    public static new Image StaticItemImage {
     
    3233    }
    3334
     35    #region Constructor, Cloning & Persistence
    3436    public MultiScatterPlotContent(IFilteredPreprocessingData preprocessingData)
    3537      : base(preprocessingData) {
    3638    }
    3739
    38     public MultiScatterPlotContent(MultiScatterPlotContent content, Cloner cloner)
    39       : base(content, cloner) {
     40    public MultiScatterPlotContent(MultiScatterPlotContent original, Cloner cloner)
     41      : base(original, cloner) {
    4042    }
    41 
    4243    public override IDeepCloneable Clone(Cloner cloner) {
    4344      return new MultiScatterPlotContent(this, cloner);
    4445    }
     46
     47    [StorableConstructor]
     48    protected MultiScatterPlotContent(bool deserializing)
     49      : base(deserializing) { }
     50    #endregion
    4551  }
    4652}
  • branches/2839_HiveProjectManagement/HeuristicLab.DataPreprocessing/3.4/Content/PreprocessingChartContent.cs

    r15210 r16057  
    11#region License Information
    22/* HeuristicLab
    3  * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
    44 *
    55 * This file is part of HeuristicLab.
     
    2929using HeuristicLab.Core;
    3030using HeuristicLab.Data;
     31using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
    3132
    3233namespace HeuristicLab.DataPreprocessing {
    3334  [Item("PreprocessingChart", "Represents a preprocessing chart.")]
    34   public class PreprocessingChartContent : Item, IViewShortcut {
     35  [StorableClass]
     36  public class PreprocessingChartContent : PreprocessingContent, IViewShortcut {
    3537    public enum LegendOrder {
    3638      Alphabetically,
     
    4244    }
    4345
    44     private ICheckedItemList<StringValue> variableItemList = null;
     46    [Storable]
     47    private ICheckedItemList<StringValue> variableItemList;
    4548    public ICheckedItemList<StringValue> VariableItemList {
    4649      get {
    4750        if (variableItemList == null)
    4851          variableItemList = CreateVariableItemList(PreprocessingData);
    49         return this.variableItemList;
     52        return variableItemList;
    5053      }
    5154    }
    5255
    53     public IFilteredPreprocessingData PreprocessingData { get; private set; }
    5456    public event DataPreprocessingChangedEventHandler Changed {
    5557      add { PreprocessingData.Changed += value; }
     
    5759    }
    5860
    59     public PreprocessingChartContent(IFilteredPreprocessingData preprocessingData) {
    60       PreprocessingData = preprocessingData;
     61    #region Constructor, Cloning & Persistence
     62    public PreprocessingChartContent(IFilteredPreprocessingData preprocessingData)
     63       : base(preprocessingData) {
    6164    }
    6265
    63     public PreprocessingChartContent(PreprocessingChartContent content, Cloner cloner)
    64       : base(content, cloner) {
    65       this.PreprocessingData = content.PreprocessingData;
    66       this.variableItemList = cloner.Clone<ICheckedItemList<StringValue>>(variableItemList);
     66    public PreprocessingChartContent(PreprocessingChartContent original, Cloner cloner)
     67      : base(original, cloner) {
     68      variableItemList = cloner.Clone(original.variableItemList);
    6769    }
    6870    public override IDeepCloneable Clone(Cloner cloner) {
    6971      return new PreprocessingChartContent(this, cloner);
    7072    }
     73
     74    [StorableConstructor]
     75    protected PreprocessingChartContent(bool deserializing)
     76      : base(deserializing) { }
     77    #endregion
    7178
    7279    public DataRow CreateDataRow(string variableName, DataRowVisualProperties.DataRowChartType chartType) {
     
    7582
    7683    public static DataRow CreateDataRow(IFilteredPreprocessingData preprocessingData, string variableName, DataRowVisualProperties.DataRowChartType chartType) {
    77       IList<double> values = preprocessingData.GetValues<double>(preprocessingData.GetColumnIndex(variableName));
    78       DataRow row = new DataRow(variableName, "", values);
    79       row.VisualProperties.ChartType = chartType;
     84      var values = preprocessingData.GetValues<double>(preprocessingData.GetColumnIndex(variableName));
     85      var row = new DataRow(variableName, "", values) {
     86        VisualProperties = {
     87          ChartType = chartType,
     88          StartIndexZero = true
     89        }
     90      };
    8091      return row;
    8192    }
  • branches/2839_HiveProjectManagement/HeuristicLab.DataPreprocessing/3.4/Content/ScatterPlotContent.cs

    r15210 r16057  
    11#region License Information
    22/* HeuristicLab
    3  * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
    44 *
    55 * This file is part of HeuristicLab.
     
    2525using HeuristicLab.Analysis;
    2626using HeuristicLab.Common;
     27using HeuristicLab.Core;
     28using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
    2729using HeuristicLab.Visualization.ChartControlsExtensions;
    2830
    2931namespace HeuristicLab.DataPreprocessing {
    30 
     32  [Item("ScatterPlotContent", "")]
     33  [StorableClass]
    3134  public abstract class ScatterPlotContent : PreprocessingChartContent {
     35    [Storable]
    3236    public string GroupingVariable { get; set; }
    3337
     38    #region Constructor, Cloning & Persistence
    3439    protected ScatterPlotContent(IFilteredPreprocessingData preprocessingData)
    3540      : base(preprocessingData) {
    3641    }
    3742
    38     protected ScatterPlotContent(ScatterPlotContent content, Cloner cloner)
    39       : base(content, cloner) {
     43    protected ScatterPlotContent(ScatterPlotContent original, Cloner cloner)
     44      : base(original, cloner) {
     45      GroupingVariable = original.GroupingVariable;
    4046    }
     47
     48    [StorableConstructor]
     49    protected ScatterPlotContent(bool deserializing)
     50      : base(deserializing) { }
     51    #endregion
    4152
    4253    public static ScatterPlot CreateScatterPlot(IFilteredPreprocessingData preprocessingData, string variableNameX, string variableNameY, string variableNameGroup = "-", LegendOrder legendOrder = LegendOrder.Alphabetically) {
  • branches/2839_HiveProjectManagement/HeuristicLab.DataPreprocessing/3.4/Content/SingleScatterPlotContent.cs

    r15110 r16057  
    11#region License Information
    22/* HeuristicLab
    3  * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
    44 *
    55 * This file is part of HeuristicLab.
     
    2323using HeuristicLab.Common;
    2424using HeuristicLab.Core;
     25using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
    2526
    2627namespace HeuristicLab.DataPreprocessing {
    27 
    2828  [Item("Scatter Plot", "Represents a scatter plot.")]
     29  [StorableClass]
    2930  public class SingleScatterPlotContent : ScatterPlotContent {
    3031    public static new Image StaticItemImage {
     
    3233    }
    3334
     35    [Storable]
    3436    public string SelectedXVariable { get; set; }
     37    [Storable]
    3538    public string SelectedYVariable { get; set; }
    3639
     40    #region Constructor, Cloning & Persistence
    3741    public SingleScatterPlotContent(IFilteredPreprocessingData preprocessingData)
    3842      : base(preprocessingData) {
    3943    }
    4044
    41     public SingleScatterPlotContent(SingleScatterPlotContent content, Cloner cloner)
    42       : base(content, cloner) {
    43       this.SelectedXVariable = content.SelectedXVariable;
    44       this.SelectedYVariable = content.SelectedYVariable;
    45       this.GroupingVariable = content.GroupingVariable;
     45    public SingleScatterPlotContent(SingleScatterPlotContent original, Cloner cloner)
     46      : base(original, cloner) {
     47      SelectedXVariable = original.SelectedXVariable;
     48      SelectedYVariable = original.SelectedYVariable;
    4649    }
    47 
    4850    public override IDeepCloneable Clone(Cloner cloner) {
    4951      return new SingleScatterPlotContent(this, cloner);
    5052    }
     53
     54    [StorableConstructor]
     55    protected SingleScatterPlotContent(bool deserializing)
     56      : base(deserializing) { }
     57    #endregion
    5158  }
    5259}
  • branches/2839_HiveProjectManagement/HeuristicLab.DataPreprocessing/3.4/Content/StatisticsContent.cs

    r15110 r16057  
    11#region License Information
    22/* HeuristicLab
    3  * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
    44 *
    55 * This file is part of HeuristicLab.
     
    2323using HeuristicLab.Common;
    2424using HeuristicLab.Core;
     25using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
    2526
    2627namespace HeuristicLab.DataPreprocessing {
    2728  [Item("Statistics", "Represents the statistics grid.")]
    28   public class StatisticsContent : Item, IViewShortcut {
     29  [StorableClass]
     30  public class StatisticsContent : PreprocessingContent, IViewShortcut {
    2931    public static new Image StaticItemImage {
    3032      get { return HeuristicLab.Common.Resources.VSImageLibrary.Object; }
    3133    }
    3234
    33     public ITransactionalPreprocessingData PreprocessingData { get; private set; }
    34     public StatisticsLogic StatisticsLogic { get; private set; }
    35 
    36     public StatisticsContent(ITransactionalPreprocessingData preProcessingData, StatisticsLogic statisticsLogic) {
    37       PreprocessingData = preProcessingData;
    38       StatisticsLogic = statisticsLogic;
     35    #region Constructor, Cloning & Persistence
     36    public StatisticsContent(IFilteredPreprocessingData preprocessingData)
     37      : base(preprocessingData) {
    3938    }
    4039
    41     public StatisticsContent(StatisticsContent content, Cloner cloner)
    42       : base(content, cloner) {
     40    public StatisticsContent(StatisticsContent original, Cloner cloner)
     41      : base(original, cloner) {
    4342    }
    44 
    4543    public override IDeepCloneable Clone(Cloner cloner) {
    4644      return new StatisticsContent(this, cloner);
    4745    }
    4846
     47    [StorableConstructor]
     48    protected StatisticsContent(bool deserializing)
     49      : base(deserializing) { }
     50    #endregion
     51
    4952    public event DataPreprocessingChangedEventHandler Changed {
    50       add { StatisticsLogic.Changed += value; }
    51       remove { StatisticsLogic.Changed -= value; }
     53      add { PreprocessingData.Changed += value; }
     54      remove { PreprocessingData.Changed -= value; }
    5255    }
    5356  }
  • branches/2839_HiveProjectManagement/HeuristicLab.DataPreprocessing/3.4/Content/TransformationContent.cs

    r15110 r16057  
    11#region License Information
    22/* HeuristicLab
    3  * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
    44 *
    55 * This file is part of HeuristicLab.
     
    2323using HeuristicLab.Common;
    2424using HeuristicLab.Core;
     25using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
    2526using HeuristicLab.Problems.DataAnalysis;
    2627
    2728namespace HeuristicLab.DataPreprocessing {
    2829  [Item("Transformation", "Represents the transformation grid.")]
    29   public class TransformationContent : Item, IViewShortcut {
     30  [StorableClass]
     31  public class TransformationContent : PreprocessingContent, IViewShortcut {
    3032    public static new Image StaticItemImage {
    3133      get { return HeuristicLab.Common.Resources.VSImageLibrary.Method; }
    3234    }
    3335
    34     public IPreprocessingData Data { get; private set; }
    35     public FilterLogic FilterLogic { get; private set; }
    36 
     36    [Storable]
    3737    public ICheckedItemList<ITransformation> CheckedTransformationList { get; private set; }
    3838
    39     public TransformationContent(IPreprocessingData data, FilterLogic filterLogic) {
    40       Data = data;
     39    #region Constructor, Cloning & Persistence
     40    public TransformationContent(IFilteredPreprocessingData preprocessingData)
     41      : base(preprocessingData) {
    4142      CheckedTransformationList = new CheckedItemList<ITransformation>();
    42       FilterLogic = filterLogic;
    4343    }
    4444
    4545    public TransformationContent(TransformationContent original, Cloner cloner)
    4646      : base(original, cloner) {
    47       Data = original.Data;
    48       CheckedTransformationList = new CheckedItemList<ITransformation>(original.CheckedTransformationList);
     47      CheckedTransformationList = cloner.Clone(original.CheckedTransformationList);
    4948    }
    50 
    5149    public override IDeepCloneable Clone(Cloner cloner) {
    5250      return new TransformationContent(this, cloner);
    5351    }
     52
     53    [StorableConstructor]
     54    protected TransformationContent(bool deserializing)
     55      : base(deserializing) { }
     56    #endregion
    5457  }
    5558}
  • branches/2839_HiveProjectManagement/HeuristicLab.DataPreprocessing/3.4/Data/DataPreprocessingChangedEvent.cs

    r14185 r16057  
    11#region License Information
    22/* HeuristicLab
    3  * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
    44 *
    55 * This file is part of HeuristicLab.
  • branches/2839_HiveProjectManagement/HeuristicLab.DataPreprocessing/3.4/Data/FilteredPreprocessingData.cs

    r15110 r16057  
    11#region License Information
    22/* HeuristicLab
    3  * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
    44 *
    55 * This file is part of HeuristicLab.
     
    2222using System;
    2323using System.Collections.Generic;
     24using System.Linq;
    2425using HeuristicLab.Common;
    2526using HeuristicLab.Core;
    2627using HeuristicLab.Data;
     28using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
    2729using HeuristicLab.Problems.DataAnalysis;
    2830
    2931namespace HeuristicLab.DataPreprocessing {
    30   public class FilteredPreprocessingData : NamedItem, IFilteredPreprocessingData {
    31     private readonly ITransactionalPreprocessingData originalData;
    32     private ITransactionalPreprocessingData filteredData;
    33 
     32  [Item("FilteredPreprocessingData", "Represents filtered data used for preprocessing.")]
     33  [StorableClass]
     34  public sealed class FilteredPreprocessingData : NamedItem, IFilteredPreprocessingData {
     35
     36    [Storable]
     37    private readonly IPreprocessingData originalData;
     38    [Storable]
     39    private IPreprocessingData filteredData;
     40
     41    public IPreprocessingData ActiveData {
     42      get { return IsFiltered ? filteredData : originalData; }
     43    }
     44
     45    #region Constructor, Cloning & Persistence
     46    public FilteredPreprocessingData(IPreprocessingData preprocessingData)
     47      : base() {
     48      originalData = preprocessingData;
     49      filteredData = null;
     50    }
     51
     52    private FilteredPreprocessingData(FilteredPreprocessingData original, Cloner cloner)
     53      : base(original, cloner) {
     54      originalData = original.originalData;
     55      filteredData = original.filteredData;
     56    }
     57    public override IDeepCloneable Clone(Cloner cloner) {
     58      return new FilteredPreprocessingData(this, cloner);
     59    }
     60
     61    [StorableConstructor]
     62    private FilteredPreprocessingData(bool deserializing)
     63      : base(deserializing) { }
     64    #endregion
     65
     66    #region Cells
     67    public bool IsCellEmpty(int columnIndex, int rowIndex) {
     68      return ActiveData.IsCellEmpty(columnIndex, rowIndex);
     69    }
     70
     71    public T GetCell<T>(int columnIndex, int rowIndex) {
     72      return ActiveData.GetCell<T>(columnIndex, rowIndex);
     73    }
     74
     75    public void SetCell<T>(int columnIndex, int rowIndex, T value) {
     76      if (IsFiltered)
     77        throw new InvalidOperationException("SetValues not possible while data is filtered");
     78      originalData.SetCell<T>(columnIndex, rowIndex, value);
     79    }
     80
     81    public string GetCellAsString(int columnIndex, int rowIndex) {
     82      return ActiveData.GetCellAsString(columnIndex, rowIndex);
     83    }
     84
     85    public IList<T> GetValues<T>(int columnIndex, bool considerSelection) {
     86      return ActiveData.GetValues<T>(columnIndex, considerSelection);
     87    }
     88
     89    public void SetValues<T>(int columnIndex, IList<T> values) {
     90      if (IsFiltered)
     91        throw new InvalidOperationException("SetValues not possible while data is filtered");
     92
     93      originalData.SetValues<T>(columnIndex, values);
     94    }
     95
     96    public bool SetValue(string value, int columnIndex, int rowIndex) {
     97      if (IsFiltered)
     98        throw new InvalidOperationException("SetValue not possible while data is filtered");
     99      return originalData.SetValue(value, columnIndex, rowIndex);
     100    }
     101
     102    public int Columns {
     103      get { return ActiveData.Columns; }
     104    }
     105
     106    public int Rows {
     107      get { return ActiveData.Rows; }
     108    }
     109    #endregion
     110
     111    #region Rows
     112    public void InsertRow(int rowIndex) {
     113      if (IsFiltered)
     114        throw new InvalidOperationException("InsertRow not possible while data is filtered");
     115
     116      originalData.InsertRow(rowIndex);
     117    }
     118
     119    public void DeleteRow(int rowIndex) {
     120      if (IsFiltered)
     121        throw new InvalidOperationException("DeleteRow not possible while data is filtered");
     122
     123      originalData.DeleteRow(rowIndex);
     124    }
     125
     126    public void DeleteRowsWithIndices(IEnumerable<int> rows) {
     127      if (IsFiltered)
     128        throw new InvalidOperationException("DeleteRowsWithIndices not possible while data is filtered");
     129
     130      originalData.DeleteRowsWithIndices(rows);
     131    }
     132
     133    public void InsertColumn<T>(string variableName, int columnIndex) {
     134      if (IsFiltered)
     135        throw new InvalidOperationException("InsertColumn not possible while data is filtered");
     136
     137      originalData.InsertColumn<T>(variableName, columnIndex);
     138    }
     139
     140    public void DeleteColumn(int columnIndex) {
     141      if (IsFiltered)
     142        throw new InvalidOperationException("DeleteColumn not possible while data is filtered");
     143      originalData.DeleteColumn(columnIndex);
     144    }
     145
     146    public void RenameColumn(int columnIndex, string name) {
     147      if (IsFiltered)
     148        throw new InvalidOperationException("RenameColumn not possible while data is filtered");
     149      originalData.RenameColumn(columnIndex, name);
     150    }
     151
     152    public void RenameColumns(IList<string> names) {
     153      if (IsFiltered)
     154        throw new InvalidOperationException("RenameColumns not possible while data is filtered");
     155      originalData.RenameColumns(names);
     156    }
     157
     158    public bool AreAllStringColumns(IEnumerable<int> columnIndices) {
     159      return originalData.AreAllStringColumns(columnIndices);
     160    }
     161    #endregion
     162
     163    #region Variables
     164    public IEnumerable<string> VariableNames {
     165      get { return ActiveData.VariableNames; }
     166    }
     167    public IEnumerable<string> GetDoubleVariableNames() {
     168      return originalData.GetDoubleVariableNames();
     169    }
     170    public string GetVariableName(int columnIndex) {
     171      return ActiveData.GetVariableName(columnIndex);
     172    }
     173
     174    public int GetColumnIndex(string variableName) {
     175      return ActiveData.GetColumnIndex(variableName);
     176    }
     177
     178    public bool VariableHasType<T>(int columnIndex) {
     179      return originalData.VariableHasType<T>(columnIndex);
     180    }
     181
     182    public Type GetVariableType(int columnIndex) {
     183      return ActiveData.GetVariableType(columnIndex);
     184    }
     185
     186    public IList<string> InputVariables {
     187      get { return ActiveData.InputVariables; }
     188    }
     189
     190    public string TargetVariable {
     191      get { return ActiveData.TargetVariable; }
     192    } // optional
     193    #endregion
     194
     195    #region Partitions
    34196    public IntRange TrainingPartition {
    35197      get { return originalData.TrainingPartition; }
     
    39201      get { return originalData.TestPartition; }
    40202    }
    41 
     203    #endregion
     204
     205    #region Transformations
    42206    public IList<ITransformation> Transformations {
    43207      get { return originalData.Transformations; }
    44208    }
    45 
    46     public IEnumerable<string> VariableNames {
    47       get { return ActiveData.VariableNames; }
    48     }
    49 
    50     public IList<string> InputVariables { get { return ActiveData.InputVariables; } }
    51     public string TargetVariable { get { return ActiveData.TargetVariable; } } // optional
    52 
     209    #endregion
     210
     211    #region Validation
     212    public bool Validate(string value, out string errorMessage, int columnIndex) {
     213      return originalData.Validate(value, out errorMessage, columnIndex);
     214    }
     215    #endregion
     216
     217    #region Import & Export
     218    public void Import(IDataAnalysisProblemData problemData) {
     219      if (IsFiltered)
     220        throw new InvalidOperationException("Import not possible while data is filtered");
     221      originalData.Import(problemData);
     222    }
     223
     224    public Dataset ExportToDataset() {
     225      return originalData.ExportToDataset();
     226    }
     227    #endregion
     228
     229    #region Selection
    53230    public IDictionary<int, IList<int>> Selection {
    54231      get { return originalData.Selection; }
     
    56233    }
    57234
    58     public int Columns {
    59       get { return ActiveData.Columns; }
    60     }
    61 
    62     public int Rows {
    63       get { return ActiveData.Rows; }
    64     }
    65 
    66     public ITransactionalPreprocessingData ActiveData {
    67       get { return IsFiltered ? filteredData : originalData; }
     235    public void ClearSelection() {
     236      originalData.ClearSelection();
     237    }
     238
     239    public event EventHandler SelectionChanged {
     240      add { originalData.SelectionChanged += value; }
     241      remove { originalData.SelectionChanged -= value; }
     242    }
     243    #endregion
     244
     245    #region Transactions
     246    public event DataPreprocessingChangedEventHandler Changed {
     247      add { originalData.Changed += value; }
     248      remove { originalData.Changed -= value; }
    68249    }
    69250
     
    72253    }
    73254
    74     public bool IsFiltered {
    75       get { return filteredData != null; }
    76     }
    77 
    78 
    79     public FilteredPreprocessingData(ITransactionalPreprocessingData preporcessingData)
    80       : base() {
    81       originalData = preporcessingData;
    82       filteredData = null;
    83     }
    84 
    85     protected FilteredPreprocessingData(FilteredPreprocessingData original, Cloner cloner)
    86       : base(original, cloner) {
    87       originalData = original.originalData;
    88       filteredData = original.filteredData;
    89     }
    90     public override IDeepCloneable Clone(Cloner cloner) {
    91       return new FilteredPreprocessingData(this, cloner);
    92     }
    93 
    94     public T GetCell<T>(int columnIndex, int rowIndex) {
    95       return ActiveData.GetCell<T>(columnIndex, rowIndex);
    96     }
    97 
    98     public void SetCell<T>(int columnIndex, int rowIndex, T value) {
    99       if (IsFiltered)
    100         throw new InvalidOperationException("SetValues not possible while data is filtered");
    101       originalData.SetCell<T>(columnIndex, rowIndex, value);
    102     }
    103 
    104     public string GetCellAsString(int columnIndex, int rowIndex) {
    105       return ActiveData.GetCellAsString(columnIndex, rowIndex);
    106     }
    107 
    108     public IList<T> GetValues<T>(int columnIndex, bool considerSelection) {
    109       return ActiveData.GetValues<T>(columnIndex, considerSelection);
    110     }
    111 
    112     public void SetValues<T>(int columnIndex, IList<T> values) {
    113       if (IsFiltered)
    114         throw new InvalidOperationException("SetValues not possible while data is filtered");
    115 
    116       originalData.SetValues<T>(columnIndex, values);
    117     }
    118 
    119     public void InsertRow(int rowIndex) {
    120       if (IsFiltered)
    121         throw new InvalidOperationException("InsertRow not possible while data is filtered");
    122 
    123       originalData.InsertRow(rowIndex);
    124     }
    125 
    126     public void DeleteRow(int rowIndex) {
    127       if (IsFiltered)
    128         throw new InvalidOperationException("DeleteRow not possible while data is filtered");
    129 
    130       originalData.DeleteRow(rowIndex);
    131     }
    132 
    133     public void InsertColumn<T>(string variableName, int columnIndex) {
    134       if (IsFiltered)
    135         throw new InvalidOperationException("InsertColumn not possible while data is filtered");
    136 
    137       originalData.InsertColumn<T>(variableName, columnIndex);
    138     }
    139 
    140     public void DeleteColumn(int columnIndex) {
    141       if (IsFiltered)
    142         throw new InvalidOperationException("DeleteColumn not possible while data is filtered");
    143       originalData.DeleteColumn(columnIndex);
    144     }
    145 
    146     public void RenameColumn(int columnIndex, string name) {
    147       if (IsFiltered)
    148         throw new InvalidOperationException("RenameColumn not possible while data is filtered");
    149       originalData.RenameColumn(columnIndex, name);
    150     }
    151 
    152     public void RenameColumns(IList<string> names) {
    153       if (IsFiltered)
    154         throw new InvalidOperationException("RenameColumns not possible while data is filtered");
    155       originalData.RenameColumns(names);
    156     }
    157 
    158     public string GetVariableName(int columnIndex) {
    159       return ActiveData.GetVariableName(columnIndex);
    160     }
    161 
    162     public int GetColumnIndex(string variableName) {
    163       return ActiveData.GetColumnIndex(variableName);
    164     }
    165 
    166     public bool VariableHasType<T>(int columnIndex) {
    167       return originalData.VariableHasType<T>(columnIndex);
    168     }
    169 
    170     public Dataset ExportToDataset() {
    171       return originalData.ExportToDataset();
    172     }
    173 
    174     public void SetFilter(bool[] rowFilters) {
    175       filteredData = (ITransactionalPreprocessingData)originalData.Clone();
     255    public void Undo() {
     256      if (IsFiltered)
     257        throw new InvalidOperationException("Undo not possible while data is filtered");
     258
     259      originalData.Undo();
     260    }
     261
     262    public void InTransaction(Action action, DataPreprocessingChangedEventType type = DataPreprocessingChangedEventType.Any) {
     263      if (IsFiltered)
     264        throw new InvalidOperationException("Transaction not possible while data is filtered");
     265      originalData.InTransaction(action, type);
     266    }
     267
     268    public void BeginTransaction(DataPreprocessingChangedEventType type) {
     269      if (IsFiltered)
     270        throw new InvalidOperationException("Transaction not possible while data is filtered");
     271      originalData.BeginTransaction(type);
     272    }
     273
     274    public void EndTransaction() {
     275      originalData.EndTransaction();
     276    }
     277    #endregion
     278
     279    #region Statistics
     280    public T GetMin<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) {
     281      return ActiveData.GetMin<T>(columnIndex, considerSelection, emptyValue);
     282    }
     283    public T GetMax<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) {
     284      return ActiveData.GetMax<T>(columnIndex, considerSelection, emptyValue);
     285    }
     286    public T GetMean<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) {
     287      return ActiveData.GetMean<T>(columnIndex, considerSelection, emptyValue);
     288    }
     289    public T GetMedian<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) where T : IComparable<T> {
     290      return ActiveData.GetMedian<T>(columnIndex, considerSelection, emptyValue);
     291    }
     292    public T GetMode<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) where T : IEquatable<T> {
     293      return ActiveData.GetMode<T>(columnIndex, considerSelection, emptyValue);
     294    }
     295    public T GetStandardDeviation<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) {
     296      return ActiveData.GetStandardDeviation<T>(columnIndex, considerSelection, emptyValue);
     297    }
     298    public T GetVariance<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) {
     299      return ActiveData.GetVariance<T>(columnIndex, considerSelection, emptyValue);
     300    }
     301    public T GetQuantile<T>(double alpha, int columnIndex, bool considerSelection = false, T emptyValue = default(T)) where T : IComparable<T> {
     302      return ActiveData.GetQuantile<T>(alpha, columnIndex, considerSelection, emptyValue);
     303    }
     304    public int GetDistinctValues<T>(int columnIndex, bool considerSelection = false) {
     305      return ActiveData.GetDistinctValues<T>(columnIndex, considerSelection);
     306    }
     307
     308    public int GetMissingValueCount() {
     309      return ActiveData.GetMissingValueCount();
     310    }
     311    public int GetMissingValueCount(int columnIndex) {
     312      return ActiveData.GetMissingValueCount(columnIndex);
     313    }
     314    public int GetRowMissingValueCount(int rowIndex) {
     315      return ActiveData.GetRowMissingValueCount(rowIndex);
     316    }
     317    #endregion
     318
     319    #region Filters
     320    public void SetFilter(bool[] remainingRows) {
     321      filteredData = (IPreprocessingData)originalData.Clone();
    176322      filteredData.InTransaction(() => {
    177         for (int row = (rowFilters.Length - 1); row >= 0; --row) {
    178           if (rowFilters[row]) {
    179             filteredData.DeleteRow(row);
     323        var remainingIndices = Enumerable.Range(0, remainingRows.Length).Where(x => remainingRows[x]);
     324
     325        foreach (var v in filteredData.VariableNames) {
     326          var ci = filteredData.GetColumnIndex(v);
     327          if (filteredData.VariableHasType<double>(ci)) {
     328            var values = filteredData.GetValues<double>(ci);
     329            var filteredValues = remainingIndices.Select(x => values[x]).ToList();
     330            filteredData.SetValues(ci, filteredValues);
     331          } else if (filteredData.VariableHasType<DateTime>(ci)) {
     332            var values = filteredData.GetValues<DateTime>(ci);
     333            var filteredValues = remainingIndices.Select(x => values[x]).ToList();
     334            filteredData.SetValues(ci, filteredValues);
     335          } else if (filteredData.VariableHasType<string>(ci)) {
     336            var values = filteredData.GetValues<string>(ci);
     337            var filteredValues = remainingIndices.Select(x => values[x]).ToList();
     338            filteredData.SetValues(ci, filteredValues);
    180339          }
    181340        }
     
    206365    }
    207366
     367    public bool IsFiltered {
     368      get { return filteredData != null; }
     369    }
     370
     371    public event EventHandler FilterChanged;
     372
    208373    private void OnFilterChanged() {
    209374      if (FilterChanged != null) {
     
    211376      }
    212377    }
    213 
    214     public event DataPreprocessingChangedEventHandler Changed {
    215       add { originalData.Changed += value; }
    216       remove { originalData.Changed -= value; }
    217     }
    218 
    219     public bool SetValue(string value, int columnIndex, int rowIndex) {
    220       if (IsFiltered)
    221         throw new InvalidOperationException("SetValue not possible while data is filtered");
    222       return originalData.SetValue(value, columnIndex, rowIndex);
    223     }
    224 
    225     public bool AreAllStringColumns(IEnumerable<int> columnIndices) {
    226       return originalData.AreAllStringColumns(columnIndices);
    227     }
    228 
    229     public void DeleteRowsWithIndices(IEnumerable<int> rows) {
    230       if (IsFiltered)
    231         throw new InvalidOperationException("DeleteRowsWithIndices not possible while data is filtered");
    232 
    233       originalData.DeleteRowsWithIndices(rows);
    234     }
    235 
    236     public void Undo() {
    237       if (IsFiltered)
    238         throw new InvalidOperationException("Undo not possible while data is filtered");
    239 
    240       originalData.Undo();
    241     }
    242 
    243     public void InTransaction(Action action, DataPreprocessingChangedEventType type = DataPreprocessingChangedEventType.Any) {
    244       if (IsFiltered)
    245         throw new InvalidOperationException("Transaction not possible while data is filtered");
    246       originalData.InTransaction(action, type);
    247     }
    248 
    249     public void BeginTransaction(DataPreprocessingChangedEventType type) {
    250       if (IsFiltered)
    251         throw new InvalidOperationException("Transaction not possible while data is filtered");
    252       originalData.BeginTransaction(type);
    253     }
    254 
    255     public void EndTransaction() {
    256       originalData.EndTransaction();
    257     }
    258 
    259     public IEnumerable<string> GetDoubleVariableNames() {
    260       return originalData.GetDoubleVariableNames();
    261     }
    262 
    263     public void ClearSelection() {
    264       originalData.ClearSelection();
    265     }
    266 
    267     public event EventHandler SelectionChanged {
    268       add { originalData.SelectionChanged += value; }
    269       remove { originalData.SelectionChanged -= value; }
    270     }
    271 
    272     #region IPreprocessingData Members
    273     public bool Validate(string value, out string errorMessage, int columnIndex) {
    274       return originalData.Validate(value, out errorMessage, columnIndex);
    275     }
    276 
    277     public event EventHandler FilterChanged;
    278378    #endregion
    279379  }
  • branches/2839_HiveProjectManagement/HeuristicLab.DataPreprocessing/3.4/Data/IFilteredPreprocessingData.cs

    r14185 r16057  
    11#region License Information
    22/* HeuristicLab
    3  * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
    44 *
    55 * This file is part of HeuristicLab.
     
    2323
    2424namespace HeuristicLab.DataPreprocessing {
    25   public interface IFilteredPreprocessingData : ITransactionalPreprocessingData {
    26     void SetFilter(bool[] rowFilters);
     25  public interface IFilteredPreprocessingData : IPreprocessingData {
     26    #region Filters
     27    void SetFilter(bool[] remainingRows);
    2728    void PersistFilter();
    2829    void ResetFilter();
     
    3031
    3132    event EventHandler FilterChanged;
     33    #endregion
    3234  }
    3335}
  • branches/2839_HiveProjectManagement/HeuristicLab.DataPreprocessing/3.4/Data/IPreprocessingData.cs

    r15110 r16057  
    11#region License Information
    22/* HeuristicLab
    3  * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
    44 *
    55 * This file is part of HeuristicLab.
     
    2828namespace HeuristicLab.DataPreprocessing {
    2929  public interface IPreprocessingData : INamedItem {
     30    #region Cells
     31    bool IsCellEmpty(int columnIndex, int rowIndex);
    3032    T GetCell<T>(int columnIndex, int rowIndex);
    3133
     
    3941    bool SetValue(string value, int columnIndex, int rowIndex);
    4042
     43    int Columns { get; }
     44    int Rows { get; }
     45    #endregion
     46
     47    #region Rows
    4148    void InsertRow(int rowIndex);
    4249    void DeleteRow(int rowIndex);
     
    5057
    5158    bool AreAllStringColumns(IEnumerable<int> columnIndices);
    52     bool Validate(string value, out string errorMessage, int columnIndex);
     59    #endregion
    5360
    54     IntRange TrainingPartition { get; }
    55     IntRange TestPartition { get; }
    56 
    57     IList<ITransformation> Transformations { get; }
    58 
     61    #region Variables
    5962    IEnumerable<string> VariableNames { get; }
    6063    IEnumerable<string> GetDoubleVariableNames();
     
    6366
    6467    bool VariableHasType<T>(int columnIndex);
     68    Type GetVariableType(int columnIndex);
    6569
    6670    IList<string> InputVariables { get; }
    6771    string TargetVariable { get; } // optional
     72    #endregion
    6873
    69     int Columns { get; }
    70     int Rows { get; }
     74    #region Partitions
     75    IntRange TrainingPartition { get; }
     76    IntRange TestPartition { get; }
     77    #endregion
    7178
     79    #region Transformations
     80    IList<ITransformation> Transformations { get; }
     81    #endregion
     82
     83    #region Validation
     84    bool Validate(string value, out string errorMessage, int columnIndex);
     85    #endregion
     86
     87    #region Import & Export
     88    void Import(IDataAnalysisProblemData problemData);
    7289    Dataset ExportToDataset();
     90    #endregion
    7391
     92    #region Selection
    7493    IDictionary<int, IList<int>> Selection { get; set; }
    7594    void ClearSelection();
    7695
    7796    event EventHandler SelectionChanged;
     97    #endregion
     98
     99    #region Transactions
     100    event DataPreprocessingChangedEventHandler Changed;
     101
     102    bool IsUndoAvailable { get; }
     103    void Undo();
     104    void InTransaction(Action action, DataPreprocessingChangedEventType type = DataPreprocessingChangedEventType.Any);
     105    void BeginTransaction(DataPreprocessingChangedEventType type);
     106    void EndTransaction();
     107    #endregion
     108
     109    #region Statistics
     110    T GetMin<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T));
     111    T GetMax<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T));
     112    T GetMean<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T));
     113    T GetMedian<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) where T : IComparable<T>;
     114    T GetMode<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) where T : IEquatable<T>;
     115    T GetStandardDeviation<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T));
     116    T GetVariance<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T));
     117    T GetQuantile<T>(double alpha, int columnIndex, bool considerSelection = false, T emptyValue = default(T)) where T : IComparable<T>;
     118    int GetDistinctValues<T>(int columnIndex, bool considerSelection = false);
     119
     120    int GetMissingValueCount();
     121    int GetMissingValueCount(int columnIndex);
     122    int GetRowMissingValueCount(int rowIndex);
     123    #endregion
    78124  }
    79125}
  • branches/2839_HiveProjectManagement/HeuristicLab.DataPreprocessing/3.4/Data/PreprocessingData.cs

    r15110 r16057  
    11#region License Information
    22/* HeuristicLab
    3  * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
    44 *
    55 * This file is part of HeuristicLab.
     
    2323using System.Collections;
    2424using System.Collections.Generic;
     25using System.Globalization;
    2526using System.Linq;
    2627using HeuristicLab.Common;
    2728using HeuristicLab.Core;
    2829using HeuristicLab.Data;
     30using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
    2931using HeuristicLab.Problems.DataAnalysis;
    3032
     
    3234
    3335  [Item("PreprocessingData", "Represents data used for preprocessing.")]
    34   public abstract class PreprocessingData : NamedItem, IPreprocessingData {
    35     public IntRange TrainingPartition { get; set; }
    36     public IntRange TestPartition { get; set; }
    37 
    38     public IList<ITransformation> Transformations { get; protected set; }
    39 
     36  [StorableClass]
     37  public class PreprocessingData : NamedItem, IPreprocessingData {
     38
     39    [Storable]
    4040    protected IList<IList> variableValues;
     41    [Storable]
    4142    protected IList<string> variableNames;
    4243
    43     public IEnumerable<string> VariableNames {
    44       get { return variableNames; }
    45     }
    46 
    47     public IEnumerable<string> GetDoubleVariableNames() {
    48       var doubleVariableNames = new List<string>();
    49       for (int i = 0; i < Columns; ++i) {
    50         if (VariableHasType<double>(i)) {
    51           doubleVariableNames.Add(variableNames[i]);
    52         }
    53       }
    54       return doubleVariableNames;
    55     }
    56 
    57     public IList<string> InputVariables { get; private set; }
    58     public string TargetVariable { get; private set; } // optional
    59 
    60     public int Columns {
    61       get { return variableNames.Count; }
    62     }
    63 
    64     public int Rows {
    65       get { return variableValues.Count > 0 ? variableValues[0].Count : 0; }
    66     }
    67 
    68     protected IDictionary<int, IList<int>> selection;
    69     public IDictionary<int, IList<int>> Selection {
    70       get { return selection; }
    71       set {
    72         selection = value;
    73         OnSelectionChanged();
    74       }
     44    #region Constructor, Cloning & Persistence
     45    public PreprocessingData(IDataAnalysisProblemData problemData)
     46      : base() {
     47      Name = "Preprocessing Data";
     48
     49      Transformations = new List<ITransformation>();
     50      selection = new Dictionary<int, IList<int>>();
     51
     52      Import(problemData);
     53
     54      RegisterEventHandler();
    7555    }
    7656
     
    8868      RegisterEventHandler();
    8969    }
    90 
    91     protected PreprocessingData(IDataAnalysisProblemData problemData)
    92       : base() {
    93       Name = "Preprocessing Data";
    94 
    95       Transformations = new List<ITransformation>();
    96       selection = new Dictionary<int, IList<int>>();
    97 
    98       Import(problemData);
    99 
     70    public override IDeepCloneable Clone(Cloner cloner) {
     71      return new PreprocessingData(this, cloner);
     72    }
     73
     74    [StorableConstructor]
     75    protected PreprocessingData(bool deserializing)
     76      : base(deserializing) { }
     77    [StorableHook(HookType.AfterDeserialization)]
     78    private void AfterDeserialization() {
    10079      RegisterEventHandler();
    10180    }
    10281
     82    private void RegisterEventHandler() {
     83      Changed += (s, e) => {
     84        switch (e.Type) {
     85          case DataPreprocessingChangedEventType.DeleteRow:
     86          case DataPreprocessingChangedEventType.Any:
     87          case DataPreprocessingChangedEventType.Transformation:
     88            int maxRowIndex = Math.Max(0, Rows);
     89            TrainingPartition.Start = Math.Min(TrainingPartition.Start, maxRowIndex);
     90            TrainingPartition.End = Math.Min(TrainingPartition.End, maxRowIndex);
     91            TestPartition.Start = Math.Min(TestPartition.Start, maxRowIndex);
     92            TestPartition.End = Math.Min(TestPartition.End, maxRowIndex);
     93            break;
     94        }
     95      };
     96    }
     97    #endregion
     98
     99    #region Cells
     100    public bool IsCellEmpty(int columnIndex, int rowIndex) {
     101      var value = variableValues[columnIndex][rowIndex];
     102      return IsMissingValue(value);
     103    }
     104
     105    public T GetCell<T>(int columnIndex, int rowIndex) {
     106      return (T)variableValues[columnIndex][rowIndex];
     107    }
     108
     109    public void SetCell<T>(int columnIndex, int rowIndex, T value) {
     110      SaveSnapshot(DataPreprocessingChangedEventType.ChangeItem, columnIndex, rowIndex);
     111
     112      for (int i = Rows; i <= rowIndex; i++)
     113        InsertRow(i);
     114      for (int i = Columns; i <= columnIndex; i++)
     115        InsertColumn<T>(i.ToString(), i);
     116
     117      variableValues[columnIndex][rowIndex] = value;
     118      if (!IsInTransaction)
     119        OnChanged(DataPreprocessingChangedEventType.ChangeItem, columnIndex, rowIndex);
     120    }
     121
     122    public string GetCellAsString(int columnIndex, int rowIndex) {
     123      return variableValues[columnIndex][rowIndex].ToString();
     124    }
     125
     126    public IList<T> GetValues<T>(int columnIndex, bool considerSelection) {
     127      if (considerSelection) {
     128        var list = new List<T>();
     129        foreach (var rowIdx in selection[columnIndex]) {
     130          list.Add((T)variableValues[columnIndex][rowIdx]);
     131        }
     132        return list;
     133      } else {
     134        return (IList<T>)variableValues[columnIndex];
     135      }
     136    }
     137
     138    public void SetValues<T>(int columnIndex, IList<T> values) {
     139      SaveSnapshot(DataPreprocessingChangedEventType.ChangeColumn, columnIndex, -1);
     140      if (VariableHasType<T>(columnIndex)) {
     141        variableValues[columnIndex] = (IList)values;
     142      } else {
     143        throw new ArgumentException("The datatype of column " + columnIndex + " must be of type " + variableValues[columnIndex].GetType().Name + " but was " + typeof(T).Name);
     144      }
     145      if (!IsInTransaction)
     146        OnChanged(DataPreprocessingChangedEventType.ChangeColumn, columnIndex, -1);
     147    }
     148
     149    public bool SetValue(string value, int columnIndex, int rowIndex) {
     150      bool valid = false;
     151      if (VariableHasType<double>(columnIndex)) {
     152        double val;
     153        if (string.IsNullOrWhiteSpace(value)) {
     154          val = double.NaN;
     155          valid = true;
     156        } else {
     157          valid = double.TryParse(value, out val);
     158        }
     159        if (valid)
     160          SetCell(columnIndex, rowIndex, val);
     161      } else if (VariableHasType<string>(columnIndex)) {
     162        valid = value != null;
     163        if (valid)
     164          SetCell(columnIndex, rowIndex, value);
     165      } else if (VariableHasType<DateTime>(columnIndex)) {
     166        DateTime date;
     167        valid = DateTime.TryParse(value, out date);
     168        if (valid)
     169          SetCell(columnIndex, rowIndex, date);
     170      } else {
     171        throw new ArgumentException("column " + columnIndex + " contains a non supported type.");
     172      }
     173
     174      if (!IsInTransaction)
     175        OnChanged(DataPreprocessingChangedEventType.ChangeColumn, columnIndex, -1);
     176
     177      return valid;
     178    }
     179
     180    public int Columns {
     181      get { return variableNames.Count; }
     182    }
     183
     184    public int Rows {
     185      get { return variableValues.Count > 0 ? variableValues[0].Count : 0; }
     186    }
     187
     188    public static bool IsMissingValue(object value) {
     189      if (value is double) return double.IsNaN((double)value);
     190      if (value is string) return string.IsNullOrEmpty((string)value);
     191      if (value is DateTime) return ((DateTime)value).Equals(DateTime.MinValue);
     192      throw new ArgumentException();
     193    }
     194    #endregion
     195
     196    #region Rows
     197    public void InsertRow(int rowIndex) {
     198      SaveSnapshot(DataPreprocessingChangedEventType.DeleteRow, -1, rowIndex);
     199      foreach (IList column in variableValues) {
     200        Type type = column.GetType().GetGenericArguments()[0];
     201        column.Insert(rowIndex, type.IsValueType ? Activator.CreateInstance(type) : null);
     202      }
     203      if (TrainingPartition.Start <= rowIndex && rowIndex <= TrainingPartition.End) {
     204        TrainingPartition.End++;
     205        if (TrainingPartition.End <= TestPartition.Start) {
     206          TestPartition.Start++;
     207          TestPartition.End++;
     208        }
     209      } else if (TestPartition.Start <= rowIndex && rowIndex <= TestPartition.End) {
     210        TestPartition.End++;
     211        if (TestPartition.End <= TrainingPartition.Start) {
     212          TestPartition.Start++;
     213          TestPartition.End++;
     214        }
     215      }
     216      if (!IsInTransaction)
     217        OnChanged(DataPreprocessingChangedEventType.AddRow, -1, rowIndex);
     218    }
     219    public void DeleteRow(int rowIndex) {
     220      SaveSnapshot(DataPreprocessingChangedEventType.AddRow, -1, rowIndex);
     221      foreach (IList column in variableValues) {
     222        column.RemoveAt(rowIndex);
     223      }
     224      if (TrainingPartition.Start <= rowIndex && rowIndex <= TrainingPartition.End) {
     225        TrainingPartition.End--;
     226        if (TrainingPartition.End <= TestPartition.Start) {
     227          TestPartition.Start--;
     228          TestPartition.End--;
     229        }
     230      } else if (TestPartition.Start <= rowIndex && rowIndex <= TestPartition.End) {
     231        TestPartition.End--;
     232        if (TestPartition.End <= TrainingPartition.Start) {
     233          TestPartition.Start--;
     234          TestPartition.End--;
     235        }
     236      }
     237      if (!IsInTransaction)
     238        OnChanged(DataPreprocessingChangedEventType.DeleteRow, -1, rowIndex);
     239    }
     240    public void DeleteRowsWithIndices(IEnumerable<int> rows) {
     241      SaveSnapshot(DataPreprocessingChangedEventType.AddRow, -1, -1);
     242      foreach (int rowIndex in rows.OrderByDescending(x => x)) {
     243        foreach (IList column in variableValues) {
     244          column.RemoveAt(rowIndex);
     245        }
     246        if (TrainingPartition.Start <= rowIndex && rowIndex <= TrainingPartition.End) {
     247          TrainingPartition.End--;
     248          if (TrainingPartition.End <= TestPartition.Start) {
     249            TestPartition.Start--;
     250            TestPartition.End--;
     251          }
     252        } else if (TestPartition.Start <= rowIndex && rowIndex <= TestPartition.End) {
     253          TestPartition.End--;
     254          if (TestPartition.End <= TrainingPartition.Start) {
     255            TestPartition.Start--;
     256            TestPartition.End--;
     257          }
     258        }
     259      }
     260      if (!IsInTransaction)
     261        OnChanged(DataPreprocessingChangedEventType.DeleteRow, -1, -1);
     262    }
     263
     264    public void InsertColumn<T>(string variableName, int columnIndex) {
     265      SaveSnapshot(DataPreprocessingChangedEventType.DeleteColumn, columnIndex, -1);
     266      variableValues.Insert(columnIndex, new List<T>(Enumerable.Repeat(default(T), Rows)));
     267      variableNames.Insert(columnIndex, variableName);
     268      if (!IsInTransaction)
     269        OnChanged(DataPreprocessingChangedEventType.AddColumn, columnIndex, -1);
     270    }
     271
     272    public void DeleteColumn(int columnIndex) {
     273      SaveSnapshot(DataPreprocessingChangedEventType.AddColumn, columnIndex, -1);
     274      variableValues.RemoveAt(columnIndex);
     275      variableNames.RemoveAt(columnIndex);
     276      if (!IsInTransaction)
     277        OnChanged(DataPreprocessingChangedEventType.DeleteColumn, columnIndex, -1);
     278    }
     279
     280    public void RenameColumn(int columnIndex, string name) {
     281      SaveSnapshot(DataPreprocessingChangedEventType.ChangeColumn, columnIndex, -1);
     282      if (columnIndex < 0 || columnIndex > variableNames.Count)
     283        throw new ArgumentOutOfRangeException("columnIndex");
     284      variableNames[columnIndex] = name;
     285
     286      if (!IsInTransaction)
     287        OnChanged(DataPreprocessingChangedEventType.ChangeColumn, -1, -1);
     288    }
     289
     290    public void RenameColumns(IList<string> names) {
     291      if (names == null) throw new ArgumentNullException("names");
     292      if (names.Count != variableNames.Count) throw new ArgumentException("number of names must match the number of columns.", "names");
     293
     294      SaveSnapshot(DataPreprocessingChangedEventType.ChangeColumn, -1, -1);
     295      for (int i = 0; i < names.Count; i++)
     296        variableNames[i] = names[i];
     297
     298      if (!IsInTransaction)
     299        OnChanged(DataPreprocessingChangedEventType.ChangeColumn, -1, -1);
     300    }
     301
     302    public bool AreAllStringColumns(IEnumerable<int> columnIndices) {
     303      return columnIndices.All(x => VariableHasType<string>(x));
     304    }
     305    #endregion
     306
     307    #region Variables
     308    public IEnumerable<string> VariableNames {
     309      get { return variableNames; }
     310    }
     311
     312    public IEnumerable<string> GetDoubleVariableNames() {
     313      var doubleVariableNames = new List<string>();
     314      for (int i = 0; i < Columns; ++i) {
     315        if (VariableHasType<double>(i)) {
     316          doubleVariableNames.Add(variableNames[i]);
     317        }
     318      }
     319      return doubleVariableNames;
     320    }
     321
     322    public string GetVariableName(int columnIndex) {
     323      return variableNames[columnIndex];
     324    }
     325
     326    public int GetColumnIndex(string variableName) {
     327      return variableNames.IndexOf(variableName);
     328    }
     329
     330    public bool VariableHasType<T>(int columnIndex) {
     331      return columnIndex >= variableValues.Count || variableValues[columnIndex] is List<T>;
     332    }
     333
     334    public Type GetVariableType(int columnIndex) {
     335      var listType = variableValues[columnIndex].GetType();
     336      return listType.GenericTypeArguments.Single();
     337    }
     338
     339    public IList<string> InputVariables { get; private set; }
     340    public string TargetVariable { get; private set; } // optional
     341    #endregion
     342
     343    #region Partitions
     344    [Storable]
     345    public IntRange TrainingPartition { get; set; }
     346    [Storable]
     347    public IntRange TestPartition { get; set; }
     348    #endregion
     349
     350    #region Transformations
     351    [Storable]
     352    public IList<ITransformation> Transformations { get; protected set; }
     353    #endregion
     354
     355    #region Validation
     356    public bool Validate(string value, out string errorMessage, int columnIndex) {
     357      if (columnIndex < 0 || columnIndex > VariableNames.Count()) {
     358        throw new ArgumentOutOfRangeException("column index is out of range");
     359      }
     360
     361      bool valid = false;
     362      errorMessage = string.Empty;
     363      if (VariableHasType<double>(columnIndex)) {
     364        if (string.IsNullOrWhiteSpace(value)) {
     365          valid = true;
     366        } else {
     367          double val;
     368          valid = double.TryParse(value, out val);
     369          if (!valid) {
     370            errorMessage = "Invalid Value (Valid Value Format: \"" + FormatPatterns.GetDoubleFormatPattern() + "\")";
     371          }
     372        }
     373      } else if (VariableHasType<string>(columnIndex)) {
     374        valid = value != null;
     375        if (!valid) {
     376          errorMessage = "Invalid Value (string must not be null)";
     377        }
     378      } else if (VariableHasType<DateTime>(columnIndex)) {
     379        DateTime date;
     380        valid = DateTime.TryParse(value, out date);
     381        if (!valid) {
     382          errorMessage = "Invalid Value (Valid Value Format: \"" + CultureInfo.CurrentCulture.DateTimeFormat + "\"";
     383        }
     384      } else {
     385        throw new ArgumentException("column " + columnIndex + " contains a non supported type.");
     386      }
     387
     388      return valid;
     389    }
     390    #endregion
     391
     392    #region Import & Export
    103393    public void Import(IDataAnalysisProblemData problemData) {
    104394      Dataset dataset = (Dataset)problemData.Dataset;
     
    107397      TargetVariable = (problemData is IRegressionProblemData) ? ((IRegressionProblemData)problemData).TargetVariable
    108398        : (problemData is IClassificationProblemData) ? ((IClassificationProblemData)problemData).TargetVariable
    109         : null;
     399          : null;
    110400
    111401      int columnIndex = 0;
     
    128418    }
    129419
    130     private void RegisterEventHandler() {
    131       Changed += (s, e) => {
    132         switch (e.Type) {
    133           case DataPreprocessingChangedEventType.DeleteRow:
    134             CheckPartitionRanges();
    135             break;
    136           case DataPreprocessingChangedEventType.Any:
    137             CheckPartitionRanges();
    138             break;
    139           case DataPreprocessingChangedEventType.Transformation:
    140             CheckPartitionRanges();
    141             break;
    142         }
    143       };
    144     }
    145 
    146     private void CheckPartitionRanges() {
    147       int maxRowIndex = Math.Max(0, Rows);
    148       TrainingPartition.Start = Math.Min(TrainingPartition.Start, maxRowIndex);
    149       TrainingPartition.End = Math.Min(TrainingPartition.End, maxRowIndex);
    150       TestPartition.Start = Math.Min(TestPartition.Start, maxRowIndex);
    151       TestPartition.End = Math.Min(TestPartition.End, maxRowIndex);
    152     }
    153 
    154     protected IList<IList> CopyVariableValues(IList<IList> original) {
    155       var copy = new List<IList>(original);
    156       for (int i = 0; i < original.Count; ++i) {
    157         copy[i] = (IList)Activator.CreateInstance(original[i].GetType(), original[i]);
    158       }
    159       return copy;
    160     }
    161 
    162 
    163     #region IPreprocessingData Members
    164     public abstract T GetCell<T>(int columnIndex, int rowIndex);
    165 
    166     public abstract void SetCell<T>(int columnIndex, int rowIndex, T value);
    167 
    168     public abstract string GetCellAsString(int columnIndex, int rowIndex);
    169 
    170     public abstract string GetVariableName(int columnIndex);
    171 
    172     public abstract int GetColumnIndex(string variableName);
    173 
    174     public abstract bool VariableHasType<T>(int columnIndex);
    175 
    176     [Obsolete("use the index based variant, is faster")]
    177     public abstract IList<T> GetValues<T>(string variableName, bool considerSelection);
    178 
    179     public abstract IList<T> GetValues<T>(int columnIndex, bool considerSelection);
    180 
    181     public abstract void SetValues<T>(int columnIndex, IList<T> values);
    182 
    183     public abstract bool SetValue(string value, int columnIndex, int rowIndex);
    184 
    185     public abstract bool Validate(string value, out string errorMessage, int columnIndex);
    186 
    187     public abstract bool AreAllStringColumns(IEnumerable<int> columnIndices);
    188 
    189     public abstract void DeleteRowsWithIndices(IEnumerable<int> rows);
    190 
    191     public abstract void InsertRow(int rowIndex);
    192 
    193     public abstract void DeleteRow(int rowIndex);
    194 
    195     public abstract void InsertColumn<T>(string variableName, int columnIndex);
    196 
    197     public abstract void DeleteColumn(int columnIndex);
    198 
    199     public abstract void RenameColumn(int columnIndex, string name);
    200     public abstract void RenameColumns(IList<string> list);
    201 
    202     public abstract Dataset ExportToDataset();
    203 
    204     public abstract void ClearSelection();
    205 
    206     public abstract event EventHandler SelectionChanged;
    207     protected abstract void OnSelectionChanged();
     420    public Dataset ExportToDataset() {
     421      IList<IList> values = new List<IList>();
     422
     423      for (int i = 0; i < Columns; ++i) {
     424        values.Add(variableValues[i]);
     425      }
     426
     427      var dataset = new Dataset(variableNames, values);
     428      return dataset;
     429    }
     430    #endregion
     431
     432    #region Selection
     433    [Storable]
     434    protected IDictionary<int, IList<int>> selection;
     435    public IDictionary<int, IList<int>> Selection {
     436      get { return selection; }
     437      set {
     438        selection = value;
     439        OnSelectionChanged();
     440      }
     441    }
     442    public void ClearSelection() {
     443      Selection = new Dictionary<int, IList<int>>();
     444    }
     445
     446    public event EventHandler SelectionChanged;
     447    protected void OnSelectionChanged() {
     448      var listeners = SelectionChanged;
     449      if (listeners != null) listeners(this, EventArgs.Empty);
     450    }
     451    #endregion
     452
     453    #region Transactions
     454    // Stapshot/History are nost storable/cloneable on purpose
     455    private class Snapshot {
     456      public IList<IList> VariableValues { get; set; }
     457      public IList<string> VariableNames { get; set; }
     458
     459      public IntRange TrainingPartition { get; set; }
     460      public IntRange TestPartition { get; set; }
     461      public IList<ITransformation> Transformations { get; set; }
     462      public DataPreprocessingChangedEventType ChangedType { get; set; }
     463
     464      public int ChangedColumn { get; set; }
     465      public int ChangedRow { get; set; }
     466    }
    208467
    209468    public event DataPreprocessingChangedEventHandler Changed;
     
    212471      if (listeners != null) listeners(this, new DataPreprocessingChangedEventArgs(type, column, row));
    213472    }
    214     #endregion
     473
     474    private const int MAX_UNDO_DEPTH = 5;
     475
     476    private readonly IList<Snapshot> undoHistory = new List<Snapshot>();
     477    private readonly Stack<DataPreprocessingChangedEventType> eventStack = new Stack<DataPreprocessingChangedEventType>();
     478
     479    public bool IsInTransaction { get { return eventStack.Count > 0; } }
     480
     481    private void SaveSnapshot(DataPreprocessingChangedEventType changedType, int column, int row) {
     482      if (IsInTransaction) return;
     483
     484      var currentSnapshot = new Snapshot {
     485        VariableValues = CopyVariableValues(variableValues),
     486        VariableNames = new List<string>(variableNames),
     487        TrainingPartition = new IntRange(TrainingPartition.Start, TrainingPartition.End),
     488        TestPartition = new IntRange(TestPartition.Start, TestPartition.End),
     489        Transformations = new List<ITransformation>(Transformations),
     490        ChangedType = changedType,
     491        ChangedColumn = column,
     492        ChangedRow = row
     493      };
     494
     495      if (undoHistory.Count >= MAX_UNDO_DEPTH)
     496        undoHistory.RemoveAt(0);
     497
     498      undoHistory.Add(currentSnapshot);
     499    }
     500
     501    public bool IsUndoAvailable {
     502      get { return undoHistory.Count > 0; }
     503    }
     504
     505    public void Undo() {
     506      if (IsUndoAvailable) {
     507        Snapshot previousSnapshot = undoHistory[undoHistory.Count - 1];
     508        variableValues = previousSnapshot.VariableValues;
     509        variableNames = previousSnapshot.VariableNames;
     510        TrainingPartition = previousSnapshot.TrainingPartition;
     511        TestPartition = previousSnapshot.TestPartition;
     512        Transformations = previousSnapshot.Transformations;
     513        undoHistory.Remove(previousSnapshot);
     514        OnChanged(previousSnapshot.ChangedType,
     515          previousSnapshot.ChangedColumn,
     516          previousSnapshot.ChangedRow);
     517      }
     518    }
     519
     520    public void InTransaction(Action action, DataPreprocessingChangedEventType type = DataPreprocessingChangedEventType.Any) {
     521      BeginTransaction(type);
     522      action();
     523      EndTransaction();
     524    }
     525
     526    public void BeginTransaction(DataPreprocessingChangedEventType type) {
     527      SaveSnapshot(type, -1, -1);
     528      eventStack.Push(type);
     529    }
     530
     531    public void EndTransaction() {
     532      if (eventStack.Count == 0)
     533        throw new InvalidOperationException("There is no open transaction that can be ended.");
     534
     535      var @event = eventStack.Pop();
     536      OnChanged(@event, -1, -1);
     537    }
     538    #endregion
     539
     540    #region Statistics
     541    public T GetMin<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) {
     542      var values = GetValuesWithoutMissingValues<T>(columnIndex, considerSelection);
     543      return values.Any() ? values.Min() : emptyValue;
     544    }
     545
     546    public T GetMax<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) {
     547      var values = GetValuesWithoutMissingValues<T>(columnIndex, considerSelection);
     548      return values.Any() ? values.Max() : emptyValue;
     549    }
     550
     551    public T GetMean<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) {
     552      if (typeof(T) == typeof(double)) {
     553        var values = GetValuesWithoutMissingValues<double>(columnIndex, considerSelection);
     554        return values.Any() ? Convert<T>(values.Average()) : emptyValue;
     555      }
     556      if (typeof(T) == typeof(string)) {
     557        return Convert<T>(string.Empty);
     558      }
     559      if (typeof(T) == typeof(DateTime)) {
     560        var values = GetValuesWithoutMissingValues<DateTime>(columnIndex, considerSelection);
     561        return values.Any() ? Convert<T>(AggregateAsDouble(values, Enumerable.Average)) : emptyValue;
     562      }
     563
     564      throw new InvalidOperationException(typeof(T) + " not supported");
     565    }
     566
     567    public T GetMedian<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) where T : IComparable<T> {
     568      if (typeof(T) == typeof(double)) {// IEnumerable<double> is faster 
     569        var doubleValues = GetValuesWithoutMissingValues<double>(columnIndex, considerSelection);
     570        return doubleValues.Any() ? Convert<T>(doubleValues.Median()) : emptyValue;
     571      }
     572      var values = GetValuesWithoutMissingValues<T>(columnIndex, considerSelection);
     573      return values.Any() ? values.Quantile(0.5) : emptyValue;
     574    }
     575
     576    public T GetMode<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) where T : IEquatable<T> {
     577      var values = GetValuesWithoutMissingValues<T>(columnIndex, considerSelection);
     578      return values.Any() ? values.GroupBy(x => x).OrderByDescending(g => g.Count()).Select(g => g.Key).First() : emptyValue;
     579    }
     580
     581    public T GetStandardDeviation<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) {
     582      if (typeof(T) == typeof(double)) {
     583        var values = GetValuesWithoutMissingValues<double>(columnIndex, considerSelection);
     584        return values.Any() ? Convert<T>(values.StandardDeviation()) : emptyValue;
     585      }
     586      // For DateTime, std.dev / variance would have to be TimeSpan
     587      //if (typeof(T) == typeof(DateTime)) {
     588      //  var values = GetValuesWithoutMissingValues<DateTime>(columnIndex, considerSelection);
     589      //  return values.Any() ? Convert<T>(AggregateAsDouble(values, EnumerableStatisticExtensions.StandardDeviation)) : emptyValue;
     590      //}
     591      return default(T);
     592    }
     593
     594    public T GetVariance<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) {
     595      if (typeof(T) == typeof(double)) {
     596        var values = GetValuesWithoutMissingValues<double>(columnIndex, considerSelection);
     597        return values.Any() ? Convert<T>(values.Variance()) : emptyValue;
     598      }
     599      // DateTime variance often overflows long, thus the corresponding DateTime is invalid
     600      //if (typeof(T) == typeof(DateTime)) {
     601      //  var values = GetValuesWithoutMissingValues<DateTime>(columnIndex, considerSelection);
     602      //  return values.Any() ? Convert<T>(AggregateAsDouble(values, EnumerableStatisticExtensions.Variance)) : emptyValue;
     603      //}
     604      return default(T);
     605    }
     606
     607    public T GetQuantile<T>(double alpha, int columnIndex, bool considerSelection = false, T emptyValue = default(T)) where T : IComparable<T> {
     608      if (typeof(T) == typeof(double)) {// IEnumerable<double> is faster 
     609        var doubleValues = GetValuesWithoutMissingValues<double>(columnIndex, considerSelection);
     610        return doubleValues.Any() ? Convert<T>(doubleValues.Quantile(alpha)) : emptyValue;
     611      }
     612      var values = GetValuesWithoutMissingValues<T>(columnIndex, considerSelection);
     613      return values.Any() ? values.Quantile(alpha) : emptyValue;
     614    }
     615
     616    public int GetDistinctValues<T>(int columnIndex, bool considerSelection = false) {
     617      var values = GetValuesWithoutMissingValues<T>(columnIndex, considerSelection);
     618      return values.GroupBy(x => x).Count();
     619    }
     620
     621    private IEnumerable<T> GetValuesWithoutMissingValues<T>(int columnIndex, bool considerSelection) {
     622      return GetValues<T>(columnIndex, considerSelection).Where(x => !IsMissingValue(x));
     623    }
     624
     625    private static DateTime AggregateAsDouble(IEnumerable<DateTime> values, Func<IEnumerable<double>, double> func) {
     626      return new DateTime((long)(func(values.Select(x => (double)x.Ticks / TimeSpan.TicksPerSecond)) * TimeSpan.TicksPerSecond));
     627    }
     628    private static T Convert<T>(object obj) { return (T)obj; }
     629
     630    public int GetMissingValueCount() {
     631      int count = 0;
     632      for (int i = 0; i < Columns; ++i) {
     633        count += GetMissingValueCount(i);
     634      }
     635      return count;
     636    }
     637    public int GetMissingValueCount(int columnIndex) {
     638      int sum = 0;
     639      for (int i = 0; i < Rows; i++) {
     640        if (IsCellEmpty(columnIndex, i))
     641          sum++;
     642      }
     643      return sum;
     644    }
     645    public int GetRowMissingValueCount(int rowIndex) {
     646      int sum = 0;
     647      for (int i = 0; i < Columns; i++) {
     648        if (IsCellEmpty(i, rowIndex))
     649          sum++;
     650      }
     651      return sum;
     652    }
     653    #endregion
     654
     655    #region Helpers
     656    private static IList<IList> CopyVariableValues(IList<IList> original) {
     657      var copy = new List<IList>(original);
     658      for (int i = 0; i < original.Count; ++i) {
     659        copy[i] = (IList)Activator.CreateInstance(original[i].GetType(), original[i]);
     660      }
     661      return copy;
     662    }
     663    #endregion
     664  }
     665
     666  // Adapted from HeuristicLab.Common.EnumerableStatisticExtensions
     667  internal static class EnumerableExtensions {
     668    public static T Quantile<T>(this IEnumerable<T> values, double alpha) where T : IComparable<T> {
     669      T[] valuesArr = values.ToArray();
     670      int n = valuesArr.Length;
     671      if (n == 0) throw new InvalidOperationException("Enumeration contains no elements.");
     672
     673      var pos = n * alpha;
     674
     675      return Select((int)Math.Ceiling(pos) - 1, valuesArr);
     676
     677    }
     678
     679    private static T Select<T>(int k, T[] arr) where T : IComparable<T> {
     680      int i, ir, j, l, mid, n = arr.Length;
     681      T a;
     682      l = 0;
     683      ir = n - 1;
     684      for (;;) {
     685        if (ir <= l + 1) {
     686          // Active partition contains 1 or 2 elements.
     687          if (ir == l + 1 && arr[ir].CompareTo(arr[l]) < 0) {
     688            // Case of 2 elements.
     689            Swap(arr, l, ir);
     690          }
     691          return arr[k];
     692        } else {
     693          mid = (l + ir) >> 1; // Choose median of left, center, and right elements
     694          Swap(arr, mid, l + 1); // as partitioning element a. Also
     695
     696          if (arr[l].CompareTo(arr[ir]) > 0) {  // rearrange so that arr[l] arr[ir] <= arr[l+1],
     697            Swap(arr, l, ir); // . arr[ir] >= arr[l+1]
     698          }
     699
     700          if (arr[l + 1].CompareTo(arr[ir]) > 0) {
     701            Swap(arr, l + 1, ir);
     702          }
     703          if (arr[l].CompareTo(arr[l + 1]) > 0) {
     704            Swap(arr, l, l + 1);
     705          }
     706          i = l + 1; // Initialize pointers for partitioning.
     707          j = ir;
     708          a = arr[l + 1]; // Partitioning element.
     709          for (;;) { // Beginning of innermost loop.
     710            do i++; while (arr[i].CompareTo(a) < 0); // Scan up to find element > a.
     711            do j--; while (arr[j].CompareTo(a) > 0); // Scan down to find element < a.
     712            if (j < i) break; // Pointers crossed. Partitioning complete.
     713            Swap(arr, i, j);
     714          } // End of innermost loop.
     715          arr[l + 1] = arr[j]; // Insert partitioning element.
     716          arr[j] = a;
     717          if (j >= k) ir = j - 1; // Keep active the partition that contains the
     718          if (j <= k) l = i; // kth element.
     719        }
     720      }
     721    }
     722
     723    private static void Swap<T>(T[] arr, int i, int j) {
     724      T temp = arr[i];
     725      arr[i] = arr[j];
     726      arr[j] = temp;
     727    }
    215728  }
    216729}
  • branches/2839_HiveProjectManagement/HeuristicLab.DataPreprocessing/3.4/HeuristicLab.DataPreprocessing-3.4.csproj

    r15110 r16057  
    7979  <ItemGroup>
    8080    <Compile Include="Content\MultiScatterPlotContent.cs" />
     81    <Compile Include="Content\PreprocessingContent.cs" />
    8182    <Compile Include="Content\SingleScatterPlotContent.cs" />
    8283    <Compile Include="Content\ScatterPlotContent.cs" />
     
    9091    <Compile Include="PreprocessingTransformator.cs" />
    9192    <Compile Include="Data\DataPreprocessingChangedEvent.cs" />
    92     <Compile Include="Logic\Filter\ComparisonFilter.cs" />
    93     <Compile Include="Logic\Filter\IFilter.cs" />
    94     <Compile Include="Logic\ManipulationLogic.cs" />
     93    <Compile Include="Filter\ComparisonFilter.cs" />
     94    <Compile Include="Filter\IFilter.cs" />
    9595    <Compile Include="Data\IPreprocessingData.cs" />
    9696    <Compile Include="Content\FilterContent.cs" />
    97     <Compile Include="Logic\FilterLogic.cs" />
    9897    <Compile Include="Content\HistogramContent.cs" />
    9998    <Compile Include="Content\LineChartContent.cs" />
     
    105104    <Compile Include="Content\DataGridContent.cs" />
    106105    <Compile Include="PreprocessingContext.cs" />
    107     <Compile Include="Data\TransactionalPreprocessingData.cs" />
    108     <Compile Include="Logic\SearchLogic.cs" />
    109     <Compile Include="Logic\StatisticsLogic.cs" />
    110     <Compile Include="Data\ITransactionalPreprocessingData.cs" />
    111106    <Compile Include="Plugin.cs" />
    112107    <Compile Include="Properties\AssemblyInfo.cs" />
  • branches/2839_HiveProjectManagement/HeuristicLab.DataPreprocessing/3.4/Plugin.cs.frame

    r15110 r16057  
    11#region License Information
    22/* HeuristicLab
    3  * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
    44 *
    55 * This file is part of HeuristicLab.
     
    2626  /// Plugin class for HeuristicLab.Data plugin.
    2727  /// </summary>
    28   [Plugin("HeuristicLab.DataPreprocessing", "3.4.4.$WCREV$")]
     28  [Plugin("HeuristicLab.DataPreprocessing", "3.4.5.$WCREV$")]
    2929  [PluginFile("HeuristicLab.DataPreprocessing-3.4.dll", PluginFileType.Assembly)]
    3030  [PluginDependency("HeuristicLab.Analysis", "3.3")]
  • branches/2839_HiveProjectManagement/HeuristicLab.DataPreprocessing/3.4/PreprocessingContext.cs

    r15110 r16057  
    11#region License Information
    22/* HeuristicLab
    3  * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
    44 *
    55 * This file is part of HeuristicLab.
     
    7171      if (namedSource != null)
    7272        Name = "Preprocessing " + namedSource.Name;
    73       Data = new FilteredPreprocessingData(new TransactionalPreprocessingData(problemData));
     73      Data = new FilteredPreprocessingData(new PreprocessingData(problemData));
    7474      OnReset();
    7575      // Reset GUI:
  • branches/2839_HiveProjectManagement/HeuristicLab.DataPreprocessing/3.4/PreprocessingTransformator.cs

    r14843 r16057  
    11#region License Information
    22/* HeuristicLab
    3  * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
    44 *
    55 * This file is part of HeuristicLab.
     
    2929namespace HeuristicLab.DataPreprocessing {
    3030  public class PreprocessingTransformator {
    31     private readonly ITransactionalPreprocessingData preprocessingData;
     31    private readonly IPreprocessingData preprocessingData;
    3232
    3333    private readonly IDictionary<string, IList<double>> originalColumns;
     
    3636
    3737    public PreprocessingTransformator(IPreprocessingData preprocessingData) {
    38       this.preprocessingData = (ITransactionalPreprocessingData)preprocessingData;
     38      this.preprocessingData = preprocessingData;
    3939      originalColumns = new Dictionary<string, IList<double>>();
    4040      renamedColumns = new Dictionary<string, string>();
     
    6868          preprocessingData.Undo();
    6969        }
    70       }
    71       catch (Exception e) {
     70      } catch (Exception e) {
    7271        preprocessingData.Undo();
    7372        if (string.IsNullOrEmpty(errorMsg)) errorMsg = e.Message;
    74       }
    75       finally {
     73      } finally {
    7674        preprocessingData.EndTransaction();
    7775      }
  • branches/2839_HiveProjectManagement/HeuristicLab.DataPreprocessing/3.4/ProblemDataCreator.cs

    r15110 r16057  
    11#region License Information
    22/* HeuristicLab
    3  * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
    44 *
    55 * This file is part of HeuristicLab.
  • branches/2839_HiveProjectManagement/HeuristicLab.DataPreprocessing/3.4/Properties/AssemblyInfo.cs.frame

    r14195 r16057  
    11#region License Information
    22/* HeuristicLab
    3  * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
    44 *
    55 * This file is part of HeuristicLab.
     
    3232[assembly: AssemblyCompany("HEAL")]
    3333[assembly: AssemblyProduct("HeuristicLab")]
    34 [assembly: AssemblyCopyright("(c) 2002-2016 HEAL")]
     34[assembly: AssemblyCopyright("(c) 2002-2018 HEAL")]
    3535[assembly: AssemblyTrademark("")]
    3636[assembly: AssemblyCulture("")]
     
    5454// by using the '*' as shown below:
    5555[assembly: AssemblyVersion("3.4.0.0")]
    56 [assembly: AssemblyFileVersion("3.4.4.$WCREV$")]
     56[assembly: AssemblyFileVersion("3.4.5.$WCREV$")]
Note: See TracChangeset for help on using the changeset viewer.