Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
09/14/18 11:36:59 (6 years ago)
Author:
abeham
Message:

#2817: updated to trunk r15680

Location:
branches/2817-BinPackingSpeedup
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • branches/2817-BinPackingSpeedup

  • branches/2817-BinPackingSpeedup/HeuristicLab.DataPreprocessing

  • branches/2817-BinPackingSpeedup/HeuristicLab.DataPreprocessing/3.4

  • branches/2817-BinPackingSpeedup/HeuristicLab.DataPreprocessing/3.4/Content/DataGridContent.cs

    r15110 r16140  
    11#region License Information
    22/* HeuristicLab
    3  * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
    44 *
    55 * This file is part of HeuristicLab.
     
    2727using HeuristicLab.Core;
    2828using HeuristicLab.Data;
     29using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
     30using HeuristicLab.Random;
    2931
    3032namespace HeuristicLab.DataPreprocessing {
    31 
    3233  [Item("Data Grid", "Represents a data grid.")]
    33   public class DataGridContent : Item, IStringConvertibleMatrix, IViewShortcut {
     34  [StorableClass]
     35  public class DataGridContent : PreprocessingContent, IStringConvertibleMatrix, IViewShortcut {
    3436    public static new Image StaticItemImage {
    3537      get { return HeuristicLab.Common.Resources.VSImageLibrary.Table; }
    3638    }
    3739
    38     public ITransactionalPreprocessingData PreProcessingData { get; private set; }
    39 
    40     public ManipulationLogic ManipulationLogic { get; private set; }
    41     public FilterLogic FilterLogic { get; private set; }
    42 
    4340    public int Rows {
    44       get { return PreProcessingData.Rows; }
     41      get { return PreprocessingData.Rows; }
    4542      set { }
    4643    }
    4744
    4845    public int Columns {
    49       get { return PreProcessingData.Columns; }
     46      get { return PreprocessingData.Columns; }
    5047      set { }
    5148    }
    5249
    5350    public IEnumerable<string> ColumnNames {
    54       get { return PreProcessingData.VariableNames; }
     51      get { return PreprocessingData.VariableNames; }
    5552      set { }
    5653    }
     
    7168
    7269    public IDictionary<int, IList<int>> Selection {
    73       get { return PreProcessingData.Selection; }
    74       set { PreProcessingData.Selection = value; }
    75     }
    76 
    77     public DataGridContent(ITransactionalPreprocessingData preProcessingData, ManipulationLogic theManipulationLogic, FilterLogic theFilterLogic) {
    78       ManipulationLogic = theManipulationLogic;
    79       FilterLogic = theFilterLogic;
    80       PreProcessingData = preProcessingData;
    81     }
    82 
    83     public DataGridContent(DataGridContent dataGridContent, Cloner cloner)
    84       : base(dataGridContent, cloner) {
    85 
     70      get { return PreprocessingData.Selection; }
     71      set { PreprocessingData.Selection = value; }
     72    }
     73
     74    #region Constructor, Cloning & Persistence
     75    public DataGridContent(IFilteredPreprocessingData preprocessingData)
     76      : base(preprocessingData) {
     77    }
     78
     79    public DataGridContent(DataGridContent original, Cloner cloner)
     80      : base(original, cloner) {
    8681    }
    8782    public override IDeepCloneable Clone(Cloner cloner) {
     
    8984    }
    9085
     86    [StorableConstructor]
     87    protected DataGridContent(bool deserializing)
     88      : base(deserializing) { }
     89    #endregion
     90
    9191    public void DeleteRows(IEnumerable<int> rows) {
    92       PreProcessingData.DeleteRowsWithIndices(rows);
     92      PreprocessingData.DeleteRowsWithIndices(rows);
    9393    }
    9494
    9595    public void DeleteColumn(int column) {
    96       PreProcessingData.DeleteColumn(column);
     96      PreprocessingData.DeleteColumn(column);
    9797    }
    9898
    9999    public bool Validate(string value, out string errorMessage, int columnIndex) {
    100       return PreProcessingData.Validate(value, out errorMessage, columnIndex);
     100      return PreprocessingData.Validate(value, out errorMessage, columnIndex);
    101101    }
    102102
    103103    public string GetValue(int rowIndex, int columnIndex) {
    104       return PreProcessingData.GetCellAsString(columnIndex, rowIndex);
     104      return PreprocessingData.GetCellAsString(columnIndex, rowIndex);
    105105    }
    106106
    107107    public bool SetValue(string value, int rowIndex, int columnIndex) {
    108       return PreProcessingData.SetValue(value, columnIndex, rowIndex);
     108      return PreprocessingData.SetValue(value, columnIndex, rowIndex);
    109109    }
    110110
    111111    public event DataPreprocessingChangedEventHandler Changed {
    112       add { PreProcessingData.Changed += value; }
    113       remove { PreProcessingData.Changed -= value; }
     112      add { PreprocessingData.Changed += value; }
     113      remove { PreprocessingData.Changed -= value; }
    114114    }
    115115
     
    132132#pragma warning restore 0067
    133133    #endregion
     134
     135    #region Manipulations
     136    private void ReplaceIndicesByValue(IDictionary<int, IList<int>> cells, Func<int, double> doubleAggregator = null,
     137      Func<int, DateTime> dateTimeAggregator = null, Func<int, string> stringAggregator = null) {
     138      PreprocessingData.InTransaction(() => {
     139        foreach (var column in cells) {
     140          if (doubleAggregator != null && PreprocessingData.VariableHasType<double>(column.Key)) {
     141            var value = doubleAggregator(column.Key);
     142            foreach (int index in column.Value)
     143              PreprocessingData.SetCell<double>(column.Key, index, value);
     144          } else if (dateTimeAggregator != null && PreprocessingData.VariableHasType<DateTime>(column.Key)) {
     145            var value = dateTimeAggregator(column.Key);
     146            foreach (int index in column.Value)
     147              PreprocessingData.SetCell<DateTime>(column.Key, index, value);
     148          } else if (stringAggregator != null && PreprocessingData.VariableHasType<string>(column.Key)) {
     149            var value = stringAggregator(column.Key);
     150            foreach (int index in column.Value)
     151              PreprocessingData.SetCell<string>(column.Key, index, value);
     152          }
     153        }
     154      });
     155    }
     156
     157    private void ReplaceIndicesByValues(IDictionary<int, IList<int>> cells, Func<int, IEnumerable<double>> doubleAggregator = null,
     158      Func<int, IEnumerable<DateTime>> dateTimeAggregator = null, Func<int, IEnumerable<string>> stringAggregator = null) {
     159      PreprocessingData.InTransaction(() => {
     160        foreach (var column in cells) {
     161          if (doubleAggregator != null && PreprocessingData.VariableHasType<double>(column.Key)) {
     162            var values = doubleAggregator(column.Key);
     163            foreach (var pair in column.Value.Zip(values, (row, value) => new { row, value }))
     164              PreprocessingData.SetCell<double>(column.Key, pair.row, pair.value);
     165          } else if (dateTimeAggregator != null && PreprocessingData.VariableHasType<DateTime>(column.Key)) {
     166            var values = dateTimeAggregator(column.Key);
     167            foreach (var pair in column.Value.Zip(values, (row, value) => new { row, value }))
     168              PreprocessingData.SetCell<DateTime>(column.Key, pair.row, pair.value);
     169          } else if (stringAggregator != null && PreprocessingData.VariableHasType<string>(column.Key)) {
     170            var values = stringAggregator(column.Key);
     171            foreach (var pair in column.Value.Zip(values, (row, value) => new { row, value }))
     172              PreprocessingData.SetCell<string>(column.Key, pair.row, pair.value);
     173          }
     174        }
     175      });
     176    }
     177
     178    public void ReplaceIndicesByMean(IDictionary<int, IList<int>> cells, bool considerSelection = false) {
     179      ReplaceIndicesByValue(cells,
     180        col => PreprocessingData.GetMean<double>(col, considerSelection),
     181        col => PreprocessingData.GetMean<DateTime>(col, considerSelection));
     182    }
     183
     184    public void ReplaceIndicesByMedianValue(IDictionary<int, IList<int>> cells, bool considerSelection = false) {
     185      ReplaceIndicesByValue(cells,
     186        col => PreprocessingData.GetMedian<double>(col, considerSelection),
     187        col => PreprocessingData.GetMedian<DateTime>(col, considerSelection));
     188    }
     189
     190    public void ReplaceIndicesByMode(IDictionary<int, IList<int>> cells, bool considerSelection = false) {
     191      ReplaceIndicesByValue(cells,
     192        col => PreprocessingData.GetMode<double>(col, considerSelection),
     193        col => PreprocessingData.GetMode<DateTime>(col, considerSelection),
     194        col => PreprocessingData.GetMode<string>(col, considerSelection));
     195    }
     196
     197    public void ReplaceIndicesByRandomValue(IDictionary<int, IList<int>> cells, bool considerSelection = false) {
     198      var rand = new FastRandom();
     199      ReplaceIndicesByValues(cells,
     200        col => {
     201          double min = PreprocessingData.GetMin<double>(col, considerSelection);
     202          double max = PreprocessingData.GetMax<double>(col, considerSelection);
     203          double range = max - min;
     204          return cells[col].Select(_ => rand.NextDouble() * range + min);
     205        },
     206        col => {
     207          var min = PreprocessingData.GetMin<DateTime>(col, considerSelection);
     208          var max = PreprocessingData.GetMax<DateTime>(col, considerSelection);
     209          double range = (max - min).TotalSeconds;
     210          return cells[col].Select(_ => min + TimeSpan.FromSeconds(rand.NextDouble() * range));
     211        });
     212    }
     213
     214    public void ReplaceIndicesByString(IDictionary<int, IList<int>> cells, string value) {
     215      PreprocessingData.InTransaction(() => {
     216        foreach (var column in cells) {
     217          foreach (var rowIdx in column.Value) {
     218            PreprocessingData.SetValue(value, column.Key, rowIdx);
     219          }
     220        }
     221      });
     222    }
     223
     224
     225    public void ReplaceIndicesByLinearInterpolationOfNeighbours(IDictionary<int, IList<int>> cells) {
     226      PreprocessingData.InTransaction(() => {
     227        foreach (var column in cells) {
     228          IList<Tuple<int, int>> startEndings = GetStartAndEndingsForInterpolation(column);
     229          foreach (var tuple in startEndings) {
     230            Interpolate(column, tuple.Item1, tuple.Item2);
     231          }
     232        }
     233      });
     234    }
     235
     236    private List<Tuple<int, int>> GetStartAndEndingsForInterpolation(KeyValuePair<int, IList<int>> column) {
     237      var startEndings = new List<Tuple<int, int>>();
     238      var rowIndices = column.Value.OrderBy(x => x).ToList();
     239      var count = rowIndices.Count;
     240      int start = int.MinValue;
     241      for (int i = 0; i < count; ++i) {
     242        if (start == int.MinValue) {
     243          start = IndexOfPrevPresentValue(column.Key, rowIndices[i]);
     244        }
     245        if (i + 1 == count || (i + 1 < count && rowIndices[i + 1] - rowIndices[i] > 1)) {
     246          int next = IndexOfNextPresentValue(column.Key, rowIndices[i]);
     247          if (start > 0 && next < PreprocessingData.Rows) {
     248            startEndings.Add(new Tuple<int, int>(start, next));
     249          }
     250          start = int.MinValue;
     251        }
     252      }
     253      return startEndings;
     254    }
     255
     256    private void Interpolate(KeyValuePair<int, IList<int>> column, int prevIndex, int nextIndex) {
     257      int valuesToInterpolate = nextIndex - prevIndex;
     258
     259      if (PreprocessingData.VariableHasType<double>(column.Key)) {
     260        double prev = PreprocessingData.GetCell<double>(column.Key, prevIndex);
     261        double next = PreprocessingData.GetCell<double>(column.Key, nextIndex);
     262        double interpolationStep = (next - prev) / valuesToInterpolate;
     263
     264        for (int i = prevIndex; i < nextIndex; ++i) {
     265          double interpolated = prev + (interpolationStep * (i - prevIndex));
     266          PreprocessingData.SetCell<double>(column.Key, i, interpolated);
     267        }
     268      } else if (PreprocessingData.VariableHasType<DateTime>(column.Key)) {
     269        DateTime prev = PreprocessingData.GetCell<DateTime>(column.Key, prevIndex);
     270        DateTime next = PreprocessingData.GetCell<DateTime>(column.Key, nextIndex);
     271        double interpolationStep = (next - prev).TotalSeconds / valuesToInterpolate;
     272
     273        for (int i = prevIndex; i < nextIndex; ++i) {
     274          DateTime interpolated = prev.AddSeconds(interpolationStep * (i - prevIndex));
     275          PreprocessingData.SetCell<DateTime>(column.Key, i, interpolated);
     276        }
     277      }
     278    }
     279
     280    private int IndexOfPrevPresentValue(int columnIndex, int start) {
     281      int offset = start - 1;
     282      while (offset >= 0 && PreprocessingData.IsCellEmpty(columnIndex, offset)) {
     283        offset--;
     284      }
     285
     286      return offset;
     287    }
     288
     289    private int IndexOfNextPresentValue(int columnIndex, int start) {
     290      int offset = start + 1;
     291      while (offset < PreprocessingData.Rows && PreprocessingData.IsCellEmpty(columnIndex, offset)) {
     292        offset++;
     293      }
     294
     295      return offset;
     296    }
     297
     298    public void Shuffle(bool shuffleRangesSeparately) {
     299      var random = new FastRandom();
     300
     301      if (shuffleRangesSeparately) {
     302        var ranges = new[] { PreprocessingData.TestPartition, PreprocessingData.TrainingPartition };
     303        PreprocessingData.InTransaction(() => {
     304          // process all given ranges - e.g. TrainingPartition, TestPartition
     305          foreach (IntRange range in ranges) {
     306            var indices = Enumerable.Range(0, PreprocessingData.Rows).ToArray();
     307            var shuffledIndices = Enumerable.Range(range.Start, range.Size).Shuffle(random).ToArray();
     308            for (int i = range.Start, j = 0; i < range.End; i++, j++)
     309              indices[i] = shuffledIndices[j];
     310
     311            ReOrderToIndices(indices);
     312          }
     313        });
     314
     315      } else {
     316        PreprocessingData.InTransaction(() => {
     317          var indices = Enumerable.Range(0, PreprocessingData.Rows).ToArray();
     318          indices.ShuffleInPlace(random);
     319          ReOrderToIndices(indices);
     320        });
     321      }
     322    }
     323
     324    public void ReOrderToIndices(int[] indices) {
     325      PreprocessingData.InTransaction(() => {
     326        for (int i = 0; i < PreprocessingData.Columns; ++i) {
     327          if (PreprocessingData.VariableHasType<double>(i))
     328            ReOrderToIndices<double>(i, indices);
     329          else if (PreprocessingData.VariableHasType<string>(i))
     330            ReOrderToIndices<string>(i, indices);
     331          else if (PreprocessingData.VariableHasType<DateTime>(i))
     332            ReOrderToIndices<DateTime>(i, indices);
     333        }
     334      });
     335    }
     336
     337    private void ReOrderToIndices<T>(int columnIndex, int[] indices) {
     338      var originalData = new List<T>(PreprocessingData.GetValues<T>(columnIndex));
     339      if (indices.Length != originalData.Count) throw new InvalidOperationException("The number of provided indices does not match the values.");
     340
     341      for (int i = 0; i < indices.Length; i++) {
     342        T newValue = originalData[indices[i]];
     343        PreprocessingData.SetCell<T>(columnIndex, i, newValue);
     344      }
     345    }
     346    #endregion
    134347  }
    135348}
Note: See TracChangeset for help on using the changeset viewer.