Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
07/26/17 14:12:27 (8 years ago)
Author:
pfleck
Message:

#2809 removed ManipulationLogic

Location:
branches/DataPreprocessing Cleanup/HeuristicLab.DataPreprocessing/3.4/Content
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • branches/DataPreprocessing Cleanup/HeuristicLab.DataPreprocessing/3.4/Content/DataGridContent.cs

    r15274 r15285  
    2828using HeuristicLab.Data;
    2929using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
     30using HeuristicLab.Random;
    3031
    3132namespace HeuristicLab.DataPreprocessing {
     
    3738    }
    3839
    39     [Storable]
    40     public ManipulationLogic ManipulationLogic { get; private set; }
    41 
    4240    public int Rows {
    4341      get { return PreprocessingData.Rows; }
     
    7573
    7674    #region Constructor, Cloning & Persistence
    77     public DataGridContent(IFilteredPreprocessingData preprocessingData, ManipulationLogic manipulationLogic)
     75    public DataGridContent(IFilteredPreprocessingData preprocessingData)
    7876      : base(preprocessingData) {
    79       ManipulationLogic = manipulationLogic;
    8077    }
    8178
    8279    public DataGridContent(DataGridContent original, Cloner cloner)
    8380      : base(original, cloner) {
    84       ManipulationLogic = cloner.Clone(original.ManipulationLogic);
    8581    }
    8682    public override IDeepCloneable Clone(Cloner cloner) {
     
    136132#pragma warning restore 0067
    137133    #endregion
     134
     135    #region Manipulations
     136    private void ReplaceIndicesByValue(IDictionary<int, IList<int>> cells, Func<int, double> doubleAggregator = null,
     137      Func<int, DateTime> dateTimeAggregator = null, Func<int, string> stringAggregator = null) {
     138      PreprocessingData.InTransaction(() => {
     139        foreach (var column in cells) {
     140          if (doubleAggregator != null && PreprocessingData.VariableHasType<double>(column.Key)) {
     141            var value = doubleAggregator(column.Key);
     142            foreach (int index in column.Value)
     143              PreprocessingData.SetCell<double>(column.Key, index, value);
     144          } else if (dateTimeAggregator != null && PreprocessingData.VariableHasType<DateTime>(column.Key)) {
     145            var value = dateTimeAggregator(column.Key);
     146            foreach (int index in column.Value)
     147              PreprocessingData.SetCell<DateTime>(column.Key, index, value);
     148          } else if (stringAggregator != null && PreprocessingData.VariableHasType<string>(column.Key)) {
     149            var value = stringAggregator(column.Key);
     150            foreach (int index in column.Value)
     151              PreprocessingData.SetCell<string>(column.Key, index, value);
     152          }
     153        }
     154      });
     155    }
     156
     157    private void ReplaceIndicesByValues(IDictionary<int, IList<int>> cells, Func<int, IEnumerable<double>> doubleAggregator = null,
     158      Func<int, IEnumerable<DateTime>> dateTimeAggregator = null, Func<int, IEnumerable<string>> stringAggregator = null) {
     159      PreprocessingData.InTransaction(() => {
     160        foreach (var column in cells) {
     161          if (doubleAggregator != null && PreprocessingData.VariableHasType<double>(column.Key)) {
     162            var values = doubleAggregator(column.Key);
     163            foreach (var pair in column.Value.Zip(values, (row, value) => new { row, value }))
     164              PreprocessingData.SetCell<double>(column.Key, pair.row, pair.value);
     165          } else if (dateTimeAggregator != null && PreprocessingData.VariableHasType<DateTime>(column.Key)) {
     166            var values = dateTimeAggregator(column.Key);
     167            foreach (var pair in column.Value.Zip(values, (row, value) => new { row, value }))
     168              PreprocessingData.SetCell<DateTime>(column.Key, pair.row, pair.value);
     169          } else if (stringAggregator != null && PreprocessingData.VariableHasType<string>(column.Key)) {
     170            var values = stringAggregator(column.Key);
     171            foreach (var pair in column.Value.Zip(values, (row, value) => new { row, value }))
     172              PreprocessingData.SetCell<string>(column.Key, pair.row, pair.value);
     173          }
     174        }
     175      });
     176    }
     177
     178    public void ReplaceIndicesByMean(IDictionary<int, IList<int>> cells, bool considerSelection = false) {
     179      ReplaceIndicesByValue(cells,
     180        col => PreprocessingData.GetMean<double>(col, considerSelection),
     181        col => PreprocessingData.GetMean<DateTime>(col, considerSelection));
     182    }
     183
     184    public void ReplaceIndicesByMedianValue(IDictionary<int, IList<int>> cells, bool considerSelection = false) {
     185      ReplaceIndicesByValue(cells,
     186        col => PreprocessingData.GetMedian<double>(col, considerSelection),
     187        col => PreprocessingData.GetMedian<DateTime>(col, considerSelection));
     188    }
     189
     190    public void ReplaceIndicesByMode(IDictionary<int, IList<int>> cells, bool considerSelection = false) {
     191      ReplaceIndicesByValue(cells,
     192        col => PreprocessingData.GetMode<double>(col, considerSelection),
     193        col => PreprocessingData.GetMode<DateTime>(col, considerSelection),
     194        col => PreprocessingData.GetMode<string>(col, considerSelection));
     195    }
     196
     197    public void ReplaceIndicesByRandomValue(IDictionary<int, IList<int>> cells, bool considerSelection = false) {
     198      var rand = new FastRandom();
     199      ReplaceIndicesByValues(cells,
     200        col => {
     201          double min = PreprocessingData.GetMin<double>(col, considerSelection);
     202          double max = PreprocessingData.GetMax<double>(col, considerSelection);
     203          double range = max - min;
     204          return cells[col].Select(_ => rand.NextDouble() * range + min);
     205        },
     206        col => {
     207          var min = PreprocessingData.GetMin<DateTime>(col, considerSelection);
     208          var max = PreprocessingData.GetMax<DateTime>(col, considerSelection);
     209          double range = (max - min).TotalSeconds;
     210          return cells[col].Select(_ => min + TimeSpan.FromSeconds(rand.NextDouble() * range));
     211        });
     212    }
     213
     214    public void ReplaceIndicesByString(IDictionary<int, IList<int>> cells, string value) {
     215      PreprocessingData.InTransaction(() => {
     216        foreach (var column in cells) {
     217          foreach (var rowIdx in column.Value) {
     218            PreprocessingData.SetValue(value, column.Key, rowIdx);
     219          }
     220        }
     221      });
     222    }
     223
     224
     225    public void ReplaceIndicesByLinearInterpolationOfNeighbours(IDictionary<int, IList<int>> cells) {
     226      PreprocessingData.InTransaction(() => {
     227        foreach (var column in cells) {
     228          IList<Tuple<int, int>> startEndings = GetStartAndEndingsForInterpolation(column);
     229          foreach (var tuple in startEndings) {
     230            Interpolate(column, tuple.Item1, tuple.Item2);
     231          }
     232        }
     233      });
     234    }
     235
     236    private List<Tuple<int, int>> GetStartAndEndingsForInterpolation(KeyValuePair<int, IList<int>> column) {
     237      var startEndings = new List<Tuple<int, int>>();
     238      var rowIndices = column.Value.OrderBy(x => x).ToList();
     239      var count = rowIndices.Count;
     240      int start = int.MinValue;
     241      for (int i = 0; i < count; ++i) {
     242        if (start == int.MinValue) {
     243          start = IndexOfPrevPresentValue(column.Key, rowIndices[i]);
     244        }
     245        if (i + 1 == count || (i + 1 < count && rowIndices[i + 1] - rowIndices[i] > 1)) {
     246          int next = IndexOfNextPresentValue(column.Key, rowIndices[i]);
     247          if (start > 0 && next < PreprocessingData.Rows) {
     248            startEndings.Add(new Tuple<int, int>(start, next));
     249          }
     250          start = int.MinValue;
     251        }
     252      }
     253      return startEndings;
     254    }
     255
     256    private void Interpolate(KeyValuePair<int, IList<int>> column, int prevIndex, int nextIndex) {
     257      int valuesToInterpolate = nextIndex - prevIndex;
     258
     259      if (PreprocessingData.VariableHasType<double>(column.Key)) {
     260        double prev = PreprocessingData.GetCell<double>(column.Key, prevIndex);
     261        double next = PreprocessingData.GetCell<double>(column.Key, nextIndex);
     262        double interpolationStep = (next - prev) / valuesToInterpolate;
     263
     264        for (int i = prevIndex; i < nextIndex; ++i) {
     265          double interpolated = prev + (interpolationStep * (i - prevIndex));
     266          PreprocessingData.SetCell<double>(column.Key, i, interpolated);
     267        }
     268      } else if (PreprocessingData.VariableHasType<DateTime>(column.Key)) {
     269        DateTime prev = PreprocessingData.GetCell<DateTime>(column.Key, prevIndex);
     270        DateTime next = PreprocessingData.GetCell<DateTime>(column.Key, nextIndex);
     271        double interpolationStep = (next - prev).TotalSeconds / valuesToInterpolate;
     272
     273        for (int i = prevIndex; i < nextIndex; ++i) {
     274          DateTime interpolated = prev.AddSeconds(interpolationStep * (i - prevIndex));
     275          PreprocessingData.SetCell<DateTime>(column.Key, i, interpolated);
     276        }
     277      }
     278    }
     279
     280    private int IndexOfPrevPresentValue(int columnIndex, int start) {
     281      int offset = start - 1;
     282      while (offset >= 0 && PreprocessingData.IsCellEmpty(columnIndex, offset)) {
     283        offset--;
     284      }
     285
     286      return offset;
     287    }
     288
     289    private int IndexOfNextPresentValue(int columnIndex, int start) {
     290      int offset = start + 1;
     291      while (offset < PreprocessingData.Rows && PreprocessingData.IsCellEmpty(columnIndex, offset)) {
     292        offset++;
     293      }
     294
     295      return offset;
     296    }
     297
     298    public void Shuffle(bool shuffleRangesSeparately) {
     299      var random = new FastRandom();
     300
     301      if (shuffleRangesSeparately) {
     302        var ranges = new[] { PreprocessingData.TestPartition, PreprocessingData.TrainingPartition };
     303        PreprocessingData.InTransaction(() => {
     304          // process all given ranges - e.g. TrainingPartition, TestPartition
     305          foreach (IntRange range in ranges) {
     306            var indices = Enumerable.Range(0, PreprocessingData.Rows).ToArray();
     307            var shuffledIndices = Enumerable.Range(range.Start, range.Size).Shuffle(random).ToArray();
     308            for (int i = range.Start, j = 0; i < range.End; i++, j++)
     309              indices[i] = shuffledIndices[j];
     310
     311            ReOrderToIndices(indices);
     312          }
     313        });
     314
     315      } else {
     316        PreprocessingData.InTransaction(() => {
     317          var indices = Enumerable.Range(0, PreprocessingData.Rows).ToArray();
     318          indices.ShuffleInPlace(random);
     319          ReOrderToIndices(indices);
     320        });
     321      }
     322    }
     323
     324    public void ReOrderToIndices(int[] indices) {
     325      PreprocessingData.InTransaction(() => {
     326        for (int i = 0; i < PreprocessingData.Columns; ++i) {
     327          if (PreprocessingData.VariableHasType<double>(i))
     328            ReOrderToIndices<double>(i, indices);
     329          else if (PreprocessingData.VariableHasType<string>(i))
     330            ReOrderToIndices<string>(i, indices);
     331          else if (PreprocessingData.VariableHasType<DateTime>(i))
     332            ReOrderToIndices<DateTime>(i, indices);
     333        }
     334      });
     335    }
     336
     337    private void ReOrderToIndices<T>(int columnIndex, int[] indices) {
     338      var originalData = new List<T>(PreprocessingData.GetValues<T>(columnIndex));
     339      if (indices.Length != originalData.Count) throw new InvalidOperationException("The number of provided indices does not match the values.");
     340
     341      for (int i = 0; i < indices.Length; i++) {
     342        T newValue = originalData[indices[i]];
     343        PreprocessingData.SetCell<T>(columnIndex, i, newValue);
     344      }
     345    }
     346    #endregion
    138347  }
    139348}
  • branches/DataPreprocessing Cleanup/HeuristicLab.DataPreprocessing/3.4/Content/ManipulationContent.cs

    r15274 r15285  
    2020#endregion
    2121
     22using System;
     23using System.Collections.Generic;
    2224using System.Drawing;
     25using System.Linq;
    2326using HeuristicLab.Common;
    2427using HeuristicLab.Core;
     
    3336    }
    3437
    35     [Storable]
    36     public ManipulationLogic ManipulationLogic { get; private set; }
    37 
    3838    #region Constructor, Cloning & Persistence
    39     public ManipulationContent(IFilteredPreprocessingData preprocessingData, ManipulationLogic manipulationLogic)
     39    public ManipulationContent(IFilteredPreprocessingData preprocessingData)
    4040      : base(preprocessingData) {
    41       ManipulationLogic = manipulationLogic;
    4241    }
    4342
    4443    public ManipulationContent(ManipulationContent original, Cloner cloner) :
    4544      base(original, cloner) {
    46       ManipulationLogic = cloner.Clone(original.ManipulationLogic);
    4745    }
    4846    public override IDeepCloneable Clone(Cloner cloner) {
     
    5452      : base(deserializing) { }
    5553    #endregion
     54
     55    public List<int> RowsWithMissingValuesGreater(double percent) {
     56      List<int> rows = new List<int>();
     57
     58      for (int i = 0; i < PreprocessingData.Rows; ++i) {
     59        int missingCount = PreprocessingData.GetRowMissingValueCount(i);
     60        if (100f / PreprocessingData.Columns * missingCount > percent) {
     61          rows.Add(i);
     62        }
     63      }
     64
     65      return rows;
     66    }
     67
     68    public List<int> ColumnsWithMissingValuesGreater(double percent) {
     69      List<int> columns = new List<int>();
     70      for (int i = 0; i < PreprocessingData.Columns; ++i) {
     71        int missingCount = PreprocessingData.GetMissingValueCount(i);
     72        if (100f / PreprocessingData.Rows * missingCount > percent) {
     73          columns.Add(i);
     74        }
     75      }
     76
     77      return columns;
     78    }
     79
     80    public List<int> ColumnsWithVarianceSmaller(double variance) {
     81      List<int> columns = new List<int>();
     82      for (int i = 0; i < PreprocessingData.Columns; ++i) {
     83        if (PreprocessingData.VariableHasType<double>(i)) {
     84          double columnVariance = PreprocessingData.GetVariance<double>(i);
     85          if (columnVariance < variance) {
     86            columns.Add(i);
     87          }
     88        } else if (PreprocessingData.VariableHasType<DateTime>(i)) {
     89          double columnVariance = (double)PreprocessingData.GetVariance<DateTime>(i).Ticks / TimeSpan.TicksPerSecond;
     90          if (columnVariance < variance) {
     91            columns.Add(i);
     92          }
     93        }
     94      }
     95      return columns;
     96    }
     97
     98    public void DeleteRowsWithMissingValuesGreater(double percent) {
     99      DeleteRows(RowsWithMissingValuesGreater(percent));
     100    }
     101
     102    public void DeleteColumnsWithMissingValuesGreater(double percent) {
     103      DeleteColumns(ColumnsWithMissingValuesGreater(percent));
     104    }
     105
     106    public void DeleteColumnsWithVarianceSmaller(double variance) {
     107      DeleteColumns(ColumnsWithVarianceSmaller(variance));
     108    }
     109
     110    private void DeleteRows(List<int> rows) {
     111      PreprocessingData.InTransaction(() => {
     112        foreach (int row in rows.OrderByDescending(x => x)) {
     113          PreprocessingData.DeleteRow(row);
     114        }
     115      });
     116    }
     117
     118    private void DeleteColumns(List<int> columns) {
     119      PreprocessingData.InTransaction(() => {
     120        foreach (int column in columns.OrderByDescending(x => x)) {
     121          PreprocessingData.DeleteColumn(column);
     122        }
     123      });
     124    }
    56125  }
    57126}
Note: See TracChangeset for help on using the changeset viewer.