Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
05/07/14 15:22:41 (11 years ago)
Author:
rstoll
Message:
  • Interpolation implemented
  • Smoothing fixed
  • Interpolation/Smooting menu disabled if first column is selected as well
Location:
branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/ManipulationLogic.cs

    r10811 r10820  
    109109          }
    110110
     111          IList<Tuple<int, int>> startEndings = GetStartAndEndingsForInterpolation(column);
     112          foreach (var tuple in startEndings) {
     113            Interpolate(column, tuple.Item1, tuple.Item2);
     114          }
     115        }
     116      });
     117    }
     118
     119    private List<Tuple<int, int>> GetStartAndEndingsForInterpolation(KeyValuePair<int, IList<int>> column) {
     120      List<Tuple<int, int>> startEndings = new List<Tuple<int, int>>();
     121      var rowIndices = column.Value;
     122      rowIndices = rowIndices.OrderBy(x => x).ToList();
     123      var count = rowIndices.Count;
     124      int start = int.MinValue;
     125      for (int i = 0; i < count; ++i) {
     126        if (start == int.MinValue) {
     127          start = indexOfPrevPresentValue(column.Key, rowIndices[i]);
     128        }
     129        if (i + 1 == count || (i + 1 < count && rowIndices[i + 1] - rowIndices[i] > 1)) {
     130          int next = indexOfNextPresentValue(column.Key, rowIndices[i]);
     131          if (start > 0 && next < preprocessingData.Rows) {
     132            startEndings.Add(new Tuple<int, int>(start, next));
     133          }
     134          start = int.MinValue;
     135        }
     136      }
     137      return startEndings;
     138    }
     139
     140    public void ReplaceIndicesBySmoothing(IDictionary<int, IList<int>> cells) {
     141      preprocessingData.InTransaction(() => {
     142        foreach (var column in cells) {
     143          int countValues = preprocessingData.Rows;
     144
    111145          foreach (int index in column.Value) {
    112146            // dont replace first or last values
     
    116150
    117151              // no neighbours found
    118               if (prevIndex < 0 && nextIndex >= countValues) {
     152              if (prevIndex < 0 || nextIndex >= countValues) {
    119153                continue;
    120154              }
    121155
    122               int valuesToInterpolate = nextIndex - prevIndex;
    123 
    124               if (preprocessingData.IsType<double>(column.Key)) {
    125                 double prev = preprocessingData.GetCell<double>(column.Key, prevIndex);
    126                 double next = preprocessingData.GetCell<double>(column.Key, nextIndex);
    127                 double interpolationStep = (next - prev) / valuesToInterpolate;
    128 
    129                 for (int i = prevIndex; i < nextIndex; ++i) {
    130                   double interpolated = prev + (interpolationStep * (i - prevIndex));
    131                   preprocessingData.SetCell<double>(column.Key, i, interpolated);
    132                 }
    133               } else if (preprocessingData.IsType<DateTime>(column.Key)) {
    134                 DateTime prev = preprocessingData.GetCell<DateTime>(column.Key, prevIndex);
    135                 DateTime next = preprocessingData.GetCell<DateTime>(column.Key, nextIndex);
    136                 double interpolationStep = (next - prev).TotalSeconds / valuesToInterpolate;
    137 
    138                 for (int i = prevIndex; i < nextIndex; ++i) {
    139                   DateTime interpolated = prev.AddSeconds(interpolationStep * (i - prevIndex));
    140                   preprocessingData.SetCell<DateTime>(column.Key, i, interpolated);
    141                 }
    142               }
     156              Interpolate(column, prevIndex, nextIndex);
    143157            }
    144158          }
    145159        }
    146160      });
     161    }
     162
     163    private void Interpolate(KeyValuePair<int, IList<int>> column, int prevIndex, int nextIndex) {
     164      int valuesToInterpolate = nextIndex - prevIndex;
     165
     166      if (preprocessingData.IsType<double>(column.Key)) {
     167        double prev = preprocessingData.GetCell<double>(column.Key, prevIndex);
     168        double next = preprocessingData.GetCell<double>(column.Key, nextIndex);
     169        double interpolationStep = (next - prev) / valuesToInterpolate;
     170
     171        for (int i = prevIndex; i < nextIndex; ++i) {
     172          double interpolated = prev + (interpolationStep * (i - prevIndex));
     173          preprocessingData.SetCell<double>(column.Key, i, interpolated);
     174        }
     175      } else if (preprocessingData.IsType<DateTime>(column.Key)) {
     176        DateTime prev = preprocessingData.GetCell<DateTime>(column.Key, prevIndex);
     177        DateTime next = preprocessingData.GetCell<DateTime>(column.Key, nextIndex);
     178        double interpolationStep = (next - prev).TotalSeconds / valuesToInterpolate;
     179
     180        for (int i = prevIndex; i < nextIndex; ++i) {
     181          DateTime interpolated = prev.AddSeconds(interpolationStep * (i - prevIndex));
     182          preprocessingData.SetCell<DateTime>(column.Key, i, interpolated);
     183        }
     184      }
    147185    }
    148186
     
    232270    }
    233271
    234     public void ShuffleToIndices(IList<System.Tuple<int, int>> indices)
    235     {
    236       preprocessingData.InTransaction(() =>
    237       {
    238         for (int i = 0; i < preprocessingData.Columns; ++i)
    239         {
    240           if (preprocessingData.IsType<double>(i))
    241           {
     272    public void ShuffleToIndices(IList<System.Tuple<int, int>> indices) {
     273      preprocessingData.InTransaction(() => {
     274        for (int i = 0; i < preprocessingData.Columns; ++i) {
     275          if (preprocessingData.IsType<double>(i)) {
    242276            ShuffleToIndices<double>(i, indices);
    243           }
    244           else if (preprocessingData.IsType<string>(i))
    245           {
     277          } else if (preprocessingData.IsType<string>(i)) {
    246278            ShuffleToIndices<string>(i, indices);
    247           }
    248           else if (preprocessingData.IsType<DateTime>(i))
    249           {
     279          } else if (preprocessingData.IsType<DateTime>(i)) {
    250280            ShuffleToIndices<DateTime>(i, indices);
    251281          }
     
    268298    }
    269299
    270     private void ShuffleToIndices<T>(int columnIndex, IList<Tuple<int, int>> indices)
    271     {
     300    private void ShuffleToIndices<T>(int columnIndex, IList<Tuple<int, int>> indices) {
    272301      // process all columns equally
    273       foreach (Tuple<int, int> index in indices)
    274       {
     302      foreach (Tuple<int, int> index in indices) {
    275303        int originalIndex = index.Item1;
    276304        int replaceIndex = index.Item2;
     
    297325    public List<int> RowsWithMissingValuesGreater(double percent) {
    298326
    299       List<int> rows= new List<int>();
    300 
    301       for (int i = 0; i < preprocessingData.Rows; ++i)
    302       {
     327      List<int> rows = new List<int>();
     328
     329      for (int i = 0; i < preprocessingData.Rows; ++i) {
    303330        int missingCount = statisticsLogic.GetRowMissingValueCount(i);
    304         if (100f / preprocessingData.Columns * missingCount > percent)
    305         {
     331        if (100f / preprocessingData.Columns * missingCount > percent) {
    306332          rows.Add(i);
    307333        }
     
    328354      List<int> columns = new List<int>();
    329355      for (int i = 0; i < preprocessingData.Columns; ++i) {
    330         if (preprocessingData.IsType<double>(i) || preprocessingData.IsType<DateTime>(i))
    331         {
     356        if (preprocessingData.IsType<double>(i) || preprocessingData.IsType<DateTime>(i)) {
    332357          double columnVariance = statisticsLogic.GetVariance(i);
    333           if (columnVariance < variance)
    334           {
     358          if (columnVariance < variance) {
    335359            columns.Add(i);
    336360          }
     
    355379      rows.Sort();
    356380      rows.Reverse();
    357       preprocessingData.InTransaction(() =>
    358       {
    359         foreach (int row in rows)
    360         {
     381      preprocessingData.InTransaction(() => {
     382        foreach (int row in rows) {
    361383          preprocessingData.DeleteRow(row);
    362384        }
     
    367389      columns.Sort();
    368390      columns.Reverse();
    369       preprocessingData.InTransaction(() =>
    370       {
    371         foreach (int column in columns)
    372         {
     391      preprocessingData.InTransaction(() => {
     392        foreach (int column in columns) {
    373393          preprocessingData.DeleteColumn(column);
    374394        }
  • branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Interfaces/IManipulationLogic.cs

    r10811 r10820  
    2929    void ShuffleToIndices(IList<System.Tuple<int, int>> indices);
    3030    void ReplaceIndicesByAverageValue(IDictionary<int, IList<int>> cells, bool considerSelection = false);
    31     void ReplaceIndicesByLinearInterpolationOfNeighbours(IDictionary<int, IList<int>> cells);
    3231    void ReplaceIndicesByMedianValue(IDictionary<int, IList<int>> cells, bool considerSelection = false);
    3332    void ReplaceIndicesByMostCommonValue(IDictionary<int, IList<int>> cells, bool considerSelection = false);
    3433    void ReplaceIndicesByRandomValue(IDictionary<int, IList<int>> cells, bool considerSelection = false);
     34    void ReplaceIndicesByLinearInterpolationOfNeighbours(IDictionary<int, IList<int>> cells);
     35    void ReplaceIndicesBySmoothing(IDictionary<int, IList<int>> cells);
    3536    void ReplaceIndicesByValue(IDictionary<int, IList<int>> cells, string value);
    3637    void ReplaceIndicesByValue<T>(int columnIndex, IEnumerable<int> rowIndices, T value);
     
    4546
    4647    event DataPreprocessingChangedEventHandler Changed;
     48
     49
    4750  }
    4851}
Note: See TracChangeset for help on using the changeset viewer.