Free cookie consent management tool by TermsFeed Policy Generator

source: branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/ManipulationLogic.cs @ 10613

Last change on this file since 10613 was 10612, checked in by rstoll, 11 years ago
  • added InTranscation to ITransactionalPreprocessingData and used it in ManipulationLogic
File size: 7.8 KB
RevLine 
[10539]1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2013 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
[10193]23using System.Collections.Generic;
24using System.Linq;
[10249]25using HeuristicLab.Data;
[10193]26
[10249]27namespace HeuristicLab.DataPreprocessing {
[10369]28  public class ManipulationLogic : IManipulationLogic {
[10586]29    private ITransactionalPreprocessingData preprocessingData;
[10249]30    private IStatisticsLogic statisticInfo;
31    private ISearchLogic searchLogic;
[10193]32
[10586]33    public ManipulationLogic(ITransactionalPreprocessingData _prepocessingData, ISearchLogic theSearchLogic, IStatisticsLogic theStatisticsLogic) {
[10249]34      preprocessingData = _prepocessingData;
35      searchLogic = theSearchLogic;
36      statisticInfo = theStatisticsLogic;
37    }
[10193]38
[10367]39    public void ReplaceIndicesByValue<T>(int columnIndex, IEnumerable<int> rowIndices, T value) {
40      foreach (int index in rowIndices) {
41        preprocessingData.SetCell<T>(columnIndex, index, value);
[10249]42      }
43    }
[10193]44
[10590]45    public void ReplaceIndicesByAverageValue(Dictionary<int, List<int>> cells) {
[10612]46      preprocessingData.InTransaction(() => {
47        foreach (var column in cells) {
48          double average = statisticInfo.GetAverage(column.Key);
49          ReplaceIndicesByValue<double>(column.Key, column.Value, average);
50        }
51      });
[10249]52    }
[10193]53
[10590]54    public void ReplaceIndicesByMedianValue(Dictionary<int, List<int>> cells) {
[10612]55      preprocessingData.InTransaction(() => {
56        foreach (var column in cells) {
57          double median = statisticInfo.GetMedian(column.Key);
58          ReplaceIndicesByValue<double>(column.Key, column.Value, median);
59        }
60      });
[10249]61    }
[10193]62
[10590]63    public void ReplaceIndicesByRandomValue(Dictionary<int, List<int>> cells) {
[10612]64      preprocessingData.InTransaction(() => {
65        Random r = new Random();
[10193]66
[10612]67        foreach (var column in cells) {
68          double max = statisticInfo.GetMax<double>(column.Key);
69          double min = statisticInfo.GetMin<double>(column.Key);
70          double randMultiplier = (max - min);
71          foreach (int index in column.Value) {
72            double rand = r.NextDouble() * randMultiplier + min;
73            preprocessingData.SetCell<double>(column.Key, index, rand);
74          }
[10590]75        }
[10612]76      });
[10249]77    }
[10193]78
[10590]79    public void ReplaceIndicesByLinearInterpolationOfNeighbours(Dictionary<int, List<int>> cells) {
[10612]80      preprocessingData.InTransaction(() => {
81        foreach (var column in cells) {
82          int countValues = preprocessingData.GetValues<double>(column.Key).Count();
83          foreach (int index in column.Value) {
84            // dont replace first or last values
85            if (index > 0 && index < countValues) {
86              int prevIndex = indexOfPrevPresentValue(column.Key, index);
87              int nextIndex = indexOfNextPresentValue(column.Key, index);
[10193]88
[10612]89              // no neighbours found
90              if (prevIndex < 0 && nextIndex >= countValues) {
91                continue;
92              }
93              double prev = preprocessingData.GetCell<double>(column.Key, prevIndex);
94              double next = preprocessingData.GetCell<double>(column.Key, nextIndex);
[10193]95
[10612]96              int valuesToInterpolate = nextIndex - prevIndex;
[10234]97
[10612]98              double interpolationStep = (prev + next) / valuesToInterpolate;
[10234]99
[10612]100              for (int i = prevIndex; i < nextIndex; ++i) {
101                double interpolated = prev + (interpolationStep * (i - prevIndex));
102                preprocessingData.SetCell<double>(column.Key, i, interpolated);
103              }
[10590]104            }
[10249]105          }
[10193]106        }
[10612]107      });
[10249]108    }
[10193]109
[10367]110    private int indexOfPrevPresentValue(int columnIndex, int start) {
[10249]111      int offset = start - 1;
[10367]112      while (offset >= 0 && searchLogic.IsMissingValue(columnIndex, offset)) {
[10249]113        offset--;
114      }
[10234]115
[10249]116      return offset;
117    }
[10234]118
[10367]119    private int indexOfNextPresentValue(int columnIndex, int start) {
[10249]120      int offset = start + 1;
[10367]121      while (offset < preprocessingData.Rows && searchLogic.IsMissingValue(columnIndex, offset)) {
[10249]122        offset++;
123      }
[10234]124
[10249]125      return offset;
126    }
[10234]127
[10590]128    public void ReplaceIndicesByMostCommonValue(Dictionary<int, List<int>> cells) {
[10612]129      preprocessingData.InTransaction(() => {
130        foreach (var column in cells) {
131          if (preprocessingData.IsType<double>(column.Key)) {
132            ReplaceIndicesByValue<double>(column.Key, column.Value, statisticInfo.GetMostCommonValue<double>(column.Key));
133          } else if (preprocessingData.IsType<string>(column.Key)) {
134            ReplaceIndicesByValue<string>(column.Key, column.Value, statisticInfo.GetMostCommonValue<string>(column.Key));
135          } else if (preprocessingData.IsType<DateTime>(column.Key)) {
136            ReplaceIndicesByValue<DateTime>(column.Key, column.Value, statisticInfo.GetMostCommonValue<DateTime>(column.Key));
137          } else {
138            throw new ArgumentException("column with index: " + column.Key + " contains a non supported type.");
139          }
[10590]140        }
[10612]141      });
[10249]142    }
[10218]143
[10249]144    public void ShuffleWithRanges(IEnumerable<IntRange> ranges) {
145      // init random outside loop
146      Random random = new Random();
[10218]147
[10612]148      preprocessingData.InTransaction(() => {
149        // process all given ranges - e.g. TrainingPartition, Trainingpartition
150        foreach (IntRange range in ranges) {
151          List<Tuple<int, int>> shuffledIndices = new List<Tuple<int, int>>();
[10218]152
[10612]153          // generate random indices used for shuffeling each column
154          for (int i = range.End; i > range.Start; --i) {
155            int rand = random.Next(range.Start, i);
156            shuffledIndices.Add(new Tuple<int, int>(i, rand));
157          }
158
159          ReOrderToIndices(shuffledIndices);
[10218]160        }
[10612]161      });
[10253]162    }
163
[10535]164    public void ReOrderToIndices(IEnumerable<int> indices) {
[10256]165      List<Tuple<int, int>> indicesTuple = new List<Tuple<int, int>>();
[10255]166
[10256]167      for (int i = 0; i < indices.Count(); ++i) {
[10311]168        indicesTuple.Add(new Tuple<int, int>(i, indices.ElementAt(i)));
[10256]169      }
170
[10535]171      ReOrderToIndices(indicesTuple);
[10255]172    }
173
[10535]174    public void ReOrderToIndices(IList<System.Tuple<int, int>> indices) {
[10612]175      preprocessingData.InTransaction(() => {
176        for (int i = 0; i < preprocessingData.Columns; ++i) {
177          if (preprocessingData.IsType<double>(i)) {
178            reOrderToIndices<double>(i, indices);
179          } else if (preprocessingData.IsType<string>(i)) {
180            reOrderToIndices<string>(i, indices);
181          } else if (preprocessingData.IsType<DateTime>(i)) {
182            reOrderToIndices<DateTime>(i, indices);
183          }
[10249]184        }
[10612]185      });
[10249]186    }
[10218]187
[10367]188    private void reOrderToIndices<T>(int columnIndex, IList<Tuple<int, int>> indices) {
[10308]189
[10367]190      List<T> originalData = new List<T>(preprocessingData.GetValues<T>(columnIndex));
[10308]191
[10249]192      // process all columns equally
193      foreach (Tuple<int, int> index in indices) {
194        int originalIndex = index.Item1;
195        int replaceIndex = index.Item2;
[10218]196
[10308]197        T replaceValue = originalData.ElementAt<T>(replaceIndex);
[10367]198        preprocessingData.SetCell<T>(columnIndex, originalIndex, replaceValue);
[10249]199      }
[10193]200    }
[10249]201  }
[10193]202}
Note: See TracBrowser for help on using the repository browser.