Free cookie consent management tool by TermsFeed Policy Generator

source: branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/ManipulationLogic.cs @ 10672

Last change on this file since 10672 was 10672, checked in by sbreuer, 10 years ago
  • change function headers in manipulationlogic to interface datatypes
  • implemented find and replace logic
File size: 10.4 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2013 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Data;
26
27namespace HeuristicLab.DataPreprocessing {
28  public class ManipulationLogic : IManipulationLogic {
29    private ITransactionalPreprocessingData preprocessingData;
30    private IStatisticsLogic statisticsLogic;
31    private ISearchLogic searchLogic;
32    private IDataGridLogic dataGridLogic;
33
34    public ManipulationLogic(ITransactionalPreprocessingData _prepocessingData, ISearchLogic theSearchLogic, IStatisticsLogic theStatisticsLogic, IDataGridLogic theDataGridLogic) {
35      preprocessingData = _prepocessingData;
36      searchLogic = theSearchLogic;
37      statisticsLogic = theStatisticsLogic;
38      dataGridLogic = theDataGridLogic;
39    }
40
41    public void ReplaceIndicesByValue<T>(int columnIndex, IEnumerable<int> rowIndices, T value) {
42      foreach (int index in rowIndices) {
43        preprocessingData.SetCell<T>(columnIndex, index, value);
44      }
45    }
46
47    public void ReplaceIndicesByAverageValue(IDictionary<int, IList<int>> cells) {
48      preprocessingData.InTransaction(() => {
49        foreach (var column in cells) {
50          if (preprocessingData.IsType<double>(column.Key)) {
51            double average = statisticsLogic.GetAverage(column.Key);
52            ReplaceIndicesByValue<double>(column.Key, column.Value, average);
53          } else if (preprocessingData.IsType<DateTime>(column.Key)) {
54            DateTime average = statisticsLogic.GetAverageDateTime(column.Key);
55            ReplaceIndicesByValue<DateTime>(column.Key, column.Value, average);
56          }
57        }
58      });
59    }
60
61    public void ReplaceIndicesByMedianValue(IDictionary<int, IList<int>> cells) {
62      preprocessingData.InTransaction(() => {
63        foreach (var column in cells) {
64          if (preprocessingData.IsType<double>(column.Key)) {
65            double median = statisticsLogic.GetMedian(column.Key);
66            ReplaceIndicesByValue<double>(column.Key, column.Value, median);
67          } else if (preprocessingData.IsType<DateTime>(column.Key)) {
68            DateTime median = statisticsLogic.GetMedianDateTime(column.Key);
69            ReplaceIndicesByValue<DateTime>(column.Key, column.Value, median);
70          }
71        }
72      });
73    }
74
75    public void ReplaceIndicesByRandomValue(IDictionary<int, IList<int>> cells) {
76      preprocessingData.InTransaction(() => {
77        Random r = new Random();
78
79        foreach (var column in cells) {
80          if (preprocessingData.IsType<double>(column.Key)) {
81            double max = statisticsLogic.GetMax<double>(column.Key);
82            double min = statisticsLogic.GetMin<double>(column.Key);
83            double randMultiplier = (max - min);
84            foreach (int index in column.Value) {
85              double rand = r.NextDouble() * randMultiplier + min;
86              preprocessingData.SetCell<double>(column.Key, index, rand);
87            }
88          } else if (preprocessingData.IsType<DateTime>(column.Key)) {
89            DateTime min = statisticsLogic.GetMin<DateTime>(column.Key);
90            DateTime max = statisticsLogic.GetMax<DateTime>(column.Key);
91            double randMultiplier = (max - min).TotalSeconds;
92            foreach (int index in column.Value) {
93              double rand = r.NextDouble() * randMultiplier;
94              preprocessingData.SetCell<DateTime>(column.Key, index, min.AddSeconds(rand));
95            }
96          }
97        }
98      });
99    }
100
101    public void ReplaceIndicesByLinearInterpolationOfNeighbours(IDictionary<int, IList<int>> cells) {
102      preprocessingData.InTransaction(() => {
103        foreach (var column in cells) {
104          int countValues = 0;
105          if (preprocessingData.IsType<double>(column.Key)) {
106            countValues = preprocessingData.GetValues<double>(column.Key).Count();
107          } else if (preprocessingData.IsType<DateTime>(column.Key)) {
108            countValues = preprocessingData.GetValues<DateTime>(column.Key).Count();
109          }
110
111          foreach (int index in column.Value) {
112            // dont replace first or last values
113            if (index > 0 && index < countValues) {
114              int prevIndex = indexOfPrevPresentValue(column.Key, index);
115              int nextIndex = indexOfNextPresentValue(column.Key, index);
116
117              // no neighbours found
118              if (prevIndex < 0 && nextIndex >= countValues) {
119                continue;
120              }
121
122              int valuesToInterpolate = nextIndex - prevIndex;
123
124              if (preprocessingData.IsType<double>(column.Key)) {
125                double prev = preprocessingData.GetCell<double>(column.Key, prevIndex);
126                double next = preprocessingData.GetCell<double>(column.Key, nextIndex);
127                double interpolationStep = (next - prev) / valuesToInterpolate;
128
129                for (int i = prevIndex; i < nextIndex; ++i) {
130                  double interpolated = prev + (interpolationStep * (i - prevIndex));
131                  preprocessingData.SetCell<double>(column.Key, i, interpolated);
132                }
133              } else if (preprocessingData.IsType<DateTime>(column.Key)) {
134                DateTime prev = preprocessingData.GetCell<DateTime>(column.Key, prevIndex);
135                DateTime next = preprocessingData.GetCell<DateTime>(column.Key, nextIndex);
136                double interpolationStep = (next - prev).TotalSeconds / valuesToInterpolate;
137
138                for (int i = prevIndex; i < nextIndex; ++i) {
139                  DateTime interpolated = prev.AddSeconds(interpolationStep * (i - prevIndex));
140                  preprocessingData.SetCell<DateTime>(column.Key, i, interpolated);
141                }
142              }
143            }
144          }
145        }
146      });
147    }
148
149    private int indexOfPrevPresentValue(int columnIndex, int start) {
150      int offset = start - 1;
151      while (offset >= 0 && searchLogic.IsMissingValue(columnIndex, offset)) {
152        offset--;
153      }
154
155      return offset;
156    }
157
158    private int indexOfNextPresentValue(int columnIndex, int start) {
159      int offset = start + 1;
160      while (offset < preprocessingData.Rows && searchLogic.IsMissingValue(columnIndex, offset)) {
161        offset++;
162      }
163
164      return offset;
165    }
166
167    public void ReplaceIndicesByMostCommonValue(IDictionary<int, IList<int>> cells) {
168      preprocessingData.InTransaction(() => {
169        foreach (var column in cells) {
170          if (preprocessingData.IsType<double>(column.Key)) {
171            ReplaceIndicesByValue<double>(column.Key, column.Value, statisticsLogic.GetMostCommonValue<double>(column.Key));
172          } else if (preprocessingData.IsType<string>(column.Key)) {
173            ReplaceIndicesByValue<string>(column.Key, column.Value, statisticsLogic.GetMostCommonValue<string>(column.Key));
174          } else if (preprocessingData.IsType<DateTime>(column.Key)) {
175            ReplaceIndicesByValue<DateTime>(column.Key, column.Value, statisticsLogic.GetMostCommonValue<DateTime>(column.Key));
176          } else {
177            throw new ArgumentException("column with index: " + column.Key + " contains a non supported type.");
178          }
179        }
180      });
181    }
182
183    public void ShuffleWithRanges(IEnumerable<IntRange> ranges) {
184      // init random outside loop
185      Random random = new Random();
186
187      preprocessingData.InTransaction(() => {
188        // process all given ranges - e.g. TrainingPartition, Trainingpartition
189        foreach (IntRange range in ranges) {
190          List<Tuple<int, int>> shuffledIndices = new List<Tuple<int, int>>();
191
192          // generate random indices used for shuffeling each column
193          for (int i = range.End; i > range.Start; --i) {
194            int rand = random.Next(range.Start, i);
195            shuffledIndices.Add(new Tuple<int, int>(i, rand));
196          }
197
198          ReOrderToIndices(shuffledIndices);
199        }
200      });
201    }
202
203    public void ReOrderToIndices(IEnumerable<int> indices) {
204      List<Tuple<int, int>> indicesTuple = new List<Tuple<int, int>>();
205
206      for (int i = 0; i < indices.Count(); ++i) {
207        indicesTuple.Add(new Tuple<int, int>(i, indices.ElementAt(i)));
208      }
209
210      ReOrderToIndices(indicesTuple);
211    }
212
213    public void ReOrderToIndices(IList<System.Tuple<int, int>> indices) {
214      preprocessingData.InTransaction(() => {
215        for (int i = 0; i < preprocessingData.Columns; ++i) {
216          if (preprocessingData.IsType<double>(i)) {
217            reOrderToIndices<double>(i, indices);
218          } else if (preprocessingData.IsType<string>(i)) {
219            reOrderToIndices<string>(i, indices);
220          } else if (preprocessingData.IsType<DateTime>(i)) {
221            reOrderToIndices<DateTime>(i, indices);
222          }
223        }
224      });
225    }
226
227    private void reOrderToIndices<T>(int columnIndex, IList<Tuple<int, int>> indices) {
228
229      List<T> originalData = new List<T>(preprocessingData.GetValues<T>(columnIndex));
230
231      // process all columns equally
232      foreach (Tuple<int, int> index in indices) {
233        int originalIndex = index.Item1;
234        int replaceIndex = index.Item2;
235
236        T replaceValue = originalData.ElementAt<T>(replaceIndex);
237        preprocessingData.SetCell<T>(columnIndex, originalIndex, replaceValue);
238      }
239    }
240
241    public void ReplaceIndicesByValue(IDictionary<int, IList<int>> cells, string value) {
242      preprocessingData.InTransaction(() => {
243        foreach (var column in cells) {
244          foreach (var rowIdx in column.Value) {
245            dataGridLogic.SetValue(value, column.Key, rowIdx);
246          }
247        }
248      });
249    }
250  }
251}
Note: See TracBrowser for help on using the repository browser.