Free cookie consent management tool by TermsFeed Policy Generator

source: branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/ManipulationLogic.cs @ 10665

Last change on this file since 10665 was 10621, checked in by sbreuer, 11 years ago
  • disable toolstrip menu items that are not possible for strings if only string columns are selectd
  • refactor interpolation
File size: 9.9 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2013 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Data;
26
27namespace HeuristicLab.DataPreprocessing {
28  public class ManipulationLogic : IManipulationLogic {
29    private ITransactionalPreprocessingData preprocessingData;
30    private IStatisticsLogic statisticsLogic;
31    private ISearchLogic searchLogic;
32
33    public ManipulationLogic(ITransactionalPreprocessingData _prepocessingData, ISearchLogic theSearchLogic, IStatisticsLogic theStatisticsLogic) {
34      preprocessingData = _prepocessingData;
35      searchLogic = theSearchLogic;
36      statisticsLogic = theStatisticsLogic;
37    }
38
39    public void ReplaceIndicesByValue<T>(int columnIndex, IEnumerable<int> rowIndices, T value) {
40      foreach (int index in rowIndices) {
41        preprocessingData.SetCell<T>(columnIndex, index, value);
42      }
43    }
44
45    public void ReplaceIndicesByAverageValue(Dictionary<int, List<int>> cells) {
46      preprocessingData.InTransaction(() => {
47        foreach (var column in cells) {
48          if (preprocessingData.IsType<double>(column.Key)) {
49            double average = statisticsLogic.GetAverage(column.Key);
50            ReplaceIndicesByValue<double>(column.Key, column.Value, average);
51          } else if (preprocessingData.IsType<DateTime>(column.Key)) {
52            DateTime average = statisticsLogic.GetAverageDateTime(column.Key);
53            ReplaceIndicesByValue<DateTime>(column.Key, column.Value, average);
54          }
55        }
56      });
57    }
58
59    public void ReplaceIndicesByMedianValue(Dictionary<int, List<int>> cells) {
60      preprocessingData.InTransaction(() => {
61        foreach (var column in cells) {
62          if (preprocessingData.IsType<double>(column.Key)) {
63            double median = statisticsLogic.GetMedian(column.Key);
64            ReplaceIndicesByValue<double>(column.Key, column.Value, median);
65          } else if (preprocessingData.IsType<DateTime>(column.Key)) {
66            DateTime median = statisticsLogic.GetMedianDateTime(column.Key);
67            ReplaceIndicesByValue<DateTime>(column.Key, column.Value, median);
68          }
69        }
70      });
71    }
72
73    public void ReplaceIndicesByRandomValue(Dictionary<int, List<int>> cells) {
74      preprocessingData.InTransaction(() => {
75        Random r = new Random();
76
77        foreach (var column in cells) {
78          if (preprocessingData.IsType<double>(column.Key)) {
79            double max = statisticsLogic.GetMax<double>(column.Key);
80            double min = statisticsLogic.GetMin<double>(column.Key);
81            double randMultiplier = (max - min);
82            foreach (int index in column.Value) {
83              double rand = r.NextDouble() * randMultiplier + min;
84              preprocessingData.SetCell<double>(column.Key, index, rand);
85            }
86          } else if (preprocessingData.IsType<DateTime>(column.Key)) {
87            DateTime min = statisticsLogic.GetMin<DateTime>(column.Key);
88            DateTime max = statisticsLogic.GetMax<DateTime>(column.Key);
89            double randMultiplier = (max - min).TotalSeconds;
90            foreach (int index in column.Value) {
91              double rand = r.NextDouble() * randMultiplier;
92              preprocessingData.SetCell<DateTime>(column.Key, index, min.AddSeconds(rand));
93            }
94          }
95        }
96      });
97    }
98
99    public void ReplaceIndicesByLinearInterpolationOfNeighbours(Dictionary<int, List<int>> cells) {
100      preprocessingData.InTransaction(() => {
101        foreach (var column in cells) {
102          int countValues = 0;
103          if (preprocessingData.IsType<double>(column.Key)) {
104            countValues = preprocessingData.GetValues<double>(column.Key).Count();
105          } else if (preprocessingData.IsType<DateTime>(column.Key)) {
106            countValues = preprocessingData.GetValues<DateTime>(column.Key).Count();
107          }
108
109          foreach (int index in column.Value) {
110            // dont replace first or last values
111            if (index > 0 && index < countValues) {
112              int prevIndex = indexOfPrevPresentValue(column.Key, index);
113              int nextIndex = indexOfNextPresentValue(column.Key, index);
114
115              // no neighbours found
116              if (prevIndex < 0 && nextIndex >= countValues) {
117                continue;
118              }
119
120              int valuesToInterpolate = nextIndex - prevIndex;
121
122              if (preprocessingData.IsType<double>(column.Key)) {
123                double prev = preprocessingData.GetCell<double>(column.Key, prevIndex);
124                double next = preprocessingData.GetCell<double>(column.Key, nextIndex);
125                double interpolationStep = (next - prev) / valuesToInterpolate;
126
127                for (int i = prevIndex; i < nextIndex; ++i) {
128                  double interpolated = prev + (interpolationStep * (i - prevIndex));
129                  preprocessingData.SetCell<double>(column.Key, i, interpolated);
130                }
131              } else if (preprocessingData.IsType<DateTime>(column.Key)) {
132                DateTime prev = preprocessingData.GetCell<DateTime>(column.Key, prevIndex);
133                DateTime next = preprocessingData.GetCell<DateTime>(column.Key, nextIndex);
134                double interpolationStep = (next - prev).TotalSeconds / valuesToInterpolate;
135
136                for (int i = prevIndex; i < nextIndex; ++i) {
137                  DateTime interpolated = prev.AddSeconds(interpolationStep * (i - prevIndex));
138                  preprocessingData.SetCell<DateTime>(column.Key, i, interpolated);
139                }
140              }
141            }
142          }
143        }
144      });
145    }
146
147    private int indexOfPrevPresentValue(int columnIndex, int start) {
148      int offset = start - 1;
149      while (offset >= 0 && searchLogic.IsMissingValue(columnIndex, offset)) {
150        offset--;
151      }
152
153      return offset;
154    }
155
156    private int indexOfNextPresentValue(int columnIndex, int start) {
157      int offset = start + 1;
158      while (offset < preprocessingData.Rows && searchLogic.IsMissingValue(columnIndex, offset)) {
159        offset++;
160      }
161
162      return offset;
163    }
164
165    public void ReplaceIndicesByMostCommonValue(Dictionary<int, List<int>> cells) {
166      preprocessingData.InTransaction(() => {
167        foreach (var column in cells) {
168          if (preprocessingData.IsType<double>(column.Key)) {
169            ReplaceIndicesByValue<double>(column.Key, column.Value, statisticsLogic.GetMostCommonValue<double>(column.Key));
170          } else if (preprocessingData.IsType<string>(column.Key)) {
171            ReplaceIndicesByValue<string>(column.Key, column.Value, statisticsLogic.GetMostCommonValue<string>(column.Key));
172          } else if (preprocessingData.IsType<DateTime>(column.Key)) {
173            ReplaceIndicesByValue<DateTime>(column.Key, column.Value, statisticsLogic.GetMostCommonValue<DateTime>(column.Key));
174          } else {
175            throw new ArgumentException("column with index: " + column.Key + " contains a non supported type.");
176          }
177        }
178      });
179    }
180
181    public void ShuffleWithRanges(IEnumerable<IntRange> ranges) {
182      // init random outside loop
183      Random random = new Random();
184
185      preprocessingData.InTransaction(() => {
186        // process all given ranges - e.g. TrainingPartition, Trainingpartition
187        foreach (IntRange range in ranges) {
188          List<Tuple<int, int>> shuffledIndices = new List<Tuple<int, int>>();
189
190          // generate random indices used for shuffeling each column
191          for (int i = range.End; i > range.Start; --i) {
192            int rand = random.Next(range.Start, i);
193            shuffledIndices.Add(new Tuple<int, int>(i, rand));
194          }
195
196          ReOrderToIndices(shuffledIndices);
197        }
198      });
199    }
200
201    public void ReOrderToIndices(IEnumerable<int> indices) {
202      List<Tuple<int, int>> indicesTuple = new List<Tuple<int, int>>();
203
204      for (int i = 0; i < indices.Count(); ++i) {
205        indicesTuple.Add(new Tuple<int, int>(i, indices.ElementAt(i)));
206      }
207
208      ReOrderToIndices(indicesTuple);
209    }
210
211    public void ReOrderToIndices(IList<System.Tuple<int, int>> indices) {
212      preprocessingData.InTransaction(() => {
213        for (int i = 0; i < preprocessingData.Columns; ++i) {
214          if (preprocessingData.IsType<double>(i)) {
215            reOrderToIndices<double>(i, indices);
216          } else if (preprocessingData.IsType<string>(i)) {
217            reOrderToIndices<string>(i, indices);
218          } else if (preprocessingData.IsType<DateTime>(i)) {
219            reOrderToIndices<DateTime>(i, indices);
220          }
221        }
222      });
223    }
224
225    private void reOrderToIndices<T>(int columnIndex, IList<Tuple<int, int>> indices) {
226
227      List<T> originalData = new List<T>(preprocessingData.GetValues<T>(columnIndex));
228
229      // process all columns equally
230      foreach (Tuple<int, int> index in indices) {
231        int originalIndex = index.Item1;
232        int replaceIndex = index.Item2;
233
234        T replaceValue = originalData.ElementAt<T>(replaceIndex);
235        preprocessingData.SetCell<T>(columnIndex, originalIndex, replaceValue);
236      }
237    }
238  }
239}
Note: See TracBrowser for help on using the repository browser.