Free cookie consent management tool by TermsFeed Policy Generator

source: branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/ManipulationLogic.cs @ 10611

Last change on this file since 10611 was 10590, checked in by sbreuer, 10 years ago
  • enhanced selection of cells
File size: 7.8 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2013 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Data;
26
27namespace HeuristicLab.DataPreprocessing {
28  public class ManipulationLogic : IManipulationLogic {
29    private ITransactionalPreprocessingData preprocessingData;
30    private IStatisticsLogic statisticInfo;
31    private ISearchLogic searchLogic;
32
33    public ManipulationLogic(ITransactionalPreprocessingData _prepocessingData, ISearchLogic theSearchLogic, IStatisticsLogic theStatisticsLogic) {
34      preprocessingData = _prepocessingData;
35      searchLogic = theSearchLogic;
36      statisticInfo = theStatisticsLogic;
37    }
38
39    public void ReplaceIndicesByValue<T>(int columnIndex, IEnumerable<int> rowIndices, T value) {
40      foreach (int index in rowIndices) {
41        preprocessingData.SetCell<T>(columnIndex, index, value);
42      }
43    }
44
45    public void ReplaceIndicesByAverageValue(Dictionary<int, List<int>> cells) {
46      preprocessingData.BeginTransaction();
47      foreach (var column in cells) {
48        double average = statisticInfo.GetAverage(column.Key);
49        ReplaceIndicesByValue<double>(column.Key, column.Value, average);
50      }
51      preprocessingData.EndTransaction();
52    }
53
54    public void ReplaceIndicesByMedianValue(Dictionary<int, List<int>> cells) {
55      preprocessingData.BeginTransaction();
56      foreach (var column in cells) {
57        double median = statisticInfo.GetMedian(column.Key);
58        ReplaceIndicesByValue<double>(column.Key, column.Value, median);
59      }
60      preprocessingData.EndTransaction();
61    }
62
63    public void ReplaceIndicesByRandomValue(Dictionary<int, List<int>> cells) {
64      preprocessingData.BeginTransaction();
65      Random r = new Random();
66
67      foreach (var column in cells) {
68        double max = statisticInfo.GetMax<double>(column.Key);
69        double min = statisticInfo.GetMin<double>(column.Key);
70        double randMultiplier = (max - min);
71        foreach (int index in column.Value) {
72          double rand = r.NextDouble() * randMultiplier + min;
73          preprocessingData.SetCell<double>(column.Key, index, rand);
74        }
75      }
76      preprocessingData.EndTransaction();
77    }
78
79    public void ReplaceIndicesByLinearInterpolationOfNeighbours(Dictionary<int, List<int>> cells) {
80      preprocessingData.BeginTransaction();
81      foreach (var column in cells) {
82        int countValues = preprocessingData.GetValues<double>(column.Key).Count();
83        foreach (int index in column.Value) {
84          // dont replace first or last values
85          if (index > 0 && index < countValues) {
86            int prevIndex = indexOfPrevPresentValue(column.Key, index);
87            int nextIndex = indexOfNextPresentValue(column.Key, index);
88
89            // no neighbours found
90            if (prevIndex < 0 && nextIndex >= countValues) {
91              continue;
92            }
93            double prev = preprocessingData.GetCell<double>(column.Key, prevIndex);
94            double next = preprocessingData.GetCell<double>(column.Key, nextIndex);
95
96            int valuesToInterpolate = nextIndex - prevIndex;
97
98            double interpolationStep = (prev + next) / valuesToInterpolate;
99
100            for (int i = prevIndex; i < nextIndex; ++i) {
101              double interpolated = prev + (interpolationStep * (i - prevIndex));
102              preprocessingData.SetCell<double>(column.Key, i, interpolated);
103            }
104          }
105        }
106      }
107      preprocessingData.EndTransaction();
108    }
109
110    private int indexOfPrevPresentValue(int columnIndex, int start) {
111      int offset = start - 1;
112      while (offset >= 0 && searchLogic.IsMissingValue(columnIndex, offset)) {
113        offset--;
114      }
115
116      return offset;
117    }
118
119    private int indexOfNextPresentValue(int columnIndex, int start) {
120      int offset = start + 1;
121      while (offset < preprocessingData.Rows && searchLogic.IsMissingValue(columnIndex, offset)) {
122        offset++;
123      }
124
125      return offset;
126    }
127
128    public void ReplaceIndicesByMostCommonValue(Dictionary<int, List<int>> cells) {
129      preprocessingData.BeginTransaction();
130      foreach (var column in cells) {
131        if (preprocessingData.IsType<double>(column.Key)) {
132          ReplaceIndicesByValue<double>(column.Key, column.Value, statisticInfo.GetMostCommonValue<double>(column.Key));
133        } else if (preprocessingData.IsType<string>(column.Key)) {
134          ReplaceIndicesByValue<string>(column.Key, column.Value, statisticInfo.GetMostCommonValue<string>(column.Key));
135        } else if (preprocessingData.IsType<DateTime>(column.Key)) {
136          ReplaceIndicesByValue<DateTime>(column.Key, column.Value, statisticInfo.GetMostCommonValue<DateTime>(column.Key));
137        } else {
138          throw new ArgumentException("column with index: " + column.Key + " contains a non supported type.");
139        }
140      }
141      preprocessingData.EndTransaction();
142    }
143
144    public void ShuffleWithRanges(IEnumerable<IntRange> ranges) {
145      // init random outside loop
146      Random random = new Random();
147
148      preprocessingData.BeginTransaction();
149      // process all given ranges - e.g. TrainingPartition, Trainingpartition
150      foreach (IntRange range in ranges) {
151        List<Tuple<int, int>> shuffledIndices = new List<Tuple<int, int>>();
152
153        // generate random indices used for shuffeling each column
154        for (int i = range.End; i > range.Start; --i) {
155          int rand = random.Next(range.Start, i);
156          shuffledIndices.Add(new Tuple<int, int>(i, rand));
157        }
158
159        ReOrderToIndices(shuffledIndices);
160      }
161      preprocessingData.EndTransaction();
162    }
163
164    public void ReOrderToIndices(IEnumerable<int> indices) {
165      List<Tuple<int, int>> indicesTuple = new List<Tuple<int, int>>();
166
167      for (int i = 0; i < indices.Count(); ++i) {
168        indicesTuple.Add(new Tuple<int, int>(i, indices.ElementAt(i)));
169      }
170
171      ReOrderToIndices(indicesTuple);
172    }
173
174    public void ReOrderToIndices(IList<System.Tuple<int, int>> indices) {
175      preprocessingData.BeginTransaction();
176      for (int i = 0; i < preprocessingData.Columns; ++i) {
177        if (preprocessingData.IsType<double>(i)) {
178          reOrderToIndices<double>(i, indices);
179        } else if (preprocessingData.IsType<string>(i)) {
180          reOrderToIndices<string>(i, indices);
181        } else if (preprocessingData.IsType<DateTime>(i)) {
182          reOrderToIndices<DateTime>(i, indices);
183        }
184      }
185      preprocessingData.EndTransaction();
186    }
187
188    private void reOrderToIndices<T>(int columnIndex, IList<Tuple<int, int>> indices) {
189
190      List<T> originalData = new List<T>(preprocessingData.GetValues<T>(columnIndex));
191
192      // process all columns equally
193      foreach (Tuple<int, int> index in indices) {
194        int originalIndex = index.Item1;
195        int replaceIndex = index.Item2;
196
197        T replaceValue = originalData.ElementAt<T>(replaceIndex);
198        preprocessingData.SetCell<T>(columnIndex, originalIndex, replaceValue);
199      }
200    }
201  }
202}
Note: See TracBrowser for help on using the repository browser.