Free cookie consent management tool by TermsFeed Policy Generator

source: branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/ManipulationLogic.cs @ 10551

Last change on this file since 10551 was 10539, checked in by rstoll, 11 years ago

Added License notice

File size: 7.0 KB
RevLine 
[10539]1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2013 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
[10193]23using System.Collections.Generic;
24using System.Linq;
[10249]25using HeuristicLab.Data;
[10193]26
[10249]27namespace HeuristicLab.DataPreprocessing {
[10369]28  public class ManipulationLogic : IManipulationLogic {
[10249]29    private IPreprocessingData preprocessingData;
30    private IStatisticsLogic statisticInfo;
31    private ISearchLogic searchLogic;
[10193]32
[10369]33    public ManipulationLogic(IPreprocessingData _prepocessingData, ISearchLogic theSearchLogic, IStatisticsLogic theStatisticsLogic) {
[10249]34      preprocessingData = _prepocessingData;
35      searchLogic = theSearchLogic;
36      statisticInfo = theStatisticsLogic;
37    }
[10193]38
[10367]39    public void ReplaceIndicesByValue<T>(int columnIndex, IEnumerable<int> rowIndices, T value) {
40      foreach (int index in rowIndices) {
41        preprocessingData.SetCell<T>(columnIndex, index, value);
[10249]42      }
43    }
[10193]44
[10367]45    public void ReplaceIndicesByAverageValue(int columnIndex, IEnumerable<int> rowIndices) {
46      double average = statisticInfo.GetAverage(columnIndex);
47      ReplaceIndicesByValue<double>(columnIndex, rowIndices, average);
[10249]48    }
[10193]49
[10367]50    public void ReplaceIndicesByMedianValue(int columnIndex, IEnumerable<int> rowIndices) {
51      double median = statisticInfo.GetMedian(columnIndex);
52      ReplaceIndicesByValue<double>(columnIndex, rowIndices, median);
[10249]53    }
[10193]54
[10367]55    public void ReplaceIndicesByRandomValue(int columnIndex, IEnumerable<int> rowIndices) {
[10249]56      Random r = new Random();
[10193]57
[10367]58      double max = statisticInfo.GetMax<double>(columnIndex);
59      double min = statisticInfo.GetMin<double>(columnIndex);
[10249]60      double randMultiplier = (max - min);
[10367]61      foreach (int index in rowIndices) {
[10249]62        double rand = r.NextDouble() * randMultiplier + min;
[10367]63        preprocessingData.SetCell<double>(columnIndex, index, rand);
[10249]64      }
65    }
[10193]66
[10367]67    public void ReplaceIndicesByLinearInterpolationOfNeighbours(int columnIndex, IEnumerable<int> rowIndices) {
68      int countValues = preprocessingData.GetValues<double>(columnIndex).Count();
69      foreach (int index in rowIndices) {
[10249]70        // dont replace first or last values
71        if (index > 0 && index < countValues) {
[10367]72          int prevIndex = indexOfPrevPresentValue(columnIndex, index);
73          int nextIndex = indexOfNextPresentValue(columnIndex, index);
[10193]74
[10249]75          // no neighbours found
76          if (prevIndex < 0 && nextIndex >= countValues) {
77            continue;
78          }
[10367]79          double prev = preprocessingData.GetCell<double>(columnIndex, prevIndex);
80          double next = preprocessingData.GetCell<double>(columnIndex, nextIndex);
[10193]81
[10249]82          int valuesToInterpolate = nextIndex - prevIndex;
[10234]83
[10249]84          double interpolationStep = (prev + next) / valuesToInterpolate;
[10234]85
[10249]86          for (int i = prevIndex; i < nextIndex; ++i) {
87            double interpolated = prev + (interpolationStep * (i - prevIndex));
[10367]88            preprocessingData.SetCell<double>(columnIndex, i, interpolated);
[10249]89          }
[10193]90        }
[10249]91      }
92    }
[10193]93
[10367]94    private int indexOfPrevPresentValue(int columnIndex, int start) {
[10249]95      int offset = start - 1;
[10367]96      while (offset >= 0 && searchLogic.IsMissingValue(columnIndex, offset)) {
[10249]97        offset--;
98      }
[10234]99
[10249]100      return offset;
101    }
[10234]102
[10367]103    private int indexOfNextPresentValue(int columnIndex, int start) {
[10249]104      int offset = start + 1;
[10367]105      while (offset < preprocessingData.Rows && searchLogic.IsMissingValue(columnIndex, offset)) {
[10249]106        offset++;
107      }
[10234]108
[10249]109      return offset;
110    }
[10234]111
[10367]112    public void ReplaceIndicesByMostCommonValue(int columnIndex, IEnumerable<int> rowIndices) {
113      if (preprocessingData.IsType<double>(columnIndex)) {
114        ReplaceIndicesByValue<double>(columnIndex, rowIndices, statisticInfo.GetMostCommonValue<double>(columnIndex));
115      } else if (preprocessingData.IsType<string>(columnIndex)) {
116        ReplaceIndicesByValue<string>(columnIndex, rowIndices, statisticInfo.GetMostCommonValue<string>(columnIndex));
117      } else if (preprocessingData.IsType<DateTime>(columnIndex)) {
118        ReplaceIndicesByValue<DateTime>(columnIndex, rowIndices, statisticInfo.GetMostCommonValue<DateTime>(columnIndex));
[10249]119      } else {
[10367]120        throw new ArgumentException("column with index: " + columnIndex + " contains a non supported type.");
[10249]121      }
122    }
[10218]123
[10249]124    public void ShuffleWithRanges(IEnumerable<IntRange> ranges) {
125      // init random outside loop
126      Random random = new Random();
[10218]127
[10249]128      // process all given ranges - e.g. TrainingPartition, Trainingpartition
129      foreach (IntRange range in ranges) {
130        List<Tuple<int, int>> shuffledIndices = new List<Tuple<int, int>>();
[10218]131
[10249]132        // generate random indices used for shuffeling each column
133        for (int i = range.End; i > range.Start; --i) {
134          int rand = random.Next(range.Start, i);
135          shuffledIndices.Add(new Tuple<int, int>(i, rand));
[10218]136        }
137
[10535]138        ReOrderToIndices(shuffledIndices);
[10253]139      }
140    }
141
[10535]142    public void ReOrderToIndices(IEnumerable<int> indices) {
[10256]143      List<Tuple<int, int>> indicesTuple = new List<Tuple<int, int>>();
[10255]144
[10256]145      for (int i = 0; i < indices.Count(); ++i) {
[10311]146        indicesTuple.Add(new Tuple<int, int>(i, indices.ElementAt(i)));
[10256]147      }
148
[10535]149      ReOrderToIndices(indicesTuple);
[10255]150    }
151
[10535]152    public void ReOrderToIndices(IList<System.Tuple<int, int>> indices) {
[10367]153      for (int i = 0; i < preprocessingData.Columns; ++i) {
154        if (preprocessingData.IsType<double>(i)) {
155          reOrderToIndices<double>(i, indices);
156        } else if (preprocessingData.IsType<string>(i)) {
157          reOrderToIndices<string>(i, indices);
158        } else if (preprocessingData.IsType<DateTime>(i)) {
159          reOrderToIndices<DateTime>(i, indices);
[10249]160        }
161      }
162    }
[10218]163
[10367]164    private void reOrderToIndices<T>(int columnIndex, IList<Tuple<int, int>> indices) {
[10308]165
[10367]166      List<T> originalData = new List<T>(preprocessingData.GetValues<T>(columnIndex));
[10308]167
[10249]168      // process all columns equally
169      foreach (Tuple<int, int> index in indices) {
170        int originalIndex = index.Item1;
171        int replaceIndex = index.Item2;
[10218]172
[10308]173        T replaceValue = originalData.ElementAt<T>(replaceIndex);
[10367]174        preprocessingData.SetCell<T>(columnIndex, originalIndex, replaceValue);
[10249]175      }
[10193]176    }
[10249]177  }
[10193]178}
Note: See TracBrowser for help on using the repository browser.