Free cookie consent management tool by TermsFeed Policy Generator

source: branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/PreprocessingDataManipulation.cs @ 10311

Last change on this file since 10311 was 10311, checked in by rstoll, 10 years ago
  • tuples for sorting were not added to tuples list - bug fixed
  • Standard view for DataGridContenView changed
  • changed SetValues to IList (was IEnumerable) before
File size: 6.3 KB
RevLine 
[10249]1using System;
[10193]2using System.Collections.Generic;
3using System.Linq;
[10249]4using HeuristicLab.Data;
[10308]5using System.Collections;
[10193]6
[10249]7namespace HeuristicLab.DataPreprocessing {
[10256]8  public class PreprocessingDataManipulation : IPreprocessingDataManipulation {
[10249]9    private IPreprocessingData preprocessingData;
10    private IStatisticsLogic statisticInfo;
11    private ISearchLogic searchLogic;
[10193]12
[10249]13    public PreprocessingDataManipulation(IPreprocessingData _prepocessingData, ISearchLogic theSearchLogic, IStatisticsLogic theStatisticsLogic) {
14      preprocessingData = _prepocessingData;
15      searchLogic = theSearchLogic;
16      statisticInfo = theStatisticsLogic;
17    }
[10193]18
[10249]19    public void ReplaceIndicesByValue<T>(string variableName, IEnumerable<int> indices, T value) {
20      foreach (int index in indices) {
21        preprocessingData.SetCell<T>(variableName, index, value);
22      }
23    }
[10193]24
[10249]25    public void ReplaceIndicesByAverageValue(string variableName, IEnumerable<int> indices) {
26      double average = statisticInfo.GetAverage(variableName);
27      ReplaceIndicesByValue<double>(variableName, indices, average);
28    }
[10193]29
[10249]30    public void ReplaceIndicesByMedianValue(string variableName, IEnumerable<int> indices) {
31      double median = statisticInfo.GetMedian(variableName);
32      ReplaceIndicesByValue<double>(variableName, indices, median);
33    }
[10193]34
[10249]35    public void ReplaceIndicesByRandomValue(string variableName, IEnumerable<int> indices) {
36      Random r = new Random();
[10193]37
[10249]38      double max = statisticInfo.GetMax<double>(variableName);
39      double min = statisticInfo.GetMin<double>(variableName);
40      double randMultiplier = (max - min);
41      foreach (int index in indices) {
42        double rand = r.NextDouble() * randMultiplier + min;
43        preprocessingData.SetCell<double>(variableName, index, rand);
44      }
45    }
[10193]46
[10249]47    public void ReplaceIndicesByLinearInterpolationOfNeighbours(string variableName, IEnumerable<int> indices) {
48      int countValues = preprocessingData.GetValues<double>(variableName).Count();
49      foreach (int index in indices) {
50        // dont replace first or last values
51        if (index > 0 && index < countValues) {
52          int prevIndex = indexOfPrevPresentValue(variableName, index);
53          int nextIndex = indexOfNextPresentValue(variableName, index);
[10193]54
[10249]55          // no neighbours found
56          if (prevIndex < 0 && nextIndex >= countValues) {
57            continue;
58          }
59          double prev = preprocessingData.GetCell<double>(variableName, prevIndex);
60          double next = preprocessingData.GetCell<double>(variableName, nextIndex);
[10193]61
[10249]62          int valuesToInterpolate = nextIndex - prevIndex;
[10234]63
[10249]64          double interpolationStep = (prev + next) / valuesToInterpolate;
[10234]65
[10249]66          for (int i = prevIndex; i < nextIndex; ++i) {
67            double interpolated = prev + (interpolationStep * (i - prevIndex));
68            preprocessingData.SetCell<double>(variableName, i, interpolated);
69          }
[10193]70        }
[10249]71      }
72    }
[10193]73
[10249]74    private int indexOfPrevPresentValue(string variableName, int start) {
75      int offset = start - 1;
76      while (offset >= 0 && searchLogic.IsMissingValue(variableName, offset)) {
77        offset--;
78      }
[10234]79
[10249]80      return offset;
81    }
[10234]82
[10249]83    private int indexOfNextPresentValue(string variableName, int start) {
84      int offset = start + 1;
85      while (offset < preprocessingData.Rows && searchLogic.IsMissingValue(variableName, offset)) {
86        offset++;
87      }
[10234]88
[10249]89      return offset;
90    }
[10234]91
[10249]92    public void ReplaceIndicesByMostCommonValue(string variableName, IEnumerable<int> indices) {
93      if (preprocessingData.IsType<double>(variableName)) {
94        ReplaceIndicesByValue<double>(variableName, indices, statisticInfo.GetMostCommonValue<double>(variableName));
95      } else if (preprocessingData.IsType<string>(variableName)) {
96        ReplaceIndicesByValue<string>(variableName, indices, statisticInfo.GetMostCommonValue<string>(variableName));
97      } else if (preprocessingData.IsType<DateTime>(variableName)) {
98        ReplaceIndicesByValue<DateTime>(variableName, indices, statisticInfo.GetMostCommonValue<DateTime>(variableName));
99      } else {
100        throw new ArgumentException("column with index: " + variableName + " contains a non supported type.");
101      }
102    }
[10218]103
[10249]104    public void ShuffleWithRanges(IEnumerable<IntRange> ranges) {
105      // init random outside loop
106      Random random = new Random();
[10218]107
[10249]108      // process all given ranges - e.g. TrainingPartition, Trainingpartition
109      foreach (IntRange range in ranges) {
110        List<Tuple<int, int>> shuffledIndices = new List<Tuple<int, int>>();
[10218]111
[10249]112        // generate random indices used for shuffeling each column
113        for (int i = range.End; i > range.Start; --i) {
114          int rand = random.Next(range.Start, i);
115          shuffledIndices.Add(new Tuple<int, int>(i, rand));
[10218]116        }
117
[10256]118        reOrderToIndices(shuffledIndices);
[10253]119      }
120    }
121
[10256]122    public void reOrderToIndices(IEnumerable<int> indices) {
123      List<Tuple<int, int>> indicesTuple = new List<Tuple<int, int>>();
[10255]124
[10256]125      for (int i = 0; i < indices.Count(); ++i) {
[10311]126        indicesTuple.Add(new Tuple<int, int>(i, indices.ElementAt(i)));
[10256]127      }
128
129      reOrderToIndices(indicesTuple);
[10255]130    }
131
[10256]132    public void reOrderToIndices(IList<System.Tuple<int, int>> indices) {
[10253]133      foreach (string variableName in preprocessingData.VariableNames) {
134        if (preprocessingData.IsType<double>(variableName)) {
135          reOrderToIndices<double>(variableName, indices);
136        } else if (preprocessingData.IsType<string>(variableName)) {
137          reOrderToIndices<string>(variableName, indices);
138        } else if (preprocessingData.IsType<DateTime>(variableName)) {
139          reOrderToIndices<DateTime>(variableName, indices);
[10249]140        }
141      }
142    }
[10218]143
[10253]144    private void reOrderToIndices<T>(string variableName, IList<Tuple<int, int>> indices) {
[10308]145
146      List<T> originalData = new List<T>(preprocessingData.GetValues<T>(variableName));
147
[10249]148      // process all columns equally
149      foreach (Tuple<int, int> index in indices) {
150        int originalIndex = index.Item1;
151        int replaceIndex = index.Item2;
[10218]152
[10308]153        T replaceValue = originalData.ElementAt<T>(replaceIndex);
[10249]154        preprocessingData.SetCell<T>(variableName, originalIndex, replaceValue);
155      }
[10193]156    }
[10249]157  }
[10193]158}
Note: See TracBrowser for help on using the repository browser.