Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
01/22/14 14:13:11 (10 years ago)
Author:
rstoll
Message:
  • modified PreprocessingData, uses columnIndex now instead of variableName (is faster and more convenient), set variabelName based methods to Obsolete
  • Already changed SearchLogic, DataGridLogic, StatisticLogic as well as PreprocessingDataManipulation

*

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/PreprocessingDataManipulation.cs

    r10311 r10367  
    33using System.Linq;
    44using HeuristicLab.Data;
    5 using System.Collections;
    65
    76namespace HeuristicLab.DataPreprocessing {
     
    1716    }
    1817
    19     public void ReplaceIndicesByValue<T>(string variableName, IEnumerable<int> indices, T value) {
    20       foreach (int index in indices) {
    21         preprocessingData.SetCell<T>(variableName, index, value);
     18    public void ReplaceIndicesByValue<T>(int columnIndex, IEnumerable<int> rowIndices, T value) {
     19      foreach (int index in rowIndices) {
     20        preprocessingData.SetCell<T>(columnIndex, index, value);
    2221      }
    2322    }
    2423
    25     public void ReplaceIndicesByAverageValue(string variableName, IEnumerable<int> indices) {
    26       double average = statisticInfo.GetAverage(variableName);
    27       ReplaceIndicesByValue<double>(variableName, indices, average);
     24    public void ReplaceIndicesByAverageValue(int columnIndex, IEnumerable<int> rowIndices) {
     25      double average = statisticInfo.GetAverage(columnIndex);
     26      ReplaceIndicesByValue<double>(columnIndex, rowIndices, average);
    2827    }
    2928
    30     public void ReplaceIndicesByMedianValue(string variableName, IEnumerable<int> indices) {
    31       double median = statisticInfo.GetMedian(variableName);
    32       ReplaceIndicesByValue<double>(variableName, indices, median);
     29    public void ReplaceIndicesByMedianValue(int columnIndex, IEnumerable<int> rowIndices) {
     30      double median = statisticInfo.GetMedian(columnIndex);
     31      ReplaceIndicesByValue<double>(columnIndex, rowIndices, median);
    3332    }
    3433
    35     public void ReplaceIndicesByRandomValue(string variableName, IEnumerable<int> indices) {
     34    public void ReplaceIndicesByRandomValue(int columnIndex, IEnumerable<int> rowIndices) {
    3635      Random r = new Random();
    3736
    38       double max = statisticInfo.GetMax<double>(variableName);
    39       double min = statisticInfo.GetMin<double>(variableName);
     37      double max = statisticInfo.GetMax<double>(columnIndex);
     38      double min = statisticInfo.GetMin<double>(columnIndex);
    4039      double randMultiplier = (max - min);
    41       foreach (int index in indices) {
     40      foreach (int index in rowIndices) {
    4241        double rand = r.NextDouble() * randMultiplier + min;
    43         preprocessingData.SetCell<double>(variableName, index, rand);
     42        preprocessingData.SetCell<double>(columnIndex, index, rand);
    4443      }
    4544    }
    4645
    47     public void ReplaceIndicesByLinearInterpolationOfNeighbours(string variableName, IEnumerable<int> indices) {
    48       int countValues = preprocessingData.GetValues<double>(variableName).Count();
    49       foreach (int index in indices) {
     46    public void ReplaceIndicesByLinearInterpolationOfNeighbours(int columnIndex, IEnumerable<int> rowIndices) {
     47      int countValues = preprocessingData.GetValues<double>(columnIndex).Count();
     48      foreach (int index in rowIndices) {
    5049        // dont replace first or last values
    5150        if (index > 0 && index < countValues) {
    52           int prevIndex = indexOfPrevPresentValue(variableName, index);
    53           int nextIndex = indexOfNextPresentValue(variableName, index);
     51          int prevIndex = indexOfPrevPresentValue(columnIndex, index);
     52          int nextIndex = indexOfNextPresentValue(columnIndex, index);
    5453
    5554          // no neighbours found
     
    5756            continue;
    5857          }
    59           double prev = preprocessingData.GetCell<double>(variableName, prevIndex);
    60           double next = preprocessingData.GetCell<double>(variableName, nextIndex);
     58          double prev = preprocessingData.GetCell<double>(columnIndex, prevIndex);
     59          double next = preprocessingData.GetCell<double>(columnIndex, nextIndex);
    6160
    6261          int valuesToInterpolate = nextIndex - prevIndex;
     
    6665          for (int i = prevIndex; i < nextIndex; ++i) {
    6766            double interpolated = prev + (interpolationStep * (i - prevIndex));
    68             preprocessingData.SetCell<double>(variableName, i, interpolated);
     67            preprocessingData.SetCell<double>(columnIndex, i, interpolated);
    6968          }
    7069        }
     
    7271    }
    7372
    74     private int indexOfPrevPresentValue(string variableName, int start) {
     73    private int indexOfPrevPresentValue(int columnIndex, int start) {
    7574      int offset = start - 1;
    76       while (offset >= 0 && searchLogic.IsMissingValue(variableName, offset)) {
     75      while (offset >= 0 && searchLogic.IsMissingValue(columnIndex, offset)) {
    7776        offset--;
    7877      }
     
    8180    }
    8281
    83     private int indexOfNextPresentValue(string variableName, int start) {
     82    private int indexOfNextPresentValue(int columnIndex, int start) {
    8483      int offset = start + 1;
    85       while (offset < preprocessingData.Rows && searchLogic.IsMissingValue(variableName, offset)) {
     84      while (offset < preprocessingData.Rows && searchLogic.IsMissingValue(columnIndex, offset)) {
    8685        offset++;
    8786      }
     
    9089    }
    9190
    92     public void ReplaceIndicesByMostCommonValue(string variableName, IEnumerable<int> indices) {
    93       if (preprocessingData.IsType<double>(variableName)) {
    94         ReplaceIndicesByValue<double>(variableName, indices, statisticInfo.GetMostCommonValue<double>(variableName));
    95       } else if (preprocessingData.IsType<string>(variableName)) {
    96         ReplaceIndicesByValue<string>(variableName, indices, statisticInfo.GetMostCommonValue<string>(variableName));
    97       } else if (preprocessingData.IsType<DateTime>(variableName)) {
    98         ReplaceIndicesByValue<DateTime>(variableName, indices, statisticInfo.GetMostCommonValue<DateTime>(variableName));
     91    public void ReplaceIndicesByMostCommonValue(int columnIndex, IEnumerable<int> rowIndices) {
     92      if (preprocessingData.IsType<double>(columnIndex)) {
     93        ReplaceIndicesByValue<double>(columnIndex, rowIndices, statisticInfo.GetMostCommonValue<double>(columnIndex));
     94      } else if (preprocessingData.IsType<string>(columnIndex)) {
     95        ReplaceIndicesByValue<string>(columnIndex, rowIndices, statisticInfo.GetMostCommonValue<string>(columnIndex));
     96      } else if (preprocessingData.IsType<DateTime>(columnIndex)) {
     97        ReplaceIndicesByValue<DateTime>(columnIndex, rowIndices, statisticInfo.GetMostCommonValue<DateTime>(columnIndex));
    9998      } else {
    100         throw new ArgumentException("column with index: " + variableName + " contains a non supported type.");
     99        throw new ArgumentException("column with index: " + columnIndex + " contains a non supported type.");
    101100      }
    102101    }
     
    131130
    132131    public void reOrderToIndices(IList<System.Tuple<int, int>> indices) {
    133       foreach (string variableName in preprocessingData.VariableNames) {
    134         if (preprocessingData.IsType<double>(variableName)) {
    135           reOrderToIndices<double>(variableName, indices);
    136         } else if (preprocessingData.IsType<string>(variableName)) {
    137           reOrderToIndices<string>(variableName, indices);
    138         } else if (preprocessingData.IsType<DateTime>(variableName)) {
    139           reOrderToIndices<DateTime>(variableName, indices);
     132      for (int i = 0; i < preprocessingData.Columns; ++i) {
     133        if (preprocessingData.IsType<double>(i)) {
     134          reOrderToIndices<double>(i, indices);
     135        } else if (preprocessingData.IsType<string>(i)) {
     136          reOrderToIndices<string>(i, indices);
     137        } else if (preprocessingData.IsType<DateTime>(i)) {
     138          reOrderToIndices<DateTime>(i, indices);
    140139        }
    141140      }
    142141    }
    143142
    144     private void reOrderToIndices<T>(string variableName, IList<Tuple<int, int>> indices) {
     143    private void reOrderToIndices<T>(int columnIndex, IList<Tuple<int, int>> indices) {
    145144
    146       List<T> originalData = new List<T>(preprocessingData.GetValues<T>(variableName));
     145      List<T> originalData = new List<T>(preprocessingData.GetValues<T>(columnIndex));
    147146
    148147      // process all columns equally
     
    152151
    153152        T replaceValue = originalData.ElementAt<T>(replaceIndex);
    154         preprocessingData.SetCell<T>(variableName, originalIndex, replaceValue);
     153        preprocessingData.SetCell<T>(columnIndex, originalIndex, replaceValue);
    155154      }
    156155    }
Note: See TracChangeset for help on using the changeset viewer.