Changeset 10367


Ignore:
Timestamp:
01/22/14 14:13:11 (6 years ago)
Author:
rstoll
Message:
  • modified PreprocessingData, uses columnIndex now instead of variableName (is faster and more convenient), set variabelName based methods to Obsolete
  • Already changed SearchLogic, DataGridLogic, StatisticLogic as well as PreprocessingDataManipulation

*

Location:
branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3
Files:
14 edited

Legend:

Unmodified
Added
Removed
  • branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/HeuristicLab.DataPreprocessing-3.3.csproj

    r10319 r10367  
    219219    </ProjectReference>
    220220  </ItemGroup>
     221  <ItemGroup>
     222    <EmbeddedResource Include="Views\StatisticsView.resx">
     223      <DependentUpon>StatisticsView.cs</DependentUpon>
     224    </EmbeddedResource>
     225  </ItemGroup>
    221226  <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
    222227  <PropertyGroup>
  • branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/DataGridLogic.cs

    r10248 r10367  
    3232    }
    3333
     34    public string GetColumnTypeAsString(int columnIndex) {
     35      if (preprocessingData.IsType<double>(columnIndex)) {
     36        return "double";
     37      } else if (preprocessingData.IsType<string>(columnIndex)) {
     38        return "string";
     39      } else if (preprocessingData.IsType<DateTime>(columnIndex)) {
     40        return "DateTime";
     41      }
     42      return "Unknown Type";
     43    }
     44
    3445    public IEnumerable<string> RowNames {
    3546      get {
     
    4354      }
    4455      bool valid = false;
    45       string variableName = preprocessingData.GetVariableName(columnIndex);
    46       if (preprocessingData.IsType<double>(variableName)) {
     56      if (preprocessingData.IsType<double>(columnIndex)) {
    4757        double val;
    4858        valid = double.TryParse(value, out val);
     
    5161          errorMessage = "Invalid Value (Valid Value Format: \"" + FormatPatterns.GetDoubleFormatPattern() + "\")";
    5262        }
    53       } else if (preprocessingData.IsType<string>(variableName)) {
     63      } else if (preprocessingData.IsType<string>(columnIndex)) {
    5464        valid = value != null;
    5565        errorMessage = string.Empty;
     
    5767          errorMessage = "Invalid Value (string must not be null)";
    5868        }
    59       } else if (preprocessingData.IsType<DateTime>(variableName)) {
     69      } else if (preprocessingData.IsType<DateTime>(columnIndex)) {
    6070        DateTime date;
    6171        valid = DateTime.TryParse(value, out date);
     
    6575        }
    6676      } else {
    67         throw new ArgumentException("column with variableName: " + variableName + " contains a non supported type.");
     77        throw new ArgumentException("column " + columnIndex + " contains a non supported type.");
    6878      }
    6979
     
    7282
    7383    public string GetValue(int rowIndex, int columnIndex) {
    74       return preprocessingData.GetCellAsString(preprocessingData.GetVariableName(columnIndex), rowIndex);
     84      return preprocessingData.GetCellAsString(columnIndex, rowIndex);
    7585    }
    7686
    7787    public bool SetValue(string value, int rowIndex, int columnIndex) {
    78       string variableName = preprocessingData.GetVariableName(columnIndex);
    7988      bool valid = false;
    80       if (preprocessingData.IsType<double>(variableName)) {
     89      if (preprocessingData.IsType<double>(columnIndex)) {
    8190        double val;
    8291        valid = double.TryParse(value, out val);
    8392        if (valid) {
    84           preprocessingData.SetCell<double>(variableName, rowIndex, val);
     93          preprocessingData.SetCell<double>(columnIndex, rowIndex, val);
    8594        }
    86       } else if (preprocessingData.IsType<string>(variableName)) {
     95      } else if (preprocessingData.IsType<string>(columnIndex)) {
    8796        valid = value != null;
    8897        if (valid) {
    89           preprocessingData.SetCell<string>(variableName, rowIndex, value);
     98          preprocessingData.SetCell<string>(columnIndex, rowIndex, value);
    9099        }
    91       } else if (preprocessingData.IsType<DateTime>(variableName)) {
     100      } else if (preprocessingData.IsType<DateTime>(columnIndex)) {
    92101        DateTime date;
    93102        valid = DateTime.TryParse(value, out date);
    94103        if (valid) {
    95           preprocessingData.SetCell<DateTime>(variableName, rowIndex, date);
     104          preprocessingData.SetCell<DateTime>(columnIndex, rowIndex, date);
    96105        }
    97106      } else {
    98         throw new ArgumentException("column with variableName: " + variableName + " contains a non supported type.");
     107        throw new ArgumentException("column " + columnIndex + " contains a non supported type.");
    99108      }
    100109
  • branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/PreprocessingData.cs

    r10341 r10367  
    3333  public class PreprocessingData : NamedItem, IPreprocessingData {
    3434
    35     private IDictionary<string, IList> variableValues;
     35    private IDictionary<int, IList> variableValues;
    3636
    3737    private IList<string> variableNames;
    38 
    39     private IDictionary<string, int> variableNameIndices;
    4038
    4139    private double trainingToTestRatio;
     
    4341    private PreprocessingData(PreprocessingData original, Cloner cloner)
    4442      : base(original, cloner) {
    45       variableValues = new Dictionary<string, IList>(variableValues);
    46       variableNameIndices = new Dictionary<string, int>(variableNameIndices);
     43      variableValues = new Dictionary<int, IList>(original.variableValues);
    4744    }
    4845
     
    5350      variableNames = new List<string>(problemData.Dataset.VariableNames);
    5451      // create dictionary from variable name to index
    55       variableNameIndices = new Dictionary<string, int>();
    56       var variableNamesList = problemData.Dataset.VariableNames.ToList();
    57       for (int i = 0; i < variableNamesList.Count; i++) {
    58         variableNameIndices.Add(variableNamesList[i], i);
    59       }
    60 
    61       // copy values
    62       variableValues = new Dictionary<string, IList>();
     52
     53      int columnIndex = 0;
     54      variableValues = new Dictionary<int, IList>();
    6355      foreach (var variableName in problemData.Dataset.VariableNames) {
    6456        if (problemData.Dataset.IsType<double>(variableName)) {
    65           variableValues[variableName] = problemData.Dataset.GetDoubleValues(variableName).ToList();
     57          variableValues[columnIndex] = problemData.Dataset.GetDoubleValues(variableName).ToList();
    6658        } else if (problemData.Dataset.IsType<string>(variableName)) {
    67           variableValues[variableName] = CreateColumn<string>(problemData.Dataset, variableNameIndices[variableName], x => x);
     59          variableValues[columnIndex] = CreateColumn<string>(problemData.Dataset, columnIndex, x => x);
    6860        } else if (problemData.Dataset.IsType<DateTime>(variableName)) {
    69           variableValues[variableName] = CreateColumn<DateTime>(problemData.Dataset, variableNameIndices[variableName], x => DateTime.Parse(x));
     61          variableValues[columnIndex] = CreateColumn<DateTime>(problemData.Dataset, columnIndex, x => DateTime.Parse(x));
    7062        } else {
    7163          throw new ArgumentException("The datatype of column " + variableName + " must be of type List<double>, List<string> or List<DateTime>");
    7264        }
     65        ++columnIndex;
    7366      }
    7467
     
    7972      var list = new List<T>(ds.Rows);
    8073      for (int row = 0; row < ds.Rows; ++row) {
    81         list.Add(selector(ds.GetValue(row, column))); 
     74        list.Add(selector(ds.GetValue(row, column)));
    8275      }
    8376      return list;
     
    9487    #region IPreprocessingData Members
    9588
    96     public T GetCell<T>(string variableName, int row) {
    97       return (T)variableValues[variableName][row];
    98     }
    99 
    100     public void SetCell<T>(string variableName, int row, T value) {
    101       variableValues[variableName][row] = value;
    102     }
    103 
    104     public string GetCellAsString(string variableName, int row) {
    105       return variableValues[variableName][row].ToString();
    106     }
    107 
     89    [Obsolete("use the index based variant, is faster")]
     90    public T GetCell<T>(string variableName, int rowIndex) {
     91      return GetCell<T>(GetColumnIndex(variableName), rowIndex);
     92    }
     93
     94    public T GetCell<T>(int columnIndex, int rowIndex) {
     95      return (T)variableValues[columnIndex][rowIndex];
     96    }
     97
     98    [Obsolete("use the index based variant, is faster")]
     99    public void SetCell<T>(string variableName, int rowIndex, T value) {
     100      SetCell<T>(GetColumnIndex(variableName), rowIndex, value);
     101    }
     102
     103    public void SetCell<T>(int columnIndex, int rowIndex, T value) {
     104      variableValues[columnIndex][rowIndex] = value;
     105    }
     106
     107    [Obsolete("use the index based variant, is faster")]
     108    public string GetCellAsString(string variableName, int rowIndex) {
     109      return GetCellAsString(GetColumnIndex(variableName), rowIndex);
     110    }
     111
     112    public string GetCellAsString(int columnIndex, int rowIndex) {
     113      return variableValues[columnIndex][rowIndex].ToString();
     114
     115    }
     116
     117    [Obsolete("use the index based variant, is faster")]
    108118    public IList<T> GetValues<T>(string variableName) {
    109       // TODO: test if cast is valid
    110       return (IList<T>) variableValues[variableName];
    111     }
    112 
     119      return GetValues<T>(GetColumnIndex(variableName));
     120    }
     121
     122    public IList<T> GetValues<T>(int columnIndex) {
     123      return (IList<T>)variableValues[columnIndex];
     124    }
     125
     126    [Obsolete("use the index based variant, is faster")]
    113127    public void SetValues<T>(string variableName, IList<T> values) {
    114       if(IsType<T>(variableName)){
    115         variableValues[variableName] = (IList) values;
    116       }else{
    117         throw new ArgumentException("The datatype of column " + variableName + " must be of type " + variableValues[variableName].GetType().Name + " but was " + typeof(T).Name);
     128      SetValues<T>(GetColumnIndex(variableName), values);
     129
     130    }
     131    public void SetValues<T>(int columnIndex, IList<T> values) {
     132      if (IsType<T>(columnIndex)) {
     133        variableValues[columnIndex] = (IList)values;
     134      } else {
     135        throw new ArgumentException("The datatype of column " + columnIndex + " must be of type " + variableValues[columnIndex].GetType().Name + " but was " + typeof(T).Name);
    118136      }
    119137    }
     
    134152
    135153    public void InsertColumn<T>(string variableName, int columnIndex) {
    136       variableValues.Add(variableName, new List<T>(Rows));
    137       variableNameIndices.Add(variableName, columnIndex);
     154      variableValues.Add(columnIndex, new List<T>(Rows));
    138155      variableNames.Insert(columnIndex, variableName);
    139156    }
    140157
     158    public void DeleteColumn(int columnIndex) {
     159      variableValues.Remove(columnIndex);
     160      variableNames.RemoveAt(columnIndex);
     161    }
     162
     163    [Obsolete("use the index based variant, is faster")]
    141164    public void DeleteColumn(string variableName) {
    142       variableValues.Remove(variableName);
    143       variableNames.RemoveAt(variableNameIndices[variableName]);
    144       variableNameIndices.Remove(variableName);
     165      DeleteColumn(GetColumnIndex(variableName));
    145166    }
    146167
     
    157178    }
    158179
     180    [Obsolete("use the index based variant, is faster")]
    159181    public string GetVariableName(int columnIndex) {
    160182      return variableNames[columnIndex];
    161183    }
    162 
     184    public int GetColumnIndex(string variableName) {
     185      return variableNames.IndexOf(variableName);
     186    }
     187
     188    [Obsolete("use the index based variant, is faster")]
    163189    public bool IsType<T>(string variableName) {
    164       return variableValues[variableName] is List<T>;
     190      return IsType<T>(GetColumnIndex(variableName));
     191
     192    }
     193    public bool IsType<T>(int columnIndex) {
     194      return variableValues[columnIndex] is List<T>;
    165195    }
    166196
     
    170200
    171201    public int Rows {
    172       get { return variableValues[variableNames[0]].Count; }
     202      get { return variableValues.Count > 0 ? variableValues[0].Count : 0; }
    173203    }
    174204
    175205    public Dataset ExportToDataset() {
    176206      IList<IList> values = new List<IList>();
    177       foreach (var variable in VariableNames) {
    178         values.Add(variableValues[variable]);
     207
     208      for (int i = 0; i < Columns; ++i) {
     209        values.Add(variableValues[i]);
    179210      }
    180211
  • branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/PreprocessingDataManipulation.cs

    r10311 r10367  
    33using System.Linq;
    44using HeuristicLab.Data;
    5 using System.Collections;
    65
    76namespace HeuristicLab.DataPreprocessing {
     
    1716    }
    1817
    19     public void ReplaceIndicesByValue<T>(string variableName, IEnumerable<int> indices, T value) {
    20       foreach (int index in indices) {
    21         preprocessingData.SetCell<T>(variableName, index, value);
     18    public void ReplaceIndicesByValue<T>(int columnIndex, IEnumerable<int> rowIndices, T value) {
     19      foreach (int index in rowIndices) {
     20        preprocessingData.SetCell<T>(columnIndex, index, value);
    2221      }
    2322    }
    2423
    25     public void ReplaceIndicesByAverageValue(string variableName, IEnumerable<int> indices) {
    26       double average = statisticInfo.GetAverage(variableName);
    27       ReplaceIndicesByValue<double>(variableName, indices, average);
     24    public void ReplaceIndicesByAverageValue(int columnIndex, IEnumerable<int> rowIndices) {
     25      double average = statisticInfo.GetAverage(columnIndex);
     26      ReplaceIndicesByValue<double>(columnIndex, rowIndices, average);
    2827    }
    2928
    30     public void ReplaceIndicesByMedianValue(string variableName, IEnumerable<int> indices) {
    31       double median = statisticInfo.GetMedian(variableName);
    32       ReplaceIndicesByValue<double>(variableName, indices, median);
     29    public void ReplaceIndicesByMedianValue(int columnIndex, IEnumerable<int> rowIndices) {
     30      double median = statisticInfo.GetMedian(columnIndex);
     31      ReplaceIndicesByValue<double>(columnIndex, rowIndices, median);
    3332    }
    3433
    35     public void ReplaceIndicesByRandomValue(string variableName, IEnumerable<int> indices) {
     34    public void ReplaceIndicesByRandomValue(int columnIndex, IEnumerable<int> rowIndices) {
    3635      Random r = new Random();
    3736
    38       double max = statisticInfo.GetMax<double>(variableName);
    39       double min = statisticInfo.GetMin<double>(variableName);
     37      double max = statisticInfo.GetMax<double>(columnIndex);
     38      double min = statisticInfo.GetMin<double>(columnIndex);
    4039      double randMultiplier = (max - min);
    41       foreach (int index in indices) {
     40      foreach (int index in rowIndices) {
    4241        double rand = r.NextDouble() * randMultiplier + min;
    43         preprocessingData.SetCell<double>(variableName, index, rand);
     42        preprocessingData.SetCell<double>(columnIndex, index, rand);
    4443      }
    4544    }
    4645
    47     public void ReplaceIndicesByLinearInterpolationOfNeighbours(string variableName, IEnumerable<int> indices) {
    48       int countValues = preprocessingData.GetValues<double>(variableName).Count();
    49       foreach (int index in indices) {
     46    public void ReplaceIndicesByLinearInterpolationOfNeighbours(int columnIndex, IEnumerable<int> rowIndices) {
     47      int countValues = preprocessingData.GetValues<double>(columnIndex).Count();
     48      foreach (int index in rowIndices) {
    5049        // dont replace first or last values
    5150        if (index > 0 && index < countValues) {
    52           int prevIndex = indexOfPrevPresentValue(variableName, index);
    53           int nextIndex = indexOfNextPresentValue(variableName, index);
     51          int prevIndex = indexOfPrevPresentValue(columnIndex, index);
     52          int nextIndex = indexOfNextPresentValue(columnIndex, index);
    5453
    5554          // no neighbours found
     
    5756            continue;
    5857          }
    59           double prev = preprocessingData.GetCell<double>(variableName, prevIndex);
    60           double next = preprocessingData.GetCell<double>(variableName, nextIndex);
     58          double prev = preprocessingData.GetCell<double>(columnIndex, prevIndex);
     59          double next = preprocessingData.GetCell<double>(columnIndex, nextIndex);
    6160
    6261          int valuesToInterpolate = nextIndex - prevIndex;
     
    6665          for (int i = prevIndex; i < nextIndex; ++i) {
    6766            double interpolated = prev + (interpolationStep * (i - prevIndex));
    68             preprocessingData.SetCell<double>(variableName, i, interpolated);
     67            preprocessingData.SetCell<double>(columnIndex, i, interpolated);
    6968          }
    7069        }
     
    7271    }
    7372
    74     private int indexOfPrevPresentValue(string variableName, int start) {
     73    private int indexOfPrevPresentValue(int columnIndex, int start) {
    7574      int offset = start - 1;
    76       while (offset >= 0 && searchLogic.IsMissingValue(variableName, offset)) {
     75      while (offset >= 0 && searchLogic.IsMissingValue(columnIndex, offset)) {
    7776        offset--;
    7877      }
     
    8180    }
    8281
    83     private int indexOfNextPresentValue(string variableName, int start) {
     82    private int indexOfNextPresentValue(int columnIndex, int start) {
    8483      int offset = start + 1;
    85       while (offset < preprocessingData.Rows && searchLogic.IsMissingValue(variableName, offset)) {
     84      while (offset < preprocessingData.Rows && searchLogic.IsMissingValue(columnIndex, offset)) {
    8685        offset++;
    8786      }
     
    9089    }
    9190
    92     public void ReplaceIndicesByMostCommonValue(string variableName, IEnumerable<int> indices) {
    93       if (preprocessingData.IsType<double>(variableName)) {
    94         ReplaceIndicesByValue<double>(variableName, indices, statisticInfo.GetMostCommonValue<double>(variableName));
    95       } else if (preprocessingData.IsType<string>(variableName)) {
    96         ReplaceIndicesByValue<string>(variableName, indices, statisticInfo.GetMostCommonValue<string>(variableName));
    97       } else if (preprocessingData.IsType<DateTime>(variableName)) {
    98         ReplaceIndicesByValue<DateTime>(variableName, indices, statisticInfo.GetMostCommonValue<DateTime>(variableName));
     91    public void ReplaceIndicesByMostCommonValue(int columnIndex, IEnumerable<int> rowIndices) {
     92      if (preprocessingData.IsType<double>(columnIndex)) {
     93        ReplaceIndicesByValue<double>(columnIndex, rowIndices, statisticInfo.GetMostCommonValue<double>(columnIndex));
     94      } else if (preprocessingData.IsType<string>(columnIndex)) {
     95        ReplaceIndicesByValue<string>(columnIndex, rowIndices, statisticInfo.GetMostCommonValue<string>(columnIndex));
     96      } else if (preprocessingData.IsType<DateTime>(columnIndex)) {
     97        ReplaceIndicesByValue<DateTime>(columnIndex, rowIndices, statisticInfo.GetMostCommonValue<DateTime>(columnIndex));
    9998      } else {
    100         throw new ArgumentException("column with index: " + variableName + " contains a non supported type.");
     99        throw new ArgumentException("column with index: " + columnIndex + " contains a non supported type.");
    101100      }
    102101    }
     
    131130
    132131    public void reOrderToIndices(IList<System.Tuple<int, int>> indices) {
    133       foreach (string variableName in preprocessingData.VariableNames) {
    134         if (preprocessingData.IsType<double>(variableName)) {
    135           reOrderToIndices<double>(variableName, indices);
    136         } else if (preprocessingData.IsType<string>(variableName)) {
    137           reOrderToIndices<string>(variableName, indices);
    138         } else if (preprocessingData.IsType<DateTime>(variableName)) {
    139           reOrderToIndices<DateTime>(variableName, indices);
     132      for (int i = 0; i < preprocessingData.Columns; ++i) {
     133        if (preprocessingData.IsType<double>(i)) {
     134          reOrderToIndices<double>(i, indices);
     135        } else if (preprocessingData.IsType<string>(i)) {
     136          reOrderToIndices<string>(i, indices);
     137        } else if (preprocessingData.IsType<DateTime>(i)) {
     138          reOrderToIndices<DateTime>(i, indices);
    140139        }
    141140      }
    142141    }
    143142
    144     private void reOrderToIndices<T>(string variableName, IList<Tuple<int, int>> indices) {
     143    private void reOrderToIndices<T>(int columnIndex, IList<Tuple<int, int>> indices) {
    145144
    146       List<T> originalData = new List<T>(preprocessingData.GetValues<T>(variableName));
     145      List<T> originalData = new List<T>(preprocessingData.GetValues<T>(columnIndex));
    147146
    148147      // process all columns equally
     
    152151
    153152        T replaceValue = originalData.ElementAt<T>(replaceIndex);
    154         preprocessingData.SetCell<T>(variableName, originalIndex, replaceValue);
     153        preprocessingData.SetCell<T>(columnIndex, originalIndex, replaceValue);
    155154      }
    156155    }
  • branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/SearchLogic.cs

    r10236 r10367  
    1515      var dic = new Dictionary<string, IEnumerable<int>>();
    1616      foreach (string variableName in preprocessingData.VariableNames) {
    17         dic.Add(variableName, GetMissingValueIndices(variableName));
     17        dic.Add(variableName, GetMissingValueIndices(preprocessingData.GetColumnIndex(variableName)));
    1818      }
    1919      return dic;
    2020    }
    2121
    22     public bool IsMissingValue(string variableName, int rowIndex) {
    23       if (preprocessingData.IsType<double>(variableName)) {
    24         return double.IsNaN(preprocessingData.GetCell<double>(variableName, rowIndex));
    25       } else if (preprocessingData.IsType<string>(variableName)) {
    26         return string.IsNullOrEmpty(preprocessingData.GetCell<string>(variableName, rowIndex));
    27       } else if (preprocessingData.IsType<DateTime>(variableName)) {
    28         return preprocessingData.GetCell<DateTime>(variableName, rowIndex).Equals(DateTime.MinValue);
     22    public bool IsMissingValue(int columnIndex, int rowIndex) {
     23      if (preprocessingData.IsType<double>(columnIndex)) {
     24        return double.IsNaN(preprocessingData.GetCell<double>(columnIndex, rowIndex));
     25      } else if (preprocessingData.IsType<string>(columnIndex)) {
     26        return string.IsNullOrEmpty(preprocessingData.GetCell<string>(columnIndex, rowIndex));
     27      } else if (preprocessingData.IsType<DateTime>(columnIndex)) {
     28        return preprocessingData.GetCell<DateTime>(columnIndex, rowIndex).Equals(DateTime.MinValue);
    2929      } else {
    30         throw new ArgumentException("cell in column with variableName: " + variableName + " and row index " + rowIndex + " contains a non supported type.");
     30        throw new ArgumentException("cell in column " + columnIndex + " and row index " + rowIndex + " contains a non supported type.");
    3131      }
    3232    }
    3333
    34     public IEnumerable<int> GetMissingValueIndices(string variableName) {
    35       if (preprocessingData.IsType<double>(variableName)) {
    36         return preprocessingData.GetValues<double>(variableName).Select((s, i) => new { i, s }).Where(t => double.IsNaN(t.s)).Select(t => t.i);
    37       } else if (preprocessingData.IsType<string>(variableName)) {
    38         return preprocessingData.GetValues<string>(variableName).Select((s, i) => new { i, s }).Where(t => string.IsNullOrEmpty(t.s)).Select(t => t.i);
    39       } else if (preprocessingData.IsType<DateTime>(variableName)) {
    40         return preprocessingData.GetValues<DateTime>(variableName).Select((s, i) => new { i, s }).Where(t => t.s.Equals(DateTime.MinValue)).Select(t => t.i);
     34    public IEnumerable<int> GetMissingValueIndices(int columnIndex) {
     35      if (preprocessingData.IsType<double>(columnIndex)) {
     36        return preprocessingData.GetValues<double>(columnIndex).Select((s, i) => new { i, s }).Where(t => double.IsNaN(t.s)).Select(t => t.i);
     37      } else if (preprocessingData.IsType<string>(columnIndex)) {
     38        return preprocessingData.GetValues<string>(columnIndex).Select((s, i) => new { i, s }).Where(t => string.IsNullOrEmpty(t.s)).Select(t => t.i);
     39      } else if (preprocessingData.IsType<DateTime>(columnIndex)) {
     40        return preprocessingData.GetValues<DateTime>(columnIndex).Select((s, i) => new { i, s }).Where(t => t.s.Equals(DateTime.MinValue)).Select(t => t.i);
    4141      } else {
    42         throw new ArgumentException("column with variableName: " + variableName + " contains a non supported type.");
     42        throw new ArgumentException("column " + columnIndex + " contains a non supported type.");
    4343      }
    4444    }
  • branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/StatisticsContent.cs

    r10313 r10367  
    1414
    1515    private readonly IStatisticsLogic statisticsLogic;
    16     public StatisticsContent(IStatisticsLogic theStatisticsLogic)
     16    private readonly IDataGridLogic dataGridLogic;
     17    public StatisticsContent(IStatisticsLogic theStatisticsLogic, IDataGridLogic theDataGridLogic)
    1718    {
    1819      statisticsLogic = theStatisticsLogic;
     20      dataGridLogic = theDataGridLogic;
    1921    }
    2022
     
    2729    public IStatisticsLogic StatisticsLogic
    2830    {
    29       get
    30       {
    31         return statisticsLogic;
    32       }
     31      get { return statisticsLogic; }
     32    }
     33
     34    public IDataGridLogic DataDridLogic {
     35      get { return dataGridLogic; }
    3336    }
    3437
  • branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/StatisticsLogic.cs

    r10249 r10367  
    3939    public int GetMissingValueCount() {
    4040      int count = 0;
    41       foreach (var variableName in preprocessingData.VariableNames) {
    42         count += GetMissingValueCount(variableName);
     41      for(int i = 0; i < preprocessingData.Columns; ++i){
     42        count += GetMissingValueCount(i);
    4343      }
    4444      return count;
    4545    }
    4646
    47     public int GetMissingValueCount(string variableName) {
    48       return searchLogic.GetMissingValueIndices(variableName).Count();
     47    public int GetMissingValueCount(int columnIndex) {
     48      return searchLogic.GetMissingValueIndices(columnIndex).Count();
    4949    }
    5050
    51     public T GetMin<T>(string variableName) where T : IComparable<T> {
    52       return preprocessingData.GetValues<T>(variableName).Min();
     51    public T GetMin<T>(int columnIndex) where T : IComparable<T> {
     52      return preprocessingData.GetValues<T>(columnIndex).Min();
    5353    }
    5454
    55     public T GetMax<T>(string variableName) where T : IComparable<T> {
    56       return preprocessingData.GetValues<T>(variableName).Max();
     55    public T GetMax<T>(int columnIndex) where T : IComparable<T> {
     56      return preprocessingData.GetValues<T>(columnIndex).Max();
    5757    }
    5858
    59     public double GetMedian(string variableName) {
     59    public double GetMedian(int columnIndex) {
    6060      double median = double.NaN;
    61       if (preprocessingData.IsType<double>(variableName)) {
    62         median = preprocessingData.GetValues<double>(variableName).Median();
     61      if (preprocessingData.IsType<double>(columnIndex)) {
     62        median = preprocessingData.GetValues<double>(columnIndex).Median();
    6363      }
    6464      return median;
    6565    }
    6666
    67     public double GetAverage(string variableName) {
     67    public double GetAverage(int columnIndex) {
    6868      double avg = double.NaN;
    69       if (preprocessingData.IsType<double>(variableName)) {
    70         avg = preprocessingData.GetValues<double>(variableName).Average();
     69      if (preprocessingData.IsType<double>(columnIndex)) {
     70        avg = preprocessingData.GetValues<double>(columnIndex).Average();
    7171      }
    7272      return avg;
    7373    }
    7474
    75     public T GetMostCommonValue<T>(string variableName) {
    76       return preprocessingData.GetValues<T>(variableName)
     75    public T GetMostCommonValue<T>(int columnIndex) {
     76      return preprocessingData.GetValues<T>(columnIndex)
    7777                              .GroupBy(x => x)
    7878                              .OrderByDescending(g => g.Count())
     
    8282
    8383
    84     public double GetStandardDeviation(string variableName) {
     84    public double GetStandardDeviation(int columnIndex) {
    8585      double stdDev = double.NaN;
    86       if (preprocessingData.IsType<double>(variableName)) {
    87         stdDev = preprocessingData.GetValues<double>(variableName).StandardDeviation();
     86      if (preprocessingData.IsType<double>(columnIndex)) {
     87        stdDev = preprocessingData.GetValues<double>(columnIndex).StandardDeviation();
    8888      }
    8989      return stdDev;
    9090    }
    9191
    92     public double GetVariance(string variableName) {
     92    public double GetVariance(int columnIndex) {
    9393      double stdDev = double.NaN;
    94       if (preprocessingData.IsType<double>(variableName)) {
    95         stdDev = preprocessingData.GetValues<double>(variableName).Variance();
     94      if (preprocessingData.IsType<double>(columnIndex)) {
     95        stdDev = preprocessingData.GetValues<double>(columnIndex).Variance();
    9696      }
    9797      return stdDev;
    9898    }
    9999
    100     public int GetDifferentValuesCount<T>(string variableName) {
    101       return preprocessingData.GetValues<T>(variableName).GroupBy(x => x).Count();
     100    public int GetDifferentValuesCount<T>(int columnIndex) {
     101      return preprocessingData.GetValues<T>(columnIndex).GroupBy(x => x).Count();
    102102    }
    103103
    104104    public int GetRowMissingValueCount(int rowIndex) {
    105105      int count = 0;
    106       foreach (var variableName in preprocessingData.VariableNames) {
    107         if (searchLogic.IsMissingValue(variableName, rowIndex)) {
     106       for(int i = 0; i < preprocessingData.Columns; ++i){
     107        if (searchLogic.IsMissingValue(i, rowIndex)) {
    108108          ++count;
    109109        }
     
    111111      return count;
    112112    }
     113
     114
     115    public string GetVariableName(int columnIndex) {
     116      return preprocessingData.GetVariableName(columnIndex);
     117    }
     118
     119    public bool IsType<T>(int columnIndex) {
     120      return preprocessingData.IsType<T>(columnIndex);
     121    }
    113122  }
    114123}
  • branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Interfaces/IDataGridLogic.cs

    r10246 r10367  
    33  public interface IDataGridLogic {
    44    IEnumerable<string> ColumnNames { get; }
     5    string GetColumnTypeAsString(int columnIndex);
    56    IEnumerable<string> RowNames { get; }
    67    int Columns { get; }
  • branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Interfaces/IPreprocessingData.cs

    r10311 r10367  
    2020#endregion
    2121
     22using System;
    2223using System.Collections.Generic;
    2324using HeuristicLab.Core;
     
    2829
    2930  public interface IPreprocessingData : INamedItem {
    30     T GetCell<T>(string variableName, int row);
    31     void SetCell<T>(string variableName, int row, T value);
    32     string GetCellAsString(string variableName, int row);
     31    [Obsolete("use the index based variant, is faster")]
     32    T GetCell<T>(string variableName, int rowIndex);
     33    T GetCell<T>(int columnIndex, int rowIndex);
    3334
     35    [Obsolete("use the index based variant, is faster")]
     36    void SetCell<T>(string variableName, int rowIndex, T value);
     37    void SetCell<T>(int columnIndex, int rowIndex, T value);
     38
     39    [Obsolete("use the index based variant, is faster")]
     40    string GetCellAsString(string variableName, int rowIndex);
     41    string GetCellAsString(int columnIndex, int rowIndex);
     42
     43    [Obsolete("use the index based variant, is faster")]
    3444    IList<T> GetValues<T>(string variableName);
     45    IList<T> GetValues<T>(int columnIndex);
     46
     47    [Obsolete("use the index based variant, is faster")]
    3548    void SetValues<T>(string variableName, IList<T> values);
     49    void SetValues<T>(int columnIndex, IList<T> values);
    3650
    3751    void InsertRow(int rowIndex);
     
    3953
    4054    void InsertColumn<T>(string variableName, int columnIndex);
     55
     56    [Obsolete("use the index based variant, is faster")]
    4157    void DeleteColumn(string variableName);
     58    void DeleteColumn(int columnIndex);
    4259
    4360    IntRange TrainingPartition { get; }
     
    4663    IEnumerable<string> VariableNames { get; }
    4764    string GetVariableName(int columnIndex);
     65    int GetColumnIndex(string variableName);
     66
     67    [Obsolete("use the index based variant, is faster")]
    4868    bool IsType<T>(string variableName);
     69    bool IsType<T>(int columnIndex);
     70
    4971
    5072    int Columns { get; }
  • branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Interfaces/IPreprocessingDataManipulation.cs

    r10256 r10367  
    55    void reOrderToIndices(IEnumerable<int> indices);
    66    void reOrderToIndices(IList<Tuple<int, int>> indices);
    7     void ReplaceIndicesByAverageValue(string variableName, IEnumerable<int> indices);
    8     void ReplaceIndicesByLinearInterpolationOfNeighbours(string variableName, IEnumerable<int> indices);
    9     void ReplaceIndicesByMedianValue(string variableName, IEnumerable<int> indices);
    10     void ReplaceIndicesByMostCommonValue(string variableName, IEnumerable<int> indices);
    11     void ReplaceIndicesByRandomValue(string variableName, IEnumerable<int> indices);
    12     void ReplaceIndicesByValue<T>(string variableName, IEnumerable<int> indices, T value);
     7    void ReplaceIndicesByAverageValue(int columnIndex, IEnumerable<int> rowIndices);
     8    void ReplaceIndicesByLinearInterpolationOfNeighbours(int columnIndex, IEnumerable<int> rowIndices);
     9    void ReplaceIndicesByMedianValue(int columnIndex, IEnumerable<int> rowIndices);
     10    void ReplaceIndicesByMostCommonValue(int columnIndex, IEnumerable<int> rowIndices);
     11    void ReplaceIndicesByRandomValue(int columnIndex, IEnumerable<int> rowIndices);
     12    void ReplaceIndicesByValue<T>(int columnIndex, IEnumerable<int> rowIndices, T value);
    1313    void ShuffleWithRanges(IEnumerable<HeuristicLab.Data.IntRange> ranges);
    1414  }
  • branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Interfaces/ISearchLogic.cs

    r10236 r10367  
    1717    /// </summary>
    1818    /// <returns></returns>
    19     IEnumerable<int> GetMissingValueIndices(string variableName);
     19    IEnumerable<int> GetMissingValueIndices(int columnIndex);
    2020
    21     bool IsMissingValue(string variableName, int rowIndex);
     21    bool IsMissingValue(int columnIndex, int rowIndex);
    2222  }
    2323}
  • branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Interfaces/IStatisticsLogic.cs

    r10249 r10367  
    99    int GetNominalColumnCount();
    1010    int GetMissingValueCount();
    11     int GetMissingValueCount(string variableName);
     11    int GetMissingValueCount(int columnIndex);
    1212    int GetRowMissingValueCount(int rowIndex);
    13     T GetMin<T>(string variableName) where T : IComparable<T>;
    14     T GetMax<T>(string variableName) where T : IComparable<T>;
    15     double GetMedian(string variableName);
    16     double GetAverage(string variableName);
    17     T GetMostCommonValue<T>(string variableName);
    18     double GetStandardDeviation(string variableName);
    19     double GetVariance(string variableName);
    20     int GetDifferentValuesCount<T>(string variableName);
     13    T GetMin<T>(int columnIndex) where T : IComparable<T>;
     14    T GetMax<T>(int columnIndex) where T : IComparable<T>;
     15    double GetMedian(int columnIndex);
     16    double GetAverage(int columnIndex);
     17    T GetMostCommonValue<T>(int columnIndex);
     18    double GetStandardDeviation(int columnIndex);
     19    double GetVariance(int columnIndex);
     20    int GetDifferentValuesCount<T>(int columnIndex);
     21    bool IsType<T>(int columnIndex);
    2122  }
    2223}
  • branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Views/DataPreprocessingView.cs

    r10344 r10367  
    4545      IPreprocessingData data = Content.Data;
    4646      ISearchLogic searchLogic = new SearchLogic(data);
    47       dataGridContent = new DataGridContent(new DataGridLogic(data), new PreprocessingDataManipulation(data, searchLogic, new StatisticsLogic(data, searchLogic)));
    48       statisticsContent = new StatisticsContent(new StatisticsLogic(data, searchLogic));
     47      var dataGridLogic = new DataGridLogic(data);
     48      dataGridContent = new DataGridContent(dataGridLogic, new PreprocessingDataManipulation(data, searchLogic, new StatisticsLogic(data, searchLogic)));
     49      statisticsContent = new StatisticsContent(new StatisticsLogic(data, searchLogic), dataGridLogic);
    4950      filterContent = new FilterContent(new FilterLogic());
    5051      tranformationContent = new TransformationContent(new TransformationLogic());
  • branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Views/StatisticsView.Designer.cs

    r10316 r10367  
    3434      this.lblNominalColumns = new System.Windows.Forms.Label();
    3535      this.txtMissingValuesTotal = new System.Windows.Forms.Label();
    36       this.dataGridView1 = new System.Windows.Forms.DataGridView();
    37       ((System.ComponentModel.ISupportInitialize)(this.dataGridView1)).BeginInit();
     36      this.dataGridView = new System.Windows.Forms.DataGridView();
     37      ((System.ComponentModel.ISupportInitialize)(this.dataGridView)).BeginInit();
    3838      this.SuspendLayout();
    3939      //
     
    128128      this.txtMissingValuesTotal.Text = "102";
    129129      //
    130       // dataGridView1
     130      // dataGridView
    131131      //
    132       this.dataGridView1.ColumnHeadersHeightSizeMode = System.Windows.Forms.DataGridViewColumnHeadersHeightSizeMode.AutoSize;
    133       this.dataGridView1.Location = new System.Drawing.Point(5, 117);
    134       this.dataGridView1.Name = "dataGridView1";
    135       this.dataGridView1.Size = new System.Drawing.Size(531, 278);
    136       this.dataGridView1.TabIndex = 4;
     132      this.dataGridView.ColumnHeadersHeightSizeMode = System.Windows.Forms.DataGridViewColumnHeadersHeightSizeMode.AutoSize;
     133      this.dataGridView.Location = new System.Drawing.Point(5, 117);
     134      this.dataGridView.Name = "dataGridView";
     135      this.dataGridView.Size = new System.Drawing.Size(531, 278);
     136      this.dataGridView.TabIndex = 4;
     137      this.dataGridView.CellValueNeeded += new System.Windows.Forms.DataGridViewCellValueEventHandler(this.dataGridView_CellValueNeeded);
    137138      //
    138139      // StatisticsView
     
    140141      this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F);
    141142      this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Font;
    142       this.Controls.Add(this.dataGridView1);
     143      this.Controls.Add(this.dataGridView);
    143144      this.Controls.Add(this.lblNominalColumns);
    144145      this.Controls.Add(this.lblNumericColumns);
     
    153154      this.Name = "StatisticsView";
    154155      this.Size = new System.Drawing.Size(549, 408);
    155       ((System.ComponentModel.ISupportInitialize)(this.dataGridView1)).EndInit();
     156      ((System.ComponentModel.ISupportInitialize)(this.dataGridView)).EndInit();
    156157      this.ResumeLayout(false);
    157158      this.PerformLayout();
     
    171172    private System.Windows.Forms.Label lblNominalColumns;
    172173    private System.Windows.Forms.Label txtMissingValuesTotal;
    173     private System.Windows.Forms.DataGridView dataGridView1;
     174    private System.Windows.Forms.DataGridView dataGridView;
    174175  }
    175176}
Note: See TracChangeset for help on using the changeset viewer.