Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
01/22/14 14:13:11 (10 years ago)
Author:
rstoll
Message:
  • modified PreprocessingData, uses columnIndex now instead of variableName (is faster and more convenient), set variabelName based methods to Obsolete
  • Already changed SearchLogic, DataGridLogic, StatisticLogic as well as PreprocessingDataManipulation

*

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/PreprocessingData.cs

    r10341 r10367  
    3333  public class PreprocessingData : NamedItem, IPreprocessingData {
    3434
    35     private IDictionary<string, IList> variableValues;
     35    private IDictionary<int, IList> variableValues;
    3636
    3737    private IList<string> variableNames;
    38 
    39     private IDictionary<string, int> variableNameIndices;
    4038
    4139    private double trainingToTestRatio;
     
    4341    private PreprocessingData(PreprocessingData original, Cloner cloner)
    4442      : base(original, cloner) {
    45       variableValues = new Dictionary<string, IList>(variableValues);
    46       variableNameIndices = new Dictionary<string, int>(variableNameIndices);
     43      variableValues = new Dictionary<int, IList>(original.variableValues);
    4744    }
    4845
     
    5350      variableNames = new List<string>(problemData.Dataset.VariableNames);
    5451      // create dictionary from variable name to index
    55       variableNameIndices = new Dictionary<string, int>();
    56       var variableNamesList = problemData.Dataset.VariableNames.ToList();
    57       for (int i = 0; i < variableNamesList.Count; i++) {
    58         variableNameIndices.Add(variableNamesList[i], i);
    59       }
    60 
    61       // copy values
    62       variableValues = new Dictionary<string, IList>();
     52
     53      int columnIndex = 0;
     54      variableValues = new Dictionary<int, IList>();
    6355      foreach (var variableName in problemData.Dataset.VariableNames) {
    6456        if (problemData.Dataset.IsType<double>(variableName)) {
    65           variableValues[variableName] = problemData.Dataset.GetDoubleValues(variableName).ToList();
     57          variableValues[columnIndex] = problemData.Dataset.GetDoubleValues(variableName).ToList();
    6658        } else if (problemData.Dataset.IsType<string>(variableName)) {
    67           variableValues[variableName] = CreateColumn<string>(problemData.Dataset, variableNameIndices[variableName], x => x);
     59          variableValues[columnIndex] = CreateColumn<string>(problemData.Dataset, columnIndex, x => x);
    6860        } else if (problemData.Dataset.IsType<DateTime>(variableName)) {
    69           variableValues[variableName] = CreateColumn<DateTime>(problemData.Dataset, variableNameIndices[variableName], x => DateTime.Parse(x));
     61          variableValues[columnIndex] = CreateColumn<DateTime>(problemData.Dataset, columnIndex, x => DateTime.Parse(x));
    7062        } else {
    7163          throw new ArgumentException("The datatype of column " + variableName + " must be of type List<double>, List<string> or List<DateTime>");
    7264        }
     65        ++columnIndex;
    7366      }
    7467
     
    7972      var list = new List<T>(ds.Rows);
    8073      for (int row = 0; row < ds.Rows; ++row) {
    81         list.Add(selector(ds.GetValue(row, column))); 
     74        list.Add(selector(ds.GetValue(row, column)));
    8275      }
    8376      return list;
     
    9487    #region IPreprocessingData Members
    9588
    96     public T GetCell<T>(string variableName, int row) {
    97       return (T)variableValues[variableName][row];
    98     }
    99 
    100     public void SetCell<T>(string variableName, int row, T value) {
    101       variableValues[variableName][row] = value;
    102     }
    103 
    104     public string GetCellAsString(string variableName, int row) {
    105       return variableValues[variableName][row].ToString();
    106     }
    107 
     89    [Obsolete("use the index based variant, is faster")]
     90    public T GetCell<T>(string variableName, int rowIndex) {
     91      return GetCell<T>(GetColumnIndex(variableName), rowIndex);
     92    }
     93
     94    public T GetCell<T>(int columnIndex, int rowIndex) {
     95      return (T)variableValues[columnIndex][rowIndex];
     96    }
     97
     98    [Obsolete("use the index based variant, is faster")]
     99    public void SetCell<T>(string variableName, int rowIndex, T value) {
     100      SetCell<T>(GetColumnIndex(variableName), rowIndex, value);
     101    }
     102
     103    public void SetCell<T>(int columnIndex, int rowIndex, T value) {
     104      variableValues[columnIndex][rowIndex] = value;
     105    }
     106
     107    [Obsolete("use the index based variant, is faster")]
     108    public string GetCellAsString(string variableName, int rowIndex) {
     109      return GetCellAsString(GetColumnIndex(variableName), rowIndex);
     110    }
     111
     112    public string GetCellAsString(int columnIndex, int rowIndex) {
     113      return variableValues[columnIndex][rowIndex].ToString();
     114
     115    }
     116
     117    [Obsolete("use the index based variant, is faster")]
    108118    public IList<T> GetValues<T>(string variableName) {
    109       // TODO: test if cast is valid
    110       return (IList<T>) variableValues[variableName];
    111     }
    112 
     119      return GetValues<T>(GetColumnIndex(variableName));
     120    }
     121
     122    public IList<T> GetValues<T>(int columnIndex) {
     123      return (IList<T>)variableValues[columnIndex];
     124    }
     125
     126    [Obsolete("use the index based variant, is faster")]
    113127    public void SetValues<T>(string variableName, IList<T> values) {
    114       if(IsType<T>(variableName)){
    115         variableValues[variableName] = (IList) values;
    116       }else{
    117         throw new ArgumentException("The datatype of column " + variableName + " must be of type " + variableValues[variableName].GetType().Name + " but was " + typeof(T).Name);
     128      SetValues<T>(GetColumnIndex(variableName), values);
     129
     130    }
     131    public void SetValues<T>(int columnIndex, IList<T> values) {
     132      if (IsType<T>(columnIndex)) {
     133        variableValues[columnIndex] = (IList)values;
     134      } else {
     135        throw new ArgumentException("The datatype of column " + columnIndex + " must be of type " + variableValues[columnIndex].GetType().Name + " but was " + typeof(T).Name);
    118136      }
    119137    }
     
    134152
    135153    public void InsertColumn<T>(string variableName, int columnIndex) {
    136       variableValues.Add(variableName, new List<T>(Rows));
    137       variableNameIndices.Add(variableName, columnIndex);
     154      variableValues.Add(columnIndex, new List<T>(Rows));
    138155      variableNames.Insert(columnIndex, variableName);
    139156    }
    140157
     158    public void DeleteColumn(int columnIndex) {
     159      variableValues.Remove(columnIndex);
     160      variableNames.RemoveAt(columnIndex);
     161    }
     162
     163    [Obsolete("use the index based variant, is faster")]
    141164    public void DeleteColumn(string variableName) {
    142       variableValues.Remove(variableName);
    143       variableNames.RemoveAt(variableNameIndices[variableName]);
    144       variableNameIndices.Remove(variableName);
     165      DeleteColumn(GetColumnIndex(variableName));
    145166    }
    146167
     
    157178    }
    158179
     180    [Obsolete("use the index based variant, is faster")]
    159181    public string GetVariableName(int columnIndex) {
    160182      return variableNames[columnIndex];
    161183    }
    162 
     184    public int GetColumnIndex(string variableName) {
     185      return variableNames.IndexOf(variableName);
     186    }
     187
     188    [Obsolete("use the index based variant, is faster")]
    163189    public bool IsType<T>(string variableName) {
    164       return variableValues[variableName] is List<T>;
     190      return IsType<T>(GetColumnIndex(variableName));
     191
     192    }
     193    public bool IsType<T>(int columnIndex) {
     194      return variableValues[columnIndex] is List<T>;
    165195    }
    166196
     
    170200
    171201    public int Rows {
    172       get { return variableValues[variableNames[0]].Count; }
     202      get { return variableValues.Count > 0 ? variableValues[0].Count : 0; }
    173203    }
    174204
    175205    public Dataset ExportToDataset() {
    176206      IList<IList> values = new List<IList>();
    177       foreach (var variable in VariableNames) {
    178         values.Add(variableValues[variable]);
     207
     208      for (int i = 0; i < Columns; ++i) {
     209        values.Add(variableValues[i]);
    179210      }
    180211
Note: See TracChangeset for help on using the changeset viewer.