Changeset 10367 for branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/PreprocessingData.cs
- Timestamp:
- 01/22/14 14:13:11 (10 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/PreprocessingData.cs
r10341 r10367 33 33 public class PreprocessingData : NamedItem, IPreprocessingData { 34 34 35 private IDictionary< string, IList> variableValues;35 private IDictionary<int, IList> variableValues; 36 36 37 37 private IList<string> variableNames; 38 39 private IDictionary<string, int> variableNameIndices;40 38 41 39 private double trainingToTestRatio; … … 43 41 private PreprocessingData(PreprocessingData original, Cloner cloner) 44 42 : base(original, cloner) { 45 variableValues = new Dictionary<string, IList>(variableValues); 46 variableNameIndices = new Dictionary<string, int>(variableNameIndices); 43 variableValues = new Dictionary<int, IList>(original.variableValues); 47 44 } 48 45 … … 53 50 variableNames = new List<string>(problemData.Dataset.VariableNames); 54 51 // create dictionary from variable name to index 55 variableNameIndices = new Dictionary<string, int>(); 56 var variableNamesList = problemData.Dataset.VariableNames.ToList(); 57 for (int i = 0; i < variableNamesList.Count; i++) { 58 variableNameIndices.Add(variableNamesList[i], i); 59 } 60 61 // copy values 62 variableValues = new Dictionary<string, IList>(); 52 53 int columnIndex = 0; 54 variableValues = new Dictionary<int, IList>(); 63 55 foreach (var variableName in problemData.Dataset.VariableNames) { 64 56 if (problemData.Dataset.IsType<double>(variableName)) { 65 variableValues[ variableName] = problemData.Dataset.GetDoubleValues(variableName).ToList();57 variableValues[columnIndex] = problemData.Dataset.GetDoubleValues(variableName).ToList(); 66 58 } else if (problemData.Dataset.IsType<string>(variableName)) { 67 variableValues[ variableName] = CreateColumn<string>(problemData.Dataset, variableNameIndices[variableName], x => x);59 variableValues[columnIndex] = CreateColumn<string>(problemData.Dataset, columnIndex, x => x); 68 60 } else if (problemData.Dataset.IsType<DateTime>(variableName)) { 69 variableValues[ variableName] = CreateColumn<DateTime>(problemData.Dataset, variableNameIndices[variableName], x => DateTime.Parse(x));61 variableValues[columnIndex] = CreateColumn<DateTime>(problemData.Dataset, columnIndex, x => DateTime.Parse(x)); 70 62 } else { 71 63 throw new ArgumentException("The datatype of column " + variableName + " must be of type List<double>, List<string> or List<DateTime>"); 72 64 } 65 ++columnIndex; 73 66 } 74 67 … … 79 72 var list = new List<T>(ds.Rows); 80 73 for (int row = 0; row < ds.Rows; ++row) { 81 list.Add(selector(ds.GetValue(row, column))); 74 list.Add(selector(ds.GetValue(row, column))); 82 75 } 83 76 return list; … … 94 87 #region IPreprocessingData Members 95 88 96 public T GetCell<T>(string variableName, int row) { 97 return (T)variableValues[variableName][row]; 98 } 99 100 public void SetCell<T>(string variableName, int row, T value) { 101 variableValues[variableName][row] = value; 102 } 103 104 public string GetCellAsString(string variableName, int row) { 105 return variableValues[variableName][row].ToString(); 106 } 107 89 [Obsolete("use the index based variant, is faster")] 90 public T GetCell<T>(string variableName, int rowIndex) { 91 return GetCell<T>(GetColumnIndex(variableName), rowIndex); 92 } 93 94 public T GetCell<T>(int columnIndex, int rowIndex) { 95 return (T)variableValues[columnIndex][rowIndex]; 96 } 97 98 [Obsolete("use the index based variant, is faster")] 99 public void SetCell<T>(string variableName, int rowIndex, T value) { 100 SetCell<T>(GetColumnIndex(variableName), rowIndex, value); 101 } 102 103 public void SetCell<T>(int columnIndex, int rowIndex, T value) { 104 variableValues[columnIndex][rowIndex] = value; 105 } 106 107 [Obsolete("use the index based variant, is faster")] 108 public string GetCellAsString(string variableName, int rowIndex) { 109 return GetCellAsString(GetColumnIndex(variableName), rowIndex); 110 } 111 112 public string GetCellAsString(int columnIndex, int rowIndex) { 113 return variableValues[columnIndex][rowIndex].ToString(); 114 115 } 116 117 [Obsolete("use the index based variant, is faster")] 108 118 public IList<T> GetValues<T>(string variableName) { 109 // TODO: test if cast is valid 110 return (IList<T>) variableValues[variableName]; 111 } 112 119 return GetValues<T>(GetColumnIndex(variableName)); 120 } 121 122 public IList<T> GetValues<T>(int columnIndex) { 123 return (IList<T>)variableValues[columnIndex]; 124 } 125 126 [Obsolete("use the index based variant, is faster")] 113 127 public void SetValues<T>(string variableName, IList<T> values) { 114 if(IsType<T>(variableName)){ 115 variableValues[variableName] = (IList) values; 116 }else{ 117 throw new ArgumentException("The datatype of column " + variableName + " must be of type " + variableValues[variableName].GetType().Name + " but was " + typeof(T).Name); 128 SetValues<T>(GetColumnIndex(variableName), values); 129 130 } 131 public void SetValues<T>(int columnIndex, IList<T> values) { 132 if (IsType<T>(columnIndex)) { 133 variableValues[columnIndex] = (IList)values; 134 } else { 135 throw new ArgumentException("The datatype of column " + columnIndex + " must be of type " + variableValues[columnIndex].GetType().Name + " but was " + typeof(T).Name); 118 136 } 119 137 } … … 134 152 135 153 public void InsertColumn<T>(string variableName, int columnIndex) { 136 variableValues.Add(variableName, new List<T>(Rows)); 137 variableNameIndices.Add(variableName, columnIndex); 154 variableValues.Add(columnIndex, new List<T>(Rows)); 138 155 variableNames.Insert(columnIndex, variableName); 139 156 } 140 157 158 public void DeleteColumn(int columnIndex) { 159 variableValues.Remove(columnIndex); 160 variableNames.RemoveAt(columnIndex); 161 } 162 163 [Obsolete("use the index based variant, is faster")] 141 164 public void DeleteColumn(string variableName) { 142 variableValues.Remove(variableName); 143 variableNames.RemoveAt(variableNameIndices[variableName]); 144 variableNameIndices.Remove(variableName); 165 DeleteColumn(GetColumnIndex(variableName)); 145 166 } 146 167 … … 157 178 } 158 179 180 [Obsolete("use the index based variant, is faster")] 159 181 public string GetVariableName(int columnIndex) { 160 182 return variableNames[columnIndex]; 161 183 } 162 184 public int GetColumnIndex(string variableName) { 185 return variableNames.IndexOf(variableName); 186 } 187 188 [Obsolete("use the index based variant, is faster")] 163 189 public bool IsType<T>(string variableName) { 164 return variableValues[variableName] is List<T>; 190 return IsType<T>(GetColumnIndex(variableName)); 191 192 } 193 public bool IsType<T>(int columnIndex) { 194 return variableValues[columnIndex] is List<T>; 165 195 } 166 196 … … 170 200 171 201 public int Rows { 172 get { return variableValues [variableNames[0]].Count; }202 get { return variableValues.Count > 0 ? variableValues[0].Count : 0; } 173 203 } 174 204 175 205 public Dataset ExportToDataset() { 176 206 IList<IList> values = new List<IList>(); 177 foreach (var variable in VariableNames) { 178 values.Add(variableValues[variable]); 207 208 for (int i = 0; i < Columns; ++i) { 209 values.Add(variableValues[i]); 179 210 } 180 211
Note: See TracChangeset
for help on using the changeset viewer.