Changeset 10586 for branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/TransactionalPreprocessingData.cs
- Timestamp:
- 03/12/14 17:03:45 (11 years ago)
- File:
-
- 1 copied
Legend:
- Unmodified
- Added
- Removed
-
branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/TransactionalPreprocessingData.cs
r10585 r10586 23 23 using System.Collections; 24 24 using System.Collections.Generic; 25 using System.Linq;26 25 using HeuristicLab.Common; 27 26 using HeuristicLab.Core; 28 using HeuristicLab.Data;29 27 using HeuristicLab.Problems.DataAnalysis; 30 28 … … 46 44 47 45 [Item("PreprocessingData", "Represents data used for preprocessing.")] 48 public class PreprocessingData : NamedItem, IPreprocessingData {46 public class TransactionalPreprocessingData : PreprocessingData, ITransactionalPreprocessingData { 49 47 50 48 private const int MAX_UNDO_DEPTH = 5; 51 49 52 private IDictionary<int, IList> variableValues;53 54 private IList<string> variableNames;55 56 private double trainingToTestRatio;57 58 50 private IList<PDSnapshot> undoHistory; 59 51 60 //TODO: refactor extract Transaction logic in a own class61 52 private int transactionDepth = 0; 62 53 63 private PreprocessingData(PreprocessingData original, Cloner cloner)54 private TransactionalPreprocessingData(TransactionalPreprocessingData original, Cloner cloner) 64 55 : base(original, cloner) { 65 variableValues = CopyVariableValues(original.variableValues);66 variableNames = new List<string>(original.variableNames);67 trainingToTestRatio = original.trainingToTestRatio;68 56 undoHistory = new List<PDSnapshot>(); 69 57 } 70 58 71 public PreprocessingData(IDataAnalysisProblemData problemData) 72 : base() { 73 Name = "-"; 74 75 variableNames = new List<string>(problemData.Dataset.VariableNames); 76 // create dictionary from variable name to index 77 78 int columnIndex = 0; 79 variableValues = new Dictionary<int, IList>(); 80 foreach (var variableName in problemData.Dataset.VariableNames) { 81 if (problemData.Dataset.IsType<double>(variableName)) { 82 variableValues[columnIndex] = problemData.Dataset.GetDoubleValues(variableName).ToList(); 83 } else if (problemData.Dataset.IsType<string>(variableName)) { 84 variableValues[columnIndex] = CreateColumn<string>(problemData.Dataset, columnIndex, x => x); 85 } else if (problemData.Dataset.IsType<DateTime>(variableName)) { 86 variableValues[columnIndex] = CreateColumn<DateTime>(problemData.Dataset, columnIndex, x => DateTime.Parse(x)); 87 } else { 88 throw new ArgumentException("The datatype of column " + variableName + " must be of type List<double>, List<string> or List<DateTime>"); 89 } 90 ++columnIndex; 91 } 92 93 trainingToTestRatio = (double)problemData.TrainingPartition.Size / Math.Max(problemData.Dataset.Rows, double.Epsilon); 59 public TransactionalPreprocessingData(IDataAnalysisProblemData problemData) 60 : base(problemData) { 94 61 undoHistory = new List<PDSnapshot>(); 95 }96 97 private static IList CreateColumn<T>(Dataset ds, int column, Func<string, T> selector) {98 var list = new List<T>(ds.Rows);99 for (int row = 0; row < ds.Rows; ++row) {100 list.Add(selector(ds.GetValue(row, column)));101 }102 return list;103 }104 105 private IDictionary<int, IList> CopyVariableValues(IDictionary<int, IList> original) {106 var copy = new Dictionary<int, IList>(variableValues);107 for (int i = 0; i < original.Count; i++) {108 variableValues[i] = (IList)Activator.CreateInstance(original[i].GetType(), original[i]);109 }110 return copy;111 62 } 112 63 … … 131 82 132 83 public override IDeepCloneable Clone(Cloner cloner) { 133 return new PreprocessingData(this, cloner);84 return new TransactionalPreprocessingData(this, cloner); 134 85 } 135 86 136 87 #endregion 137 88 138 #region IPreprocessingData Members89 #region Overridden IPreprocessingData Members 139 90 140 public T GetCell<T>(int columnIndex, int rowIndex) { 141 return (T)variableValues[columnIndex][rowIndex]; 142 } 143 144 145 public void SetCell<T>(int columnIndex, int rowIndex, T value) { 91 public override void SetCell<T>(int columnIndex, int rowIndex, T value) { 146 92 SaveSnapshot(DataPreprocessingChangedEventType.ChangeItem, columnIndex, rowIndex); 147 variableValues[columnIndex][rowIndex] = value;93 base.SetCell<T>(columnIndex, rowIndex, value); 148 94 if (transactionDepth <= 0) 149 95 OnChanged(DataPreprocessingChangedEventType.ChangeItem, columnIndex, rowIndex); 150 96 } 151 97 152 153 public string GetCellAsString(int columnIndex, int rowIndex) { 154 return variableValues[columnIndex][rowIndex].ToString(); 155 } 156 157 158 [Obsolete("use the index based variant, is faster")] 159 public IList<T> GetValues<T>(string variableName) { 160 return GetValues<T>(GetColumnIndex(variableName)); 161 } 162 163 public IList<T> GetValues<T>(int columnIndex) { 164 return (IList<T>)variableValues[columnIndex]; 165 } 166 167 public void SetValues<T>(int columnIndex, IList<T> values) { 168 if (IsType<T>(columnIndex)) { 169 SaveSnapshot(DataPreprocessingChangedEventType.ChangeColumn, columnIndex, -1); 170 variableValues[columnIndex] = (IList)values; 171 } else { 172 throw new ArgumentException("The datatype of column " + columnIndex + " must be of type " + variableValues[columnIndex].GetType().Name + " but was " + typeof(T).Name); 173 } 98 public override void SetValues<T>(int columnIndex, IList<T> values) { 99 SaveSnapshot(DataPreprocessingChangedEventType.ChangeColumn, columnIndex, -1); 100 base.SetValues<T>(columnIndex, values); 174 101 if (transactionDepth <= 0) 175 102 OnChanged(DataPreprocessingChangedEventType.ChangeColumn, columnIndex, -1); 176 103 } 177 104 178 public void InsertRow(int rowIndex) {105 public override void InsertRow(int rowIndex) { 179 106 SaveSnapshot(DataPreprocessingChangedEventType.DeleteRow, -1, rowIndex); 180 foreach (IList column in variableValues.Values) { 181 Type type = column.GetType().GetGenericArguments()[0]; 182 column.Insert(rowIndex, type.IsValueType ? Activator.CreateInstance(type) : null); 183 } 107 base.InsertRow(rowIndex); 184 108 if (transactionDepth <= 0) 185 109 OnChanged(DataPreprocessingChangedEventType.AddRow, -1, rowIndex); 186 110 } 187 111 188 public void DeleteRow(int rowIndex) {112 public override void DeleteRow(int rowIndex) { 189 113 SaveSnapshot(DataPreprocessingChangedEventType.AddRow, -1, rowIndex); 190 foreach (IList column in variableValues.Values) { 191 column.RemoveAt(rowIndex); 192 } 114 base.DeleteRow(rowIndex); 193 115 if (transactionDepth <= 0) 194 116 OnChanged(DataPreprocessingChangedEventType.DeleteRow, -1, rowIndex); 195 117 } 196 118 197 public void InsertColumn<T>(string variableName, int columnIndex) {119 public override void InsertColumn<T>(string variableName, int columnIndex) { 198 120 SaveSnapshot(DataPreprocessingChangedEventType.DeleteColumn, columnIndex, -1); 199 variableValues.Add(columnIndex, new List<T>(Rows)); 200 variableNames.Insert(columnIndex, variableName); 121 base.InsertColumn<T>(variableName, columnIndex); 201 122 if (transactionDepth <= 0) 202 123 OnChanged(DataPreprocessingChangedEventType.AddColumn, columnIndex, -1); 203 124 } 204 125 205 public void DeleteColumn(int columnIndex) {126 public override void DeleteColumn(int columnIndex) { 206 127 SaveSnapshot(DataPreprocessingChangedEventType.AddColumn, columnIndex, -1); 207 variableValues.Remove(columnIndex); 208 variableNames.RemoveAt(columnIndex); 128 base.DeleteColumn(columnIndex); 209 129 if (transactionDepth <= 0) 210 130 OnChanged(DataPreprocessingChangedEventType.DeleteColumn, columnIndex, -1); 211 131 } 212 132 213 public IntRange TrainingPartition { 214 get { return new IntRange(0, (int)(Rows * trainingToTestRatio)); } 215 } 133 #endregion 216 134 217 public IntRange TestPartition { 218 get { return new IntRange((int)(Rows * trainingToTestRatio), Rows); } 219 } 220 221 public string GetVariableName(int columnIndex) { 222 return variableNames[columnIndex]; 223 } 224 225 public IEnumerable<string> VariableNames { 226 get { return variableNames; } 227 } 228 229 public int GetColumnIndex(string variableName) { 230 return variableNames.IndexOf(variableName); 231 } 232 233 public bool IsType<T>(int columnIndex) { 234 return variableValues[columnIndex] is List<T>; 235 } 236 237 public int Columns { 238 get { return variableNames.Count; } 239 } 240 241 public int Rows { 242 get { return variableValues.Count > 0 ? variableValues[0].Count : 0; } 243 } 244 245 public Dataset ExportToDataset() { 246 IList<IList> values = new List<IList>(); 247 248 for (int i = 0; i < Columns; ++i) { 249 values.Add(variableValues[i]); 250 } 251 252 var dataset = new Dataset(variableNames, values); 253 return dataset; 254 } 135 #region TransactionalPreprocessingData members 255 136 256 137 public event DataPreprocessingChangedEventHandler Changed;
Note: See TracChangeset
for help on using the changeset viewer.