Changeset 10185
- Timestamp:
- 12/04/13 13:51:07 (11 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/PreprocessingData.cs
r10182 r10185 23 23 using System.Collections; 24 24 using System.Collections.Generic; 25 using System.Linq; 26 using HeuristicLab.Common; 25 27 using HeuristicLab.Core; 26 28 using HeuristicLab.DataPreprocessing; … … 31 33 public class PreprocessingData : NamedItem, IPreprocessingData { 32 34 33 private Dictionary<string, IList> variableValues;35 private IDictionary<string, IList> variableValues; 34 36 37 private IDictionary<string, int> variableNameIndices; 38 39 private double trainingToTestRatio; 40 41 private PreprocessingData(PreprocessingData original, Cloner cloner) 42 : base(original, cloner) { 43 variableValues = new Dictionary<string, IList>(variableValues); 44 variableNameIndices = new Dictionary<string, int>(variableNameIndices); 45 } 46 35 47 public PreprocessingData(IDataAnalysisProblemData problemData) 36 48 : base() { 37 49 Name = "-"; 38 foreach (var s in problemData.Dataset.VariableNames) {39 if (problemData.Dataset.IsType<double>(s)) {40 50 41 } else if (problemData.Dataset.IsType<string>(s)) { 42 43 } else if (problemData.Dataset.IsType<DateTime>(s)) { 44 51 // create dictionary from variable name to index 52 variableNameIndices = new Dictionary<string, int>(); 53 var variableNamesList = problemData.Dataset.VariableNames.ToList(); 54 for (int i = 0; i < variableNamesList.Count; i++) { 55 variableNameIndices.Add(variableNamesList[i], i); 56 } 57 58 // copy values 59 variableValues = new Dictionary<string, IList>(); 60 foreach (var variableName in problemData.Dataset.VariableNames) { 61 if (problemData.Dataset.IsType<double>(variableName)) { 62 variableValues[variableName] = problemData.Dataset.GetDoubleValues(variableName).ToList(); 63 } else if (problemData.Dataset.IsType<string>(variableName)) { 64 variableValues[variableName] = CreateColumn<string>(problemData.Dataset, variableNameIndices[variableName], x => x); 65 } else if (problemData.Dataset.IsType<DateTime>(variableName)) { 66 variableValues[variableName] = CreateColumn<DateTime>(problemData.Dataset, variableNameIndices[variableName], x => DateTime.Parse(x)); 45 67 } else { 46 throw new ArgumentException("The datatype of column " + s + "is of TODO");68 throw new ArgumentException("The datatype of column " + variableName + " must be of type List<double>, List<string> or List<DateTime>"); 47 69 } 48 70 } 49 throw new NotImplementedException(); 71 72 trainingToTestRatio = (double)problemData.TrainingPartition.Size / problemData.TestPartition.Size; 73 } 74 75 private static IList CreateColumn<T>(Dataset ds, int column, Func<string, T> selector) { 76 var list = new List<T>(ds.Rows); 77 for (int row = 0; row < ds.Rows; row++) { 78 list[row] = selector(ds.GetValue(row, column)); 79 } 80 return list; 50 81 } 51 82 52 83 #region NamedItem abstract Member Implementations 53 84 54 public override Common.IDeepCloneable Clone(Common.Cloner cloner) {55 throw new NotImplementedException();85 public override IDeepCloneable Clone(Cloner cloner) { 86 return new PreprocessingData(this, cloner); 56 87 } 57 88
Note: See TracChangeset
for help on using the changeset viewer.