- Timestamp:
- 01/22/14 14:13:11 (11 years ago)
- Location:
- branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations
- Files:
-
- 6 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/DataGridLogic.cs
r10248 r10367 32 32 } 33 33 34 public string GetColumnTypeAsString(int columnIndex) { 35 if (preprocessingData.IsType<double>(columnIndex)) { 36 return "double"; 37 } else if (preprocessingData.IsType<string>(columnIndex)) { 38 return "string"; 39 } else if (preprocessingData.IsType<DateTime>(columnIndex)) { 40 return "DateTime"; 41 } 42 return "Unknown Type"; 43 } 44 34 45 public IEnumerable<string> RowNames { 35 46 get { … … 43 54 } 44 55 bool valid = false; 45 string variableName = preprocessingData.GetVariableName(columnIndex); 46 if (preprocessingData.IsType<double>(variableName)) { 56 if (preprocessingData.IsType<double>(columnIndex)) { 47 57 double val; 48 58 valid = double.TryParse(value, out val); … … 51 61 errorMessage = "Invalid Value (Valid Value Format: \"" + FormatPatterns.GetDoubleFormatPattern() + "\")"; 52 62 } 53 } else if (preprocessingData.IsType<string>( variableName)) {63 } else if (preprocessingData.IsType<string>(columnIndex)) { 54 64 valid = value != null; 55 65 errorMessage = string.Empty; … … 57 67 errorMessage = "Invalid Value (string must not be null)"; 58 68 } 59 } else if (preprocessingData.IsType<DateTime>( variableName)) {69 } else if (preprocessingData.IsType<DateTime>(columnIndex)) { 60 70 DateTime date; 61 71 valid = DateTime.TryParse(value, out date); … … 65 75 } 66 76 } else { 67 throw new ArgumentException("column with variableName: " + variableName+ " contains a non supported type.");77 throw new ArgumentException("column " + columnIndex + " contains a non supported type."); 68 78 } 69 79 … … 72 82 73 83 public string GetValue(int rowIndex, int columnIndex) { 74 return preprocessingData.GetCellAsString( preprocessingData.GetVariableName(columnIndex), rowIndex);84 return preprocessingData.GetCellAsString(columnIndex, rowIndex); 75 85 } 76 86 77 87 public bool SetValue(string value, int rowIndex, int columnIndex) { 78 string variableName = preprocessingData.GetVariableName(columnIndex);79 88 bool valid = false; 80 if (preprocessingData.IsType<double>( variableName)) {89 if (preprocessingData.IsType<double>(columnIndex)) { 81 90 double val; 82 91 valid = double.TryParse(value, out val); 83 92 if (valid) { 84 preprocessingData.SetCell<double>( variableName, rowIndex, val);93 preprocessingData.SetCell<double>(columnIndex, rowIndex, val); 85 94 } 86 } else if (preprocessingData.IsType<string>( variableName)) {95 } else if (preprocessingData.IsType<string>(columnIndex)) { 87 96 valid = value != null; 88 97 if (valid) { 89 preprocessingData.SetCell<string>( variableName, rowIndex, value);98 preprocessingData.SetCell<string>(columnIndex, rowIndex, value); 90 99 } 91 } else if (preprocessingData.IsType<DateTime>( variableName)) {100 } else if (preprocessingData.IsType<DateTime>(columnIndex)) { 92 101 DateTime date; 93 102 valid = DateTime.TryParse(value, out date); 94 103 if (valid) { 95 preprocessingData.SetCell<DateTime>( variableName, rowIndex, date);104 preprocessingData.SetCell<DateTime>(columnIndex, rowIndex, date); 96 105 } 97 106 } else { 98 throw new ArgumentException("column with variableName: " + variableName+ " contains a non supported type.");107 throw new ArgumentException("column " + columnIndex + " contains a non supported type."); 99 108 } 100 109 -
branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/PreprocessingData.cs
r10341 r10367 33 33 public class PreprocessingData : NamedItem, IPreprocessingData { 34 34 35 private IDictionary< string, IList> variableValues;35 private IDictionary<int, IList> variableValues; 36 36 37 37 private IList<string> variableNames; 38 39 private IDictionary<string, int> variableNameIndices;40 38 41 39 private double trainingToTestRatio; … … 43 41 private PreprocessingData(PreprocessingData original, Cloner cloner) 44 42 : base(original, cloner) { 45 variableValues = new Dictionary<string, IList>(variableValues); 46 variableNameIndices = new Dictionary<string, int>(variableNameIndices); 43 variableValues = new Dictionary<int, IList>(original.variableValues); 47 44 } 48 45 … … 53 50 variableNames = new List<string>(problemData.Dataset.VariableNames); 54 51 // create dictionary from variable name to index 55 variableNameIndices = new Dictionary<string, int>(); 56 var variableNamesList = problemData.Dataset.VariableNames.ToList(); 57 for (int i = 0; i < variableNamesList.Count; i++) { 58 variableNameIndices.Add(variableNamesList[i], i); 59 } 60 61 // copy values 62 variableValues = new Dictionary<string, IList>(); 52 53 int columnIndex = 0; 54 variableValues = new Dictionary<int, IList>(); 63 55 foreach (var variableName in problemData.Dataset.VariableNames) { 64 56 if (problemData.Dataset.IsType<double>(variableName)) { 65 variableValues[ variableName] = problemData.Dataset.GetDoubleValues(variableName).ToList();57 variableValues[columnIndex] = problemData.Dataset.GetDoubleValues(variableName).ToList(); 66 58 } else if (problemData.Dataset.IsType<string>(variableName)) { 67 variableValues[ variableName] = CreateColumn<string>(problemData.Dataset, variableNameIndices[variableName], x => x);59 variableValues[columnIndex] = CreateColumn<string>(problemData.Dataset, columnIndex, x => x); 68 60 } else if (problemData.Dataset.IsType<DateTime>(variableName)) { 69 variableValues[ variableName] = CreateColumn<DateTime>(problemData.Dataset, variableNameIndices[variableName], x => DateTime.Parse(x));61 variableValues[columnIndex] = CreateColumn<DateTime>(problemData.Dataset, columnIndex, x => DateTime.Parse(x)); 70 62 } else { 71 63 throw new ArgumentException("The datatype of column " + variableName + " must be of type List<double>, List<string> or List<DateTime>"); 72 64 } 65 ++columnIndex; 73 66 } 74 67 … … 79 72 var list = new List<T>(ds.Rows); 80 73 for (int row = 0; row < ds.Rows; ++row) { 81 list.Add(selector(ds.GetValue(row, column))); 74 list.Add(selector(ds.GetValue(row, column))); 82 75 } 83 76 return list; … … 94 87 #region IPreprocessingData Members 95 88 96 public T GetCell<T>(string variableName, int row) { 97 return (T)variableValues[variableName][row]; 98 } 99 100 public void SetCell<T>(string variableName, int row, T value) { 101 variableValues[variableName][row] = value; 102 } 103 104 public string GetCellAsString(string variableName, int row) { 105 return variableValues[variableName][row].ToString(); 106 } 107 89 [Obsolete("use the index based variant, is faster")] 90 public T GetCell<T>(string variableName, int rowIndex) { 91 return GetCell<T>(GetColumnIndex(variableName), rowIndex); 92 } 93 94 public T GetCell<T>(int columnIndex, int rowIndex) { 95 return (T)variableValues[columnIndex][rowIndex]; 96 } 97 98 [Obsolete("use the index based variant, is faster")] 99 public void SetCell<T>(string variableName, int rowIndex, T value) { 100 SetCell<T>(GetColumnIndex(variableName), rowIndex, value); 101 } 102 103 public void SetCell<T>(int columnIndex, int rowIndex, T value) { 104 variableValues[columnIndex][rowIndex] = value; 105 } 106 107 [Obsolete("use the index based variant, is faster")] 108 public string GetCellAsString(string variableName, int rowIndex) { 109 return GetCellAsString(GetColumnIndex(variableName), rowIndex); 110 } 111 112 public string GetCellAsString(int columnIndex, int rowIndex) { 113 return variableValues[columnIndex][rowIndex].ToString(); 114 115 } 116 117 [Obsolete("use the index based variant, is faster")] 108 118 public IList<T> GetValues<T>(string variableName) { 109 // TODO: test if cast is valid 110 return (IList<T>) variableValues[variableName]; 111 } 112 119 return GetValues<T>(GetColumnIndex(variableName)); 120 } 121 122 public IList<T> GetValues<T>(int columnIndex) { 123 return (IList<T>)variableValues[columnIndex]; 124 } 125 126 [Obsolete("use the index based variant, is faster")] 113 127 public void SetValues<T>(string variableName, IList<T> values) { 114 if(IsType<T>(variableName)){ 115 variableValues[variableName] = (IList) values; 116 }else{ 117 throw new ArgumentException("The datatype of column " + variableName + " must be of type " + variableValues[variableName].GetType().Name + " but was " + typeof(T).Name); 128 SetValues<T>(GetColumnIndex(variableName), values); 129 130 } 131 public void SetValues<T>(int columnIndex, IList<T> values) { 132 if (IsType<T>(columnIndex)) { 133 variableValues[columnIndex] = (IList)values; 134 } else { 135 throw new ArgumentException("The datatype of column " + columnIndex + " must be of type " + variableValues[columnIndex].GetType().Name + " but was " + typeof(T).Name); 118 136 } 119 137 } … … 134 152 135 153 public void InsertColumn<T>(string variableName, int columnIndex) { 136 variableValues.Add(variableName, new List<T>(Rows)); 137 variableNameIndices.Add(variableName, columnIndex); 154 variableValues.Add(columnIndex, new List<T>(Rows)); 138 155 variableNames.Insert(columnIndex, variableName); 139 156 } 140 157 158 public void DeleteColumn(int columnIndex) { 159 variableValues.Remove(columnIndex); 160 variableNames.RemoveAt(columnIndex); 161 } 162 163 [Obsolete("use the index based variant, is faster")] 141 164 public void DeleteColumn(string variableName) { 142 variableValues.Remove(variableName); 143 variableNames.RemoveAt(variableNameIndices[variableName]); 144 variableNameIndices.Remove(variableName); 165 DeleteColumn(GetColumnIndex(variableName)); 145 166 } 146 167 … … 157 178 } 158 179 180 [Obsolete("use the index based variant, is faster")] 159 181 public string GetVariableName(int columnIndex) { 160 182 return variableNames[columnIndex]; 161 183 } 162 184 public int GetColumnIndex(string variableName) { 185 return variableNames.IndexOf(variableName); 186 } 187 188 [Obsolete("use the index based variant, is faster")] 163 189 public bool IsType<T>(string variableName) { 164 return variableValues[variableName] is List<T>; 190 return IsType<T>(GetColumnIndex(variableName)); 191 192 } 193 public bool IsType<T>(int columnIndex) { 194 return variableValues[columnIndex] is List<T>; 165 195 } 166 196 … … 170 200 171 201 public int Rows { 172 get { return variableValues [variableNames[0]].Count; }202 get { return variableValues.Count > 0 ? variableValues[0].Count : 0; } 173 203 } 174 204 175 205 public Dataset ExportToDataset() { 176 206 IList<IList> values = new List<IList>(); 177 foreach (var variable in VariableNames) { 178 values.Add(variableValues[variable]); 207 208 for (int i = 0; i < Columns; ++i) { 209 values.Add(variableValues[i]); 179 210 } 180 211 -
branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/PreprocessingDataManipulation.cs
r10311 r10367 3 3 using System.Linq; 4 4 using HeuristicLab.Data; 5 using System.Collections;6 5 7 6 namespace HeuristicLab.DataPreprocessing { … … 17 16 } 18 17 19 public void ReplaceIndicesByValue<T>( string variableName, IEnumerable<int> indices, T value) {20 foreach (int index in indices) {21 preprocessingData.SetCell<T>( variableName, index, value);18 public void ReplaceIndicesByValue<T>(int columnIndex, IEnumerable<int> rowIndices, T value) { 19 foreach (int index in rowIndices) { 20 preprocessingData.SetCell<T>(columnIndex, index, value); 22 21 } 23 22 } 24 23 25 public void ReplaceIndicesByAverageValue( string variableName, IEnumerable<int> indices) {26 double average = statisticInfo.GetAverage( variableName);27 ReplaceIndicesByValue<double>( variableName, indices, average);24 public void ReplaceIndicesByAverageValue(int columnIndex, IEnumerable<int> rowIndices) { 25 double average = statisticInfo.GetAverage(columnIndex); 26 ReplaceIndicesByValue<double>(columnIndex, rowIndices, average); 28 27 } 29 28 30 public void ReplaceIndicesByMedianValue( string variableName, IEnumerable<int> indices) {31 double median = statisticInfo.GetMedian( variableName);32 ReplaceIndicesByValue<double>( variableName, indices, median);29 public void ReplaceIndicesByMedianValue(int columnIndex, IEnumerable<int> rowIndices) { 30 double median = statisticInfo.GetMedian(columnIndex); 31 ReplaceIndicesByValue<double>(columnIndex, rowIndices, median); 33 32 } 34 33 35 public void ReplaceIndicesByRandomValue( string variableName, IEnumerable<int> indices) {34 public void ReplaceIndicesByRandomValue(int columnIndex, IEnumerable<int> rowIndices) { 36 35 Random r = new Random(); 37 36 38 double max = statisticInfo.GetMax<double>( variableName);39 double min = statisticInfo.GetMin<double>( variableName);37 double max = statisticInfo.GetMax<double>(columnIndex); 38 double min = statisticInfo.GetMin<double>(columnIndex); 40 39 double randMultiplier = (max - min); 41 foreach (int index in indices) {40 foreach (int index in rowIndices) { 42 41 double rand = r.NextDouble() * randMultiplier + min; 43 preprocessingData.SetCell<double>( variableName, index, rand);42 preprocessingData.SetCell<double>(columnIndex, index, rand); 44 43 } 45 44 } 46 45 47 public void ReplaceIndicesByLinearInterpolationOfNeighbours( string variableName, IEnumerable<int> indices) {48 int countValues = preprocessingData.GetValues<double>( variableName).Count();49 foreach (int index in indices) {46 public void ReplaceIndicesByLinearInterpolationOfNeighbours(int columnIndex, IEnumerable<int> rowIndices) { 47 int countValues = preprocessingData.GetValues<double>(columnIndex).Count(); 48 foreach (int index in rowIndices) { 50 49 // dont replace first or last values 51 50 if (index > 0 && index < countValues) { 52 int prevIndex = indexOfPrevPresentValue( variableName, index);53 int nextIndex = indexOfNextPresentValue( variableName, index);51 int prevIndex = indexOfPrevPresentValue(columnIndex, index); 52 int nextIndex = indexOfNextPresentValue(columnIndex, index); 54 53 55 54 // no neighbours found … … 57 56 continue; 58 57 } 59 double prev = preprocessingData.GetCell<double>( variableName, prevIndex);60 double next = preprocessingData.GetCell<double>( variableName, nextIndex);58 double prev = preprocessingData.GetCell<double>(columnIndex, prevIndex); 59 double next = preprocessingData.GetCell<double>(columnIndex, nextIndex); 61 60 62 61 int valuesToInterpolate = nextIndex - prevIndex; … … 66 65 for (int i = prevIndex; i < nextIndex; ++i) { 67 66 double interpolated = prev + (interpolationStep * (i - prevIndex)); 68 preprocessingData.SetCell<double>( variableName, i, interpolated);67 preprocessingData.SetCell<double>(columnIndex, i, interpolated); 69 68 } 70 69 } … … 72 71 } 73 72 74 private int indexOfPrevPresentValue( string variableName, int start) {73 private int indexOfPrevPresentValue(int columnIndex, int start) { 75 74 int offset = start - 1; 76 while (offset >= 0 && searchLogic.IsMissingValue( variableName, offset)) {75 while (offset >= 0 && searchLogic.IsMissingValue(columnIndex, offset)) { 77 76 offset--; 78 77 } … … 81 80 } 82 81 83 private int indexOfNextPresentValue( string variableName, int start) {82 private int indexOfNextPresentValue(int columnIndex, int start) { 84 83 int offset = start + 1; 85 while (offset < preprocessingData.Rows && searchLogic.IsMissingValue( variableName, offset)) {84 while (offset < preprocessingData.Rows && searchLogic.IsMissingValue(columnIndex, offset)) { 86 85 offset++; 87 86 } … … 90 89 } 91 90 92 public void ReplaceIndicesByMostCommonValue( string variableName, IEnumerable<int> indices) {93 if (preprocessingData.IsType<double>( variableName)) {94 ReplaceIndicesByValue<double>( variableName, indices, statisticInfo.GetMostCommonValue<double>(variableName));95 } else if (preprocessingData.IsType<string>( variableName)) {96 ReplaceIndicesByValue<string>( variableName, indices, statisticInfo.GetMostCommonValue<string>(variableName));97 } else if (preprocessingData.IsType<DateTime>( variableName)) {98 ReplaceIndicesByValue<DateTime>( variableName, indices, statisticInfo.GetMostCommonValue<DateTime>(variableName));91 public void ReplaceIndicesByMostCommonValue(int columnIndex, IEnumerable<int> rowIndices) { 92 if (preprocessingData.IsType<double>(columnIndex)) { 93 ReplaceIndicesByValue<double>(columnIndex, rowIndices, statisticInfo.GetMostCommonValue<double>(columnIndex)); 94 } else if (preprocessingData.IsType<string>(columnIndex)) { 95 ReplaceIndicesByValue<string>(columnIndex, rowIndices, statisticInfo.GetMostCommonValue<string>(columnIndex)); 96 } else if (preprocessingData.IsType<DateTime>(columnIndex)) { 97 ReplaceIndicesByValue<DateTime>(columnIndex, rowIndices, statisticInfo.GetMostCommonValue<DateTime>(columnIndex)); 99 98 } else { 100 throw new ArgumentException("column with index: " + variableName+ " contains a non supported type.");99 throw new ArgumentException("column with index: " + columnIndex + " contains a non supported type."); 101 100 } 102 101 } … … 131 130 132 131 public void reOrderToIndices(IList<System.Tuple<int, int>> indices) { 133 for each (string variableName in preprocessingData.VariableNames) {134 if (preprocessingData.IsType<double>( variableName)) {135 reOrderToIndices<double>( variableName, indices);136 } else if (preprocessingData.IsType<string>( variableName)) {137 reOrderToIndices<string>( variableName, indices);138 } else if (preprocessingData.IsType<DateTime>( variableName)) {139 reOrderToIndices<DateTime>( variableName, indices);132 for (int i = 0; i < preprocessingData.Columns; ++i) { 133 if (preprocessingData.IsType<double>(i)) { 134 reOrderToIndices<double>(i, indices); 135 } else if (preprocessingData.IsType<string>(i)) { 136 reOrderToIndices<string>(i, indices); 137 } else if (preprocessingData.IsType<DateTime>(i)) { 138 reOrderToIndices<DateTime>(i, indices); 140 139 } 141 140 } 142 141 } 143 142 144 private void reOrderToIndices<T>( string variableName, IList<Tuple<int, int>> indices) {143 private void reOrderToIndices<T>(int columnIndex, IList<Tuple<int, int>> indices) { 145 144 146 List<T> originalData = new List<T>(preprocessingData.GetValues<T>( variableName));145 List<T> originalData = new List<T>(preprocessingData.GetValues<T>(columnIndex)); 147 146 148 147 // process all columns equally … … 152 151 153 152 T replaceValue = originalData.ElementAt<T>(replaceIndex); 154 preprocessingData.SetCell<T>( variableName, originalIndex, replaceValue);153 preprocessingData.SetCell<T>(columnIndex, originalIndex, replaceValue); 155 154 } 156 155 } -
branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/SearchLogic.cs
r10236 r10367 15 15 var dic = new Dictionary<string, IEnumerable<int>>(); 16 16 foreach (string variableName in preprocessingData.VariableNames) { 17 dic.Add(variableName, GetMissingValueIndices( variableName));17 dic.Add(variableName, GetMissingValueIndices(preprocessingData.GetColumnIndex(variableName))); 18 18 } 19 19 return dic; 20 20 } 21 21 22 public bool IsMissingValue( string variableName, int rowIndex) {23 if (preprocessingData.IsType<double>( variableName)) {24 return double.IsNaN(preprocessingData.GetCell<double>( variableName, rowIndex));25 } else if (preprocessingData.IsType<string>( variableName)) {26 return string.IsNullOrEmpty(preprocessingData.GetCell<string>( variableName, rowIndex));27 } else if (preprocessingData.IsType<DateTime>( variableName)) {28 return preprocessingData.GetCell<DateTime>( variableName, rowIndex).Equals(DateTime.MinValue);22 public bool IsMissingValue(int columnIndex, int rowIndex) { 23 if (preprocessingData.IsType<double>(columnIndex)) { 24 return double.IsNaN(preprocessingData.GetCell<double>(columnIndex, rowIndex)); 25 } else if (preprocessingData.IsType<string>(columnIndex)) { 26 return string.IsNullOrEmpty(preprocessingData.GetCell<string>(columnIndex, rowIndex)); 27 } else if (preprocessingData.IsType<DateTime>(columnIndex)) { 28 return preprocessingData.GetCell<DateTime>(columnIndex, rowIndex).Equals(DateTime.MinValue); 29 29 } else { 30 throw new ArgumentException("cell in column with variableName: " + variableName+ " and row index " + rowIndex + " contains a non supported type.");30 throw new ArgumentException("cell in column " + columnIndex + " and row index " + rowIndex + " contains a non supported type."); 31 31 } 32 32 } 33 33 34 public IEnumerable<int> GetMissingValueIndices( string variableName) {35 if (preprocessingData.IsType<double>( variableName)) {36 return preprocessingData.GetValues<double>( variableName).Select((s, i) => new { i, s }).Where(t => double.IsNaN(t.s)).Select(t => t.i);37 } else if (preprocessingData.IsType<string>( variableName)) {38 return preprocessingData.GetValues<string>( variableName).Select((s, i) => new { i, s }).Where(t => string.IsNullOrEmpty(t.s)).Select(t => t.i);39 } else if (preprocessingData.IsType<DateTime>( variableName)) {40 return preprocessingData.GetValues<DateTime>( variableName).Select((s, i) => new { i, s }).Where(t => t.s.Equals(DateTime.MinValue)).Select(t => t.i);34 public IEnumerable<int> GetMissingValueIndices(int columnIndex) { 35 if (preprocessingData.IsType<double>(columnIndex)) { 36 return preprocessingData.GetValues<double>(columnIndex).Select((s, i) => new { i, s }).Where(t => double.IsNaN(t.s)).Select(t => t.i); 37 } else if (preprocessingData.IsType<string>(columnIndex)) { 38 return preprocessingData.GetValues<string>(columnIndex).Select((s, i) => new { i, s }).Where(t => string.IsNullOrEmpty(t.s)).Select(t => t.i); 39 } else if (preprocessingData.IsType<DateTime>(columnIndex)) { 40 return preprocessingData.GetValues<DateTime>(columnIndex).Select((s, i) => new { i, s }).Where(t => t.s.Equals(DateTime.MinValue)).Select(t => t.i); 41 41 } else { 42 throw new ArgumentException("column with variableName: " + variableName+ " contains a non supported type.");42 throw new ArgumentException("column " + columnIndex + " contains a non supported type."); 43 43 } 44 44 } -
branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/StatisticsContent.cs
r10313 r10367 14 14 15 15 private readonly IStatisticsLogic statisticsLogic; 16 public StatisticsContent(IStatisticsLogic theStatisticsLogic) 16 private readonly IDataGridLogic dataGridLogic; 17 public StatisticsContent(IStatisticsLogic theStatisticsLogic, IDataGridLogic theDataGridLogic) 17 18 { 18 19 statisticsLogic = theStatisticsLogic; 20 dataGridLogic = theDataGridLogic; 19 21 } 20 22 … … 27 29 public IStatisticsLogic StatisticsLogic 28 30 { 29 get 30 { 31 return statisticsLogic; 32 } 31 get { return statisticsLogic; } 32 } 33 34 public IDataGridLogic DataDridLogic { 35 get { return dataGridLogic; } 33 36 } 34 37 -
branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/StatisticsLogic.cs
r10249 r10367 39 39 public int GetMissingValueCount() { 40 40 int count = 0; 41 for each (var variableName in preprocessingData.VariableNames){42 count += GetMissingValueCount( variableName);41 for(int i = 0; i < preprocessingData.Columns; ++i){ 42 count += GetMissingValueCount(i); 43 43 } 44 44 return count; 45 45 } 46 46 47 public int GetMissingValueCount( string variableName) {48 return searchLogic.GetMissingValueIndices( variableName).Count();47 public int GetMissingValueCount(int columnIndex) { 48 return searchLogic.GetMissingValueIndices(columnIndex).Count(); 49 49 } 50 50 51 public T GetMin<T>( string variableName) where T : IComparable<T> {52 return preprocessingData.GetValues<T>( variableName).Min();51 public T GetMin<T>(int columnIndex) where T : IComparable<T> { 52 return preprocessingData.GetValues<T>(columnIndex).Min(); 53 53 } 54 54 55 public T GetMax<T>( string variableName) where T : IComparable<T> {56 return preprocessingData.GetValues<T>( variableName).Max();55 public T GetMax<T>(int columnIndex) where T : IComparable<T> { 56 return preprocessingData.GetValues<T>(columnIndex).Max(); 57 57 } 58 58 59 public double GetMedian( string variableName) {59 public double GetMedian(int columnIndex) { 60 60 double median = double.NaN; 61 if (preprocessingData.IsType<double>( variableName)) {62 median = preprocessingData.GetValues<double>( variableName).Median();61 if (preprocessingData.IsType<double>(columnIndex)) { 62 median = preprocessingData.GetValues<double>(columnIndex).Median(); 63 63 } 64 64 return median; 65 65 } 66 66 67 public double GetAverage( string variableName) {67 public double GetAverage(int columnIndex) { 68 68 double avg = double.NaN; 69 if (preprocessingData.IsType<double>( variableName)) {70 avg = preprocessingData.GetValues<double>( variableName).Average();69 if (preprocessingData.IsType<double>(columnIndex)) { 70 avg = preprocessingData.GetValues<double>(columnIndex).Average(); 71 71 } 72 72 return avg; 73 73 } 74 74 75 public T GetMostCommonValue<T>( string variableName) {76 return preprocessingData.GetValues<T>( variableName)75 public T GetMostCommonValue<T>(int columnIndex) { 76 return preprocessingData.GetValues<T>(columnIndex) 77 77 .GroupBy(x => x) 78 78 .OrderByDescending(g => g.Count()) … … 82 82 83 83 84 public double GetStandardDeviation( string variableName) {84 public double GetStandardDeviation(int columnIndex) { 85 85 double stdDev = double.NaN; 86 if (preprocessingData.IsType<double>( variableName)) {87 stdDev = preprocessingData.GetValues<double>( variableName).StandardDeviation();86 if (preprocessingData.IsType<double>(columnIndex)) { 87 stdDev = preprocessingData.GetValues<double>(columnIndex).StandardDeviation(); 88 88 } 89 89 return stdDev; 90 90 } 91 91 92 public double GetVariance( string variableName) {92 public double GetVariance(int columnIndex) { 93 93 double stdDev = double.NaN; 94 if (preprocessingData.IsType<double>( variableName)) {95 stdDev = preprocessingData.GetValues<double>( variableName).Variance();94 if (preprocessingData.IsType<double>(columnIndex)) { 95 stdDev = preprocessingData.GetValues<double>(columnIndex).Variance(); 96 96 } 97 97 return stdDev; 98 98 } 99 99 100 public int GetDifferentValuesCount<T>( string variableName) {101 return preprocessingData.GetValues<T>( variableName).GroupBy(x => x).Count();100 public int GetDifferentValuesCount<T>(int columnIndex) { 101 return preprocessingData.GetValues<T>(columnIndex).GroupBy(x => x).Count(); 102 102 } 103 103 104 104 public int GetRowMissingValueCount(int rowIndex) { 105 105 int count = 0; 106 foreach (var variableName in preprocessingData.VariableNames){107 if (searchLogic.IsMissingValue( variableName, rowIndex)) {106 for(int i = 0; i < preprocessingData.Columns; ++i){ 107 if (searchLogic.IsMissingValue(i, rowIndex)) { 108 108 ++count; 109 109 } … … 111 111 return count; 112 112 } 113 114 115 public string GetVariableName(int columnIndex) { 116 return preprocessingData.GetVariableName(columnIndex); 117 } 118 119 public bool IsType<T>(int columnIndex) { 120 return preprocessingData.IsType<T>(columnIndex); 121 } 113 122 } 114 123 }
Note: See TracChangeset
for help on using the changeset viewer.