- Timestamp:
- 05/07/14 12:47:54 (10 years ago)
- Location:
- branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3
- Files:
-
- 13 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/ChartLogic.cs
r10803 r10809 37 37 38 38 public DataRow CreateDataRow(string variableName, DataRowVisualProperties.DataRowChartType chartType) { 39 IList<double> values = preprocessingData.GetValues<double>(variableName );39 IList<double> values = preprocessingData.GetValues<double>(variableName, false); 40 40 DataRow row = new DataRow(variableName, "", values); 41 41 row.VisualProperties.ChartType = chartType; … … 44 44 45 45 public DataRow CreateDataRowRange(string variableName,int start, int end, DataRowVisualProperties.DataRowChartType chartType) { 46 IList<double> values = preprocessingData.GetValues<double>(variableName );46 IList<double> values = preprocessingData.GetValues<double>(variableName, false); 47 47 IList<double> valuesRange = new List<double>(); 48 48 for (int i = 0; i < values.Count; i++) { -
branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/DataGridContent.cs
r10807 r10809 45 45 } 46 46 47 public IManipulationLogic PreprocessingDataManipulation{47 public IManipulationLogic ManipulationLogic { 48 48 get { return manipulationLogic; } 49 49 } -
branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/FilteredPreprocessingData.cs
r10804 r10809 41 41 } 42 42 43 public IList<T> GetValues<T>(string variableName ) {44 return ActiveData.GetValues<T>(variableName );43 public IList<T> GetValues<T>(string variableName, bool considerSelection) { 44 return ActiveData.GetValues<T>(variableName, considerSelection); 45 45 } 46 46 47 public IList<T> GetValues<T>(int columnIndex ) {48 return ActiveData.GetValues<T>(columnIndex );47 public IList<T> GetValues<T>(int columnIndex, bool considerSelection) { 48 return ActiveData.GetValues<T>(columnIndex, considerSelection); 49 49 } 50 50 -
branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/ManipulationLogic.cs
r10737 r10809 45 45 } 46 46 47 public void ReplaceIndicesByAverageValue(IDictionary<int, IList<int>> cells ) {48 preprocessingData.InTransaction(() => { 49 foreach (var column in cells) { 50 if (preprocessingData.IsType<double>(column.Key)) { 51 double average = statisticsLogic.GetAverage(column.Key );47 public void ReplaceIndicesByAverageValue(IDictionary<int, IList<int>> cells, bool considerSelection) { 48 preprocessingData.InTransaction(() => { 49 foreach (var column in cells) { 50 if (preprocessingData.IsType<double>(column.Key)) { 51 double average = statisticsLogic.GetAverage(column.Key, considerSelection); 52 52 ReplaceIndicesByValue<double>(column.Key, column.Value, average); 53 53 } else if (preprocessingData.IsType<DateTime>(column.Key)) { 54 DateTime average = statisticsLogic.GetAverageDateTime(column.Key );54 DateTime average = statisticsLogic.GetAverageDateTime(column.Key, considerSelection); 55 55 ReplaceIndicesByValue<DateTime>(column.Key, column.Value, average); 56 56 } … … 59 59 } 60 60 61 public void ReplaceIndicesByMedianValue(IDictionary<int, IList<int>> cells ) {62 preprocessingData.InTransaction(() => { 63 foreach (var column in cells) { 64 if (preprocessingData.IsType<double>(column.Key)) { 65 double median = statisticsLogic.GetMedian(column.Key );61 public void ReplaceIndicesByMedianValue(IDictionary<int, IList<int>> cells, bool considerSelection) { 62 preprocessingData.InTransaction(() => { 63 foreach (var column in cells) { 64 if (preprocessingData.IsType<double>(column.Key)) { 65 double median = statisticsLogic.GetMedian(column.Key, considerSelection); 66 66 ReplaceIndicesByValue<double>(column.Key, column.Value, median); 67 67 } else if (preprocessingData.IsType<DateTime>(column.Key)) { 68 DateTime median = statisticsLogic.GetMedianDateTime(column.Key );68 DateTime median = statisticsLogic.GetMedianDateTime(column.Key, considerSelection); 69 69 ReplaceIndicesByValue<DateTime>(column.Key, column.Value, median); 70 70 } … … 73 73 } 74 74 75 public void ReplaceIndicesByRandomValue(IDictionary<int, IList<int>> cells ) {75 public void ReplaceIndicesByRandomValue(IDictionary<int, IList<int>> cells, bool considerSelection) { 76 76 preprocessingData.InTransaction(() => { 77 77 Random r = new Random(); … … 79 79 foreach (var column in cells) { 80 80 if (preprocessingData.IsType<double>(column.Key)) { 81 double max = statisticsLogic.GetMax<double>(column.Key );82 double min = statisticsLogic.GetMin<double>(column.Key );81 double max = statisticsLogic.GetMax<double>(column.Key, considerSelection); 82 double min = statisticsLogic.GetMin<double>(column.Key, considerSelection); 83 83 double randMultiplier = (max - min); 84 84 foreach (int index in column.Value) { … … 87 87 } 88 88 } else if (preprocessingData.IsType<DateTime>(column.Key)) { 89 DateTime min = statisticsLogic.GetMin<DateTime>(column.Key );90 DateTime max = statisticsLogic.GetMax<DateTime>(column.Key );89 DateTime min = statisticsLogic.GetMin<DateTime>(column.Key, considerSelection); 90 DateTime max = statisticsLogic.GetMax<DateTime>(column.Key, considerSelection); 91 91 double randMultiplier = (max - min).TotalSeconds; 92 92 foreach (int index in column.Value) { … … 104 104 int countValues = 0; 105 105 if (preprocessingData.IsType<double>(column.Key)) { 106 countValues = preprocessingData.GetValues<double>(column.Key ).Count();107 } else if (preprocessingData.IsType<DateTime>(column.Key)) { 108 countValues = preprocessingData.GetValues<DateTime>(column.Key ).Count();106 countValues = preprocessingData.GetValues<double>(column.Key, false).Count(); 107 } else if (preprocessingData.IsType<DateTime>(column.Key)) { 108 countValues = preprocessingData.GetValues<DateTime>(column.Key, false).Count(); 109 109 } 110 110 … … 165 165 } 166 166 167 public void ReplaceIndicesByMostCommonValue(IDictionary<int, IList<int>> cells ) {168 preprocessingData.InTransaction(() => { 169 foreach (var column in cells) { 170 if (preprocessingData.IsType<double>(column.Key)) { 171 ReplaceIndicesByValue<double>(column.Key, column.Value, statisticsLogic.GetMostCommonValue<double>(column.Key ));167 public void ReplaceIndicesByMostCommonValue(IDictionary<int, IList<int>> cells, bool considerSelection) { 168 preprocessingData.InTransaction(() => { 169 foreach (var column in cells) { 170 if (preprocessingData.IsType<double>(column.Key)) { 171 ReplaceIndicesByValue<double>(column.Key, column.Value, statisticsLogic.GetMostCommonValue<double>(column.Key, considerSelection)); 172 172 } else if (preprocessingData.IsType<string>(column.Key)) { 173 ReplaceIndicesByValue<string>(column.Key, column.Value, statisticsLogic.GetMostCommonValue<string>(column.Key ));174 } else if (preprocessingData.IsType<DateTime>(column.Key)) { 175 ReplaceIndicesByValue<DateTime>(column.Key, column.Value, statisticsLogic.GetMostCommonValue<DateTime>(column.Key ));173 ReplaceIndicesByValue<string>(column.Key, column.Value, statisticsLogic.GetMostCommonValue<string>(column.Key, considerSelection)); 174 } else if (preprocessingData.IsType<DateTime>(column.Key)) { 175 ReplaceIndicesByValue<DateTime>(column.Key, column.Value, statisticsLogic.GetMostCommonValue<DateTime>(column.Key, considerSelection)); 176 176 } else { 177 177 throw new ArgumentException("column with index: " + column.Key + " contains a non supported type."); … … 256 256 private void reOrderToIndices<T>(int columnIndex, IList<Tuple<int, int>> indices) { 257 257 258 List<T> originalData = new List<T>(preprocessingData.GetValues<T>(columnIndex ));258 List<T> originalData = new List<T>(preprocessingData.GetValues<T>(columnIndex, false)); 259 259 260 260 // process all columns equally -
branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/PreprocessingData.cs
r10804 r10809 122 122 123 123 [Obsolete("use the index based variant, is faster")] 124 public IList<T> GetValues<T>(string variableName) { 125 return GetValues<T>(GetColumnIndex(variableName)); 126 } 127 128 public IList<T> GetValues<T>(int columnIndex) { 129 return (IList<T>)variableValues[columnIndex]; 124 public IList<T> GetValues<T>(string variableName, bool considerSelection) { 125 return GetValues<T>(GetColumnIndex(variableName), considerSelection); 126 } 127 128 public IList<T> GetValues<T>(int columnIndex, bool considerSelection) { 129 if (considerSelection) { 130 var list = new List<T>(); 131 foreach (var rowIdx in currentSelection[columnIndex]) { 132 list.Add((T)variableValues[columnIndex][rowIdx]); 133 } 134 return list; 135 } else { 136 return (IList<T>)variableValues[columnIndex]; 137 } 130 138 } 131 139 -
branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/SearchLogic.cs
r10776 r10809 30 30 31 31 private Dictionary<int, IList<int>> MissingValueIndicies { get; set; } 32 private Dictionary<int, I Enumerable> ValuesWithoutNaN { get; set; }32 private Dictionary<int, IList> ValuesWithoutNaN { get; set; } 33 33 34 34 public SearchLogic(ITransactionalPreprocessingData thePreprocessingData) { … … 36 36 37 37 MissingValueIndicies = new Dictionary<int, IList<int>>(); 38 ValuesWithoutNaN = new Dictionary<int, I Enumerable>();38 ValuesWithoutNaN = new Dictionary<int, IList>(); 39 39 40 40 preprocessingData.Changed += preprocessingData_Changed; … … 58 58 case DataPreprocessingChangedEventType.Transformation: 59 59 MissingValueIndicies = new Dictionary<int, IList<int>>(); 60 ValuesWithoutNaN = new Dictionary<int, I Enumerable>();60 ValuesWithoutNaN = new Dictionary<int, IList>(); 61 61 break; 62 62 } … … 110 110 } 111 111 112 public IEnumerable<T> GetValuesWithoutNaN<T>(int columnIndex )112 public IEnumerable<T> GetValuesWithoutNaN<T>(int columnIndex, bool considerSelection) 113 113 { 114 if (!ValuesWithoutNaN.ContainsKey(columnIndex)) 115 { 114 if (considerSelection) { 115 var selectedRows = preprocessingData.GetSelection()[columnIndex]; 116 116 117 List<T> values = new List<T>(); 117 118 for (int row = 0; row < preprocessingData.Rows; ++row) 119 { 120 if (!IsMissingValue(columnIndex, row)) 121 { 122 values.Add(preprocessingData.GetCell<T>(columnIndex, row)); 118 foreach (var rowIdx in selectedRows) { 119 if (!IsMissingValue(columnIndex, rowIdx)) { 120 values.Add(preprocessingData.GetCell<T>(columnIndex, rowIdx)); 123 121 } 124 122 } 123 return values; 124 } else { 125 if (!ValuesWithoutNaN.ContainsKey(columnIndex)) { 126 List<T> values = new List<T>(); 125 127 126 ValuesWithoutNaN[columnIndex] = values; 128 for (int row = 0; row < preprocessingData.Rows; ++row) { 129 if (!IsMissingValue(columnIndex, row)) { 130 values.Add(preprocessingData.GetCell<T>(columnIndex, row)); 131 } 132 } 133 134 ValuesWithoutNaN[columnIndex] = values; 135 } 136 return (IEnumerable<T>)ValuesWithoutNaN[columnIndex]; 127 137 } 128 129 return (IEnumerable<T>)ValuesWithoutNaN[columnIndex];130 138 } 131 139 } -
branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/StatisticsLogic.cs
r10663 r10809 73 73 } 74 74 75 public T GetMin<T>(int columnIndex ) where T : IComparable<T> {76 return preprocessingData.GetValues<T>(columnIndex ).Min();75 public T GetMin<T>(int columnIndex, bool considerSelection) where T : IComparable<T> { 76 return preprocessingData.GetValues<T>(columnIndex, considerSelection).Min(); 77 77 } 78 78 79 public T GetMax<T>(int columnIndex ) where T : IComparable<T> {80 return preprocessingData.GetValues<T>(columnIndex ).Max();79 public T GetMax<T>(int columnIndex, bool considerSelection) where T : IComparable<T> { 80 return preprocessingData.GetValues<T>(columnIndex, considerSelection).Max(); 81 81 } 82 82 83 public double GetMedian(int columnIndex ) {83 public double GetMedian(int columnIndex, bool considerSelection) { 84 84 double median = double.NaN; 85 85 if (preprocessingData.IsType<double>(columnIndex)) { 86 median = GetValuesWithoutNaN<double>(columnIndex ).Median();86 median = GetValuesWithoutNaN<double>(columnIndex, considerSelection).Median(); 87 87 } 88 88 return median; 89 89 } 90 90 91 public double GetAverage(int columnIndex ) {91 public double GetAverage(int columnIndex, bool considerSelection) { 92 92 double avg = double.NaN; 93 93 if (preprocessingData.IsType<double>(columnIndex)) { 94 avg = GetValuesWithoutNaN<double>(columnIndex ).Average();94 avg = GetValuesWithoutNaN<double>(columnIndex, considerSelection).Average(); 95 95 } 96 96 return avg; 97 97 } 98 98 99 public DateTime GetMedianDateTime(int columnIndex ) {99 public DateTime GetMedianDateTime(int columnIndex, bool considerSelection) { 100 100 DateTime median = new DateTime(); 101 101 if (preprocessingData.IsType<DateTime>(columnIndex)) { 102 median = GetSecondsAsDateTime(GetDateTimeAsSeconds(columnIndex ).Median());102 median = GetSecondsAsDateTime(GetDateTimeAsSeconds(columnIndex, considerSelection).Median()); 103 103 } 104 104 return median; 105 105 } 106 106 107 public DateTime GetAverageDateTime(int columnIndex ) {107 public DateTime GetAverageDateTime(int columnIndex, bool considerSelection) { 108 108 DateTime avg = new DateTime(); 109 109 if (preprocessingData.IsType<DateTime>(columnIndex)) { 110 avg = GetSecondsAsDateTime(GetDateTimeAsSeconds(columnIndex ).Average());110 avg = GetSecondsAsDateTime(GetDateTimeAsSeconds(columnIndex, considerSelection).Average()); 111 111 } 112 112 return avg; 113 113 } 114 114 115 public T GetMostCommonValue<T>(int columnIndex ) {116 var t = preprocessingData.GetValues<T>(columnIndex );115 public T GetMostCommonValue<T>(int columnIndex, bool considerSelection) { 116 var t = preprocessingData.GetValues<T>(columnIndex, considerSelection); 117 117 var t2 = t.GroupBy(x => x); 118 118 var t3 = t2.Select(g => g.Key); 119 119 120 return preprocessingData.GetValues<T>(columnIndex )120 return preprocessingData.GetValues<T>(columnIndex, considerSelection) 121 121 .GroupBy(x => x) 122 122 .OrderByDescending(g => g.Count()) … … 129 129 double stdDev = double.NaN; 130 130 if (preprocessingData.IsType<double>(columnIndex)) { 131 stdDev = GetValuesWithoutNaN<double>(columnIndex ).StandardDeviation();131 stdDev = GetValuesWithoutNaN<double>(columnIndex, false).StandardDeviation(); 132 132 } else if (preprocessingData.IsType<DateTime>(columnIndex)) { 133 stdDev = GetDateTimeAsSeconds(columnIndex ).StandardDeviation();133 stdDev = GetDateTimeAsSeconds(columnIndex, false).StandardDeviation(); 134 134 } 135 135 return stdDev; … … 139 139 double variance = double.NaN; 140 140 if (preprocessingData.IsType<double>(columnIndex)) { 141 variance = preprocessingData.GetValues<double>(columnIndex ).Variance();141 variance = preprocessingData.GetValues<double>(columnIndex, false).Variance(); 142 142 } else if (preprocessingData.IsType<DateTime>(columnIndex)) { 143 variance = GetDateTimeAsSeconds(columnIndex ).Variance();143 variance = GetDateTimeAsSeconds(columnIndex, false).Variance(); 144 144 } 145 145 return variance; … … 147 147 148 148 public int GetDifferentValuesCount<T>(int columnIndex) { 149 return preprocessingData.GetValues<T>(columnIndex ).GroupBy(x => x).Count();149 return preprocessingData.GetValues<T>(columnIndex, false).GroupBy(x => x).Count(); 150 150 } 151 151 … … 179 179 } 180 180 181 private IEnumerable<double> GetDateTimeAsSeconds(int columnIndex ) {182 return GetValuesWithoutNaN<DateTime>(columnIndex ).Select(x => (double)x.Ticks / TimeSpan.TicksPerSecond);181 private IEnumerable<double> GetDateTimeAsSeconds(int columnIndex, bool considerSelection) { 182 return GetValuesWithoutNaN<DateTime>(columnIndex, considerSelection).Select(x => (double)x.Ticks / TimeSpan.TicksPerSecond); 183 183 } 184 184 185 private IEnumerable<T> GetValuesWithoutNaN<T>(int columnIndex ) {186 return searchLogic.GetValuesWithoutNaN<T>(columnIndex );185 private IEnumerable<T> GetValuesWithoutNaN<T>(int columnIndex, bool considerSelection) { 186 return searchLogic.GetValuesWithoutNaN<T>(columnIndex, considerSelection); 187 187 } 188 188 -
branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Interfaces/IDataGridContent.cs
r10636 r10809 25 25 public interface IDataGridContent : IStringConvertibleMatrix { 26 26 IDataGridLogic DataGridLogic { get; } 27 IManipulationLogic PreprocessingDataManipulation{ get; }27 IManipulationLogic ManipulationLogic { get; } 28 28 IFilterLogic FilterLogic { get; } 29 29 -
branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Interfaces/IManipulationLogic.cs
r10737 r10809 28 28 void ReOrderToIndices(IList<Tuple<int, int>> indices); 29 29 void ShuffleToIndices(IList<System.Tuple<int, int>> indices); 30 void ReplaceIndicesByAverageValue(IDictionary<int, IList<int>> cells );30 void ReplaceIndicesByAverageValue(IDictionary<int, IList<int>> cells, bool considerSelection); 31 31 void ReplaceIndicesByLinearInterpolationOfNeighbours(IDictionary<int, IList<int>> cells); 32 void ReplaceIndicesByMedianValue(IDictionary<int, IList<int>> cells );33 void ReplaceIndicesByMostCommonValue(IDictionary<int, IList<int>> cells );34 void ReplaceIndicesByRandomValue(IDictionary<int, IList<int>> cells );32 void ReplaceIndicesByMedianValue(IDictionary<int, IList<int>> cells, bool considerSelection); 33 void ReplaceIndicesByMostCommonValue(IDictionary<int, IList<int>> cells, bool considerSelection); 34 void ReplaceIndicesByRandomValue(IDictionary<int, IList<int>> cells, bool considerSelection); 35 35 void ReplaceIndicesByValue(IDictionary<int, IList<int>> cells, string value); 36 36 void ReplaceIndicesByValue<T>(int columnIndex, IEnumerable<int> rowIndices, T value); -
branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Interfaces/IPreprocessingData.cs
r10804 r10809 37 37 38 38 [Obsolete("use the index based variant, is faster")] 39 IList<T> GetValues<T>(string variableName );40 IList<T> GetValues<T>(int columnIndex );39 IList<T> GetValues<T>(string variableName, bool considerSelection); 40 IList<T> GetValues<T>(int columnIndex, bool considerSelection); 41 41 42 42 void SetValues<T>(int columnIndex, IList<T> values); -
branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Interfaces/ISearchLogic.cs
r10776 r10809 39 39 bool IsMissingValue(int columnIndex, int rowIndex); 40 40 41 IEnumerable<T> GetValuesWithoutNaN<T>(int columnIndex );41 IEnumerable<T> GetValuesWithoutNaN<T>(int columnIndex, bool considerSelection); 42 42 } 43 43 } -
branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Interfaces/IStatisticsLogic.cs
r10551 r10809 33 33 int GetRowMissingValueCount(int rowIndex); 34 34 35 T GetMin<T>(int columnIndex ) where T : IComparable<T>;36 T GetMax<T>(int columnIndex ) where T : IComparable<T>;35 T GetMin<T>(int columnIndex, bool considerSelection) where T : IComparable<T>; 36 T GetMax<T>(int columnIndex, bool considerSelection) where T : IComparable<T>; 37 37 38 double GetMedian(int columnIndex );39 double GetAverage(int columnIndex );40 DateTime GetMedianDateTime(int columnIndex );41 DateTime GetAverageDateTime(int columnIndex );38 double GetMedian(int columnIndex, bool considerSelection); 39 double GetAverage(int columnIndex, bool considerSelection); 40 DateTime GetMedianDateTime(int columnIndex, bool considerSelection); 41 DateTime GetAverageDateTime(int columnIndex, bool considerSelection); 42 42 43 43 double GetStandardDeviation(int columnIndex); 44 44 double GetVariance(int columnIndex); 45 T GetMostCommonValue<T>(int columnIndex );45 T GetMostCommonValue<T>(int columnIndex, bool considerSelection); 46 46 int GetDifferentValuesCount<T>(int columnIndex); 47 47 -
branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/PreprocessingTransformator.cs
r10786 r10809 54 54 int colIndex = preprocessingData.GetColumnIndex(transformation.Column); 55 55 56 var originalData = preprocessingData.GetValues<double>(colIndex );56 var originalData = preprocessingData.GetValues<double>(colIndex, false); 57 57 var transformedData = ApplyDoubleTransformation(transformation, originalData, out success); 58 58 if (!success) return;
Note: See TracChangeset
for help on using the changeset viewer.