Changeset 11157 for stable/HeuristicLab.DataPreprocessing
- Timestamp:
- 07/09/14 13:08:40 (10 years ago)
- Location:
- stable
- Files:
-
- 14 edited
- 1 copied
Legend:
- Unmodified
- Added
- Removed
-
stable
- Property svn:mergeinfo changed
-
stable/HeuristicLab.DataPreprocessing/3.4/HeuristicLab.DataPreprocessing-3.4.csproj
r11114 r11157 167 167 <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" /> 168 168 <PropertyGroup> 169 <PreBuildEvent >set Path=%25Path%25;$(ProjectDir);$(SolutionDir)169 <PreBuildEvent Condition=" '$(OS)' == 'Windows_NT' ">set Path=%25Path%25;$(ProjectDir);$(SolutionDir) 170 170 set ProjectDir=$(ProjectDir) 171 171 set SolutionDir=$(SolutionDir) 172 172 set Outdir=$(Outdir) 173 173 174 call PreBuildEvent.cmd</PreBuildEvent> 174 call PreBuildEvent.cmd 175 </PreBuildEvent> 176 <PreBuildEvent Condition=" '$(OS)' != 'Windows_NT' "> 177 export ProjectDir=$(ProjectDir) 178 export SolutionDir=$(SolutionDir) 179 180 $SolutionDir/PreBuildEvent.sh 181 </PreBuildEvent> 175 182 </PropertyGroup> 176 183 <!-- To modify your build process, add your task inside one of the targets below and uncomment it. -
stable/HeuristicLab.DataPreprocessing/3.4/Implementations/Filter/ComparisonFilter.cs
r10999 r11157 95 95 for (int row = 0; row < ConstrainedValue.Rows; ++row) { 96 96 object item = null; 97 if (ConstrainedValue. IsType<double>(constraintColumn)) {97 if (ConstrainedValue.VariableHasType<double>(constraintColumn)) { 98 98 item = new HeuristicLab.Data.DoubleValue(ConstrainedValue.GetCell<double>(ConstraintColumn, row)); 99 } else if (ConstrainedValue. IsType<DateTime>(constraintColumn)) {99 } else if (ConstrainedValue.VariableHasType<DateTime>(constraintColumn)) { 100 100 item = new HeuristicLab.Data.DateTimeValue(ConstrainedValue.GetCell<DateTime>(ConstraintColumn, row)); 101 101 } else { -
stable/HeuristicLab.DataPreprocessing/3.4/Implementations/FilteredPreprocessingData.cs
r11068 r11157 133 133 } 134 134 135 public bool IsType<T>(int columnIndex) {136 return originalData. IsType<T>(columnIndex);135 public bool VariableHasType<T>(int columnIndex) { 136 return originalData.VariableHasType<T>(columnIndex); 137 137 } 138 138 … … 156 156 originalData.InTransaction(() => { 157 157 for (int i = 0; i < filteredData.Columns; ++i) { 158 if (filteredData. IsType<double>(i)) {158 if (filteredData.VariableHasType<double>(i)) { 159 159 originalData.SetValues<double>(i, filteredData.GetValues<double>(i)); 160 } else if (filteredData. IsType<string>(i)) {160 } else if (filteredData.VariableHasType<string>(i)) { 161 161 originalData.SetValues<string>(i, filteredData.GetValues<string>(i)); 162 } else if (filteredData. IsType<DateTime>(i)) {162 } else if (filteredData.VariableHasType<DateTime>(i)) { 163 163 originalData.SetValues<DateTime>(i, filteredData.GetValues<DateTime>(i)); 164 164 } else { -
stable/HeuristicLab.DataPreprocessing/3.4/Implementations/HistogramContent.cs
r10992 r11157 61 61 //only return variable names from type double 62 62 for (int i = 0; i < PreprocessingData.Columns; ++i) { 63 if (PreprocessingData. IsType<double>(i)) {63 if (PreprocessingData.VariableHasType<double>(i)) { 64 64 double distinctValueCount = PreprocessingData.GetValues<double>(i).GroupBy(x => x).Count(); 65 65 bool distinctValuesOk = distinctValueCount <= MAX_DISTINCT_VALUES_FOR_CLASSIFCATION; -
stable/HeuristicLab.DataPreprocessing/3.4/Implementations/ManipulationLogic.cs
r11070 r11157 54 54 preprocessingData.InTransaction(() => { 55 55 foreach (var column in cells) { 56 if (preprocessingData. IsType<double>(column.Key)) {56 if (preprocessingData.VariableHasType<double>(column.Key)) { 57 57 double average = statisticsLogic.GetAverage(column.Key, considerSelection); 58 58 ReplaceIndicesByValue<double>(column.Key, column.Value, average); 59 } else if (preprocessingData. IsType<DateTime>(column.Key)) {59 } else if (preprocessingData.VariableHasType<DateTime>(column.Key)) { 60 60 DateTime average = statisticsLogic.GetAverageDateTime(column.Key, considerSelection); 61 61 ReplaceIndicesByValue<DateTime>(column.Key, column.Value, average); … … 68 68 preprocessingData.InTransaction(() => { 69 69 foreach (var column in cells) { 70 if (preprocessingData. IsType<double>(column.Key)) {70 if (preprocessingData.VariableHasType<double>(column.Key)) { 71 71 double median = statisticsLogic.GetMedian(column.Key, considerSelection); 72 72 ReplaceIndicesByValue<double>(column.Key, column.Value, median); 73 } else if (preprocessingData. IsType<DateTime>(column.Key)) {73 } else if (preprocessingData.VariableHasType<DateTime>(column.Key)) { 74 74 DateTime median = statisticsLogic.GetMedianDateTime(column.Key, considerSelection); 75 75 ReplaceIndicesByValue<DateTime>(column.Key, column.Value, median); … … 84 84 85 85 foreach (var column in cells) { 86 if (preprocessingData. IsType<double>(column.Key)) {86 if (preprocessingData.VariableHasType<double>(column.Key)) { 87 87 double max = statisticsLogic.GetMax<double>(column.Key, considerSelection); 88 88 double min = statisticsLogic.GetMin<double>(column.Key, considerSelection); … … 92 92 preprocessingData.SetCell<double>(column.Key, index, rand); 93 93 } 94 } else if (preprocessingData. IsType<DateTime>(column.Key)) {94 } else if (preprocessingData.VariableHasType<DateTime>(column.Key)) { 95 95 DateTime min = statisticsLogic.GetMin<DateTime>(column.Key, considerSelection); 96 96 DateTime max = statisticsLogic.GetMax<DateTime>(column.Key, considerSelection); … … 109 109 foreach (var column in cells) { 110 110 int countValues = 0; 111 if (preprocessingData. IsType<double>(column.Key)) {111 if (preprocessingData.VariableHasType<double>(column.Key)) { 112 112 countValues = preprocessingData.GetValues<double>(column.Key).Count(); 113 } else if (preprocessingData. IsType<DateTime>(column.Key)) {113 } else if (preprocessingData.VariableHasType<DateTime>(column.Key)) { 114 114 countValues = preprocessingData.GetValues<DateTime>(column.Key).Count(); 115 115 } … … 170 170 int valuesToInterpolate = nextIndex - prevIndex; 171 171 172 if (preprocessingData. IsType<double>(column.Key)) {172 if (preprocessingData.VariableHasType<double>(column.Key)) { 173 173 double prev = preprocessingData.GetCell<double>(column.Key, prevIndex); 174 174 double next = preprocessingData.GetCell<double>(column.Key, nextIndex); … … 179 179 preprocessingData.SetCell<double>(column.Key, i, interpolated); 180 180 } 181 } else if (preprocessingData. IsType<DateTime>(column.Key)) {181 } else if (preprocessingData.VariableHasType<DateTime>(column.Key)) { 182 182 DateTime prev = preprocessingData.GetCell<DateTime>(column.Key, prevIndex); 183 183 DateTime next = preprocessingData.GetCell<DateTime>(column.Key, nextIndex); … … 212 212 preprocessingData.InTransaction(() => { 213 213 foreach (var column in cells) { 214 if (preprocessingData. IsType<double>(column.Key)) {214 if (preprocessingData.VariableHasType<double>(column.Key)) { 215 215 ReplaceIndicesByValue<double>(column.Key, column.Value, statisticsLogic.GetMostCommonValue<double>(column.Key, considerSelection)); 216 } else if (preprocessingData. IsType<string>(column.Key)) {216 } else if (preprocessingData.VariableHasType<string>(column.Key)) { 217 217 ReplaceIndicesByValue<string>(column.Key, column.Value, statisticsLogic.GetMostCommonValue<string>(column.Key, considerSelection)); 218 } else if (preprocessingData. IsType<DateTime>(column.Key)) {218 } else if (preprocessingData.VariableHasType<DateTime>(column.Key)) { 219 219 ReplaceIndicesByValue<DateTime>(column.Key, column.Value, statisticsLogic.GetMostCommonValue<DateTime>(column.Key, considerSelection)); 220 220 } else { … … 265 265 preprocessingData.InTransaction(() => { 266 266 for (int i = 0; i < preprocessingData.Columns; ++i) { 267 if (preprocessingData. IsType<double>(i)) {267 if (preprocessingData.VariableHasType<double>(i)) { 268 268 reOrderToIndices<double>(i, indices); 269 } else if (preprocessingData. IsType<string>(i)) {269 } else if (preprocessingData.VariableHasType<string>(i)) { 270 270 reOrderToIndices<string>(i, indices); 271 } else if (preprocessingData. IsType<DateTime>(i)) {271 } else if (preprocessingData.VariableHasType<DateTime>(i)) { 272 272 reOrderToIndices<DateTime>(i, indices); 273 273 } … … 279 279 preprocessingData.InTransaction(() => { 280 280 for (int i = 0; i < preprocessingData.Columns; ++i) { 281 if (preprocessingData. IsType<double>(i)) {281 if (preprocessingData.VariableHasType<double>(i)) { 282 282 ShuffleToIndices<double>(i, indices); 283 } else if (preprocessingData. IsType<string>(i)) {283 } else if (preprocessingData.VariableHasType<string>(i)) { 284 284 ShuffleToIndices<string>(i, indices); 285 } else if (preprocessingData. IsType<DateTime>(i)) {285 } else if (preprocessingData.VariableHasType<DateTime>(i)) { 286 286 ShuffleToIndices<DateTime>(i, indices); 287 287 } … … 360 360 List<int> columns = new List<int>(); 361 361 for (int i = 0; i < preprocessingData.Columns; ++i) { 362 if (preprocessingData. IsType<double>(i) || preprocessingData.IsType<DateTime>(i)) {362 if (preprocessingData.VariableHasType<double>(i) || preprocessingData.VariableHasType<DateTime>(i)) { 363 363 double columnVariance = statisticsLogic.GetVariance(i); 364 364 if (columnVariance < variance) { -
stable/HeuristicLab.DataPreprocessing/3.4/Implementations/PreprocessingData.cs
r11068 r11157 52 52 var doubleVariableNames = new List<string>(); 53 53 for (int i = 0; i < Columns; ++i) { 54 if ( IsType<double>(i)) {54 if (VariableHasType<double>(i)) { 55 55 doubleVariableNames.Add(variableNames[i]); 56 56 } … … 100 100 variableValues = new List<IList>(); 101 101 foreach (var variableName in problemData.Dataset.VariableNames) { 102 if (dataset. IsType<double>(variableName)) {102 if (dataset.VariableHasType<double>(variableName)) { 103 103 variableValues.Insert(columnIndex, dataset.GetDoubleValues(variableName).ToList()); 104 } else if (dataset. IsType<string>(variableName)) {104 } else if (dataset.VariableHasType<string>(variableName)) { 105 105 variableValues.Insert(columnIndex, dataset.GetStringValues(variableName).ToList()); 106 } else if (dataset. IsType<DateTime>(variableName)) {106 } else if (dataset.VariableHasType<DateTime>(variableName)) { 107 107 variableValues.Insert(columnIndex, dataset.GetDateTimeValues(variableName).ToList()); 108 108 } else { … … 171 171 public abstract int GetColumnIndex(string variableName); 172 172 173 public abstract bool IsType<T>(int columnIndex);173 public abstract bool VariableHasType<T>(int columnIndex); 174 174 175 175 [Obsolete("use the index based variant, is faster")] -
stable/HeuristicLab.DataPreprocessing/3.4/Implementations/SearchLogic.cs
r11002 r11157 85 85 86 86 public bool IsMissingValue(int columnIndex, int rowIndex) { 87 if (preprocessingData. IsType<double>(columnIndex)) {87 if (preprocessingData.VariableHasType<double>(columnIndex)) { 88 88 return double.IsNaN(preprocessingData.GetCell<double>(columnIndex, rowIndex)); 89 } else if (preprocessingData. IsType<string>(columnIndex)) {89 } else if (preprocessingData.VariableHasType<string>(columnIndex)) { 90 90 return string.IsNullOrEmpty(preprocessingData.GetCell<string>(columnIndex, rowIndex)); 91 } else if (preprocessingData. IsType<DateTime>(columnIndex)) {91 } else if (preprocessingData.VariableHasType<DateTime>(columnIndex)) { 92 92 return preprocessingData.GetCell<DateTime>(columnIndex, rowIndex).Equals(DateTime.MinValue); 93 93 } else { … … 98 98 public IList<int> GetMissingValueIndices(int columnIndex) { 99 99 if (!MissingValueIndicies.ContainsKey(columnIndex)){ 100 if (preprocessingData. IsType<double>(columnIndex)) {100 if (preprocessingData.VariableHasType<double>(columnIndex)) { 101 101 MissingValueIndicies[columnIndex] = GetMissingValueIndices<double>(columnIndex); 102 } else if (preprocessingData. IsType<string>(columnIndex)) {102 } else if (preprocessingData.VariableHasType<string>(columnIndex)) { 103 103 MissingValueIndicies[columnIndex] = GetMissingValueIndices<string>(columnIndex); 104 } else if (preprocessingData. IsType<DateTime>(columnIndex)) {104 } else if (preprocessingData.VariableHasType<DateTime>(columnIndex)) { 105 105 MissingValueIndicies[columnIndex] = GetMissingValueIndices<DateTime>(columnIndex); 106 106 } else { -
stable/HeuristicLab.DataPreprocessing/3.4/Implementations/StatisticsLogic.cs
r10812 r11157 49 49 50 50 for (int i = 0; i < preprocessingData.Columns; ++i) { 51 if (preprocessingData. IsType<double>(i)) {51 if (preprocessingData.VariableHasType<double>(i)) { 52 52 ++count; 53 53 } … … 82 82 public double GetMedian(int columnIndex, bool considerSelection) { 83 83 double median = double.NaN; 84 if (preprocessingData. IsType<double>(columnIndex)) {84 if (preprocessingData.VariableHasType<double>(columnIndex)) { 85 85 median = GetValuesWithoutNaN<double>(columnIndex, considerSelection).Median(); 86 86 } … … 90 90 public double GetAverage(int columnIndex, bool considerSelection) { 91 91 double avg = double.NaN; 92 if (preprocessingData. IsType<double>(columnIndex)) {92 if (preprocessingData.VariableHasType<double>(columnIndex)) { 93 93 avg = GetValuesWithoutNaN<double>(columnIndex, considerSelection).Average(); 94 94 } … … 98 98 public DateTime GetMedianDateTime(int columnIndex, bool considerSelection) { 99 99 DateTime median = new DateTime(); 100 if (preprocessingData. IsType<DateTime>(columnIndex)) {100 if (preprocessingData.VariableHasType<DateTime>(columnIndex)) { 101 101 median = GetSecondsAsDateTime(GetDateTimeAsSeconds(columnIndex, considerSelection).Median()); 102 102 } … … 106 106 public DateTime GetAverageDateTime(int columnIndex, bool considerSelection) { 107 107 DateTime avg = new DateTime(); 108 if (preprocessingData. IsType<DateTime>(columnIndex)) {108 if (preprocessingData.VariableHasType<DateTime>(columnIndex)) { 109 109 avg = GetSecondsAsDateTime(GetDateTimeAsSeconds(columnIndex, considerSelection).Average()); 110 110 } … … 127 127 public double GetStandardDeviation(int columnIndex) { 128 128 double stdDev = double.NaN; 129 if (preprocessingData. IsType<double>(columnIndex)) {129 if (preprocessingData.VariableHasType<double>(columnIndex)) { 130 130 stdDev = GetValuesWithoutNaN<double>(columnIndex).StandardDeviation(); 131 } else if (preprocessingData. IsType<DateTime>(columnIndex)) {131 } else if (preprocessingData.VariableHasType<DateTime>(columnIndex)) { 132 132 stdDev = GetDateTimeAsSeconds(columnIndex).StandardDeviation(); 133 133 } … … 137 137 public double GetVariance(int columnIndex) { 138 138 double variance = double.NaN; 139 if (preprocessingData. IsType<double>(columnIndex)) {139 if (preprocessingData.VariableHasType<double>(columnIndex)) { 140 140 variance = GetValuesWithoutNaN<double>(columnIndex).Variance(); 141 } else if (preprocessingData. IsType<DateTime>(columnIndex)) {141 } else if (preprocessingData.VariableHasType<DateTime>(columnIndex)) { 142 142 variance = GetDateTimeAsSeconds(columnIndex).Variance(); 143 143 } … … 163 163 } 164 164 165 public bool IsType<T>(int columnIndex) {166 return preprocessingData. IsType<T>(columnIndex);165 public bool VariableHasType<T>(int columnIndex) { 166 return preprocessingData.VariableHasType<T>(columnIndex); 167 167 } 168 168 169 169 public string GetColumnTypeAsString(int columnIndex) { 170 if (preprocessingData. IsType<double>(columnIndex)) {170 if (preprocessingData.VariableHasType<double>(columnIndex)) { 171 171 return "double"; 172 } else if (preprocessingData. IsType<string>(columnIndex)) {172 } else if (preprocessingData.VariableHasType<string>(columnIndex)) { 173 173 return "string"; 174 } else if (preprocessingData. IsType<DateTime>(columnIndex)) {174 } else if (preprocessingData.VariableHasType<DateTime>(columnIndex)) { 175 175 return "DateTime"; 176 176 } -
stable/HeuristicLab.DataPreprocessing/3.4/Implementations/TransactionalPreprocessingData.cs
r11114 r11157 115 115 } 116 116 117 public override bool IsType<T>(int columnIndex) {117 public override bool VariableHasType<T>(int columnIndex) { 118 118 return variableValues[columnIndex] is List<T>; 119 119 } … … 138 138 public override void SetValues<T>(int columnIndex, IList<T> values) { 139 139 SaveSnapshot(DataPreprocessingChangedEventType.ChangeColumn, columnIndex, -1); 140 if ( IsType<T>(columnIndex)) {140 if (VariableHasType<T>(columnIndex)) { 141 141 variableValues[columnIndex] = (IList)values; 142 142 } else { … … 149 149 public override bool SetValue(string value, int columnIndex, int rowIndex) { 150 150 bool valid = false; 151 if ( IsType<double>(columnIndex)) {151 if (VariableHasType<double>(columnIndex)) { 152 152 double val; 153 153 valid = double.TryParse(value, out val); 154 154 SetValueIfValid(columnIndex, rowIndex, valid, val); 155 } else if ( IsType<string>(columnIndex)) {155 } else if (VariableHasType<string>(columnIndex)) { 156 156 valid = value != null; 157 157 SetValueIfValid(columnIndex, rowIndex, valid, value); 158 } else if ( IsType<DateTime>(columnIndex)) {158 } else if (VariableHasType<DateTime>(columnIndex)) { 159 159 DateTime date; 160 160 valid = DateTime.TryParse(value, out date); … … 177 177 bool valid = false; 178 178 errorMessage = string.Empty; 179 if ( IsType<double>(columnIndex)) {179 if (VariableHasType<double>(columnIndex)) { 180 180 double val; 181 181 valid = double.TryParse(value, out val); … … 183 183 errorMessage = "Invalid Value (Valid Value Format: \"" + FormatPatterns.GetDoubleFormatPattern() + "\")"; 184 184 } 185 } else if ( IsType<string>(columnIndex)) {185 } else if (VariableHasType<string>(columnIndex)) { 186 186 valid = value != null; 187 187 if (!valid) { 188 188 errorMessage = "Invalid Value (string must not be null)"; 189 189 } 190 } else if ( IsType<DateTime>(columnIndex)) {190 } else if (VariableHasType<DateTime>(columnIndex)) { 191 191 DateTime date; 192 192 valid = DateTime.TryParse(value, out date); … … 207 207 208 208 public override bool AreAllStringColumns(IEnumerable<int> columnIndices) { 209 return columnIndices.All(x => IsType<string>(x));209 return columnIndices.All(x => VariableHasType<string>(x)); 210 210 } 211 211 -
stable/HeuristicLab.DataPreprocessing/3.4/Interfaces/IPreprocessingData.cs
r11068 r11157 62 62 int GetColumnIndex(string variableName); 63 63 64 bool IsType<T>(int columnIndex);64 bool VariableHasType<T>(int columnIndex); 65 65 66 66 int Columns { get; } -
stable/HeuristicLab.DataPreprocessing/3.4/Interfaces/IStatisticsLogic.cs
r10811 r11157 46 46 int GetDifferentValuesCount<T>(int columnIndex); 47 47 48 bool IsType<T>(int columnIndex);48 bool VariableHasType<T>(int columnIndex); 49 49 string GetColumnTypeAsString(int columnIndex); 50 50 string GetVariableName(int columnIndex); -
stable/HeuristicLab.DataPreprocessing/3.4/PreprocessingTransformator.cs
r11068 r11157 42 42 43 43 public bool ApplyTransformations(IEnumerable<ITransformation> transformations, bool preserveColumns, out string errorMsg) { 44 bool success ;45 44 bool success = false; 45 errorMsg = string.Empty; 46 46 preprocessingData.BeginTransaction(DataPreprocessingChangedEventType.Transformation); 47 47 … … 64 64 renamedColumns.Clear(); 65 65 } 66 // only accept changes if everything was successful 67 if (!success) { 68 preprocessingData.Undo(); 69 } 70 } 71 catch (Exception e) { 72 preprocessingData.Undo(); 73 if (string.IsNullOrEmpty(errorMsg)) errorMsg = e.Message; 66 74 } 67 75 finally { … … 103 111 private IEnumerable<double> ApplyDoubleTransformation(Transformation<double> transformation, IList<double> data, out bool success, out string errorMsg) { 104 112 success = transformation.Check(data, out errorMsg); 105 return transformation.Apply(data); 113 // don't apply when the check fails 114 if (success) 115 return transformation.Apply(data); 116 else 117 return data; 106 118 } 107 119 -
stable/HeuristicLab.DataPreprocessing/3.4/ProblemDataCreator.cs
r11098 r11157 91 91 for (int i = 0; i < context.Data.Columns; ++i) { 92 92 var variableName = context.Data.GetVariableName(i); 93 if (context.Data. IsType<double>(i)93 if (context.Data.VariableHasType<double>(i) 94 94 && variableName != targetVariable 95 95 && IsNotConstantInputVariable(context.Data.GetValues<double>(i))) {
Note: See TracChangeset
for help on using the changeset viewer.