Changeset 15283 for branches/DataPreprocessing Cleanup
- Timestamp:
- 07/24/17 15:17:35 (7 years ago)
- Location:
- branches/DataPreprocessing Cleanup
- Files:
-
- 1 deleted
- 9 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/DataPreprocessing Cleanup/HeuristicLab.DataPreprocessing.Views/3.4/DataPreprocessingView.cs
r15274 r15283 49 49 if (Content != null) { 50 50 var data = Content.Data; 51 var statisticsLogic = new StatisticsLogic(data); 52 var manipulationLogic = new ManipulationLogic(data, statisticsLogic); 51 var manipulationLogic = new ManipulationLogic(data); 53 52 54 53 var viewShortcuts = new ItemList<IViewShortcut> { 55 54 new DataGridContent(data, manipulationLogic), 56 new StatisticsContent(data , statisticsLogic),55 new StatisticsContent(data), 57 56 58 57 new LineChartContent(data), -
branches/DataPreprocessing Cleanup/HeuristicLab.DataPreprocessing.Views/3.4/ManipulationView.cs
r15274 r15283 83 83 protected override void RegisterContentEvents() { 84 84 base.RegisterContentEvents(); 85 Content.ManipulationLogic.Pre ProcessingData.FilterChanged += FilterLogic_FilterChanged;85 Content.ManipulationLogic.PreprocessingData.FilterChanged += FilterLogic_FilterChanged; 86 86 } 87 87 88 88 protected override void DeregisterContentEvents() { 89 Content.ManipulationLogic.Pre ProcessingData.FilterChanged -= FilterLogic_FilterChanged;89 Content.ManipulationLogic.PreprocessingData.FilterChanged -= FilterLogic_FilterChanged; 90 90 base.DeregisterContentEvents(); 91 91 } … … 131 131 var columnIndices = new Dictionary<int, IList<int>> { { columnIndex, missingValuesIndices } }; 132 132 133 for (int rowIndex = 0; rowIndex < Content.ManipulationLogic.Pre ProcessingData.Rows; rowIndex++)134 if (Content.ManipulationLogic.Pre ProcessingData.IsCellEmpty(columnIndex, rowIndex))133 for (int rowIndex = 0; rowIndex < Content.ManipulationLogic.PreprocessingData.Rows; rowIndex++) 134 if (Content.ManipulationLogic.PreprocessingData.IsCellEmpty(columnIndex, rowIndex)) 135 135 missingValuesIndices.Add(rowIndex); 136 136 … … 242 242 if (string.IsNullOrEmpty(replaceValue)) { 243 243 lblPreviewReplaceMissingValues.Text = "Preview not possible yet - please input the text which will be used as replacement."; 244 } else if (!Content.ManipulationLogic.Pre ProcessingData.Validate(txtReplaceValue.Text, out errorMessage, columnIndex)) {244 } else if (!Content.ManipulationLogic.PreprocessingData.Validate(txtReplaceValue.Text, out errorMessage, columnIndex)) { 245 245 lblPreviewReplaceMissingValues.Text = "Preview not possible yet - " + errorMessage; 246 246 } else { … … 253 253 if (btnApply.Enabled) { 254 254 int count = 0; 255 for (int rowIndex = 0; rowIndex < Content.ManipulationLogic.Pre ProcessingData.Rows; rowIndex++)256 if (Content.ManipulationLogic.Pre ProcessingData.IsCellEmpty(columnIndex, rowIndex)) count++;255 for (int rowIndex = 0; rowIndex < Content.ManipulationLogic.PreprocessingData.Rows; rowIndex++) 256 if (Content.ManipulationLogic.PreprocessingData.IsCellEmpty(columnIndex, rowIndex)) count++; 257 257 258 258 int cellCount = Content.PreprocessingData.Rows * Content.PreprocessingData.Columns; -
branches/DataPreprocessing Cleanup/HeuristicLab.DataPreprocessing.Views/3.4/StatisticsView.cs
r15274 r15283 87 87 88 88 private void UpdateData(Dictionary<string, bool> oldVisibility = null) { 89 var logic = Content.StatisticsLogic;90 89 var data = Content.PreprocessingData; 91 90 rowsTextBox.Text = data.Rows.ToString(); … … 93 92 numericColumnsTextBox.Text = GetColumnCount<double>().ToString(); 94 93 nominalColumnsTextBox5.Text = GetColumnCount<string>().ToString(); 95 missingValuesTextBox.Text = logic.GetMissingValueCount().ToString();96 totalValuesTextBox.Text = (data.Rows * data.Rows - logic.GetMissingValueCount()).ToString();94 missingValuesTextBox.Text = data.GetMissingValueCount().ToString(); 95 totalValuesTextBox.Text = (data.Rows * data.Rows - data.GetMissingValueCount()).ToString(); 97 96 98 97 var variableNames = Content.PreprocessingData.VariableNames.ToList(); … … 151 150 private List<string> GetStatistics(int varIdx) { 152 151 List<string> list; 153 var logic = Content.StatisticsLogic;154 if ( logic.VariableHasType<double>(varIdx)) {152 var data = Content.PreprocessingData; 153 if (data.VariableHasType<double>(varIdx)) { 155 154 list = GetDoubleColumns(varIdx); 156 } else if ( logic.VariableHasType<string>(varIdx)) {155 } else if (data.VariableHasType<string>(varIdx)) { 157 156 list = GetStringColumns(varIdx); 158 } else if ( logic.VariableHasType<DateTime>(varIdx)) {157 } else if (data.VariableHasType<DateTime>(varIdx)) { 159 158 list = GetDateTimeColumns(varIdx); 160 159 } else { … … 168 167 169 168 private List<string> GetDoubleColumns(int statIdx) { 170 var logic = Content.StatisticsLogic;169 var data = Content.PreprocessingData; 171 170 return new List<string> { 172 logic.GetColumnTypeAsString(statIdx),173 logic.GetMissingValueCount(statIdx).ToString(),174 logic.GetMin<double>(statIdx,double.NaN).ToString(),175 logic.GetMax<double>(statIdx,double.NaN).ToString(),176 logic.GetMedian(statIdx).ToString(),177 logic.GetAverage(statIdx).ToString(),178 logic.GetStandardDeviation(statIdx).ToString(),179 logic.GetVariance(statIdx).ToString(),180 logic.GetOneQuarterPercentile(statIdx).ToString(),181 logic.GetThreeQuarterPercentile(statIdx).ToString(),182 logic.GetMostCommonValue<double>(statIdx,double.NaN).ToString(),183 logic.GetDifferentValuesCount<double>(statIdx).ToString()171 data.GetVariableType(statIdx).Name, 172 data.GetMissingValueCount(statIdx).ToString(), 173 data.GetMin<double>(statIdx, emptyValue: double.NaN).ToString(), 174 data.GetMax<double>(statIdx, emptyValue: double.NaN).ToString(), 175 data.GetMedian<double>(statIdx, emptyValue: double.NaN).ToString(), 176 data.GetMean<double>(statIdx, emptyValue: double.NaN).ToString(), 177 data.GetStandardDeviation<double>(statIdx, emptyValue: double.NaN).ToString(), 178 data.GetVariance<double>(statIdx, emptyValue: double.NaN).ToString(), 179 data.GetQuantile<double>(0.25, statIdx, emptyValue: double.NaN).ToString(), 180 data.GetQuantile<double>(0.75, statIdx, emptyValue: double.NaN).ToString(), 181 data.GetMode<double>(statIdx, emptyValue: double.NaN).ToString(), 182 data.GetDistinctValues<double>(statIdx).ToString() 184 183 }; 185 184 } 186 185 187 186 private List<string> GetStringColumns(int statIdx) { 188 var logic = Content.StatisticsLogic;187 var data = Content.PreprocessingData; 189 188 return new List<string> { 190 logic.GetColumnTypeAsString(statIdx),191 logic.GetMissingValueCount(statIdx).ToString(),192 "", // min193 "", // max194 "", // median189 data.GetVariableType(statIdx).Name, 190 data.GetMissingValueCount(statIdx).ToString(), 191 "", // data.GetMin<string>(statIdx, emptyValue: string.Empty), //min 192 "", // data.GetMax<string>(statIdx, emptyValue: string.Empty), //max 193 "", // data.GetMedian<string>(statIdx, emptyValue: string.Empty), //median 195 194 "", //average 196 195 "", //standard deviation 197 196 "", //variance 198 "", // quarter percentile199 "", // three quarter percentile200 logic.GetMostCommonValue<string>(statIdx,string.Empty) ?? "",201 logic.GetDifferentValuesCount<string>(statIdx).ToString()197 "", // data.GetQuantile<string>(0.25, statIdx, emptyValue: string.Empty), //quarter percentile 198 "", // data.GetQuantile<string>(0.75, statIdx, emptyValue: string.Empty), //three quarter percentile 199 data.GetMode<string>(statIdx, emptyValue: string.Empty), 200 data.GetDistinctValues<string>(statIdx).ToString() 202 201 }; 203 202 } 204 203 205 204 private List<string> GetDateTimeColumns(int statIdx) { 206 var logic = Content.StatisticsLogic;205 var data = Content.PreprocessingData; 207 206 return new List<string> { 208 logic.GetColumnTypeAsString(statIdx),209 logic.GetMissingValueCount(statIdx).ToString(),210 logic.GetMin<DateTime>(statIdx, DateTime.MinValue).ToString(),211 logic.GetMax<DateTime>(statIdx, DateTime.MinValue).ToString(),212 logic.GetMedianDateTime(statIdx).ToString(),213 logic.GetAverageDateTime(statIdx).ToString(),214 logic.GetStandardDeviation(statIdx).ToString(),215 logic.GetVariance(statIdx).ToString(),216 logic.GetOneQuarterPercentile(statIdx).ToString(),217 logic.GetThreeQuarterPercentile(statIdx).ToString(),218 logic.GetMostCommonValue<DateTime>(statIdx, DateTime.MinValue).ToString(),219 logic.GetDifferentValuesCount<DateTime>(statIdx).ToString()207 data.GetVariableType(statIdx).Name, 208 data.GetMissingValueCount(statIdx).ToString(), 209 data.GetMin<DateTime>(statIdx).ToString(), 210 data.GetMax<DateTime>(statIdx).ToString(), 211 data.GetMedian<DateTime>(statIdx).ToString(), 212 data.GetMean<DateTime>(statIdx).ToString(), 213 "", // should be of type TimeSpan //data.GetStandardDeviation<DateTime>(statIdx).ToString(), 214 "", // should be of type TimeSpan //data.GetVariance<DateTime>(statIdx).ToString(), 215 data.GetQuantile<DateTime>(0.25, statIdx).ToString(), 216 data.GetQuantile<DateTime>(0.75, statIdx).ToString(), 217 data.GetMode<DateTime>(statIdx).ToString(), 218 data.GetDistinctValues<DateTime>(statIdx).ToString() 220 219 }; 221 220 } -
branches/DataPreprocessing Cleanup/HeuristicLab.DataPreprocessing/3.4/Content/StatisticsContent.cs
r15274 r15283 33 33 } 34 34 35 [Storable]36 public StatisticsLogic StatisticsLogic { get; private set; }37 38 35 #region Constructor, Cloning & Persistence 39 public StatisticsContent(IFilteredPreprocessingData preprocessingData , StatisticsLogic statisticsLogic)36 public StatisticsContent(IFilteredPreprocessingData preprocessingData) 40 37 : base(preprocessingData) { 41 StatisticsLogic = statisticsLogic;42 38 } 43 39 44 40 public StatisticsContent(StatisticsContent original, Cloner cloner) 45 41 : base(original, cloner) { 46 StatisticsLogic = cloner.Clone(original.StatisticsLogic);47 42 } 48 43 public override IDeepCloneable Clone(Cloner cloner) { … … 56 51 57 52 public event DataPreprocessingChangedEventHandler Changed { 58 add { StatisticsLogic.Changed += value; }59 remove { StatisticsLogic.Changed -= value; }53 add { PreprocessingData.Changed += value; } 54 remove { PreprocessingData.Changed -= value; } 60 55 } 61 56 } -
branches/DataPreprocessing Cleanup/HeuristicLab.DataPreprocessing/3.4/Data/FilteredPreprocessingData.cs
r15274 r15283 179 179 } 180 180 181 public Type GetVariableType(int columnIndex) { 182 return ActiveData.GetVariableType(columnIndex); 183 } 184 181 185 public IList<string> InputVariables { 182 186 get { return ActiveData.InputVariables; } … … 269 273 public void EndTransaction() { 270 274 originalData.EndTransaction(); 275 } 276 #endregion 277 278 #region Statistics 279 public T GetMin<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) { 280 return ActiveData.GetMin<T>(columnIndex, considerSelection, emptyValue); 281 } 282 public T GetMax<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) { 283 return ActiveData.GetMax<T>(columnIndex, considerSelection, emptyValue); 284 } 285 public T GetMean<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) { 286 return ActiveData.GetMean<T>(columnIndex, considerSelection, emptyValue); 287 } 288 public T GetMedian<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) where T : IComparable<T> { 289 return ActiveData.GetMean<T>(columnIndex, considerSelection, emptyValue); 290 } 291 public T GetMode<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) where T : IEquatable<T> { 292 return ActiveData.GetMode<T>(columnIndex, considerSelection, emptyValue); 293 } 294 public T GetStandardDeviation<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) { 295 return ActiveData.GetStandardDeviation<T>(columnIndex, considerSelection, emptyValue); 296 } 297 public T GetVariance<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) { 298 return ActiveData.GetVariance<T>(columnIndex, considerSelection, emptyValue); 299 } 300 public T GetQuantile<T>(double alpha, int columnIndex, bool considerSelection = false, T emptyValue = default(T)) where T : IComparable<T> { 301 return ActiveData.GetQuantile<T>(alpha, columnIndex, considerSelection, emptyValue); 302 } 303 public int GetDistinctValues<T>(int columnIndex, bool considerSelection = false) { 304 return ActiveData.GetDistinctValues<T>(columnIndex, considerSelection); 305 } 306 307 public int GetMissingValueCount() { 308 return ActiveData.GetMissingValueCount(); 309 } 310 public int GetMissingValueCount(int columnIndex) { 311 return ActiveData.GetMissingValueCount(columnIndex); 312 } 313 public int GetRowMissingValueCount(int rowIndex) { 314 return ActiveData.GetRowMissingValueCount(rowIndex); 271 315 } 272 316 #endregion -
branches/DataPreprocessing Cleanup/HeuristicLab.DataPreprocessing/3.4/Data/IPreprocessingData.cs
r15270 r15283 66 66 67 67 bool VariableHasType<T>(int columnIndex); 68 Type GetVariableType(int columnIndex); 68 69 69 70 IList<string> InputVariables { get; } … … 105 106 void EndTransaction(); 106 107 #endregion 108 109 #region Statistics 110 T GetMin<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)); 111 T GetMax<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)); 112 T GetMean<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)); 113 T GetMedian<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) where T : IComparable<T>; 114 T GetMode<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) where T : IEquatable<T>; 115 T GetStandardDeviation<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)); 116 T GetVariance<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)); 117 T GetQuantile<T>(double alpha, int columnIndex, bool considerSelection = false, T emptyValue = default(T)) where T : IComparable<T>; 118 int GetDistinctValues<T>(int columnIndex, bool considerSelection = false); 119 120 int GetMissingValueCount(); 121 int GetMissingValueCount(int columnIndex); 122 int GetRowMissingValueCount(int rowIndex); 123 #endregion 107 124 } 108 125 } -
branches/DataPreprocessing Cleanup/HeuristicLab.DataPreprocessing/3.4/Data/PreprocessingData.cs
r15274 r15283 327 327 } 328 328 329 public Type GetVariableType(int columnIndex) { 330 var listType = variableValues[columnIndex].GetType(); 331 return listType.GenericTypeArguments.Single(); 332 } 333 329 334 public IList<string> InputVariables { get; private set; } 330 335 public string TargetVariable { get; private set; } // optional … … 524 529 #endregion 525 530 531 #region Statistics 532 public T GetMin<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) { 533 var values = GetValuesWithoutMissingValues<T>(columnIndex, considerSelection); 534 return values.Any() ? values.Min() : emptyValue; 535 } 536 537 public T GetMax<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) { 538 var values = GetValuesWithoutMissingValues<T>(columnIndex, considerSelection); 539 return values.Any() ? values.Max() : emptyValue; 540 } 541 542 public T GetMean<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) { 543 if (typeof(T) == typeof(double)) { 544 var values = GetValuesWithoutMissingValues<double>(columnIndex, considerSelection); 545 return values.Any() ? Convert<T>(values.Average()) : emptyValue; 546 } 547 if (typeof(T) == typeof(string)) { 548 return Convert<T>(string.Empty); 549 } 550 if (typeof(T) == typeof(DateTime)) { 551 var values = GetValuesWithoutMissingValues<DateTime>(columnIndex, considerSelection); 552 return values.Any() ? Convert<T>(AggregateAsDouble(values, Enumerable.Average)) : emptyValue; 553 } 554 555 throw new InvalidOperationException(typeof(T) + " not supported"); 556 } 557 558 public T GetMedian<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) where T : IComparable<T> { 559 if (typeof(T) == typeof(double)) {// IEnumerable<double> is faster 560 var doubleValues = GetValuesWithoutMissingValues<double>(columnIndex, considerSelection); 561 return doubleValues.Any() ? Convert<T>(doubleValues.Median()) : emptyValue; 562 } 563 var values = GetValuesWithoutMissingValues<T>(columnIndex, considerSelection); 564 return values.Any() ? values.Quantile(0.5) : emptyValue; 565 } 566 567 public T GetMode<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) where T : IEquatable<T> { 568 var values = GetValuesWithoutMissingValues<T>(columnIndex, considerSelection); 569 return values.Any() ? values.GroupBy(x => x).OrderByDescending(g => g.Count()).Select(g => g.Key).First() : emptyValue; 570 } 571 572 public T GetStandardDeviation<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) { 573 if (typeof(T) == typeof(double)) { 574 var values = GetValuesWithoutMissingValues<double>(columnIndex, considerSelection); 575 return values.Any() ? Convert<T>(values.StandardDeviation()) : emptyValue; 576 } 577 // For DateTime, std.dev / variance would have to be TimeSpan 578 //if (typeof(T) == typeof(DateTime)) { 579 // var values = GetValuesWithoutMissingValues<DateTime>(columnIndex, considerSelection); 580 // return values.Any() ? Convert<T>(AggregateAsDouble(values, EnumerableStatisticExtensions.StandardDeviation)) : emptyValue; 581 //} 582 return default(T); 583 } 584 585 public T GetVariance<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) { 586 if (typeof(T) == typeof(double)) { 587 var values = GetValuesWithoutMissingValues<double>(columnIndex, considerSelection); 588 return values.Any() ? Convert<T>(values.Variance()) : emptyValue; 589 } 590 // DateTime variance often overflows long, thus the corresponding DateTime is invalid 591 //if (typeof(T) == typeof(DateTime)) { 592 // var values = GetValuesWithoutMissingValues<DateTime>(columnIndex, considerSelection); 593 // return values.Any() ? Convert<T>(AggregateAsDouble(values, EnumerableStatisticExtensions.Variance)) : emptyValue; 594 //} 595 return default(T); 596 } 597 598 public T GetQuantile<T>(double alpha, int columnIndex, bool considerSelection = false, T emptyValue = default(T)) where T : IComparable<T> { 599 if (typeof(T) == typeof(double)) {// IEnumerable<double> is faster 600 var doubleValues = GetValuesWithoutMissingValues<double>(columnIndex, considerSelection); 601 return doubleValues.Any() ? Convert<T>(doubleValues.Quantile(alpha)) : emptyValue; 602 } 603 var values = GetValuesWithoutMissingValues<T>(columnIndex, considerSelection); 604 return values.Any() ? values.Quantile(alpha) : emptyValue; 605 } 606 607 public int GetDistinctValues<T>(int columnIndex, bool considerSelection = false) { 608 var values = GetValuesWithoutMissingValues<T>(columnIndex, considerSelection); 609 return values.GroupBy(x => x).Count(); 610 } 611 612 private IEnumerable<T> GetValuesWithoutMissingValues<T>(int columnIndex, bool considerSelection) { 613 return GetValues<T>(columnIndex, considerSelection).Where(x => !IsMissingValue(x)); 614 } 615 616 private static DateTime AggregateAsDouble(IEnumerable<DateTime> values, Func<IEnumerable<double>, double> func) { 617 return new DateTime((long)(func(values.Select(x => (double)x.Ticks / TimeSpan.TicksPerSecond)) * TimeSpan.TicksPerSecond)); 618 } 619 private static T Convert<T>(object obj) { return (T)obj; } 620 621 public int GetMissingValueCount() { 622 int count = 0; 623 for (int i = 0; i < Columns; ++i) { 624 count += GetMissingValueCount(i); 625 } 626 return count; 627 } 628 public int GetMissingValueCount(int columnIndex) { 629 int sum = 0; 630 for (int i = 0; i < Rows; i++) { 631 if (IsCellEmpty(columnIndex, i)) 632 sum++; 633 } 634 return sum; 635 } 636 public int GetRowMissingValueCount(int rowIndex) { 637 int sum = 0; 638 for (int i = 0; i < Columns; i++) { 639 if (IsCellEmpty(i, rowIndex)) 640 sum++; 641 } 642 return sum; 643 } 644 #endregion 645 526 646 #region Helpers 527 647 private static IList<IList> CopyVariableValues(IList<IList> original) { … … 534 654 #endregion 535 655 } 656 657 // Adapted from HeuristicLab.Common.EnumerableStatisticExtensions 658 internal static class EnumerableExtensions { 659 public static T Quantile<T>(this IEnumerable<T> values, double alpha) where T : IComparable<T> { 660 T[] valuesArr = values.ToArray(); 661 int n = valuesArr.Length; 662 if (n == 0) throw new InvalidOperationException("Enumeration contains no elements."); 663 664 var pos = n * alpha; 665 666 return Select((int)Math.Ceiling(pos) - 1, valuesArr); 667 668 } 669 670 private static T Select<T>(int k, T[] arr) where T : IComparable<T> { 671 int i, ir, j, l, mid, n = arr.Length; 672 T a; 673 l = 0; 674 ir = n - 1; 675 for (;;) { 676 if (ir <= l + 1) { 677 // Active partition contains 1 or 2 elements. 678 if (ir == l + 1 && arr[ir].CompareTo(arr[l]) < 0) { 679 // Case of 2 elements. 680 Swap(arr, l, ir); 681 } 682 return arr[k]; 683 } else { 684 mid = (l + ir) >> 1; // Choose median of left, center, and right elements 685 Swap(arr, mid, l + 1); // as partitioning element a. Also 686 687 if (arr[l].CompareTo(arr[ir]) > 0) { // rearrange so that arr[l] arr[ir] <= arr[l+1], 688 Swap(arr, l, ir); // . arr[ir] >= arr[l+1] 689 } 690 691 if (arr[l + 1].CompareTo(arr[ir]) > 0) { 692 Swap(arr, l + 1, ir); 693 } 694 if (arr[l].CompareTo(arr[l + 1]) > 0) { 695 Swap(arr, l, l + 1); 696 } 697 i = l + 1; // Initialize pointers for partitioning. 698 j = ir; 699 a = arr[l + 1]; // Partitioning element. 700 for (;;) { // Beginning of innermost loop. 701 do i++; while (arr[i].CompareTo(a) < 0); // Scan up to find element > a. 702 do j--; while (arr[j].CompareTo(a) > 0); // Scan down to find element < a. 703 if (j < i) break; // Pointers crossed. Partitioning complete. 704 Swap(arr, i, j); 705 } // End of innermost loop. 706 arr[l + 1] = arr[j]; // Insert partitioning element. 707 arr[j] = a; 708 if (j >= k) ir = j - 1; // Keep active the partition that contains the 709 if (j <= k) l = i; // kth element. 710 } 711 } 712 } 713 714 private static void Swap<T>(T[] arr, int i, int j) { 715 T temp = arr[i]; 716 arr[i] = arr[j]; 717 arr[j] = temp; 718 } 719 } 536 720 } -
branches/DataPreprocessing Cleanup/HeuristicLab.DataPreprocessing/3.4/HeuristicLab.DataPreprocessing-3.4.csproj
r15274 r15283 145 145 <Compile Include="Content\DataGridContent.cs" /> 146 146 <Compile Include="PreprocessingContext.cs" /> 147 <Compile Include="Logic\StatisticsLogic.cs" />148 147 <Compile Include="Plugin.cs" /> 149 148 <Compile Include="Properties\AssemblyInfo.cs" /> -
branches/DataPreprocessing Cleanup/HeuristicLab.DataPreprocessing/3.4/Logic/ManipulationLogic.cs
r15274 r15283 37 37 private readonly IFilteredPreprocessingData preprocessingData; 38 38 39 [Storable]40 private readonly StatisticsLogic statisticsLogic;41 42 39 public IEnumerable<string> VariableNames { 43 40 get { return preprocessingData.VariableNames; } 44 41 } 45 42 46 public IFilteredPreprocessingData Pre ProcessingData {43 public IFilteredPreprocessingData PreprocessingData { 47 44 get { return preprocessingData; } 48 45 } 49 46 50 47 #region Constructor, Cloning & Persistence 51 public ManipulationLogic(IFilteredPreprocessingData preprocessingData , StatisticsLogic theStatisticsLogic) {48 public ManipulationLogic(IFilteredPreprocessingData preprocessingData) { 52 49 this.preprocessingData = preprocessingData; 53 statisticsLogic = theStatisticsLogic;54 50 } 55 51 public ManipulationLogic(ManipulationLogic original, Cloner cloner) 56 52 : base(original, cloner) { 57 53 preprocessingData = cloner.Clone(original.preprocessingData); 58 statisticsLogic = cloner.Clone(original.statisticsLogic);59 54 } 60 55 public override IDeepCloneable Clone(Cloner cloner) { … … 77 72 foreach (var column in cells) { 78 73 if (preprocessingData.VariableHasType<double>(column.Key)) { 79 double average = statisticsLogic.GetAverage(column.Key, considerSelection);74 double average = PreprocessingData.GetMean<double>(column.Key, considerSelection); 80 75 ReplaceIndicesByValue<double>(column.Key, column.Value, average); 81 76 } else if (preprocessingData.VariableHasType<DateTime>(column.Key)) { 82 DateTime average = statisticsLogic.GetAverageDateTime(column.Key, considerSelection);77 DateTime average = PreprocessingData.GetMean<DateTime>(column.Key, considerSelection); 83 78 ReplaceIndicesByValue<DateTime>(column.Key, column.Value, average); 84 79 } … … 91 86 foreach (var column in cells) { 92 87 if (preprocessingData.VariableHasType<double>(column.Key)) { 93 double median = statisticsLogic.GetMedian(column.Key, considerSelection);88 double median = PreprocessingData.GetMedian<double>(column.Key, considerSelection); 94 89 ReplaceIndicesByValue<double>(column.Key, column.Value, median); 95 90 } else if (preprocessingData.VariableHasType<DateTime>(column.Key)) { 96 DateTime median = statisticsLogic.GetMedianDateTime(column.Key, considerSelection);91 DateTime median = PreprocessingData.GetMedian<DateTime>(column.Key, considerSelection); 97 92 ReplaceIndicesByValue<DateTime>(column.Key, column.Value, median); 98 93 } … … 107 102 foreach (var column in cells) { 108 103 if (preprocessingData.VariableHasType<double>(column.Key)) { 109 double max = statisticsLogic.GetMax<double>(column.Key, double.NaN, considerSelection);110 double min = statisticsLogic.GetMin<double>(column.Key, double.NaN, considerSelection);104 double max = PreprocessingData.GetMax<double>(column.Key, considerSelection); 105 double min = PreprocessingData.GetMin<double>(column.Key, considerSelection); 111 106 double randMultiplier = (max - min); 112 107 foreach (int index in column.Value) { … … 115 110 } 116 111 } else if (preprocessingData.VariableHasType<DateTime>(column.Key)) { 117 DateTime min = statisticsLogic.GetMin<DateTime>(column.Key, DateTime.MinValue, considerSelection);118 DateTime max = statisticsLogic.GetMax<DateTime>(column.Key, DateTime.MinValue, considerSelection);112 DateTime min = PreprocessingData.GetMin<DateTime>(column.Key, considerSelection); 113 DateTime max = PreprocessingData.GetMax<DateTime>(column.Key, considerSelection); 119 114 double randMultiplier = (max - min).TotalSeconds; 120 115 foreach (int index in column.Value) { … … 228 223 foreach (var column in cells) { 229 224 if (preprocessingData.VariableHasType<double>(column.Key)) { 230 ReplaceIndicesByValue<double>(column.Key, column.Value, statisticsLogic.GetMostCommonValue<double>(column.Key, double.NaN, considerSelection));225 ReplaceIndicesByValue<double>(column.Key, column.Value, PreprocessingData.GetMode<double>(column.Key, considerSelection)); 231 226 } else if (preprocessingData.VariableHasType<string>(column.Key)) { 232 ReplaceIndicesByValue<string>(column.Key, column.Value, statisticsLogic.GetMostCommonValue<string>(column.Key, string.Empty, considerSelection));227 ReplaceIndicesByValue<string>(column.Key, column.Value, PreprocessingData.GetMode<string>(column.Key, considerSelection)); 233 228 } else if (preprocessingData.VariableHasType<DateTime>(column.Key)) { 234 ReplaceIndicesByValue<DateTime>(column.Key, column.Value, statisticsLogic.GetMostCommonValue<DateTime>(column.Key, DateTime.MinValue, considerSelection));229 ReplaceIndicesByValue<DateTime>(column.Key, column.Value, PreprocessingData.GetMode<DateTime>(column.Key, considerSelection)); 235 230 } else { 236 231 throw new ArgumentException("column with index: " + column.Key + " contains a non supported type."); … … 308 303 309 304 for (int i = 0; i < preprocessingData.Rows; ++i) { 310 int missingCount = statisticsLogic.GetRowMissingValueCount(i);305 int missingCount = preprocessingData.GetRowMissingValueCount(i); 311 306 if (100f / preprocessingData.Columns * missingCount > percent) { 312 307 rows.Add(i); … … 320 315 List<int> columns = new List<int>(); 321 316 for (int i = 0; i < preprocessingData.Columns; ++i) { 322 int missingCount = statisticsLogic.GetMissingValueCount(i);317 int missingCount = preprocessingData.GetMissingValueCount(i); 323 318 if (100f / preprocessingData.Rows * missingCount > percent) { 324 319 columns.Add(i); … … 332 327 List<int> columns = new List<int>(); 333 328 for (int i = 0; i < preprocessingData.Columns; ++i) { 334 if (preprocessingData.VariableHasType<double>(i) || preprocessingData.VariableHasType<DateTime>(i)) { 335 double columnVariance = statisticsLogic.GetVariance(i); 329 if (preprocessingData.VariableHasType<double>(i)) { 330 double columnVariance = preprocessingData.GetVariance<double>(i); 331 if (columnVariance < variance) { 332 columns.Add(i); 333 } 334 } else if (preprocessingData.VariableHasType<DateTime>(i)) { 335 double columnVariance = (double)preprocessingData.GetVariance<DateTime>(i).Ticks / TimeSpan.TicksPerSecond; 336 336 if (columnVariance < variance) { 337 337 columns.Add(i);
Note: See TracChangeset
for help on using the changeset viewer.