Free cookie consent management tool by TermsFeed Policy Generator

Changeset 15283 for branches


Ignore:
Timestamp:
07/24/17 15:17:35 (7 years ago)
Author:
pfleck
Message:

#2809: Removed StatisticsLogic.

Location:
branches/DataPreprocessing Cleanup
Files:
1 deleted
9 edited

Legend:

Unmodified
Added
Removed
  • branches/DataPreprocessing Cleanup/HeuristicLab.DataPreprocessing.Views/3.4/DataPreprocessingView.cs

    r15274 r15283  
    4949      if (Content != null) {
    5050        var data = Content.Data;
    51         var statisticsLogic = new StatisticsLogic(data);
    52         var manipulationLogic = new ManipulationLogic(data, statisticsLogic);
     51        var manipulationLogic = new ManipulationLogic(data);
    5352
    5453        var viewShortcuts = new ItemList<IViewShortcut> {
    5554          new DataGridContent(data, manipulationLogic),
    56           new StatisticsContent(data, statisticsLogic),
     55          new StatisticsContent(data),
    5756
    5857          new LineChartContent(data),
  • branches/DataPreprocessing Cleanup/HeuristicLab.DataPreprocessing.Views/3.4/ManipulationView.cs

    r15274 r15283  
    8383    protected override void RegisterContentEvents() {
    8484      base.RegisterContentEvents();
    85       Content.ManipulationLogic.PreProcessingData.FilterChanged += FilterLogic_FilterChanged;
     85      Content.ManipulationLogic.PreprocessingData.FilterChanged += FilterLogic_FilterChanged;
    8686    }
    8787
    8888    protected override void DeregisterContentEvents() {
    89       Content.ManipulationLogic.PreProcessingData.FilterChanged -= FilterLogic_FilterChanged;
     89      Content.ManipulationLogic.PreprocessingData.FilterChanged -= FilterLogic_FilterChanged;
    9090      base.DeregisterContentEvents();
    9191    }
     
    131131      var columnIndices = new Dictionary<int, IList<int>> { { columnIndex, missingValuesIndices } };
    132132
    133       for (int rowIndex = 0; rowIndex < Content.ManipulationLogic.PreProcessingData.Rows; rowIndex++)
    134         if (Content.ManipulationLogic.PreProcessingData.IsCellEmpty(columnIndex, rowIndex))
     133      for (int rowIndex = 0; rowIndex < Content.ManipulationLogic.PreprocessingData.Rows; rowIndex++)
     134        if (Content.ManipulationLogic.PreprocessingData.IsCellEmpty(columnIndex, rowIndex))
    135135          missingValuesIndices.Add(rowIndex);
    136136
     
    242242        if (string.IsNullOrEmpty(replaceValue)) {
    243243          lblPreviewReplaceMissingValues.Text = "Preview not possible yet - please input the text which will be used as replacement.";
    244         } else if (!Content.ManipulationLogic.PreProcessingData.Validate(txtReplaceValue.Text, out errorMessage, columnIndex)) {
     244        } else if (!Content.ManipulationLogic.PreprocessingData.Validate(txtReplaceValue.Text, out errorMessage, columnIndex)) {
    245245          lblPreviewReplaceMissingValues.Text = "Preview not possible yet - " + errorMessage;
    246246        } else {
     
    253253      if (btnApply.Enabled) {
    254254        int count = 0;
    255         for (int rowIndex = 0; rowIndex < Content.ManipulationLogic.PreProcessingData.Rows; rowIndex++)
    256           if (Content.ManipulationLogic.PreProcessingData.IsCellEmpty(columnIndex, rowIndex)) count++;
     255        for (int rowIndex = 0; rowIndex < Content.ManipulationLogic.PreprocessingData.Rows; rowIndex++)
     256          if (Content.ManipulationLogic.PreprocessingData.IsCellEmpty(columnIndex, rowIndex)) count++;
    257257
    258258        int cellCount = Content.PreprocessingData.Rows * Content.PreprocessingData.Columns;
  • branches/DataPreprocessing Cleanup/HeuristicLab.DataPreprocessing.Views/3.4/StatisticsView.cs

    r15274 r15283  
    8787
    8888    private void UpdateData(Dictionary<string, bool> oldVisibility = null) {
    89       var logic = Content.StatisticsLogic;
    9089      var data = Content.PreprocessingData;
    9190      rowsTextBox.Text = data.Rows.ToString();
     
    9392      numericColumnsTextBox.Text = GetColumnCount<double>().ToString();
    9493      nominalColumnsTextBox5.Text = GetColumnCount<string>().ToString();
    95       missingValuesTextBox.Text = logic.GetMissingValueCount().ToString();
    96       totalValuesTextBox.Text = (data.Rows * data.Rows - logic.GetMissingValueCount()).ToString();
     94      missingValuesTextBox.Text = data.GetMissingValueCount().ToString();
     95      totalValuesTextBox.Text = (data.Rows * data.Rows - data.GetMissingValueCount()).ToString();
    9796
    9897      var variableNames = Content.PreprocessingData.VariableNames.ToList();
     
    151150    private List<string> GetStatistics(int varIdx) {
    152151      List<string> list;
    153       var logic = Content.StatisticsLogic;
    154       if (logic.VariableHasType<double>(varIdx)) {
     152      var data = Content.PreprocessingData;
     153      if (data.VariableHasType<double>(varIdx)) {
    155154        list = GetDoubleColumns(varIdx);
    156       } else if (logic.VariableHasType<string>(varIdx)) {
     155      } else if (data.VariableHasType<string>(varIdx)) {
    157156        list = GetStringColumns(varIdx);
    158       } else if (logic.VariableHasType<DateTime>(varIdx)) {
     157      } else if (data.VariableHasType<DateTime>(varIdx)) {
    159158        list = GetDateTimeColumns(varIdx);
    160159      } else {
     
    168167
    169168    private List<string> GetDoubleColumns(int statIdx) {
    170       var logic = Content.StatisticsLogic;
     169      var data = Content.PreprocessingData;
    171170      return new List<string> {
    172         logic.GetColumnTypeAsString(statIdx),
    173         logic.GetMissingValueCount(statIdx).ToString(),
    174         logic.GetMin<double>(statIdx, double.NaN).ToString(),
    175         logic.GetMax<double>(statIdx, double.NaN).ToString(),
    176         logic.GetMedian(statIdx).ToString(),
    177         logic.GetAverage(statIdx).ToString(),
    178         logic.GetStandardDeviation(statIdx).ToString(),
    179         logic.GetVariance(statIdx).ToString(),
    180         logic.GetOneQuarterPercentile(statIdx).ToString(),
    181         logic.GetThreeQuarterPercentile(statIdx).ToString(),
    182         logic.GetMostCommonValue<double>(statIdx, double.NaN).ToString(),
    183         logic.GetDifferentValuesCount<double>(statIdx).ToString()
     171        data.GetVariableType(statIdx).Name,
     172        data.GetMissingValueCount(statIdx).ToString(),
     173        data.GetMin<double>(statIdx, emptyValue: double.NaN).ToString(),
     174        data.GetMax<double>(statIdx, emptyValue: double.NaN).ToString(),
     175        data.GetMedian<double>(statIdx, emptyValue: double.NaN).ToString(),
     176        data.GetMean<double>(statIdx, emptyValue: double.NaN).ToString(),
     177        data.GetStandardDeviation<double>(statIdx, emptyValue: double.NaN).ToString(),
     178        data.GetVariance<double>(statIdx, emptyValue: double.NaN).ToString(),
     179        data.GetQuantile<double>(0.25, statIdx, emptyValue: double.NaN).ToString(),
     180        data.GetQuantile<double>(0.75, statIdx, emptyValue: double.NaN).ToString(),
     181        data.GetMode<double>(statIdx, emptyValue: double.NaN).ToString(),
     182        data.GetDistinctValues<double>(statIdx).ToString()
    184183      };
    185184    }
    186185
    187186    private List<string> GetStringColumns(int statIdx) {
    188       var logic = Content.StatisticsLogic;
     187      var data = Content.PreprocessingData;
    189188      return new List<string> {
    190         logic.GetColumnTypeAsString(statIdx),
    191         logic.GetMissingValueCount(statIdx).ToString(),
    192         "", //min
    193         "", //max
    194         "", //median
     189        data.GetVariableType(statIdx).Name,
     190        data.GetMissingValueCount(statIdx).ToString(),
     191        "", // data.GetMin<string>(statIdx, emptyValue: string.Empty), //min
     192        "", // data.GetMax<string>(statIdx, emptyValue: string.Empty), //max
     193        "", // data.GetMedian<string>(statIdx, emptyValue: string.Empty), //median
    195194        "", //average
    196195        "", //standard deviation
    197196        "", //variance
    198         "", //quarter percentile
    199         "", //three quarter percentile
    200         logic.GetMostCommonValue<string>(statIdx,string.Empty) ?? "",
    201         logic.GetDifferentValuesCount<string>(statIdx).ToString()
     197        "", // data.GetQuantile<string>(0.25, statIdx, emptyValue: string.Empty), //quarter percentile
     198        "", // data.GetQuantile<string>(0.75, statIdx, emptyValue: string.Empty), //three quarter percentile
     199        data.GetMode<string>(statIdx, emptyValue: string.Empty),
     200        data.GetDistinctValues<string>(statIdx).ToString()
    202201      };
    203202    }
    204203
    205204    private List<string> GetDateTimeColumns(int statIdx) {
    206       var logic = Content.StatisticsLogic;
     205      var data = Content.PreprocessingData;
    207206      return new List<string> {
    208         logic.GetColumnTypeAsString(statIdx),
    209         logic.GetMissingValueCount(statIdx).ToString(),
    210         logic.GetMin<DateTime>(statIdx, DateTime.MinValue).ToString(),
    211         logic.GetMax<DateTime>(statIdx, DateTime.MinValue).ToString(),
    212         logic.GetMedianDateTime(statIdx).ToString(),
    213         logic.GetAverageDateTime(statIdx).ToString(),
    214         logic.GetStandardDeviation(statIdx).ToString(),
    215         logic.GetVariance(statIdx).ToString(),
    216         logic.GetOneQuarterPercentile(statIdx).ToString(),
    217         logic.GetThreeQuarterPercentile(statIdx).ToString(),
    218         logic.GetMostCommonValue<DateTime>(statIdx, DateTime.MinValue).ToString(),
    219         logic.GetDifferentValuesCount<DateTime>(statIdx).ToString()
     207        data.GetVariableType(statIdx).Name,
     208        data.GetMissingValueCount(statIdx).ToString(),
     209        data.GetMin<DateTime>(statIdx).ToString(),
     210        data.GetMax<DateTime>(statIdx).ToString(),
     211        data.GetMedian<DateTime>(statIdx).ToString(),
     212        data.GetMean<DateTime>(statIdx).ToString(),
     213        "", // should be of type TimeSpan //data.GetStandardDeviation<DateTime>(statIdx).ToString(),
     214        "", // should be of type TimeSpan //data.GetVariance<DateTime>(statIdx).ToString(),
     215        data.GetQuantile<DateTime>(0.25, statIdx).ToString(),
     216        data.GetQuantile<DateTime>(0.75, statIdx).ToString(),
     217        data.GetMode<DateTime>(statIdx).ToString(),
     218        data.GetDistinctValues<DateTime>(statIdx).ToString()
    220219      };
    221220    }
  • branches/DataPreprocessing Cleanup/HeuristicLab.DataPreprocessing/3.4/Content/StatisticsContent.cs

    r15274 r15283  
    3333    }
    3434
    35     [Storable]
    36     public StatisticsLogic StatisticsLogic { get; private set; }
    37 
    3835    #region Constructor, Cloning & Persistence
    39     public StatisticsContent(IFilteredPreprocessingData preprocessingData, StatisticsLogic statisticsLogic)
     36    public StatisticsContent(IFilteredPreprocessingData preprocessingData)
    4037      : base(preprocessingData) {
    41       StatisticsLogic = statisticsLogic;
    4238    }
    4339
    4440    public StatisticsContent(StatisticsContent original, Cloner cloner)
    4541      : base(original, cloner) {
    46       StatisticsLogic = cloner.Clone(original.StatisticsLogic);
    4742    }
    4843    public override IDeepCloneable Clone(Cloner cloner) {
     
    5651
    5752    public event DataPreprocessingChangedEventHandler Changed {
    58       add { StatisticsLogic.Changed += value; }
    59       remove { StatisticsLogic.Changed -= value; }
     53      add { PreprocessingData.Changed += value; }
     54      remove { PreprocessingData.Changed -= value; }
    6055    }
    6156  }
  • branches/DataPreprocessing Cleanup/HeuristicLab.DataPreprocessing/3.4/Data/FilteredPreprocessingData.cs

    r15274 r15283  
    179179    }
    180180
     181    public Type GetVariableType(int columnIndex) {
     182      return ActiveData.GetVariableType(columnIndex);
     183    }
     184
    181185    public IList<string> InputVariables {
    182186      get { return ActiveData.InputVariables; }
     
    269273    public void EndTransaction() {
    270274      originalData.EndTransaction();
     275    }
     276    #endregion
     277
     278    #region Statistics
     279    public T GetMin<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) {
     280      return ActiveData.GetMin<T>(columnIndex, considerSelection, emptyValue);
     281    }
     282    public T GetMax<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) {
     283      return ActiveData.GetMax<T>(columnIndex, considerSelection, emptyValue);
     284    }
     285    public T GetMean<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) {
     286      return ActiveData.GetMean<T>(columnIndex, considerSelection, emptyValue);
     287    }
     288    public T GetMedian<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) where T : IComparable<T> {
     289      return ActiveData.GetMean<T>(columnIndex, considerSelection, emptyValue);
     290    }
     291    public T GetMode<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) where T : IEquatable<T> {
     292      return ActiveData.GetMode<T>(columnIndex, considerSelection, emptyValue);
     293    }
     294    public T GetStandardDeviation<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) {
     295      return ActiveData.GetStandardDeviation<T>(columnIndex, considerSelection, emptyValue);
     296    }
     297    public T GetVariance<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) {
     298      return ActiveData.GetVariance<T>(columnIndex, considerSelection, emptyValue);
     299    }
     300    public T GetQuantile<T>(double alpha, int columnIndex, bool considerSelection = false, T emptyValue = default(T)) where T : IComparable<T> {
     301      return ActiveData.GetQuantile<T>(alpha, columnIndex, considerSelection, emptyValue);
     302    }
     303    public int GetDistinctValues<T>(int columnIndex, bool considerSelection = false) {
     304      return ActiveData.GetDistinctValues<T>(columnIndex, considerSelection);
     305    }
     306
     307    public int GetMissingValueCount() {
     308      return ActiveData.GetMissingValueCount();
     309    }
     310    public int GetMissingValueCount(int columnIndex) {
     311      return ActiveData.GetMissingValueCount(columnIndex);
     312    }
     313    public int GetRowMissingValueCount(int rowIndex) {
     314      return ActiveData.GetRowMissingValueCount(rowIndex);
    271315    }
    272316    #endregion
  • branches/DataPreprocessing Cleanup/HeuristicLab.DataPreprocessing/3.4/Data/IPreprocessingData.cs

    r15270 r15283  
    6666
    6767    bool VariableHasType<T>(int columnIndex);
     68    Type GetVariableType(int columnIndex);
    6869
    6970    IList<string> InputVariables { get; }
     
    105106    void EndTransaction();
    106107    #endregion
     108
     109    #region Statistics
     110    T GetMin<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T));
     111    T GetMax<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T));
     112    T GetMean<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T));
     113    T GetMedian<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) where T : IComparable<T>;
     114    T GetMode<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) where T : IEquatable<T>;
     115    T GetStandardDeviation<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T));
     116    T GetVariance<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T));
     117    T GetQuantile<T>(double alpha, int columnIndex, bool considerSelection = false, T emptyValue = default(T)) where T : IComparable<T>;
     118    int GetDistinctValues<T>(int columnIndex, bool considerSelection = false);
     119
     120    int GetMissingValueCount();
     121    int GetMissingValueCount(int columnIndex);
     122    int GetRowMissingValueCount(int rowIndex);
     123    #endregion
    107124  }
    108125}
  • branches/DataPreprocessing Cleanup/HeuristicLab.DataPreprocessing/3.4/Data/PreprocessingData.cs

    r15274 r15283  
    327327    }
    328328
     329    public Type GetVariableType(int columnIndex) {
     330      var listType = variableValues[columnIndex].GetType();
     331      return listType.GenericTypeArguments.Single();
     332    }
     333
    329334    public IList<string> InputVariables { get; private set; }
    330335    public string TargetVariable { get; private set; } // optional
     
    524529    #endregion
    525530
     531    #region Statistics
     532    public T GetMin<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) {
     533      var values = GetValuesWithoutMissingValues<T>(columnIndex, considerSelection);
     534      return values.Any() ? values.Min() : emptyValue;
     535    }
     536
     537    public T GetMax<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) {
     538      var values = GetValuesWithoutMissingValues<T>(columnIndex, considerSelection);
     539      return values.Any() ? values.Max() : emptyValue;
     540    }
     541
     542    public T GetMean<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) {
     543      if (typeof(T) == typeof(double)) {
     544        var values = GetValuesWithoutMissingValues<double>(columnIndex, considerSelection);
     545        return values.Any() ? Convert<T>(values.Average()) : emptyValue;
     546      }
     547      if (typeof(T) == typeof(string)) {
     548        return Convert<T>(string.Empty);
     549      }
     550      if (typeof(T) == typeof(DateTime)) {
     551        var values = GetValuesWithoutMissingValues<DateTime>(columnIndex, considerSelection);
     552        return values.Any() ? Convert<T>(AggregateAsDouble(values, Enumerable.Average)) : emptyValue;
     553      }
     554
     555      throw new InvalidOperationException(typeof(T) + " not supported");
     556    }
     557
     558    public T GetMedian<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) where T : IComparable<T> {
     559      if (typeof(T) == typeof(double)) {// IEnumerable<double> is faster 
     560        var doubleValues = GetValuesWithoutMissingValues<double>(columnIndex, considerSelection);
     561        return doubleValues.Any() ? Convert<T>(doubleValues.Median()) : emptyValue;
     562      }
     563      var values = GetValuesWithoutMissingValues<T>(columnIndex, considerSelection);
     564      return values.Any() ? values.Quantile(0.5) : emptyValue;
     565    }
     566
     567    public T GetMode<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) where T : IEquatable<T> {
     568      var values = GetValuesWithoutMissingValues<T>(columnIndex, considerSelection);
     569      return values.Any() ? values.GroupBy(x => x).OrderByDescending(g => g.Count()).Select(g => g.Key).First() : emptyValue;
     570    }
     571
     572    public T GetStandardDeviation<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) {
     573      if (typeof(T) == typeof(double)) {
     574        var values = GetValuesWithoutMissingValues<double>(columnIndex, considerSelection);
     575        return values.Any() ? Convert<T>(values.StandardDeviation()) : emptyValue;
     576      }
     577      // For DateTime, std.dev / variance would have to be TimeSpan
     578      //if (typeof(T) == typeof(DateTime)) {
     579      //  var values = GetValuesWithoutMissingValues<DateTime>(columnIndex, considerSelection);
     580      //  return values.Any() ? Convert<T>(AggregateAsDouble(values, EnumerableStatisticExtensions.StandardDeviation)) : emptyValue;
     581      //}
     582      return default(T);
     583    }
     584
     585    public T GetVariance<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) {
     586      if (typeof(T) == typeof(double)) {
     587        var values = GetValuesWithoutMissingValues<double>(columnIndex, considerSelection);
     588        return values.Any() ? Convert<T>(values.Variance()) : emptyValue;
     589      }
     590      // DateTime variance often overflows long, thus the corresponding DateTime is invalid
     591      //if (typeof(T) == typeof(DateTime)) {
     592      //  var values = GetValuesWithoutMissingValues<DateTime>(columnIndex, considerSelection);
     593      //  return values.Any() ? Convert<T>(AggregateAsDouble(values, EnumerableStatisticExtensions.Variance)) : emptyValue;
     594      //}
     595      return default(T);
     596    }
     597
     598    public T GetQuantile<T>(double alpha, int columnIndex, bool considerSelection = false, T emptyValue = default(T)) where T : IComparable<T> {
     599      if (typeof(T) == typeof(double)) {// IEnumerable<double> is faster 
     600        var doubleValues = GetValuesWithoutMissingValues<double>(columnIndex, considerSelection);
     601        return doubleValues.Any() ? Convert<T>(doubleValues.Quantile(alpha)) : emptyValue;
     602      }
     603      var values = GetValuesWithoutMissingValues<T>(columnIndex, considerSelection);
     604      return values.Any() ? values.Quantile(alpha) : emptyValue;
     605    }
     606
     607    public int GetDistinctValues<T>(int columnIndex, bool considerSelection = false) {
     608      var values = GetValuesWithoutMissingValues<T>(columnIndex, considerSelection);
     609      return values.GroupBy(x => x).Count();
     610    }
     611
     612    private IEnumerable<T> GetValuesWithoutMissingValues<T>(int columnIndex, bool considerSelection) {
     613      return GetValues<T>(columnIndex, considerSelection).Where(x => !IsMissingValue(x));
     614    }
     615
     616    private static DateTime AggregateAsDouble(IEnumerable<DateTime> values, Func<IEnumerable<double>, double> func) {
     617      return new DateTime((long)(func(values.Select(x => (double)x.Ticks / TimeSpan.TicksPerSecond)) * TimeSpan.TicksPerSecond));
     618    }
     619    private static T Convert<T>(object obj) { return (T)obj; }
     620
     621    public int GetMissingValueCount() {
     622      int count = 0;
     623      for (int i = 0; i < Columns; ++i) {
     624        count += GetMissingValueCount(i);
     625      }
     626      return count;
     627    }
     628    public int GetMissingValueCount(int columnIndex) {
     629      int sum = 0;
     630      for (int i = 0; i < Rows; i++) {
     631        if (IsCellEmpty(columnIndex, i))
     632          sum++;
     633      }
     634      return sum;
     635    }
     636    public int GetRowMissingValueCount(int rowIndex) {
     637      int sum = 0;
     638      for (int i = 0; i < Columns; i++) {
     639        if (IsCellEmpty(i, rowIndex))
     640          sum++;
     641      }
     642      return sum;
     643    }
     644    #endregion
     645
    526646    #region Helpers
    527647    private static IList<IList> CopyVariableValues(IList<IList> original) {
     
    534654    #endregion
    535655  }
     656
     657  // Adapted from HeuristicLab.Common.EnumerableStatisticExtensions
     658  internal static class EnumerableExtensions {
     659    public static T Quantile<T>(this IEnumerable<T> values, double alpha) where T : IComparable<T> {
     660      T[] valuesArr = values.ToArray();
     661      int n = valuesArr.Length;
     662      if (n == 0) throw new InvalidOperationException("Enumeration contains no elements.");
     663
     664      var pos = n * alpha;
     665
     666      return Select((int)Math.Ceiling(pos) - 1, valuesArr);
     667
     668    }
     669
     670    private static T Select<T>(int k, T[] arr) where T : IComparable<T> {
     671      int i, ir, j, l, mid, n = arr.Length;
     672      T a;
     673      l = 0;
     674      ir = n - 1;
     675      for (;;) {
     676        if (ir <= l + 1) {
     677          // Active partition contains 1 or 2 elements.
     678          if (ir == l + 1 && arr[ir].CompareTo(arr[l]) < 0) {
     679            // Case of 2 elements.
     680            Swap(arr, l, ir);
     681          }
     682          return arr[k];
     683        } else {
     684          mid = (l + ir) >> 1; // Choose median of left, center, and right elements
     685          Swap(arr, mid, l + 1); // as partitioning element a. Also
     686
     687          if (arr[l].CompareTo(arr[ir]) > 0) {  // rearrange so that arr[l] arr[ir] <= arr[l+1],
     688            Swap(arr, l, ir); // . arr[ir] >= arr[l+1]
     689          }
     690
     691          if (arr[l + 1].CompareTo(arr[ir]) > 0) {
     692            Swap(arr, l + 1, ir);
     693          }
     694          if (arr[l].CompareTo(arr[l + 1]) > 0) {
     695            Swap(arr, l, l + 1);
     696          }
     697          i = l + 1; // Initialize pointers for partitioning.
     698          j = ir;
     699          a = arr[l + 1]; // Partitioning element.
     700          for (;;) { // Beginning of innermost loop.
     701            do i++; while (arr[i].CompareTo(a) < 0); // Scan up to find element > a.
     702            do j--; while (arr[j].CompareTo(a) > 0); // Scan down to find element < a.
     703            if (j < i) break; // Pointers crossed. Partitioning complete.
     704            Swap(arr, i, j);
     705          } // End of innermost loop.
     706          arr[l + 1] = arr[j]; // Insert partitioning element.
     707          arr[j] = a;
     708          if (j >= k) ir = j - 1; // Keep active the partition that contains the
     709          if (j <= k) l = i; // kth element.
     710        }
     711      }
     712    }
     713
     714    private static void Swap<T>(T[] arr, int i, int j) {
     715      T temp = arr[i];
     716      arr[i] = arr[j];
     717      arr[j] = temp;
     718    }
     719  }
    536720}
  • branches/DataPreprocessing Cleanup/HeuristicLab.DataPreprocessing/3.4/HeuristicLab.DataPreprocessing-3.4.csproj

    r15274 r15283  
    145145    <Compile Include="Content\DataGridContent.cs" />
    146146    <Compile Include="PreprocessingContext.cs" />
    147     <Compile Include="Logic\StatisticsLogic.cs" />
    148147    <Compile Include="Plugin.cs" />
    149148    <Compile Include="Properties\AssemblyInfo.cs" />
  • branches/DataPreprocessing Cleanup/HeuristicLab.DataPreprocessing/3.4/Logic/ManipulationLogic.cs

    r15274 r15283  
    3737    private readonly IFilteredPreprocessingData preprocessingData;
    3838
    39     [Storable]
    40     private readonly StatisticsLogic statisticsLogic;
    41 
    4239    public IEnumerable<string> VariableNames {
    4340      get { return preprocessingData.VariableNames; }
    4441    }
    4542
    46     public IFilteredPreprocessingData PreProcessingData {
     43    public IFilteredPreprocessingData PreprocessingData {
    4744      get { return preprocessingData; }
    4845    }
    4946
    5047    #region Constructor, Cloning & Persistence
    51     public ManipulationLogic(IFilteredPreprocessingData preprocessingData, StatisticsLogic theStatisticsLogic) {
     48    public ManipulationLogic(IFilteredPreprocessingData preprocessingData) {
    5249      this.preprocessingData = preprocessingData;
    53       statisticsLogic = theStatisticsLogic;
    5450    }
    5551    public ManipulationLogic(ManipulationLogic original, Cloner cloner)
    5652      : base(original, cloner) {
    5753      preprocessingData = cloner.Clone(original.preprocessingData);
    58       statisticsLogic = cloner.Clone(original.statisticsLogic);
    5954    }
    6055    public override IDeepCloneable Clone(Cloner cloner) {
     
    7772        foreach (var column in cells) {
    7873          if (preprocessingData.VariableHasType<double>(column.Key)) {
    79             double average = statisticsLogic.GetAverage(column.Key, considerSelection);
     74            double average = PreprocessingData.GetMean<double>(column.Key, considerSelection);
    8075            ReplaceIndicesByValue<double>(column.Key, column.Value, average);
    8176          } else if (preprocessingData.VariableHasType<DateTime>(column.Key)) {
    82             DateTime average = statisticsLogic.GetAverageDateTime(column.Key, considerSelection);
     77            DateTime average = PreprocessingData.GetMean<DateTime>(column.Key, considerSelection);
    8378            ReplaceIndicesByValue<DateTime>(column.Key, column.Value, average);
    8479          }
     
    9186        foreach (var column in cells) {
    9287          if (preprocessingData.VariableHasType<double>(column.Key)) {
    93             double median = statisticsLogic.GetMedian(column.Key, considerSelection);
     88            double median = PreprocessingData.GetMedian<double>(column.Key, considerSelection);
    9489            ReplaceIndicesByValue<double>(column.Key, column.Value, median);
    9590          } else if (preprocessingData.VariableHasType<DateTime>(column.Key)) {
    96             DateTime median = statisticsLogic.GetMedianDateTime(column.Key, considerSelection);
     91            DateTime median = PreprocessingData.GetMedian<DateTime>(column.Key, considerSelection);
    9792            ReplaceIndicesByValue<DateTime>(column.Key, column.Value, median);
    9893          }
     
    107102        foreach (var column in cells) {
    108103          if (preprocessingData.VariableHasType<double>(column.Key)) {
    109             double max = statisticsLogic.GetMax<double>(column.Key, double.NaN, considerSelection);
    110             double min = statisticsLogic.GetMin<double>(column.Key, double.NaN, considerSelection);
     104            double max = PreprocessingData.GetMax<double>(column.Key, considerSelection);
     105            double min = PreprocessingData.GetMin<double>(column.Key, considerSelection);
    111106            double randMultiplier = (max - min);
    112107            foreach (int index in column.Value) {
     
    115110            }
    116111          } else if (preprocessingData.VariableHasType<DateTime>(column.Key)) {
    117             DateTime min = statisticsLogic.GetMin<DateTime>(column.Key, DateTime.MinValue, considerSelection);
    118             DateTime max = statisticsLogic.GetMax<DateTime>(column.Key, DateTime.MinValue, considerSelection);
     112            DateTime min = PreprocessingData.GetMin<DateTime>(column.Key, considerSelection);
     113            DateTime max = PreprocessingData.GetMax<DateTime>(column.Key, considerSelection);
    119114            double randMultiplier = (max - min).TotalSeconds;
    120115            foreach (int index in column.Value) {
     
    228223        foreach (var column in cells) {
    229224          if (preprocessingData.VariableHasType<double>(column.Key)) {
    230             ReplaceIndicesByValue<double>(column.Key, column.Value, statisticsLogic.GetMostCommonValue<double>(column.Key, double.NaN, considerSelection));
     225            ReplaceIndicesByValue<double>(column.Key, column.Value, PreprocessingData.GetMode<double>(column.Key, considerSelection));
    231226          } else if (preprocessingData.VariableHasType<string>(column.Key)) {
    232             ReplaceIndicesByValue<string>(column.Key, column.Value, statisticsLogic.GetMostCommonValue<string>(column.Key, string.Empty, considerSelection));
     227            ReplaceIndicesByValue<string>(column.Key, column.Value, PreprocessingData.GetMode<string>(column.Key, considerSelection));
    233228          } else if (preprocessingData.VariableHasType<DateTime>(column.Key)) {
    234             ReplaceIndicesByValue<DateTime>(column.Key, column.Value, statisticsLogic.GetMostCommonValue<DateTime>(column.Key, DateTime.MinValue, considerSelection));
     229            ReplaceIndicesByValue<DateTime>(column.Key, column.Value, PreprocessingData.GetMode<DateTime>(column.Key, considerSelection));
    235230          } else {
    236231            throw new ArgumentException("column with index: " + column.Key + " contains a non supported type.");
     
    308303
    309304      for (int i = 0; i < preprocessingData.Rows; ++i) {
    310         int missingCount = statisticsLogic.GetRowMissingValueCount(i);
     305        int missingCount = preprocessingData.GetRowMissingValueCount(i);
    311306        if (100f / preprocessingData.Columns * missingCount > percent) {
    312307          rows.Add(i);
     
    320315      List<int> columns = new List<int>();
    321316      for (int i = 0; i < preprocessingData.Columns; ++i) {
    322         int missingCount = statisticsLogic.GetMissingValueCount(i);
     317        int missingCount = preprocessingData.GetMissingValueCount(i);
    323318        if (100f / preprocessingData.Rows * missingCount > percent) {
    324319          columns.Add(i);
     
    332327      List<int> columns = new List<int>();
    333328      for (int i = 0; i < preprocessingData.Columns; ++i) {
    334         if (preprocessingData.VariableHasType<double>(i) || preprocessingData.VariableHasType<DateTime>(i)) {
    335           double columnVariance = statisticsLogic.GetVariance(i);
     329        if (preprocessingData.VariableHasType<double>(i)) {
     330          double columnVariance = preprocessingData.GetVariance<double>(i);
     331          if (columnVariance < variance) {
     332            columns.Add(i);
     333          }
     334        } else if (preprocessingData.VariableHasType<DateTime>(i)) {
     335          double columnVariance = (double)preprocessingData.GetVariance<DateTime>(i).Ticks / TimeSpan.TicksPerSecond;
    336336          if (columnVariance < variance) {
    337337            columns.Add(i);
Note: See TracChangeset for help on using the changeset viewer.