Changeset 13935


Ignore:
Timestamp:
06/24/16 14:00:28 (14 months ago)
Author:
mkommend
Message:

#2616: Added default value for min, max and most common value operations for data preprocessing.

Location:
trunk/sources
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.DataPreprocessing.Views/3.4/StatisticsView.cs

    r12889 r13935  
    146146        logic.GetColumnTypeAsString(columnIndex),
    147147        logic.GetMissingValueCount(columnIndex).ToString(),
    148         logic.GetMin<double>(columnIndex).ToString(),
    149         logic.GetMax<double>(columnIndex).ToString(),
     148        logic.GetMin<double>(columnIndex,double.NaN).ToString(),
     149        logic.GetMax<double>(columnIndex,double.NaN).ToString(),
    150150        logic.GetMedian(columnIndex).ToString(),
    151151        logic.GetAverage(columnIndex).ToString(),
     
    154154        logic.GetOneQuarterPercentile(columnIndex).ToString(),
    155155        logic.GetThreeQuarterPercentile(columnIndex).ToString(),
    156         logic.GetMostCommonValue<double>(columnIndex).ToString(),
     156        logic.GetMostCommonValue<double>(columnIndex,double.NaN).ToString(),
    157157        logic.GetDifferentValuesCount<double>(columnIndex).ToString()
    158158      };
     
    172172        "", //quarter percentile
    173173        "", //three quarter percentile
    174         logic.GetMostCommonValue<string>(columnIndex) ?? "",
     174        logic.GetMostCommonValue<string>(columnIndex,string.Empty) ?? "",
    175175        logic.GetDifferentValuesCount<string>(columnIndex).ToString()
    176176      };
     
    182182        logic.GetColumnTypeAsString(columnIndex),
    183183        logic.GetMissingValueCount(columnIndex).ToString(),
    184         logic.GetMin<DateTime>(columnIndex).ToString(),
    185         logic.GetMax<DateTime>(columnIndex).ToString(),
     184        logic.GetMin<DateTime>(columnIndex,DateTime.MinValue).ToString(),
     185        logic.GetMax<DateTime>(columnIndex,DateTime.MinValue).ToString(),
    186186        logic.GetMedianDateTime(columnIndex).ToString(),
    187187        logic.GetAverageDateTime(columnIndex).ToString(),
     
    190190        logic.GetOneQuarterPercentile(columnIndex).ToString(),
    191191        logic.GetThreeQuarterPercentile(columnIndex).ToString(),
    192         logic.GetMostCommonValue<DateTime>(columnIndex).ToString(),
     192        logic.GetMostCommonValue<DateTime>(columnIndex,DateTime.MinValue).ToString(),
    193193        logic.GetDifferentValuesCount<DateTime>(columnIndex).ToString()
    194194      };
  • trunk/sources/HeuristicLab.DataPreprocessing/3.4/Logic/ManipulationLogic.cs

    r13508 r13935  
    8585        foreach (var column in cells) {
    8686          if (preprocessingData.VariableHasType<double>(column.Key)) {
    87             double max = statisticsLogic.GetMax<double>(column.Key, considerSelection);
    88             double min = statisticsLogic.GetMin<double>(column.Key, considerSelection);
     87            double max = statisticsLogic.GetMax<double>(column.Key, double.NaN, considerSelection);
     88            double min = statisticsLogic.GetMin<double>(column.Key, double.NaN, considerSelection);
    8989            double randMultiplier = (max - min);
    9090            foreach (int index in column.Value) {
     
    9393            }
    9494          } else if (preprocessingData.VariableHasType<DateTime>(column.Key)) {
    95             DateTime min = statisticsLogic.GetMin<DateTime>(column.Key, considerSelection);
    96             DateTime max = statisticsLogic.GetMax<DateTime>(column.Key, considerSelection);
     95            DateTime min = statisticsLogic.GetMin<DateTime>(column.Key, DateTime.MinValue, considerSelection);
     96            DateTime max = statisticsLogic.GetMax<DateTime>(column.Key, DateTime.MinValue, considerSelection);
    9797            double randMultiplier = (max - min).TotalSeconds;
    9898            foreach (int index in column.Value) {
     
    213213        foreach (var column in cells) {
    214214          if (preprocessingData.VariableHasType<double>(column.Key)) {
    215             ReplaceIndicesByValue<double>(column.Key, column.Value, statisticsLogic.GetMostCommonValue<double>(column.Key, considerSelection));
     215            ReplaceIndicesByValue<double>(column.Key, column.Value, statisticsLogic.GetMostCommonValue<double>(column.Key, double.NaN, considerSelection));
    216216          } else if (preprocessingData.VariableHasType<string>(column.Key)) {
    217             ReplaceIndicesByValue<string>(column.Key, column.Value, statisticsLogic.GetMostCommonValue<string>(column.Key, considerSelection));
     217            ReplaceIndicesByValue<string>(column.Key, column.Value, statisticsLogic.GetMostCommonValue<string>(column.Key, string.Empty, considerSelection));
    218218          } else if (preprocessingData.VariableHasType<DateTime>(column.Key)) {
    219             ReplaceIndicesByValue<DateTime>(column.Key, column.Value, statisticsLogic.GetMostCommonValue<DateTime>(column.Key, considerSelection));
     219            ReplaceIndicesByValue<DateTime>(column.Key, column.Value, statisticsLogic.GetMostCommonValue<DateTime>(column.Key, DateTime.MinValue, considerSelection));
    220220          } else {
    221221            throw new ArgumentException("column with index: " + column.Key + " contains a non supported type.");
  • trunk/sources/HeuristicLab.DataPreprocessing/3.4/Logic/StatisticsLogic.cs

    r13934 r13935  
    7171    }
    7272
    73     public T GetMin<T>(int columnIndex, bool considerSelection = false) where T : IComparable<T> {
    74       var min = default(T);
     73    public T GetMin<T>(int columnIndex, T defaultValue, bool considerSelection = false) where T : IComparable<T> {
     74      var min = defaultValue;
    7575      if (preprocessingData.VariableHasType<T>(columnIndex)) {
    7676        var values = GetValuesWithoutNaN<T>(columnIndex, considerSelection);
     
    8282    }
    8383
    84     public T GetMax<T>(int columnIndex, bool considerSelection = false) where T : IComparable<T> {
    85       var max = default(T);
     84    public T GetMax<T>(int columnIndex, T defaultValue, bool considerSelection = false) where T : IComparable<T> {
     85      var max = defaultValue;
    8686      if (preprocessingData.VariableHasType<T>(columnIndex)) {
    8787        var values = GetValuesWithoutNaN<T>(columnIndex, considerSelection);
     
    131131    }
    132132
    133     public T GetMostCommonValue<T>(int columnIndex, bool considerSelection = false) {
     133    public T GetMostCommonValue<T>(int columnIndex, T defaultValue, bool considerSelection = false) {
    134134      var values = GetValuesWithoutNaN<T>(columnIndex, considerSelection);
    135135      if (!values.Any())
    136         return default(T);
     136        return defaultValue;
    137137      return values.GroupBy(x => x)
    138138                              .OrderByDescending(g => g.Count())
Note: See TracChangeset for help on using the changeset viewer.