Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
05/07/14 12:47:54 (10 years ago)
Author:
sbreuer
Message:
  • selected average and co. implemented
  • SelectionChanged NullPointer fixed
Location:
branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3
Files:
13 edited

Legend:

Unmodified
Added
Removed
  • branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/ChartLogic.cs

    r10803 r10809  
    3737
    3838    public DataRow CreateDataRow(string variableName, DataRowVisualProperties.DataRowChartType chartType) {
    39       IList<double> values = preprocessingData.GetValues<double>(variableName);
     39      IList<double> values = preprocessingData.GetValues<double>(variableName, false);
    4040      DataRow row = new DataRow(variableName, "", values);
    4141      row.VisualProperties.ChartType = chartType;
     
    4444
    4545    public DataRow CreateDataRowRange(string variableName,int start, int end, DataRowVisualProperties.DataRowChartType chartType) {
    46       IList<double> values = preprocessingData.GetValues<double>(variableName);
     46      IList<double> values = preprocessingData.GetValues<double>(variableName, false);
    4747      IList<double> valuesRange = new List<double>();
    4848      for (int i = 0; i < values.Count; i++) {
  • branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/DataGridContent.cs

    r10807 r10809  
    4545    }
    4646
    47     public IManipulationLogic PreprocessingDataManipulation {
     47    public IManipulationLogic ManipulationLogic {
    4848      get { return manipulationLogic; }
    4949    }
  • branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/FilteredPreprocessingData.cs

    r10804 r10809  
    4141    }
    4242
    43     public IList<T> GetValues<T>(string variableName) {
    44       return ActiveData.GetValues<T>(variableName);
     43    public IList<T> GetValues<T>(string variableName, bool considerSelection) {
     44      return ActiveData.GetValues<T>(variableName, considerSelection);
    4545    }
    4646
    47     public IList<T> GetValues<T>(int columnIndex) {
    48       return ActiveData.GetValues<T>(columnIndex);
     47    public IList<T> GetValues<T>(int columnIndex, bool considerSelection) {
     48      return ActiveData.GetValues<T>(columnIndex, considerSelection);
    4949    }
    5050
  • branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/ManipulationLogic.cs

    r10737 r10809  
    4545    }
    4646
    47     public void ReplaceIndicesByAverageValue(IDictionary<int, IList<int>> cells) {
    48       preprocessingData.InTransaction(() => {
    49         foreach (var column in cells) {
    50           if (preprocessingData.IsType<double>(column.Key)) {
    51             double average = statisticsLogic.GetAverage(column.Key);
     47    public void ReplaceIndicesByAverageValue(IDictionary<int, IList<int>> cells, bool considerSelection) {
     48      preprocessingData.InTransaction(() => {
     49        foreach (var column in cells) {
     50          if (preprocessingData.IsType<double>(column.Key)) {
     51            double average = statisticsLogic.GetAverage(column.Key, considerSelection);
    5252            ReplaceIndicesByValue<double>(column.Key, column.Value, average);
    5353          } else if (preprocessingData.IsType<DateTime>(column.Key)) {
    54             DateTime average = statisticsLogic.GetAverageDateTime(column.Key);
     54            DateTime average = statisticsLogic.GetAverageDateTime(column.Key, considerSelection);
    5555            ReplaceIndicesByValue<DateTime>(column.Key, column.Value, average);
    5656          }
     
    5959    }
    6060
    61     public void ReplaceIndicesByMedianValue(IDictionary<int, IList<int>> cells) {
    62       preprocessingData.InTransaction(() => {
    63         foreach (var column in cells) {
    64           if (preprocessingData.IsType<double>(column.Key)) {
    65             double median = statisticsLogic.GetMedian(column.Key);
     61    public void ReplaceIndicesByMedianValue(IDictionary<int, IList<int>> cells, bool considerSelection) {
     62      preprocessingData.InTransaction(() => {
     63        foreach (var column in cells) {
     64          if (preprocessingData.IsType<double>(column.Key)) {
     65            double median = statisticsLogic.GetMedian(column.Key, considerSelection);
    6666            ReplaceIndicesByValue<double>(column.Key, column.Value, median);
    6767          } else if (preprocessingData.IsType<DateTime>(column.Key)) {
    68             DateTime median = statisticsLogic.GetMedianDateTime(column.Key);
     68            DateTime median = statisticsLogic.GetMedianDateTime(column.Key, considerSelection);
    6969            ReplaceIndicesByValue<DateTime>(column.Key, column.Value, median);
    7070          }
     
    7373    }
    7474
    75     public void ReplaceIndicesByRandomValue(IDictionary<int, IList<int>> cells) {
     75    public void ReplaceIndicesByRandomValue(IDictionary<int, IList<int>> cells, bool considerSelection) {
    7676      preprocessingData.InTransaction(() => {
    7777        Random r = new Random();
     
    7979        foreach (var column in cells) {
    8080          if (preprocessingData.IsType<double>(column.Key)) {
    81             double max = statisticsLogic.GetMax<double>(column.Key);
    82             double min = statisticsLogic.GetMin<double>(column.Key);
     81            double max = statisticsLogic.GetMax<double>(column.Key, considerSelection);
     82            double min = statisticsLogic.GetMin<double>(column.Key, considerSelection);
    8383            double randMultiplier = (max - min);
    8484            foreach (int index in column.Value) {
     
    8787            }
    8888          } else if (preprocessingData.IsType<DateTime>(column.Key)) {
    89             DateTime min = statisticsLogic.GetMin<DateTime>(column.Key);
    90             DateTime max = statisticsLogic.GetMax<DateTime>(column.Key);
     89            DateTime min = statisticsLogic.GetMin<DateTime>(column.Key, considerSelection);
     90            DateTime max = statisticsLogic.GetMax<DateTime>(column.Key, considerSelection);
    9191            double randMultiplier = (max - min).TotalSeconds;
    9292            foreach (int index in column.Value) {
     
    104104          int countValues = 0;
    105105          if (preprocessingData.IsType<double>(column.Key)) {
    106             countValues = preprocessingData.GetValues<double>(column.Key).Count();
    107           } else if (preprocessingData.IsType<DateTime>(column.Key)) {
    108             countValues = preprocessingData.GetValues<DateTime>(column.Key).Count();
     106            countValues = preprocessingData.GetValues<double>(column.Key, false).Count();
     107          } else if (preprocessingData.IsType<DateTime>(column.Key)) {
     108            countValues = preprocessingData.GetValues<DateTime>(column.Key, false).Count();
    109109          }
    110110
     
    165165    }
    166166
    167     public void ReplaceIndicesByMostCommonValue(IDictionary<int, IList<int>> cells) {
    168       preprocessingData.InTransaction(() => {
    169         foreach (var column in cells) {
    170           if (preprocessingData.IsType<double>(column.Key)) {
    171             ReplaceIndicesByValue<double>(column.Key, column.Value, statisticsLogic.GetMostCommonValue<double>(column.Key));
     167    public void ReplaceIndicesByMostCommonValue(IDictionary<int, IList<int>> cells, bool considerSelection) {
     168      preprocessingData.InTransaction(() => {
     169        foreach (var column in cells) {
     170          if (preprocessingData.IsType<double>(column.Key)) {
     171            ReplaceIndicesByValue<double>(column.Key, column.Value, statisticsLogic.GetMostCommonValue<double>(column.Key, considerSelection));
    172172          } else if (preprocessingData.IsType<string>(column.Key)) {
    173             ReplaceIndicesByValue<string>(column.Key, column.Value, statisticsLogic.GetMostCommonValue<string>(column.Key));
    174           } else if (preprocessingData.IsType<DateTime>(column.Key)) {
    175             ReplaceIndicesByValue<DateTime>(column.Key, column.Value, statisticsLogic.GetMostCommonValue<DateTime>(column.Key));
     173            ReplaceIndicesByValue<string>(column.Key, column.Value, statisticsLogic.GetMostCommonValue<string>(column.Key, considerSelection));
     174          } else if (preprocessingData.IsType<DateTime>(column.Key)) {
     175            ReplaceIndicesByValue<DateTime>(column.Key, column.Value, statisticsLogic.GetMostCommonValue<DateTime>(column.Key, considerSelection));
    176176          } else {
    177177            throw new ArgumentException("column with index: " + column.Key + " contains a non supported type.");
     
    256256    private void reOrderToIndices<T>(int columnIndex, IList<Tuple<int, int>> indices) {
    257257
    258       List<T> originalData = new List<T>(preprocessingData.GetValues<T>(columnIndex));
     258      List<T> originalData = new List<T>(preprocessingData.GetValues<T>(columnIndex, false));
    259259
    260260      // process all columns equally
  • branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/PreprocessingData.cs

    r10804 r10809  
    122122
    123123    [Obsolete("use the index based variant, is faster")]
    124     public IList<T> GetValues<T>(string variableName) {
    125       return GetValues<T>(GetColumnIndex(variableName));
    126     }
    127 
    128     public IList<T> GetValues<T>(int columnIndex) {
    129       return (IList<T>)variableValues[columnIndex];
     124    public IList<T> GetValues<T>(string variableName, bool considerSelection) {
     125      return GetValues<T>(GetColumnIndex(variableName), considerSelection);
     126    }
     127
     128    public IList<T> GetValues<T>(int columnIndex, bool considerSelection) {
     129      if (considerSelection) {
     130        var list = new List<T>();
     131        foreach (var rowIdx in currentSelection[columnIndex]) {
     132          list.Add((T)variableValues[columnIndex][rowIdx]);
     133        }
     134        return list;
     135      } else {
     136        return (IList<T>)variableValues[columnIndex];
     137      }
    130138    }
    131139
  • branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/SearchLogic.cs

    r10776 r10809  
    3030
    3131    private Dictionary<int, IList<int>> MissingValueIndicies { get; set; }
    32     private Dictionary<int, IEnumerable> ValuesWithoutNaN { get; set; }
     32    private Dictionary<int, IList> ValuesWithoutNaN { get; set; }
    3333
    3434    public SearchLogic(ITransactionalPreprocessingData thePreprocessingData) {
     
    3636
    3737      MissingValueIndicies = new Dictionary<int, IList<int>>();
    38       ValuesWithoutNaN = new Dictionary<int, IEnumerable>();
     38      ValuesWithoutNaN = new Dictionary<int, IList>();
    3939
    4040      preprocessingData.Changed += preprocessingData_Changed;
     
    5858        case DataPreprocessingChangedEventType.Transformation:
    5959          MissingValueIndicies = new Dictionary<int, IList<int>>();
    60           ValuesWithoutNaN = new Dictionary<int, IEnumerable>();
     60          ValuesWithoutNaN = new Dictionary<int, IList>();
    6161          break;
    6262      }
     
    110110    }
    111111
    112     public IEnumerable<T> GetValuesWithoutNaN<T>(int columnIndex)
     112    public IEnumerable<T> GetValuesWithoutNaN<T>(int columnIndex, bool considerSelection)
    113113    {
    114       if (!ValuesWithoutNaN.ContainsKey(columnIndex))
    115       {
     114      if (considerSelection) {     
     115        var selectedRows =  preprocessingData.GetSelection()[columnIndex];
     116       
    116117        List<T> values = new List<T>();
    117 
    118         for (int row = 0; row < preprocessingData.Rows; ++row)
    119         {
    120           if (!IsMissingValue(columnIndex, row))
    121           {
    122             values.Add(preprocessingData.GetCell<T>(columnIndex, row));
     118        foreach (var rowIdx in selectedRows) {
     119          if (!IsMissingValue(columnIndex, rowIdx)) {
     120            values.Add(preprocessingData.GetCell<T>(columnIndex, rowIdx));
    123121          }
    124122        }
     123        return values;
     124      } else {
     125        if (!ValuesWithoutNaN.ContainsKey(columnIndex)) {
     126          List<T> values = new List<T>();
    125127
    126         ValuesWithoutNaN[columnIndex] = values;
     128          for (int row = 0; row < preprocessingData.Rows; ++row) {
     129            if (!IsMissingValue(columnIndex, row)) {
     130              values.Add(preprocessingData.GetCell<T>(columnIndex, row));
     131            }
     132          }
     133
     134          ValuesWithoutNaN[columnIndex] = values;
     135        }
     136        return (IEnumerable<T>)ValuesWithoutNaN[columnIndex];
    127137      }
    128 
    129       return (IEnumerable<T>)ValuesWithoutNaN[columnIndex];
    130138    }
    131139  }
  • branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/StatisticsLogic.cs

    r10663 r10809  
    7373    }
    7474
    75     public T GetMin<T>(int columnIndex) where T : IComparable<T> {
    76       return preprocessingData.GetValues<T>(columnIndex).Min();
     75    public T GetMin<T>(int columnIndex, bool considerSelection) where T : IComparable<T> {
     76      return preprocessingData.GetValues<T>(columnIndex, considerSelection).Min();
    7777    }
    7878
    79     public T GetMax<T>(int columnIndex) where T : IComparable<T> {
    80       return preprocessingData.GetValues<T>(columnIndex).Max();
     79    public T GetMax<T>(int columnIndex, bool considerSelection) where T : IComparable<T> {
     80      return preprocessingData.GetValues<T>(columnIndex, considerSelection).Max();
    8181    }
    8282
    83     public double GetMedian(int columnIndex) {
     83    public double GetMedian(int columnIndex, bool considerSelection) {
    8484      double median = double.NaN;
    8585      if (preprocessingData.IsType<double>(columnIndex)) {
    86         median = GetValuesWithoutNaN<double>(columnIndex).Median();
     86        median = GetValuesWithoutNaN<double>(columnIndex, considerSelection).Median();
    8787      }
    8888      return median;
    8989    }
    9090
    91     public double GetAverage(int columnIndex) {
     91    public double GetAverage(int columnIndex, bool considerSelection) {
    9292      double avg = double.NaN;
    9393      if (preprocessingData.IsType<double>(columnIndex)) {
    94         avg = GetValuesWithoutNaN<double>(columnIndex).Average();
     94        avg = GetValuesWithoutNaN<double>(columnIndex, considerSelection).Average();
    9595      }
    9696      return avg;
    9797    }
    9898
    99     public DateTime GetMedianDateTime(int columnIndex) {
     99    public DateTime GetMedianDateTime(int columnIndex, bool considerSelection) {
    100100      DateTime median = new DateTime();
    101101      if (preprocessingData.IsType<DateTime>(columnIndex)) {
    102         median = GetSecondsAsDateTime(GetDateTimeAsSeconds(columnIndex).Median());
     102        median = GetSecondsAsDateTime(GetDateTimeAsSeconds(columnIndex, considerSelection).Median());
    103103      }
    104104      return median;
    105105    }
    106106
    107     public DateTime GetAverageDateTime(int columnIndex) {
     107    public DateTime GetAverageDateTime(int columnIndex, bool considerSelection) {
    108108      DateTime avg = new DateTime();
    109109      if (preprocessingData.IsType<DateTime>(columnIndex)) {
    110         avg = GetSecondsAsDateTime(GetDateTimeAsSeconds(columnIndex).Average());
     110        avg = GetSecondsAsDateTime(GetDateTimeAsSeconds(columnIndex, considerSelection).Average());
    111111      }
    112112      return avg;
    113113    }
    114114
    115     public T GetMostCommonValue<T>(int columnIndex) {
    116       var t = preprocessingData.GetValues<T>(columnIndex);
     115    public T GetMostCommonValue<T>(int columnIndex, bool considerSelection) {
     116      var t = preprocessingData.GetValues<T>(columnIndex, considerSelection);
    117117      var t2 = t.GroupBy(x => x);
    118118      var t3 = t2.Select(g => g.Key);
    119119
    120       return preprocessingData.GetValues<T>(columnIndex)
     120      return preprocessingData.GetValues<T>(columnIndex, considerSelection)
    121121                              .GroupBy(x => x)
    122122                              .OrderByDescending(g => g.Count())
     
    129129      double stdDev = double.NaN;
    130130      if (preprocessingData.IsType<double>(columnIndex)) {
    131         stdDev = GetValuesWithoutNaN<double>(columnIndex).StandardDeviation();
     131        stdDev = GetValuesWithoutNaN<double>(columnIndex, false).StandardDeviation();
    132132      } else if (preprocessingData.IsType<DateTime>(columnIndex)) {
    133         stdDev = GetDateTimeAsSeconds(columnIndex).StandardDeviation();
     133        stdDev = GetDateTimeAsSeconds(columnIndex, false).StandardDeviation();
    134134      }
    135135      return stdDev;
     
    139139      double variance = double.NaN;
    140140      if (preprocessingData.IsType<double>(columnIndex)) {
    141         variance = preprocessingData.GetValues<double>(columnIndex).Variance();
     141        variance = preprocessingData.GetValues<double>(columnIndex, false).Variance();
    142142      } else if (preprocessingData.IsType<DateTime>(columnIndex)) {
    143         variance = GetDateTimeAsSeconds(columnIndex).Variance();
     143        variance = GetDateTimeAsSeconds(columnIndex, false).Variance();
    144144      }
    145145      return variance;
     
    147147
    148148    public int GetDifferentValuesCount<T>(int columnIndex) {
    149       return preprocessingData.GetValues<T>(columnIndex).GroupBy(x => x).Count();
     149      return preprocessingData.GetValues<T>(columnIndex, false).GroupBy(x => x).Count();
    150150    }
    151151
     
    179179    }
    180180
    181     private IEnumerable<double> GetDateTimeAsSeconds(int columnIndex) {
    182       return GetValuesWithoutNaN<DateTime>(columnIndex).Select(x => (double)x.Ticks / TimeSpan.TicksPerSecond);
     181    private IEnumerable<double> GetDateTimeAsSeconds(int columnIndex, bool considerSelection) {
     182      return GetValuesWithoutNaN<DateTime>(columnIndex, considerSelection).Select(x => (double)x.Ticks / TimeSpan.TicksPerSecond);
    183183    }
    184184
    185     private IEnumerable<T> GetValuesWithoutNaN<T>(int columnIndex) {
    186       return searchLogic.GetValuesWithoutNaN<T>(columnIndex);
     185    private IEnumerable<T> GetValuesWithoutNaN<T>(int columnIndex, bool considerSelection) {
     186      return searchLogic.GetValuesWithoutNaN<T>(columnIndex, considerSelection);
    187187    }
    188188
  • branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Interfaces/IDataGridContent.cs

    r10636 r10809  
    2525  public interface IDataGridContent : IStringConvertibleMatrix {
    2626    IDataGridLogic DataGridLogic { get; }
    27     IManipulationLogic PreprocessingDataManipulation { get; }
     27    IManipulationLogic ManipulationLogic { get; }
    2828    IFilterLogic FilterLogic { get; }
    2929
  • branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Interfaces/IManipulationLogic.cs

    r10737 r10809  
    2828    void ReOrderToIndices(IList<Tuple<int, int>> indices);
    2929    void ShuffleToIndices(IList<System.Tuple<int, int>> indices);
    30     void ReplaceIndicesByAverageValue(IDictionary<int, IList<int>> cells);
     30    void ReplaceIndicesByAverageValue(IDictionary<int, IList<int>> cells, bool considerSelection);
    3131    void ReplaceIndicesByLinearInterpolationOfNeighbours(IDictionary<int, IList<int>> cells);
    32     void ReplaceIndicesByMedianValue(IDictionary<int, IList<int>> cells);
    33     void ReplaceIndicesByMostCommonValue(IDictionary<int, IList<int>> cells);
    34     void ReplaceIndicesByRandomValue(IDictionary<int, IList<int>> cells);
     32    void ReplaceIndicesByMedianValue(IDictionary<int, IList<int>> cells, bool considerSelection);
     33    void ReplaceIndicesByMostCommonValue(IDictionary<int, IList<int>> cells, bool considerSelection);
     34    void ReplaceIndicesByRandomValue(IDictionary<int, IList<int>> cells, bool considerSelection);
    3535    void ReplaceIndicesByValue(IDictionary<int, IList<int>> cells, string value);
    3636    void ReplaceIndicesByValue<T>(int columnIndex, IEnumerable<int> rowIndices, T value);
  • branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Interfaces/IPreprocessingData.cs

    r10804 r10809  
    3737
    3838    [Obsolete("use the index based variant, is faster")]
    39     IList<T> GetValues<T>(string variableName);
    40     IList<T> GetValues<T>(int columnIndex);
     39    IList<T> GetValues<T>(string variableName, bool considerSelection);
     40    IList<T> GetValues<T>(int columnIndex, bool considerSelection);
    4141
    4242    void SetValues<T>(int columnIndex, IList<T> values);
  • branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Interfaces/ISearchLogic.cs

    r10776 r10809  
    3939    bool IsMissingValue(int columnIndex, int rowIndex);
    4040
    41     IEnumerable<T> GetValuesWithoutNaN<T>(int columnIndex);
     41    IEnumerable<T> GetValuesWithoutNaN<T>(int columnIndex, bool considerSelection);
    4242  }
    4343}
  • branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Interfaces/IStatisticsLogic.cs

    r10551 r10809  
    3333    int GetRowMissingValueCount(int rowIndex);
    3434
    35     T GetMin<T>(int columnIndex) where T : IComparable<T>;
    36     T GetMax<T>(int columnIndex) where T : IComparable<T>;
     35    T GetMin<T>(int columnIndex, bool considerSelection) where T : IComparable<T>;
     36    T GetMax<T>(int columnIndex, bool considerSelection) where T : IComparable<T>;
    3737
    38     double GetMedian(int columnIndex);
    39     double GetAverage(int columnIndex);
    40     DateTime GetMedianDateTime(int columnIndex);
    41     DateTime GetAverageDateTime(int columnIndex);
     38    double GetMedian(int columnIndex, bool considerSelection);
     39    double GetAverage(int columnIndex, bool considerSelection);
     40    DateTime GetMedianDateTime(int columnIndex, bool considerSelection);
     41    DateTime GetAverageDateTime(int columnIndex, bool considerSelection);
    4242
    4343    double GetStandardDeviation(int columnIndex);
    4444    double GetVariance(int columnIndex);
    45     T GetMostCommonValue<T>(int columnIndex);
     45    T GetMostCommonValue<T>(int columnIndex, bool considerSelection);
    4646    int GetDifferentValuesCount<T>(int columnIndex);
    4747
  • branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/PreprocessingTransformator.cs

    r10786 r10809  
    5454        int colIndex = preprocessingData.GetColumnIndex(transformation.Column);
    5555
    56         var originalData = preprocessingData.GetValues<double>(colIndex);
     56        var originalData = preprocessingData.GetValues<double>(colIndex, false);
    5757        var transformedData = ApplyDoubleTransformation(transformation, originalData, out success);
    5858        if (!success) return;
Note: See TracChangeset for help on using the changeset viewer.