Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HiveStatistics/sources/HeuristicLab.DataPreprocessing/3.4/Implementations/SearchLogic.cs @ 13231

Last change on this file since 13231 was 12689, checked in by dglaser, 9 years ago

#2388: Merged trunk into HiveStatistics branch

File size: 6.2 KB
RevLine 
[10539]1#region License Information
2/* HeuristicLab
[12012]3 * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
[10539]4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
[10661]23using System.Collections;
[10236]24using System.Collections.Generic;
25
26namespace HeuristicLab.DataPreprocessing {
[10557]27  public class SearchLogic : ISearchLogic {
[10586]28    private readonly ITransactionalPreprocessingData preprocessingData;
[12689]29    private readonly IFilterLogic filterLogic;
[10236]30
[10776]31    private Dictionary<int, IList<int>> MissingValueIndicies { get; set; }
[10809]32    private Dictionary<int, IList> ValuesWithoutNaN { get; set; }
[10661]33
[11002]34    public IEnumerable<string> VariableNames {
35      get { return preprocessingData.VariableNames; }
36    }
37
38    public int Columns {
39      get { return preprocessingData.Columns; }
40    }
41
42    public int Rows {
43      get { return preprocessingData.Rows; }
44    }
45
[12689]46    public SearchLogic(ITransactionalPreprocessingData thePreprocessingData, IFilterLogic theFilterLogic) {
[10236]47      preprocessingData = thePreprocessingData;
[12689]48      filterLogic = theFilterLogic;
[10661]49
[10776]50      MissingValueIndicies = new Dictionary<int, IList<int>>();
[10809]51      ValuesWithoutNaN = new Dictionary<int, IList>();
[10661]52
[12689]53      preprocessingData.Changed += PreprocessingData_Changed;
54      filterLogic.FilterChanged += FilterLogic_FilterChanged;
[10236]55    }
56
[12689]57    void FilterLogic_FilterChanged(object sender, EventArgs e) {
58      //recalculate
59      for (int i = 0; i < Columns; i++) {
60        MissingValueIndicies.Remove(i);
61        ValuesWithoutNaN.Remove(i);
62      }
63    }
64
65    void PreprocessingData_Changed(object sender, DataPreprocessingChangedEventArgs e) {
[10737]66      switch (e.Type) {
67        case DataPreprocessingChangedEventType.DeleteColumn:
68        case DataPreprocessingChangedEventType.ChangeColumn:
69          MissingValueIndicies.Remove(e.Column);
70          ValuesWithoutNaN.Remove(e.Column);
71          break;
72        case DataPreprocessingChangedEventType.AddColumn:
73          //cache does not need to be updated, will be calculated the first time it is requested
74          break;
75        case DataPreprocessingChangedEventType.DeleteRow:
76        case DataPreprocessingChangedEventType.AddRow:
77        case DataPreprocessingChangedEventType.ChangeItem:
78        case DataPreprocessingChangedEventType.Any:
79        case DataPreprocessingChangedEventType.Transformation:
[10817]80        default:
[10776]81          MissingValueIndicies = new Dictionary<int, IList<int>>();
[10809]82          ValuesWithoutNaN = new Dictionary<int, IList>();
[10737]83          break;
[12689]84      }
[10661]85    }
86
[10776]87    public IDictionary<int, IList<int>> GetMissingValueIndices() {
88      var dic = new Dictionary<int, IList<int>>();
89      for (int i = 0; i < preprocessingData.Columns; ++i) {
90        dic.Add(i, GetMissingValueIndices(i));
[10236]91      }
92      return dic;
93    }
94
[10367]95    public bool IsMissingValue(int columnIndex, int rowIndex) {
[11156]96      if (preprocessingData.VariableHasType<double>(columnIndex)) {
[10367]97        return double.IsNaN(preprocessingData.GetCell<double>(columnIndex, rowIndex));
[11156]98      } else if (preprocessingData.VariableHasType<string>(columnIndex)) {
[10367]99        return string.IsNullOrEmpty(preprocessingData.GetCell<string>(columnIndex, rowIndex));
[11156]100      } else if (preprocessingData.VariableHasType<DateTime>(columnIndex)) {
[10367]101        return preprocessingData.GetCell<DateTime>(columnIndex, rowIndex).Equals(DateTime.MinValue);
[10236]102      } else {
[10367]103        throw new ArgumentException("cell in column " + columnIndex + " and row index " + rowIndex + " contains a non supported type.");
[10236]104      }
105    }
106
[10776]107    public IList<int> GetMissingValueIndices(int columnIndex) {
[12689]108      if (!MissingValueIndicies.ContainsKey(columnIndex)) {
109        if (preprocessingData.VariableHasType<double>(columnIndex)) {
110          MissingValueIndicies[columnIndex] = GetMissingValueIndices<double>(columnIndex);
111        } else if (preprocessingData.VariableHasType<string>(columnIndex)) {
112          MissingValueIndicies[columnIndex] = GetMissingValueIndices<string>(columnIndex);
113        } else if (preprocessingData.VariableHasType<DateTime>(columnIndex)) {
114          MissingValueIndicies[columnIndex] = GetMissingValueIndices<DateTime>(columnIndex);
115        } else {
116          throw new ArgumentException("column " + columnIndex + " contains a non supported type.");
117        }
118      }
119      return MissingValueIndicies[columnIndex];
120    }
[10661]121
[10776]122    private IList<int> GetMissingValueIndices<T>(int columnIndex) {
[10661]123      List<int> missingIndices = new List<int>();
[12689]124
125      for (int row = 0; row < preprocessingData.Rows; ++row) {
[10661]126        if (IsMissingValue(columnIndex, row)) {
127          missingIndices.Add(row);
128        }
[10236]129      }
[10661]130
131      return missingIndices;
[10236]132    }
133
[12689]134    public IEnumerable<T> GetValuesWithoutNaN<T>(int columnIndex, bool considerSelection) {
135      if (considerSelection) {
136        var selectedRows = preprocessingData.Selection[columnIndex];
137
[10661]138        List<T> values = new List<T>();
[10809]139        foreach (var rowIdx in selectedRows) {
140          if (!IsMissingValue(columnIndex, rowIdx)) {
141            values.Add(preprocessingData.GetCell<T>(columnIndex, rowIdx));
142          }
143        }
144        return values;
145      } else {
146        if (!ValuesWithoutNaN.ContainsKey(columnIndex)) {
147          List<T> values = new List<T>();
[10661]148
[10809]149          for (int row = 0; row < preprocessingData.Rows; ++row) {
150            if (!IsMissingValue(columnIndex, row)) {
151              values.Add(preprocessingData.GetCell<T>(columnIndex, row));
152            }
[10661]153          }
[10809]154
155          ValuesWithoutNaN[columnIndex] = values;
[10661]156        }
[10809]157        return (IEnumerable<T>)ValuesWithoutNaN[columnIndex];
[10661]158      }
159    }
[10236]160  }
161}
Note: See TracBrowser for help on using the repository browser.