Free cookie consent management tool by TermsFeed Policy Generator

source: branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/SearchLogic.cs @ 10671

Last change on this file since 10671 was 10661, checked in by mleitner, 11 years ago

Cache Missing value dedection

File size: 4.5 KB
RevLine 
[10539]1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2013 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
[10661]23using System.Collections;
[10236]24using System.Collections.Generic;
25using System.Linq;
26
27namespace HeuristicLab.DataPreprocessing {
[10557]28  public class SearchLogic : ISearchLogic {
[10586]29    private readonly ITransactionalPreprocessingData preprocessingData;
[10236]30
[10661]31    private Dictionary<int, IEnumerable<int>> MissingValueIndicies { get; set; }
32    private Dictionary<int, IEnumerable> ValuesWithoutNaN { get; set; }
33
[10586]34    public SearchLogic(ITransactionalPreprocessingData thePreprocessingData) {
[10236]35      preprocessingData = thePreprocessingData;
[10661]36
37      MissingValueIndicies = new Dictionary<int, IEnumerable<int>>();
38      ValuesWithoutNaN = new Dictionary<int, IEnumerable>();
39
40      preprocessingData.Changed += preprocessingData_Changed;
[10236]41    }
42
[10661]43    void preprocessingData_Changed(object sender, DataPreprocessingChangedEventArgs e)
44    {
45      MissingValueIndicies.Remove(e.Column);
46      ValuesWithoutNaN.Remove(e.Column);
47    }
48
[10236]49    public IDictionary<string, IEnumerable<int>> GetMissingValueIndices() {
50      var dic = new Dictionary<string, IEnumerable<int>>();
51      foreach (string variableName in preprocessingData.VariableNames) {
[10367]52        dic.Add(variableName, GetMissingValueIndices(preprocessingData.GetColumnIndex(variableName)));
[10236]53      }
54      return dic;
55    }
56
[10367]57    public bool IsMissingValue(int columnIndex, int rowIndex) {
58      if (preprocessingData.IsType<double>(columnIndex)) {
59        return double.IsNaN(preprocessingData.GetCell<double>(columnIndex, rowIndex));
60      } else if (preprocessingData.IsType<string>(columnIndex)) {
61        return string.IsNullOrEmpty(preprocessingData.GetCell<string>(columnIndex, rowIndex));
62      } else if (preprocessingData.IsType<DateTime>(columnIndex)) {
63        return preprocessingData.GetCell<DateTime>(columnIndex, rowIndex).Equals(DateTime.MinValue);
[10236]64      } else {
[10367]65        throw new ArgumentException("cell in column " + columnIndex + " and row index " + rowIndex + " contains a non supported type.");
[10236]66      }
67    }
68
[10367]69    public IEnumerable<int> GetMissingValueIndices(int columnIndex) {
[10661]70      if (!MissingValueIndicies.ContainsKey(columnIndex)){       
71          if (preprocessingData.IsType<double>(columnIndex)) {
72            MissingValueIndicies[columnIndex] = GetMissingValueIndices<double>(columnIndex);
73          } else if (preprocessingData.IsType<string>(columnIndex)) {
74            MissingValueIndicies[columnIndex] = GetMissingValueIndices<string>(columnIndex);
75          } else if (preprocessingData.IsType<DateTime>(columnIndex)) {
76            MissingValueIndicies[columnIndex] = GetMissingValueIndices<DateTime>(columnIndex);
77          } else {
78            throw new ArgumentException("column " + columnIndex + " contains a non supported type.");
79          }
80      }
81
82      return MissingValueIndicies[columnIndex];
83   }
84    private IEnumerable<int> GetMissingValueIndices<T>(int columnIndex) {
85      List<int> missingIndices = new List<int>();
86     
87      for(int row = 0; row < preprocessingData.Rows; ++row) {
88        if (IsMissingValue(columnIndex, row)) {
89          missingIndices.Add(row);
90        }
[10236]91      }
[10661]92
93      return missingIndices;
[10236]94    }
95
[10661]96    public IEnumerable<T> GetValuesWithoutNaN<T>(int columnIndex)
97    {
98      if (!ValuesWithoutNaN.ContainsKey(columnIndex))
99      {
100        List<T> values = new List<T>();
101
102        for (int row = 0; row < preprocessingData.Rows; ++row)
103        {
104          if (!IsMissingValue(columnIndex, row))
105          {
106            values.Add(preprocessingData.GetCell<T>(columnIndex, row));
107          }
108        }
109
110        ValuesWithoutNaN[columnIndex] = values;
111      }
112
113      return (IEnumerable<T>)ValuesWithoutNaN[columnIndex];
114    }
[10236]115  }
116}
Note: See TracBrowser for help on using the repository browser.