Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.DataPreprocessing/3.4/Implementations/SearchLogic.cs @ 11156

Last change on this file since 11156 was 11156, checked in by gkronber, 10 years ago

#2206: made several changes / improvements to the data-preprocessing code while reviewing the code

File size: 5.9 KB
RevLine 
[10539]1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2013 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
[10661]23using System.Collections;
[10236]24using System.Collections.Generic;
25using System.Linq;
26
27namespace HeuristicLab.DataPreprocessing {
[10557]28  public class SearchLogic : ISearchLogic {
[10586]29    private readonly ITransactionalPreprocessingData preprocessingData;
[10236]30
[10776]31    private Dictionary<int, IList<int>> MissingValueIndicies { get; set; }
[10809]32    private Dictionary<int, IList> ValuesWithoutNaN { get; set; }
[10661]33
[11002]34    public IEnumerable<string> VariableNames {
35      get { return preprocessingData.VariableNames; }
36    }
37
38    public int Columns {
39      get { return preprocessingData.Columns; }
40    }
41
42    public int Rows {
43      get { return preprocessingData.Rows; }
44    }
45
[10586]46    public SearchLogic(ITransactionalPreprocessingData thePreprocessingData) {
[10236]47      preprocessingData = thePreprocessingData;
[10661]48
[10776]49      MissingValueIndicies = new Dictionary<int, IList<int>>();
[10809]50      ValuesWithoutNaN = new Dictionary<int, IList>();
[10661]51
52      preprocessingData.Changed += preprocessingData_Changed;
[10236]53    }
54
[10661]55    void preprocessingData_Changed(object sender, DataPreprocessingChangedEventArgs e)
56    {
[10737]57      switch (e.Type) {
58        case DataPreprocessingChangedEventType.DeleteColumn:
59        case DataPreprocessingChangedEventType.ChangeColumn:
60          MissingValueIndicies.Remove(e.Column);
61          ValuesWithoutNaN.Remove(e.Column);
62          break;
63        case DataPreprocessingChangedEventType.AddColumn:
64          //cache does not need to be updated, will be calculated the first time it is requested
65          break;
66        case DataPreprocessingChangedEventType.DeleteRow:
67        case DataPreprocessingChangedEventType.AddRow:
68        case DataPreprocessingChangedEventType.ChangeItem:
69        case DataPreprocessingChangedEventType.Any:
70        case DataPreprocessingChangedEventType.Transformation:
[10817]71        default:
[10776]72          MissingValueIndicies = new Dictionary<int, IList<int>>();
[10809]73          ValuesWithoutNaN = new Dictionary<int, IList>();
[10737]74          break;
75      }
[10661]76    }
77
[10776]78    public IDictionary<int, IList<int>> GetMissingValueIndices() {
79      var dic = new Dictionary<int, IList<int>>();
80      for (int i = 0; i < preprocessingData.Columns; ++i) {
81        dic.Add(i, GetMissingValueIndices(i));
[10236]82      }
83      return dic;
84    }
85
[10367]86    public bool IsMissingValue(int columnIndex, int rowIndex) {
[11156]87      if (preprocessingData.VariableHasType<double>(columnIndex)) {
[10367]88        return double.IsNaN(preprocessingData.GetCell<double>(columnIndex, rowIndex));
[11156]89      } else if (preprocessingData.VariableHasType<string>(columnIndex)) {
[10367]90        return string.IsNullOrEmpty(preprocessingData.GetCell<string>(columnIndex, rowIndex));
[11156]91      } else if (preprocessingData.VariableHasType<DateTime>(columnIndex)) {
[10367]92        return preprocessingData.GetCell<DateTime>(columnIndex, rowIndex).Equals(DateTime.MinValue);
[10236]93      } else {
[10367]94        throw new ArgumentException("cell in column " + columnIndex + " and row index " + rowIndex + " contains a non supported type.");
[10236]95      }
96    }
97
[10776]98    public IList<int> GetMissingValueIndices(int columnIndex) {
[10661]99      if (!MissingValueIndicies.ContainsKey(columnIndex)){       
[11156]100          if (preprocessingData.VariableHasType<double>(columnIndex)) {
[10661]101            MissingValueIndicies[columnIndex] = GetMissingValueIndices<double>(columnIndex);
[11156]102          } else if (preprocessingData.VariableHasType<string>(columnIndex)) {
[10661]103            MissingValueIndicies[columnIndex] = GetMissingValueIndices<string>(columnIndex);
[11156]104          } else if (preprocessingData.VariableHasType<DateTime>(columnIndex)) {
[10661]105            MissingValueIndicies[columnIndex] = GetMissingValueIndices<DateTime>(columnIndex);
106          } else {
107            throw new ArgumentException("column " + columnIndex + " contains a non supported type.");
108          }
109      }
110
111      return MissingValueIndicies[columnIndex];
112   }
[10776]113    private IList<int> GetMissingValueIndices<T>(int columnIndex) {
[10661]114      List<int> missingIndices = new List<int>();
115     
116      for(int row = 0; row < preprocessingData.Rows; ++row) {
117        if (IsMissingValue(columnIndex, row)) {
118          missingIndices.Add(row);
119        }
[10236]120      }
[10661]121
122      return missingIndices;
[10236]123    }
124
[10809]125    public IEnumerable<T> GetValuesWithoutNaN<T>(int columnIndex, bool considerSelection)
[10661]126    {
[10809]127      if (considerSelection) {     
[10978]128        var selectedRows =  preprocessingData.Selection[columnIndex];
[10809]129       
[10661]130        List<T> values = new List<T>();
[10809]131        foreach (var rowIdx in selectedRows) {
132          if (!IsMissingValue(columnIndex, rowIdx)) {
133            values.Add(preprocessingData.GetCell<T>(columnIndex, rowIdx));
134          }
135        }
136        return values;
137      } else {
138        if (!ValuesWithoutNaN.ContainsKey(columnIndex)) {
139          List<T> values = new List<T>();
[10661]140
[10809]141          for (int row = 0; row < preprocessingData.Rows; ++row) {
142            if (!IsMissingValue(columnIndex, row)) {
143              values.Add(preprocessingData.GetCell<T>(columnIndex, row));
144            }
[10661]145          }
[10809]146
147          ValuesWithoutNaN[columnIndex] = values;
[10661]148        }
[10809]149        return (IEnumerable<T>)ValuesWithoutNaN[columnIndex];
[10661]150      }
151    }
[10236]152  }
153}
Note: See TracBrowser for help on using the repository browser.