Changeset 10661


Ignore:
Timestamp:
03/26/14 13:34:18 (6 years ago)
Author:
mleitner
Message:

Cache Missing value dedection

Location:
branches/DataPreprocessing
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • branches/DataPreprocessing/HeuristicLab 3.3.sln

    r10542 r10661  
    19101910    HideSolutionNode = FALSE
    19111911  EndGlobalSection
     1912  GlobalSection(Performance) = preSolution
     1913    HasPerformanceSessions = true
     1914  EndGlobalSection
    19121915EndGlobal
  • branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/SearchLogic.cs

    r10586 r10661  
    2121
    2222using System;
     23using System.Collections;
    2324using System.Collections.Generic;
    2425using System.Linq;
     
    2829    private readonly ITransactionalPreprocessingData preprocessingData;
    2930
     31    private Dictionary<int, IEnumerable<int>> MissingValueIndicies { get; set; }
     32    private Dictionary<int, IEnumerable> ValuesWithoutNaN { get; set; }
     33
    3034    public SearchLogic(ITransactionalPreprocessingData thePreprocessingData) {
    3135      preprocessingData = thePreprocessingData;
     36
     37      MissingValueIndicies = new Dictionary<int, IEnumerable<int>>();
     38      ValuesWithoutNaN = new Dictionary<int, IEnumerable>();
     39
     40      preprocessingData.Changed += preprocessingData_Changed;
     41    }
     42
     43    void preprocessingData_Changed(object sender, DataPreprocessingChangedEventArgs e)
     44    {
     45      MissingValueIndicies.Remove(e.Column);
     46      ValuesWithoutNaN.Remove(e.Column);
    3247    }
    3348
     
    5368
    5469    public IEnumerable<int> GetMissingValueIndices(int columnIndex) {
    55       if (preprocessingData.IsType<double>(columnIndex)) {
    56         return preprocessingData.GetValues<double>(columnIndex).Select((s, i) => new { i, s }).Where(t => double.IsNaN(t.s)).Select(t => t.i);
    57       } else if (preprocessingData.IsType<string>(columnIndex)) {
    58         return preprocessingData.GetValues<string>(columnIndex).Select((s, i) => new { i, s }).Where(t => string.IsNullOrEmpty(t.s)).Select(t => t.i);
    59       } else if (preprocessingData.IsType<DateTime>(columnIndex)) {
    60         return preprocessingData.GetValues<DateTime>(columnIndex).Select((s, i) => new { i, s }).Where(t => t.s.Equals(DateTime.MinValue)).Select(t => t.i);
    61       } else {
    62         throw new ArgumentException("column " + columnIndex + " contains a non supported type.");
     70      if (!MissingValueIndicies.ContainsKey(columnIndex)){       
     71          if (preprocessingData.IsType<double>(columnIndex)) {
     72            MissingValueIndicies[columnIndex] = GetMissingValueIndices<double>(columnIndex);
     73          } else if (preprocessingData.IsType<string>(columnIndex)) {
     74            MissingValueIndicies[columnIndex] = GetMissingValueIndices<string>(columnIndex);
     75          } else if (preprocessingData.IsType<DateTime>(columnIndex)) {
     76            MissingValueIndicies[columnIndex] = GetMissingValueIndices<DateTime>(columnIndex);
     77          } else {
     78            throw new ArgumentException("column " + columnIndex + " contains a non supported type.");
     79          }
     80      }
     81
     82      return MissingValueIndicies[columnIndex];
     83   }
     84    private IEnumerable<int> GetMissingValueIndices<T>(int columnIndex) {
     85      List<int> missingIndices = new List<int>();
     86     
     87      for(int row = 0; row < preprocessingData.Rows; ++row) {
     88        if (IsMissingValue(columnIndex, row)) {
     89          missingIndices.Add(row);
     90        }
    6391      }
     92
     93      return missingIndices;
    6494    }
    6595
     96    public IEnumerable<T> GetValuesWithoutNaN<T>(int columnIndex)
     97    {
     98      if (!ValuesWithoutNaN.ContainsKey(columnIndex))
     99      {
     100        List<T> values = new List<T>();
     101
     102        for (int row = 0; row < preprocessingData.Rows; ++row)
     103        {
     104          if (!IsMissingValue(columnIndex, row))
     105          {
     106            values.Add(preprocessingData.GetCell<T>(columnIndex, row));
     107          }
     108        }
     109
     110        ValuesWithoutNaN[columnIndex] = values;
     111      }
     112
     113      return (IEnumerable<T>)ValuesWithoutNaN[columnIndex];
     114    }
    66115  }
    67116}
  • branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/StatisticsLogic.cs

    r10635 r10661  
    2424using System.Linq;
    2525using HeuristicLab.Common;
     26using System.Collections;
    2627
    2728namespace HeuristicLab.DataPreprocessing {
     
    178179    }
    179180
    180     private List<T> GetValuesWithoutNaN<T>(int columnIndex) {
    181       IEnumerable<int> missing = searchLogic.GetMissingValueIndices(columnIndex);
    182       return preprocessingData.GetValues<T>(columnIndex)
    183         .Select((v, i) => new { i, v })
    184         .Where(x => !missing.Contains(x.i))
    185         .Select(x => x.v).ToList<T>();
    186     }
    187181    private IEnumerable<double> GetDateTimeAsSeconds(int columnIndex) {
    188182      return GetValuesWithoutNaN<DateTime>(columnIndex).Select(x => (double)x.Ticks / TimeSpan.TicksPerSecond);
     183    }
     184
     185    private IEnumerable<T> GetValuesWithoutNaN<T>(int columnIndex) {
     186      return searchLogic.GetValuesWithoutNaN<T>(columnIndex);
    189187    }
    190188
  • branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Interfaces/ISearchLogic.cs

    r10539 r10661  
    3838
    3939    bool IsMissingValue(int columnIndex, int rowIndex);
     40
     41    IEnumerable<T> GetValuesWithoutNaN<T>(int columnIndex);
    4042  }
    4143}
Note: See TracChangeset for help on using the changeset viewer.