Free cookie consent management tool by TermsFeed Policy Generator

source: branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/PreprocessingData.cs @ 10204

Last change on this file since 10204 was 10194, checked in by pfleck, 11 years ago

Implemented InsertRow, DeleteRow, InsertColumn, DeleteColumn in PreprocessingData.

File size: 7.2 KB
RevLine 
[10163]1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2013 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
[10168]23using System.Collections;
[10163]24using System.Collections.Generic;
[10185]25using System.Linq;
26using HeuristicLab.Common;
[10163]27using HeuristicLab.Core;
28using HeuristicLab.Problems.DataAnalysis;
29
[10182]30namespace HeuristicLab.DataPreprocessing {
[10163]31  [Item("PreprocessingData", "Represents data used for preprocessing.")]
32  public class PreprocessingData : NamedItem, IPreprocessingData {
33
[10185]34    private IDictionary<string, IList> variableValues;
[10168]35
[10186]36    private IList<string> variableNames;
37
[10185]38    private IDictionary<string, int> variableNameIndices;
39
40    private double trainingToTestRatio;
[10193]41 
[10185]42    private PreprocessingData(PreprocessingData original, Cloner cloner)
43      : base(original, cloner) {
44      variableValues = new Dictionary<string, IList>(variableValues);
45      variableNameIndices = new Dictionary<string, int>(variableNameIndices);
46    }
[10187]47
[10168]48    public PreprocessingData(IDataAnalysisProblemData problemData)
49      : base() {
50      Name = "-";
51
[10187]52      variableNames = new List<string>(problemData.Dataset.VariableNames);
[10185]53      // create dictionary from variable name to index
54      variableNameIndices = new Dictionary<string, int>();
55      var variableNamesList = problemData.Dataset.VariableNames.ToList();
56      for (int i = 0; i < variableNamesList.Count; i++) {
57        variableNameIndices.Add(variableNamesList[i], i);
58      }
[10187]59
[10185]60      // copy values
61      variableValues = new Dictionary<string, IList>();
62      foreach (var variableName in problemData.Dataset.VariableNames) {
63        if (problemData.Dataset.IsType<double>(variableName)) {
64          variableValues[variableName] = problemData.Dataset.GetDoubleValues(variableName).ToList();
65        } else if (problemData.Dataset.IsType<string>(variableName)) {
66          variableValues[variableName] = CreateColumn<string>(problemData.Dataset, variableNameIndices[variableName], x => x);
67        } else if (problemData.Dataset.IsType<DateTime>(variableName)) {
68          variableValues[variableName] = CreateColumn<DateTime>(problemData.Dataset, variableNameIndices[variableName], x => DateTime.Parse(x));
[10168]69        } else {
[10185]70          throw new ArgumentException("The datatype of column " + variableName + " must be of type List<double>, List<string> or List<DateTime>");
[10168]71        }
72      }
[10185]73
74      trainingToTestRatio = (double)problemData.TrainingPartition.Size / problemData.TestPartition.Size;
[10163]75    }
76
[10185]77    private static IList CreateColumn<T>(Dataset ds, int column, Func<string, T> selector) {
78      var list = new List<T>(ds.Rows);
79      for (int row = 0; row < ds.Rows; row++) {
80        list[row] = selector(ds.GetValue(row, column));
81      }
82      return list;
83    }
84
[10163]85    #region NamedItem abstract Member Implementations
86
[10185]87    public override IDeepCloneable Clone(Cloner cloner) {
88      return new PreprocessingData(this, cloner);
[10163]89    }
90
91    #endregion
92
93    #region IPreprocessingData Members
94
[10181]95    public T GetCell<T>(string variableName, int row) {
[10187]96      return (T)variableValues[variableName][row];
[10163]97    }
98
[10181]99    public void SetCell<T>(string variableName, int row, T value) {
[10187]100      variableValues[variableName][row] = value;
[10181]101    }
102
103    public IEnumerable<T> GetValues<T>(string variableName) {
[10194]104      // TODO: test if cast is valid
105      return (IEnumerable<T>)variableValues[variableName];
[10181]106    }
107
108    public void SetValues<T>(string variableName, IEnumerable<T> values) {
[10187]109      variableValues[variableName] = values.ToList();
[10181]110    }
111
[10163]112    public void InsertRow(int rowIndex) {
[10194]113      foreach (IList column in variableValues.Values) {
114        Type type = column.GetType().GetGenericArguments()[0];
115
116        column.Insert(rowIndex, type.IsValueType ? Activator.CreateInstance(type) : null);
117      }
[10163]118    }
119
120    public void DeleteRow(int rowIndex) {
[10194]121      foreach (IList column in variableValues.Values) {
122        column.RemoveAt(rowIndex);
123      }
[10163]124    }
125
[10194]126    public void InsertColumn<T>(string variableName, int columnIndex) {
127      variableValues.Add(variableName, new List<T>(Rows));
128      variableNameIndices.Add(variableName, columnIndex);
129      variableNames.Insert(columnIndex, variableName);
[10163]130    }
131
[10181]132    public void DeleteColumn(string variableName) {
[10194]133      variableValues.Remove(variableName);
134      variableNames.RemoveAt(variableNameIndices[variableName]);
135      variableNameIndices.Remove(variableName);
[10181]136    }
137
[10188]138    public IEnumerable<string> VariableNames {
139      get { return variableNames; }
[10163]140    }
141
[10181]142    public bool IsType<T>(string variableName) {
[10188]143      return variableValues[variableName] is List<T>;
[10181]144    }
145
[10163]146    public int Columns {
[10194]147      get { return variableNames.Count; }
[10163]148    }
149
150    public int Rows {
[10194]151      get { return variableValues.Count; }
[10163]152    }
[10189]153    public IDictionary<string, IEnumerable<int>> GetMissingValueIndices() {
154      var dic = new Dictionary<string, IEnumerable<int>>();
155      foreach (string variableName in VariableNames) {
156        dic.Add(variableName, GetMissingValueIndices(variableName));
157      }
158      return dic;
159    }
160
[10191]161    public bool IsMissingValue(string variableName, int rowIndex) {
162      if (IsType<double>(variableName)) {
163        return double.IsNaN(GetCell<double>(variableName, rowIndex));
164      } else if (IsType<string>(variableName)) {
165        return string.IsNullOrEmpty(GetCell<string>(variableName, rowIndex));
166      } else if (IsType<DateTime>(variableName)) {
167        return GetCell<DateTime>(variableName, rowIndex).Equals(DateTime.MinValue);
168      } else {
169        throw new ArgumentException("cell in column with variableName: " + variableName + " and row index " + rowIndex + " contains a non supported type.");
170      }
171    }
172
[10189]173    public IEnumerable<int> GetMissingValueIndices(string variableName) {
174      if (IsType<double>(variableName)) {
175        return GetValues<double>(variableName).Select((s, i) => new { i, s }).Where(t => double.IsNaN(t.s)).Select(t => t.i);
176      } else if (IsType<string>(variableName)) {
177        return GetValues<string>(variableName).Select((s, i) => new { i, s }).Where(t => string.IsNullOrEmpty(t.s)).Select(t => t.i);
178      } else if (IsType<DateTime>(variableName)) {
[10192]179          return GetValues<DateTime>(variableName).Select((s, i) => new { i, s }).Where(t => t.s.Equals(DateTime.MinValue)).Select(t => t.i);
[10189]180      } else {
[10191]181        throw new ArgumentException("column with variableName: " + variableName + " contains a non supported type.");
[10189]182      }
183    }
184
[10163]185    #endregion
186  }
187}
Note: See TracBrowser for help on using the repository browser.