Free cookie consent management tool by TermsFeed Policy Generator

source: branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/PreprocessingData.cs @ 10188

Last change on this file since 10188 was 10188, checked in by pfleck, 10 years ago

Removed obsolete methods.
Implemented few members...

File size: 5.2 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2013 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections;
24using System.Collections.Generic;
25using System.Linq;
26using HeuristicLab.Common;
27using HeuristicLab.Core;
28using HeuristicLab.Problems.DataAnalysis;
29
30namespace HeuristicLab.DataPreprocessing {
31  [Item("PreprocessingData", "Represents data used for preprocessing.")]
32  public class PreprocessingData : NamedItem, IPreprocessingData {
33
34    private IDictionary<string, IList> variableValues;
35
36    private IList<string> variableNames;
37
38    private IDictionary<string, int> variableNameIndices;
39
40    private double trainingToTestRatio;
41
42    private PreprocessingData(PreprocessingData original, Cloner cloner)
43      : base(original, cloner) {
44      variableValues = new Dictionary<string, IList>(variableValues);
45      variableNameIndices = new Dictionary<string, int>(variableNameIndices);
46    }
47
48    public PreprocessingData(IDataAnalysisProblemData problemData)
49      : base() {
50      Name = "-";
51
52      variableNames = new List<string>(problemData.Dataset.VariableNames);
53      // create dictionary from variable name to index
54      variableNameIndices = new Dictionary<string, int>();
55      var variableNamesList = problemData.Dataset.VariableNames.ToList();
56      for (int i = 0; i < variableNamesList.Count; i++) {
57        variableNameIndices.Add(variableNamesList[i], i);
58      }
59
60      // copy values
61      variableValues = new Dictionary<string, IList>();
62      foreach (var variableName in problemData.Dataset.VariableNames) {
63        if (problemData.Dataset.IsType<double>(variableName)) {
64          variableValues[variableName] = problemData.Dataset.GetDoubleValues(variableName).ToList();
65        } else if (problemData.Dataset.IsType<string>(variableName)) {
66          variableValues[variableName] = CreateColumn<string>(problemData.Dataset, variableNameIndices[variableName], x => x);
67        } else if (problemData.Dataset.IsType<DateTime>(variableName)) {
68          variableValues[variableName] = CreateColumn<DateTime>(problemData.Dataset, variableNameIndices[variableName], x => DateTime.Parse(x));
69        } else {
70          throw new ArgumentException("The datatype of column " + variableName + " must be of type List<double>, List<string> or List<DateTime>");
71        }
72      }
73
74      trainingToTestRatio = (double)problemData.TrainingPartition.Size / problemData.TestPartition.Size;
75      Columns = problemData.Dataset.Columns;
76      Rows = problemData.Dataset.Rows;
77    }
78
79    private static IList CreateColumn<T>(Dataset ds, int column, Func<string, T> selector) {
80      var list = new List<T>(ds.Rows);
81      for (int row = 0; row < ds.Rows; row++) {
82        list[row] = selector(ds.GetValue(row, column));
83      }
84      return list;
85    }
86
87    #region NamedItem abstract Member Implementations
88
89    public override IDeepCloneable Clone(Cloner cloner) {
90      return new PreprocessingData(this, cloner);
91    }
92
93    #endregion
94
95    #region IPreprocessingData Members
96
97    public T GetCell<T>(string variableName, int row) {
98      return (T)variableValues[variableName][row];
99    }
100
101    public void SetCell<T>(string variableName, int row, T value) {
102      variableValues[variableName][row] = value;
103    }
104
105    public IEnumerable<T> GetValues<T>(string variableName) {
106      return (IEnumerable<T>)variableValues[variableName];
107    }
108
109    public void SetValues<T>(string variableName, IEnumerable<T> values) {
110      variableValues[variableName] = values.ToList();
111    }
112
113    public void InsertRow(int rowIndex) {
114      throw new NotImplementedException();
115    }
116
117    public void DeleteRow(int rowIndex) {
118      throw new NotImplementedException();
119    }
120
121    public void InsertColumn(string variableName, int columnIndex) {
122      throw new NotImplementedException();
123    }
124
125    public void DeleteColumn(string variableName) {
126      throw new NotImplementedException();
127    }
128
129    public IEnumerable<string> VariableNames {
130      get { return variableNames; }
131    }
132
133    public bool IsType<T>(string variableName) {
134      return variableValues[variableName] is List<T>;
135    }
136
137    public int Columns {
138      get;
139      private set;
140    }
141
142    public int Rows {
143      get;
144      private set;
145    }
146
147    public void ExportTo(IDataAnalysisProblemData problemData) {
148      throw new NotImplementedException();
149    }
150
151    #endregion
152  }
153}
Note: See TracBrowser for help on using the repository browser.