Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.DatasetRefactor/sources/HeuristicLab.Problems.DataAnalysis/3.4/Dataset.cs @ 12009

Last change on this file since 12009 was 11589, checked in by bburlacu, 10 years ago

#2276: Unsealed Dataset class, refactored GetDoubleValues, GetStringValues, GetDateTimeValues methods to internally use the same generic method, added ModifiableDataset class which adds the functionality of replacing, adding or removing rows in the dataset.

File size: 10.2 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2014 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections;
24using System.Collections.Generic;
25using System.Collections.ObjectModel;
26using System.Linq;
27using HeuristicLab.Common;
28using HeuristicLab.Core;
29using HeuristicLab.Data;
30using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
31
32namespace HeuristicLab.Problems.DataAnalysis {
33  [Item("Dataset", "Represents a dataset containing data that should be analyzed.")]
34  [StorableClass]
35  public class Dataset : NamedItem, IDataset {
36    [StorableConstructor]
37    protected Dataset(bool deserializing) : base(deserializing) { }
38    protected Dataset(Dataset original, Cloner cloner)
39      : base(original, cloner) {
40      variableValues = new Dictionary<string, IList>(original.variableValues);
41      variableNames = new List<string>(original.variableNames);
42      rows = original.rows;
43    }
44    public override IDeepCloneable Clone(Cloner cloner) { return new Dataset(this, cloner); }
45
46    public Dataset()
47      : base() {
48      Name = "-";
49      VariableNames = Enumerable.Empty<string>();
50      variableValues = new Dictionary<string, IList>();
51      rows = 0;
52    }
53
54    public Dataset(IEnumerable<string> variableNames, IEnumerable<IList> variableValues)
55      : base() {
56      Name = "-";
57      if (!variableNames.Any()) {
58        this.variableNames = Enumerable.Range(0, variableValues.Count()).Select(x => "Column " + x).ToList();
59      } else if (variableNames.Count() != variableValues.Count()) {
60        throw new ArgumentException("Number of variable names doesn't match the number of columns of variableValues");
61      } else if (!variableValues.All(list => list.Count == variableValues.First().Count)) {
62        throw new ArgumentException("The number of values must be equal for every variable");
63      } else if (variableNames.Distinct().Count() != variableNames.Count()) {
64        var duplicateVariableNames =
65          variableNames.GroupBy(v => v).Where(g => g.Count() > 1).Select(g => g.Key).ToList();
66        string message = "The dataset cannot contain duplicate variables names: " + Environment.NewLine;
67        foreach (var duplicateVariableName in duplicateVariableNames)
68          message += duplicateVariableName + Environment.NewLine;
69        throw new ArgumentException(message);
70      }
71
72      rows = variableValues.First().Count;
73      this.variableNames = new List<string>(variableNames);
74      this.variableValues = new Dictionary<string, IList>(this.variableNames.Count);
75      for (int i = 0; i < this.variableNames.Count; i++) {
76        var values = variableValues.ElementAt(i);
77        IList clonedValues = null;
78        if (values is IList<double>)
79          clonedValues = new List<double>(values.Cast<double>());
80        else if (values is IList<string>)
81          clonedValues = new List<string>(values.Cast<string>());
82        else if (values is IList<DateTime>)
83          clonedValues = new List<DateTime>(values.Cast<DateTime>());
84        else {
85          this.variableNames = new List<string>();
86          this.variableValues = new Dictionary<string, IList>();
87          throw new ArgumentException("The variable values must be of type IList<double>, IList<string> or IList<DateTime>");
88        }
89        this.variableValues.Add(this.variableNames[i], clonedValues);
90      }
91    }
92
93    public Dataset(IEnumerable<string> variableNames, double[,] variableValues) {
94      Name = "-";
95      if (variableNames.Count() != variableValues.GetLength(1)) {
96        throw new ArgumentException("Number of variable names doesn't match the number of columns of variableValues");
97      }
98      if (variableNames.Distinct().Count() != variableNames.Count()) {
99        var duplicateVariableNames = variableNames.GroupBy(v => v).Where(g => g.Count() > 1).Select(g => g.Key).ToList();
100        string message = "The dataset cannot contain duplicate variables names: " + Environment.NewLine;
101        foreach (var duplicateVariableName in duplicateVariableNames)
102          message += duplicateVariableName + Environment.NewLine;
103        throw new ArgumentException(message);
104      }
105
106      rows = variableValues.GetLength(0);
107      this.variableNames = new List<string>(variableNames);
108
109      this.variableValues = new Dictionary<string, IList>(variableValues.GetLength(1));
110      for (int col = 0; col < variableValues.GetLength(1); col++) {
111        string columName = this.variableNames[col];
112        var values = new List<double>(variableValues.GetLength(0));
113        for (int row = 0; row < variableValues.GetLength(0); row++) {
114          values.Add(variableValues[row, col]);
115        }
116        this.variableValues.Add(columName, values);
117      }
118    }
119
120    #region Backwards compatible code, remove with 3.5
121    private double[,] storableData;
122    //name alias used to suppport backwards compatibility
123    [Storable(Name = "data", AllowOneWay = true)]
124    private double[,] StorableData { set { storableData = value; } }
125
126    [StorableHook(HookType.AfterDeserialization)]
127    private void AfterDeserialization() {
128      if (variableValues == null) {
129        rows = storableData.GetLength(0);
130        variableValues = new Dictionary<string, IList>();
131        for (int col = 0; col < storableData.GetLength(1); col++) {
132          string columName = variableNames[col];
133          var values = new List<double>(rows);
134          for (int row = 0; row < rows; row++) {
135            values.Add(storableData[row, col]);
136          }
137          variableValues.Add(columName, values);
138        }
139        storableData = null;
140      }
141    }
142    #endregion
143
144    [Storable(Name = "VariableValues")]
145    protected Dictionary<string, IList> variableValues;
146
147    protected List<string> variableNames;
148    [Storable]
149    public IEnumerable<string> VariableNames {
150      get { return variableNames; }
151      protected set {
152        if (variableNames != null) throw new InvalidOperationException();
153        variableNames = new List<string>(value);
154      }
155    }
156    public IEnumerable<string> DoubleVariables {
157      get { return variableValues.Where(p => p.Value is List<double>).Select(p => p.Key); }
158    }
159    public IEnumerable<double> GetDoubleValues(string variableName) {
160      return GetValues<double>(variableName);
161    }
162    public IEnumerable<string> GetStringValues(string variableName) {
163      return GetValues<string>(variableName);
164    }
165    public IEnumerable<DateTime> GetDateTimeValues(string variableName) {
166      return GetValues<DateTime>(variableName);
167    }
168    public ReadOnlyCollection<double> GetReadOnlyDoubleValues(string variableName) {
169      var values = GetValues<double>(variableName).ToList();
170      return values.AsReadOnly();
171    }
172    public double GetDoubleValue(string variableName, int row) {
173      var values = GetValues<double>(variableName) as List<double>;
174      return values[row];
175    }
176    public IEnumerable<double> GetDoubleValues(string variableName, IEnumerable<int> rows) {
177      return GetValues<double>(variableName, rows);
178    }
179    private IEnumerable<T> GetValues<T>(string variableName, IEnumerable<int> rows) {
180      var values = GetValues<T>(variableName) as List<T>;
181      return rows.Select(x => values[x]);
182    }
183    private IEnumerable<T> GetValues<T>(string variableName) {
184      IList list;
185      if (!variableValues.TryGetValue(variableName, out list))
186        throw new ArgumentException("The variable " + variableName + " does not exist in the dataset.");
187      List<T> values = list as List<T>;
188      if (values == null) throw new ArgumentException("The variable " + variableName + " is not a " + typeof(T) + " variable.");
189      return values;
190    }
191    public bool VariableHasType<T>(string variableName) {
192      return variableValues[variableName] is IList<T>;
193    }
194
195    #region IStringConvertibleMatrix Members
196    [Storable]
197    protected int rows;
198    public int Rows {
199      get { return rows; }
200      set { throw new NotSupportedException(); }
201    }
202    public int Columns {
203      get { return variableNames.Count; }
204      set { throw new NotSupportedException(); }
205    }
206    public bool SortableView {
207      get { return false; }
208      set { throw new NotSupportedException(); }
209    }
210    public bool ReadOnly {
211      get { return true; }
212    }
213    IEnumerable<string> IStringConvertibleMatrix.ColumnNames {
214      get { return this.VariableNames; }
215      set { throw new NotSupportedException(); }
216    }
217    IEnumerable<string> IStringConvertibleMatrix.RowNames {
218      get { return Enumerable.Empty<string>(); }
219      set { throw new NotSupportedException(); }
220    }
221    public string GetValue(int rowIndex, int columnIndex) {
222      return variableValues[variableNames[columnIndex]][rowIndex].ToString();
223    }
224    public bool SetValue(string value, int rowIndex, int columnIndex) {
225      throw new NotSupportedException();
226    }
227    public bool Validate(string value, out string errorMessage) {
228      throw new NotSupportedException();
229    }
230
231    public event EventHandler ColumnsChanged { add { } remove { } }
232    public event EventHandler RowsChanged { add { } remove { } }
233    public event EventHandler ColumnNamesChanged { add { } remove { } }
234    public event EventHandler RowNamesChanged { add { } remove { } }
235    public event EventHandler SortableViewChanged { add { } remove { } }
236    public event EventHandler<EventArgs<int, int>> ItemChanged { add { } remove { } }
237    public event EventHandler Reset { add { } remove { } }
238    #endregion
239  }
240}
Note: See TracBrowser for help on using the repository browser.