Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2898_GeneralizedAdditiveModels/HeuristicLab.Problems.DataAnalysis/3.4/ModifiableDataset.cs @ 17074

Last change on this file since 17074 was 15769, checked in by bburlacu, 7 years ago

#2897: Add type checks in Dataset constructor, remove cloning of values in the ModifiableDataset.AddVariable method. Provide small fix in GradientBoostingRegressionAlgorithm for AddVariable.

File size: 8.4 KB
Line 
1#region License Information
2
3/* HeuristicLab
4 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
5 *
6 * This file is part of HeuristicLab.
7 *
8 * HeuristicLab is free software: you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation, either version 3 of the License, or
11 * (at your option) any later version.
12 *
13 * HeuristicLab is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
20 */
21
22#endregion
23
24using System;
25using System.Collections;
26using System.Collections.Generic;
27using System.Linq;
28using HeuristicLab.Common;
29using HeuristicLab.Core;
30using HeuristicLab.Data;
31using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
32
33namespace HeuristicLab.Problems.DataAnalysis {
34  [Item("ModifiableDataset", "Represents a dataset containing data that should be analyzed, which can be modified by adding or replacing variables and values.")]
35  [StorableClass]
36  public sealed class ModifiableDataset : Dataset, IStringConvertibleMatrix {
37    [StorableConstructor]
38    private ModifiableDataset(bool deserializing) : base(deserializing) { }
39
40    private ModifiableDataset(ModifiableDataset original, Cloner cloner) : base(original, cloner) {
41      var variables = variableValues.Keys.ToList();
42      foreach (var v in variables) {
43        var type = GetVariableType(v);
44        if (type == typeof(DateTime)) {
45          variableValues[v] = GetDateTimeValues(v).ToList();
46        } else if (type == typeof(double)) {
47          variableValues[v] = GetDoubleValues(v).ToList();
48        } else if (type == typeof(string)) {
49          variableValues[v] = GetStringValues(v).ToList();
50        } else {
51          throw new ArgumentException("Unsupported type " + type + " for variable " + v);
52        }
53      }
54    }
55    public override IDeepCloneable Clone(Cloner cloner) { return new ModifiableDataset(this, cloner); }
56    public ModifiableDataset() : base() { }
57
58    public ModifiableDataset(IEnumerable<string> variableNames, IEnumerable<IList> variableValues) : base(variableNames, variableValues) { }
59
60    public void ReplaceRow(int row, IEnumerable<object> values) {
61      var list = values.ToList();
62      if (list.Count != variableNames.Count)
63        throw new ArgumentException("The number of values must be equal to the number of variable names.");
64      // check if all the values are of the correct type
65      for (int i = 0; i < list.Count; ++i) {
66        if (list[i].GetType() != GetVariableType(variableNames[i])) {
67          throw new ArgumentException("The type of the provided value does not match the variable type.");
68        }
69      }
70      // replace values
71      for (int i = 0; i < list.Count; ++i) {
72        variableValues[variableNames[i]][row] = list[i];
73      }
74      OnReset();
75    }
76
77    public void ReplaceVariable(string variableName, IList values) {
78      if (!variableValues.ContainsKey(variableName))
79        throw new ArgumentException(string.Format("Variable {0} is not present in the dataset.", variableName));
80      if (values.Count != variableValues[variableName].Count)
81        throw new ArgumentException("The number of values must coincide with the number of dataset rows.");
82      if (GetVariableType(variableName) != values[0].GetType())
83        throw new ArgumentException("The type of the provided value does not match the variable type.");
84      variableValues[variableName] = values;
85    }
86
87    public void AddRow(IEnumerable<object> values) {
88      var list = values.ToList();
89      if (list.Count != variableNames.Count)
90        throw new ArgumentException("The number of values must be equal to the number of variable names.");
91      // check if all the values are of the correct type
92      for (int i = 0; i < list.Count; ++i) {
93        if (list[i].GetType() != GetVariableType(variableNames[i])) {
94          throw new ArgumentException("The type of the provided value does not match the variable type.");
95        }
96      }
97      // add values
98      for (int i = 0; i < list.Count; ++i) {
99        variableValues[variableNames[i]].Add(list[i]);
100      }
101      rows++;
102      OnRowsChanged();
103      OnReset();
104    }
105
106    // adds a new variable to the dataset
107    public void AddVariable(string variableName, IList values) {
108      if (variableValues.ContainsKey(variableName))
109        throw new ArgumentException(string.Format("Variable {0} is already present in the dataset.", variableName));
110
111      if (values == null || values.Count == 0)
112        throw new ArgumentException("Cannot add variable with no values.");
113
114      if (!IsAllowedType(values))
115        throw new ArgumentException(string.Format("Unsupported type {0} for variable {1}.", GetElementType(values), variableName));
116
117      variableValues[variableName] = values;
118      variableNames.Add(variableName);
119
120      OnColumnsChanged();
121      OnColumnNamesChanged();
122      OnReset();
123    }
124
125    public void RemoveVariable(string variableName) {
126      if (!variableValues.ContainsKey(variableName))
127        throw new ArgumentException(string.Format("The variable {0} does not exist in the dataset.", variableName));
128      variableValues.Remove(variableName);
129      variableNames.Remove(variableName);
130      OnColumnsChanged();
131      OnColumnNamesChanged();
132      OnReset();
133    }
134
135    // slow, avoid using this
136    public void RemoveRow(int row) {
137      foreach (var list in variableValues.Values)
138        list.RemoveAt(row);
139      rows--;
140      OnRowsChanged();
141      OnReset();
142    }
143
144    public void SetVariableValue(object value, string variableName, int row) {
145      IList list;
146      variableValues.TryGetValue(variableName, out list);
147      if (list == null)
148        throw new ArgumentException("The variable " + variableName + " does not exist in the dataset.");
149      if (row < 0 || list.Count < row)
150        throw new ArgumentOutOfRangeException("Invalid row value");
151      if (GetVariableType(variableName) != value.GetType())
152        throw new ArgumentException("The type of the provided value does not match the variable type.");
153
154      list[row] = value;
155      OnItemChanged(row, variableNames.IndexOf(variableName));
156    }
157
158    bool IStringConvertibleMatrix.SetValue(string value, int rowIndex, int columnIndex) {
159      var variableName = variableNames[columnIndex];
160      // if value represents a double
161      double dv;
162      if (double.TryParse(value, out dv)) {
163        SetVariableValue(dv, variableName, rowIndex);
164        return true;
165      }
166      // if value represents a DateTime object
167      DateTime dt;
168      if (DateTime.TryParse(value, out dt)) {
169        SetVariableValue(dt, variableName, rowIndex);
170        return true;
171      }
172      // if value is simply a string
173      SetVariableValue(value, variableName, rowIndex);
174      return true;
175    }
176
177    bool IStringConvertibleMatrix.Validate(string value, out string errorMessage) {
178      errorMessage = string.Empty;
179      return true;
180    }
181
182    #region event handlers
183    public override event EventHandler RowsChanged;
184    private void OnRowsChanged() {
185      var handler = RowsChanged;
186      if (handler != null)
187        handler(this, EventArgs.Empty);
188    }
189
190    public override event EventHandler ColumnsChanged;
191    private void OnColumnsChanged() {
192      var handler = ColumnsChanged;
193      if (handler != null)
194        handler(this, EventArgs.Empty);
195    }
196
197    public override event EventHandler ColumnNamesChanged;
198    private void OnColumnNamesChanged() {
199      var handler = ColumnNamesChanged;
200      if (handler != null)
201        handler(this, EventArgs.Empty);
202    }
203
204    public override event EventHandler Reset;
205    private void OnReset() {
206      var handler = Reset;
207      if (handler != null)
208        handler(this, EventArgs.Empty);
209    }
210
211    public override event EventHandler<EventArgs<int, int>> ItemChanged;
212    private void OnItemChanged(int rowIndex, int columnIndex) {
213      var handler = ItemChanged;
214      if (handler != null) {
215        handler(this, new EventArgs<int, int>(rowIndex, columnIndex));
216      }
217    }
218    #endregion
219  }
220}
Note: See TracBrowser for help on using the repository browser.