Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Problems.DataAnalysis/3.3/Dataset.cs @ 3818

Last change on this file since 3818 was 3493, checked in by gkronber, 15 years ago

fixed bugs in cloning. #938 (Data types and operators for regression problems)

File size: 12.2 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Xml;
25using System.Globalization;
26using System.Text;
27using System.Linq;
28using HeuristicLab.Common;
29using HeuristicLab.Core;
30using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
31using HeuristicLab.Data;
32
33namespace HeuristicLab.Problems.DataAnalysis {
34  [Item("Dataset", "Represents a dataset containing data that should be analyzed.")]
35  [StorableClass]
36  public sealed class Dataset : NamedItem, IStringConvertibleMatrix {
37    public Dataset()
38      : base() {
39      this.Name = string.Empty;
40      this.data = new double[0, 0];
41      this.variableNames = new string[0];
42      this.SortableView = false;
43    }
44
45    public Dataset(IEnumerable<string> variableNames, double[,] data)
46      : this() {
47      Name = "-";
48      if (variableNames.Count() != data.GetLength(1)) {
49        throw new ArgumentException("Number of variable names doesn't match the number of columns of data");
50      }
51      this.data = data;
52      this.variableNames = variableNames.ToArray();
53    }
54
55    [Storable]
56    private string[] variableNames;
57    public IEnumerable<string> VariableNames {
58      get { return variableNames; }
59      private set {
60        if (variableNames != value) {
61          variableNames = value.ToArray();
62          OnColumnNamesChanged();
63        }
64      }
65    }
66
67    [Storable]
68    private double[,] data;
69    private double[,] Data {
70      get { return data; }
71      set {
72        if (data != value) {
73          if (value == null) throw new ArgumentNullException();
74          this.data = value;
75          OnReset(EventArgs.Empty);
76        }
77      }
78    }
79
80    // elementwise access
81    public double this[int rowIndex, int columnIndex] {
82      get { return data[rowIndex, columnIndex]; }
83      set {
84        if (!value.Equals(data[rowIndex, columnIndex])) {
85          data[rowIndex, columnIndex] = value;
86          OnDataChanged(new EventArgs<int, int>(rowIndex, columnIndex));
87          OnItemChanged(rowIndex, columnIndex);
88        }
89      }
90    }
91    // access to full columns
92    public double[] this[string variableName] {
93      get { return GetVariableValues(GetVariableIndex(variableName), 0, Rows); }
94    }
95
96    public double[] GetVariableValues(int variableIndex, int start, int end) {
97      if (start < 0 || !(start <= end))
98        throw new ArgumentException("Start must be between 0 and end (" + end + ").");
99      if (end > Rows || end < start)
100        throw new ArgumentException("End must be between start (" + start + ") and dataset rows (" + Rows + ").");
101
102      double[] values = new double[end - start];
103      for (int i = 0; i < end - start; i++)
104        values[i] = data[i + start, variableIndex];
105      return values;
106    }
107
108    public double[] GetVariableValues(string variableName, int start, int end) {
109      return GetVariableValues(GetVariableIndex(variableName), start, end);
110    }
111
112    #region Variable name methods
113    public string GetVariableName(int variableIndex) {
114      return variableNames[variableIndex];
115    }
116
117    public int GetVariableIndex(string variableName) {
118      for (int i = 0; i < variableNames.Length; i++) {
119        if (variableNames[i].Equals(variableName)) return i;
120      }
121      throw new ArgumentException("The variable name " + variableName + " was not found.");
122    }
123
124    public void SetVariableName(int variableIndex, string name) {
125      if (name == null) throw new ArgumentNullException("Cannot set variable name to null for variable at index " + variableIndex + " variableIndex");
126      if (variableNames.Contains(name)) throw new ArgumentException("The data set already contains a variable with name " + name + ".");
127      if (variableIndex < 0 || variableIndex >= variableNames.Length) throw new ArgumentException(" Cannot set name of not existent variable at index " + variableIndex + ".");
128      variableNames[variableIndex] = name;
129      OnColumnNamesChanged();
130    }
131
132    #endregion
133
134    #region variable statistics
135    public double GetMean(string variableName) {
136      return GetMean(GetVariableIndex(variableName));
137    }
138
139    public double GetMean(string variableName, int start, int end) {
140      return GetMean(GetVariableIndex(variableName), start, end);
141    }
142
143    public double GetMean(int variableIndex) {
144      return GetMean(variableIndex, 0, Rows);
145    }
146
147    public double GetMean(int variableIndex, int start, int end) {
148      return GetVariableValues(variableIndex, start, end).Average();
149    }
150
151    public double GetRange(string variableName) {
152      return GetRange(GetVariableIndex(variableName));
153    }
154
155    public double GetRange(int variableIndex) {
156      return GetRange(variableIndex, 0, Rows);
157    }
158
159    public double GetRange(string variableName, int start, int end) {
160      return GetRange(GetVariableIndex(variableName), start, end);
161    }
162
163    public double GetRange(int variableIndex, int start, int end) {
164      var values = GetVariableValues(variableIndex, start, end);
165      return values.Max() - values.Min();
166    }
167
168    public double GetMax(string variableName) {
169      return GetMax(GetVariableIndex(variableName));
170    }
171
172    public double GetMax(int variableIndex) {
173      return GetMax(variableIndex, 0, Rows);
174    }
175
176    public double GetMax(string variableName, int start, int end) {
177      return GetMax(GetVariableIndex(variableName), start, end);
178    }
179
180    public double GetMax(int variableIndex, int start, int end) {
181      return GetVariableValues(variableIndex, start, end).Max();
182    }
183
184    public double GetMin(string variableName) {
185      return GetMin(GetVariableIndex(variableName));
186    }
187
188    public double GetMin(int variableIndex) {
189      return GetMin(variableIndex, 0, Rows);
190    }
191
192    public double GetMin(string variableName, int start, int end) {
193      return GetMin(GetVariableIndex(variableName), start, end);
194    }
195
196    public double GetMin(int variableIndex, int start, int end) {
197      return GetVariableValues(variableIndex, start, end).Min();
198    }
199
200    public int GetMissingValues(string variableName) {
201      return GetMissingValues(GetVariableIndex(variableName));
202    }
203    public int GetMissingValues(int variableIndex) {
204      return GetMissingValues(variableIndex, 0, Rows);
205    }
206
207    public int GetMissingValues(string variableName, int start, int end) {
208      return GetMissingValues(GetVariableIndex(variableName), start, end);
209    }
210
211    public int GetMissingValues(int variableIndex, int start, int end) {
212      return GetVariableValues(variableIndex, start, end).Count(x => double.IsNaN(x));
213    }
214
215    #endregion
216
217    public override IDeepCloneable Clone(Cloner cloner) {
218      Dataset clone = (Dataset)base.Clone(cloner);
219      clone.data = data;
220      clone.variableNames = (string[])variableNames.Clone();
221      return clone;
222    }
223
224    #region events
225    public event EventHandler<EventArgs<int, int>> DataChanged;
226    private void OnDataChanged(EventArgs<int, int> e) {
227      var listeners = DataChanged;
228      if (listeners != null) listeners(this, e);
229    }
230    public event EventHandler Reset;
231    private void OnReset(EventArgs e) {
232      var listeners = Reset;
233      if (listeners != null) listeners(this, e);
234    }
235    #endregion
236
237    #region IStringConvertibleMatrix Members
238
239    public int Rows {
240      get {
241        return data.GetLength(0);
242      }
243      set {
244        if (value == 0) throw new ArgumentException("Number of rows must be at least one (for variable names)");
245        if (value != Rows) {
246          var newValues = new double[value, Columns];
247          for (int row = 0; row < Math.Min(Rows, value); row++) {
248            for (int column = 0; column < Columns; column++) {
249              newValues[row, column] = data[row, column];
250            }
251          }
252          Data = newValues;
253        }
254      }
255    }
256
257    public int Columns {
258      get {
259        return data.GetLength(1);
260      }
261      set {
262        if (value != Columns) {
263          var newValues = new double[Rows, value];
264          var newVariableNames = new string[value];
265          for (int row = 0; row < Rows; row++) {
266            for (int column = 0; column < Math.Min(value, Columns); column++) {
267              newValues[row, column] = data[row, column];
268            }
269          }
270          string formatString = new StringBuilder().Append('0', (int)Math.Log10(value) + 1).ToString(); // >= 100 variables => ###
271          for (int column = 0; column < value; column++) {
272            if (column < Columns)
273              newVariableNames[column] = variableNames[column];
274            else
275              newVariableNames[column] = "Var" + column.ToString(formatString);
276          }
277          VariableNames = newVariableNames;
278          Data = newValues;
279        }
280      }
281    }
282
283    [Storable]
284    private bool sortableView;
285    public bool SortableView {
286      get { return sortableView; }
287      set {
288        if (value != sortableView) {
289          sortableView = value;
290          OnSortableViewChanged();
291        }
292      }
293    }
294
295    public bool ReadOnly {
296      get { return false; }
297    }
298
299    IEnumerable<string> IStringConvertibleMatrix.ColumnNames {
300      get { return this.VariableNames; }
301      set {
302        int i = 0;
303        foreach (string variableName in value) {
304          SetVariableName(i, variableName);
305          i++;
306        }
307        OnColumnNamesChanged();
308      }
309    }
310
311    IEnumerable<string> IStringConvertibleMatrix.RowNames {
312      get { return new List<string>(); }
313      set { throw new NotImplementedException(); }
314    }
315
316    public bool Validate(string value, out string errorMessage) {
317      double val;
318      bool valid = double.TryParse(value, out val);
319      errorMessage = string.Empty;
320      if (!valid) {
321        StringBuilder sb = new StringBuilder();
322        sb.Append("Invalid Value (Valid Value Format: \"");
323        sb.Append(FormatPatterns.GetDoubleFormatPattern());
324        sb.Append("\")");
325        errorMessage = sb.ToString();
326      }
327      return valid;
328    }
329
330    public string GetValue(int rowIndex, int columnIndex) {
331      return data[rowIndex, columnIndex].ToString();
332    }
333
334    public bool SetValue(string value, int rowIndex, int columnIndex) {
335      double v;
336      if (double.TryParse(value, out v)) {
337        data[rowIndex, columnIndex] = v;
338        OnDataChanged(new EventArgs<int, int>(rowIndex, columnIndex));
339        OnItemChanged(rowIndex, columnIndex);
340        return true;
341      } else return false;
342    }
343
344    public event EventHandler ColumnNamesChanged;
345    private void OnColumnNamesChanged() {
346      EventHandler handler = ColumnNamesChanged;
347      if (handler != null)
348        handler(this, EventArgs.Empty);
349    }
350    public event EventHandler RowNamesChanged;
351    private void OnRowNamesChanged() {
352      EventHandler handler = RowNamesChanged;
353      if (handler != null)
354        handler(this, EventArgs.Empty);
355    }
356    public event EventHandler SortableViewChanged;
357    private void OnSortableViewChanged() {
358      EventHandler handler = SortableViewChanged;
359      if (handler != null)
360        handler(this, EventArgs.Empty);
361    }
362    public event EventHandler<EventArgs<int, int>> ItemChanged;
363    private void OnItemChanged(int rowIndex, int columnIndex) {
364      if (ItemChanged != null)
365        ItemChanged(this, new EventArgs<int, int>(rowIndex, columnIndex));
366      OnToStringChanged();
367    }
368    #endregion
369
370
371  }
372}
Note: See TracBrowser for help on using the repository browser.