Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Problems.DataAnalysis/3.3/Dataset.cs @ 3923

Last change on this file since 3923 was 3892, checked in by gkronber, 15 years ago

Improved code for analyzers for SVR and symbolic regression. #1009

File size: 12.8 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Xml;
25using System.Globalization;
26using System.Text;
27using System.Linq;
28using HeuristicLab.Common;
29using HeuristicLab.Core;
30using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
31using HeuristicLab.Data;
32
33namespace HeuristicLab.Problems.DataAnalysis {
34  [Item("Dataset", "Represents a dataset containing data that should be analyzed.")]
35  [StorableClass]
36  public sealed class Dataset : NamedItem, IStringConvertibleMatrix {
37    public Dataset()
38      : base() {
39      this.Name = string.Empty;
40      this.data = new double[0, 0];
41      this.variableNames = new string[0];
42      this.SortableView = false;
43    }
44
45    public Dataset(IEnumerable<string> variableNames, double[,] data)
46      : this() {
47      Name = "-";
48      if (variableNames.Count() != data.GetLength(1)) {
49        throw new ArgumentException("Number of variable names doesn't match the number of columns of data");
50      }
51      this.data = data;
52      this.variableNames = variableNames.ToArray();
53    }
54
55    [Storable]
56    private string[] variableNames;
57    public IEnumerable<string> VariableNames {
58      get { return variableNames; }
59      private set {
60        if (variableNames != value) {
61          variableNames = value.ToArray();
62          OnColumnNamesChanged();
63        }
64      }
65    }
66
67    [Storable]
68    private double[,] data;
69    private double[,] Data {
70      get { return data; }
71      set {
72        if (data != value) {
73          if (value == null) throw new ArgumentNullException();
74          this.data = value;
75          OnReset(EventArgs.Empty);
76        }
77      }
78    }
79
80    // elementwise access
81    public double this[int rowIndex, int columnIndex] {
82      get { return data[rowIndex, columnIndex]; }
83      set {
84        if (!value.Equals(data[rowIndex, columnIndex])) {
85          data[rowIndex, columnIndex] = value;
86          OnDataChanged(new EventArgs<int, int>(rowIndex, columnIndex));
87          OnItemChanged(rowIndex, columnIndex);
88        }
89      }
90    }
91    public double this[string variableName, int rowIndex] {
92      get {
93        int columnIndex = GetVariableIndex(variableName);
94        return data[rowIndex, columnIndex];
95      }
96      set {
97        int columnIndex = GetVariableIndex(variableName);
98        if (!value.Equals(data[rowIndex, columnIndex])) {
99          data[rowIndex, columnIndex] = value;
100          OnDataChanged(new EventArgs<int, int>(rowIndex, columnIndex));
101          OnItemChanged(rowIndex, columnIndex);
102        }
103      }
104    }
105    // access to full columns
106    public double[] this[string variableName] {
107      get { return GetVariableValues(GetVariableIndex(variableName), 0, Rows); }
108    }
109
110    public double[] GetVariableValues(int variableIndex, int start, int end) {
111      if (start < 0 || !(start <= end))
112        throw new ArgumentException("Start must be between 0 and end (" + end + ").");
113      if (end > Rows || end < start)
114        throw new ArgumentException("End must be between start (" + start + ") and dataset rows (" + Rows + ").");
115
116      double[] values = new double[end - start];
117      for (int i = 0; i < end - start; i++)
118        values[i] = data[i + start, variableIndex];
119      return values;
120    }
121
122    public double[] GetVariableValues(string variableName, int start, int end) {
123      return GetVariableValues(GetVariableIndex(variableName), start, end);
124    }
125
126    #region Variable name methods
127    public string GetVariableName(int variableIndex) {
128      return variableNames[variableIndex];
129    }
130
131    public int GetVariableIndex(string variableName) {
132      for (int i = 0; i < variableNames.Length; i++) {
133        if (variableNames[i].Equals(variableName)) return i;
134      }
135      throw new ArgumentException("The variable name " + variableName + " was not found.");
136    }
137
138    public void SetVariableName(int variableIndex, string name) {
139      if (name == null) throw new ArgumentNullException("Cannot set variable name to null for variable at index " + variableIndex + " variableIndex");
140      if (variableNames.Contains(name)) throw new ArgumentException("The data set already contains a variable with name " + name + ".");
141      if (variableIndex < 0 || variableIndex >= variableNames.Length) throw new ArgumentException(" Cannot set name of not existent variable at index " + variableIndex + ".");
142      variableNames[variableIndex] = name;
143      OnColumnNamesChanged();
144    }
145
146    #endregion
147
148    #region variable statistics
149    public double GetMean(string variableName) {
150      return GetMean(GetVariableIndex(variableName));
151    }
152
153    public double GetMean(string variableName, int start, int end) {
154      return GetMean(GetVariableIndex(variableName), start, end);
155    }
156
157    public double GetMean(int variableIndex) {
158      return GetMean(variableIndex, 0, Rows);
159    }
160
161    public double GetMean(int variableIndex, int start, int end) {
162      return GetVariableValues(variableIndex, start, end).Average();
163    }
164
165    public double GetRange(string variableName) {
166      return GetRange(GetVariableIndex(variableName));
167    }
168
169    public double GetRange(int variableIndex) {
170      return GetRange(variableIndex, 0, Rows);
171    }
172
173    public double GetRange(string variableName, int start, int end) {
174      return GetRange(GetVariableIndex(variableName), start, end);
175    }
176
177    public double GetRange(int variableIndex, int start, int end) {
178      var values = GetVariableValues(variableIndex, start, end);
179      return values.Max() - values.Min();
180    }
181
182    public double GetMax(string variableName) {
183      return GetMax(GetVariableIndex(variableName));
184    }
185
186    public double GetMax(int variableIndex) {
187      return GetMax(variableIndex, 0, Rows);
188    }
189
190    public double GetMax(string variableName, int start, int end) {
191      return GetMax(GetVariableIndex(variableName), start, end);
192    }
193
194    public double GetMax(int variableIndex, int start, int end) {
195      return GetVariableValues(variableIndex, start, end).Max();
196    }
197
198    public double GetMin(string variableName) {
199      return GetMin(GetVariableIndex(variableName));
200    }
201
202    public double GetMin(int variableIndex) {
203      return GetMin(variableIndex, 0, Rows);
204    }
205
206    public double GetMin(string variableName, int start, int end) {
207      return GetMin(GetVariableIndex(variableName), start, end);
208    }
209
210    public double GetMin(int variableIndex, int start, int end) {
211      return GetVariableValues(variableIndex, start, end).Min();
212    }
213
214    public int GetMissingValues(string variableName) {
215      return GetMissingValues(GetVariableIndex(variableName));
216    }
217    public int GetMissingValues(int variableIndex) {
218      return GetMissingValues(variableIndex, 0, Rows);
219    }
220
221    public int GetMissingValues(string variableName, int start, int end) {
222      return GetMissingValues(GetVariableIndex(variableName), start, end);
223    }
224
225    public int GetMissingValues(int variableIndex, int start, int end) {
226      return GetVariableValues(variableIndex, start, end).Count(x => double.IsNaN(x));
227    }
228
229    #endregion
230
231    public override IDeepCloneable Clone(Cloner cloner) {
232      Dataset clone = (Dataset)base.Clone(cloner);
233      clone.data = (double[,])data.Clone();
234      clone.variableNames = (string[])variableNames.Clone();
235      return clone;
236    }
237
238    #region events
239    public event EventHandler<EventArgs<int, int>> DataChanged;
240    private void OnDataChanged(EventArgs<int, int> e) {
241      var listeners = DataChanged;
242      if (listeners != null) listeners(this, e);
243    }
244    public event EventHandler Reset;
245    private void OnReset(EventArgs e) {
246      var listeners = Reset;
247      if (listeners != null) listeners(this, e);
248    }
249    #endregion
250
251    #region IStringConvertibleMatrix Members
252
253    public int Rows {
254      get {
255        return data.GetLength(0);
256      }
257      set {
258        if (value == 0) throw new ArgumentException("Number of rows must be at least one (for variable names)");
259        if (value != Rows) {
260          var newValues = new double[value, Columns];
261          for (int row = 0; row < Math.Min(Rows, value); row++) {
262            for (int column = 0; column < Columns; column++) {
263              newValues[row, column] = data[row, column];
264            }
265          }
266          Data = newValues;
267        }
268      }
269    }
270
271    public int Columns {
272      get {
273        return data.GetLength(1);
274      }
275      set {
276        if (value != Columns) {
277          var newValues = new double[Rows, value];
278          var newVariableNames = new string[value];
279          for (int row = 0; row < Rows; row++) {
280            for (int column = 0; column < Math.Min(value, Columns); column++) {
281              newValues[row, column] = data[row, column];
282            }
283          }
284          string formatString = new StringBuilder().Append('0', (int)Math.Log10(value) + 1).ToString(); // >= 100 variables => ###
285          for (int column = 0; column < value; column++) {
286            if (column < Columns)
287              newVariableNames[column] = variableNames[column];
288            else
289              newVariableNames[column] = "Var" + column.ToString(formatString);
290          }
291          VariableNames = newVariableNames;
292          Data = newValues;
293        }
294      }
295    }
296
297    [Storable]
298    private bool sortableView;
299    public bool SortableView {
300      get { return sortableView; }
301      set {
302        if (value != sortableView) {
303          sortableView = value;
304          OnSortableViewChanged();
305        }
306      }
307    }
308
309    public bool ReadOnly {
310      get { return false; }
311    }
312
313    IEnumerable<string> IStringConvertibleMatrix.ColumnNames {
314      get { return this.VariableNames; }
315      set {
316        int i = 0;
317        foreach (string variableName in value) {
318          SetVariableName(i, variableName);
319          i++;
320        }
321        OnColumnNamesChanged();
322      }
323    }
324
325    IEnumerable<string> IStringConvertibleMatrix.RowNames {
326      get { return new List<string>(); }
327      set { throw new NotImplementedException(); }
328    }
329
330    public bool Validate(string value, out string errorMessage) {
331      double val;
332      bool valid = double.TryParse(value, out val);
333      errorMessage = string.Empty;
334      if (!valid) {
335        StringBuilder sb = new StringBuilder();
336        sb.Append("Invalid Value (Valid Value Format: \"");
337        sb.Append(FormatPatterns.GetDoubleFormatPattern());
338        sb.Append("\")");
339        errorMessage = sb.ToString();
340      }
341      return valid;
342    }
343
344    public string GetValue(int rowIndex, int columnIndex) {
345      return data[rowIndex, columnIndex].ToString();
346    }
347
348    public bool SetValue(string value, int rowIndex, int columnIndex) {
349      double v;
350      if (double.TryParse(value, out v)) {
351        data[rowIndex, columnIndex] = v;
352        OnDataChanged(new EventArgs<int, int>(rowIndex, columnIndex));
353        OnItemChanged(rowIndex, columnIndex);
354        return true;
355      } else return false;
356    }
357
358    public event EventHandler ColumnNamesChanged;
359    private void OnColumnNamesChanged() {
360      EventHandler handler = ColumnNamesChanged;
361      if (handler != null)
362        handler(this, EventArgs.Empty);
363    }
364    public event EventHandler RowNamesChanged;
365    private void OnRowNamesChanged() {
366      EventHandler handler = RowNamesChanged;
367      if (handler != null)
368        handler(this, EventArgs.Empty);
369    }
370    public event EventHandler SortableViewChanged;
371    private void OnSortableViewChanged() {
372      EventHandler handler = SortableViewChanged;
373      if (handler != null)
374        handler(this, EventArgs.Empty);
375    }
376    public event EventHandler<EventArgs<int, int>> ItemChanged;
377    private void OnItemChanged(int rowIndex, int columnIndex) {
378      if (ItemChanged != null)
379        ItemChanged(this, new EventArgs<int, int>(rowIndex, columnIndex));
380      OnToStringChanged();
381    }
382    #endregion
383
384
385  }
386}
Note: See TracBrowser for help on using the repository browser.