source: trunk/sources/HeuristicLab.Problems.DataAnalysis/3.3/Dataset.cs @ 3330

Last change on this file since 3330 was 3330, checked in by mkommend, 12 years ago

removed warning from Dataset (ticket #938)

File size: 12.1 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Xml;
25using System.Globalization;
26using System.Text;
27using System.Linq;
28using HeuristicLab.Core;
29using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
30using HeuristicLab.Data;
31using HeuristicLab.Common;
32
33namespace HeuristicLab.Problems.DataAnalysis {
34  [Item("Dataset", "Represents a dataset containing data that should be analyzed.")]
35  [StorableClass]
36  public sealed class Dataset : NamedItem, IStringConvertibleMatrix {
37    public Dataset()
38      : this(new string[] { "x" }, new double[,] { { 0.0 } }) {
39    }
40
41    public Dataset(IEnumerable<string> variableNames, double[,] data)
42      : base() {
43      Name = "-";
44      if (variableNames.Count() != data.GetLength(1)) {
45        throw new ArgumentException("Number of variable names doesn't match the number of columns of data");
46      }
47      Data = data;
48      this.VariableNames = variableNames;
49      this.sortableView = false;
50    }
51
52    [Storable]
53    private string[] variableNames;
54    public IEnumerable<string> VariableNames {
55      get { return variableNames; }
56      private set {
57        if (variableNames != value) {
58          variableNames = value.ToArray();
59          OnColumnNamesChanged();
60        }
61      }
62    }
63
64    [Storable]
65    private double[,] data;
66    private double[,] Data {
67      get { return data; }
68      set {
69        if (data != value) {
70          if (value == null) throw new ArgumentNullException();
71          this.data = value;
72          OnReset(EventArgs.Empty);
73        }
74      }
75    }
76
77    // elementwise access
78    public double this[int rowIndex, int columnIndex] {
79      get { return data[rowIndex, columnIndex]; }
80      set {
81        if (!value.Equals(data[rowIndex, columnIndex])) {
82          data[rowIndex, columnIndex] = value;
83          OnDataChanged(new EventArgs<int, int>(rowIndex, columnIndex));
84          OnItemChanged(rowIndex, columnIndex);
85        }
86      }
87    }
88    // access to full columns
89    public double[] this[string variableName] {
90      get { return GetVariableValues(GetVariableIndex(variableName), 0, Rows); }
91    }
92
93    public double[] GetVariableValues(int variableIndex, int start, int end) {
94      if (start < 0 || !(start <= end))
95        throw new ArgumentException("Start must be between 0 and end (" + end + ").");
96      if (end > Rows || end < start)
97        throw new ArgumentException("End must be between start (" + start + ") and dataset rows (" + Rows + ").");
98
99      double[] values = new double[end - start];
100      for (int i = 0; i < end - start; i++)
101        values[i] = data[i + start, variableIndex];
102      return values;
103    }
104
105    public double[] GetVariableValues(string variableName, int start, int end) {
106      return GetVariableValues(GetVariableIndex(variableName), start, end);
107    }
108
109    #region Variable name methods
110    public string GetVariableName(int variableIndex) {
111      return variableNames[variableIndex];
112    }
113
114    public int GetVariableIndex(string variableName) {
115      for (int i = 0; i < variableNames.Length; i++) {
116        if (variableNames[i].Equals(variableName)) return i;
117      }
118      throw new ArgumentException("The variable name " + variableName + " was not found.");
119    }
120
121    public void SetVariableName(int variableIndex, string name) {
122      if (name == null) throw new ArgumentNullException("Cannot set variable name to null for variable at index " + variableIndex + " variableIndex");
123      if (variableNames.Contains(name)) throw new ArgumentException("The data set already contains a variable with name " + name + ".");
124      if (variableIndex < 0 || variableIndex >= variableNames.Length) throw new ArgumentException(" Cannot set name of not existent variable at index " + variableIndex + ".");
125      variableNames[variableIndex] = name;
126      OnColumnNamesChanged();
127    }
128
129    #endregion
130
131    #region variable statistics
132    public double GetMean(string variableName) {
133      return GetMean(GetVariableIndex(variableName));
134    }
135
136    public double GetMean(string variableName, int start, int end) {
137      return GetMean(GetVariableIndex(variableName), start, end);
138    }
139
140    public double GetMean(int variableIndex) {
141      return GetMean(variableIndex, 0, Rows);
142    }
143
144    public double GetMean(int variableIndex, int start, int end) {
145      return GetVariableValues(variableIndex, start, end).Average();
146    }
147
148    public double GetRange(string variableName) {
149      return GetRange(GetVariableIndex(variableName));
150    }
151
152    public double GetRange(int variableIndex) {
153      return GetRange(variableIndex, 0, Rows);
154    }
155
156    public double GetRange(string variableName, int start, int end) {
157      return GetRange(GetVariableIndex(variableName), start, end);
158    }
159
160    public double GetRange(int variableIndex, int start, int end) {
161      var values = GetVariableValues(variableIndex, start, end);
162      return values.Max() - values.Min();
163    }
164
165    public double GetMax(string variableName) {
166      return GetMax(GetVariableIndex(variableName));
167    }
168
169    public double GetMax(int variableIndex) {
170      return GetMax(variableIndex, 0, Rows);
171    }
172
173    public double GetMax(string variableName, int start, int end) {
174      return GetMax(GetVariableIndex(variableName), start, end);
175    }
176
177    public double GetMax(int variableIndex, int start, int end) {
178      return GetVariableValues(variableIndex, start, end).Max();
179    }
180
181    public double GetMin(string variableName) {
182      return GetMin(GetVariableIndex(variableName));
183    }
184
185    public double GetMin(int variableIndex) {
186      return GetMin(variableIndex, 0, Rows);
187    }
188
189    public double GetMin(string variableName, int start, int end) {
190      return GetMin(GetVariableIndex(variableName), start, end);
191    }
192
193    public double GetMin(int variableIndex, int start, int end) {
194      return GetVariableValues(variableIndex, start, end).Min();
195    }
196
197    public int GetMissingValues(string variableName) {
198      return GetMissingValues(GetVariableIndex(variableName));
199    }
200    public int GetMissingValues(int variableIndex) {
201      return GetMissingValues(variableIndex, 0, Rows);
202    }
203
204    public int GetMissingValues(string variableName, int start, int end) {
205      return GetMissingValues(GetVariableIndex(variableName), start, end);
206    }
207
208    public int GetMissingValues(int variableIndex, int start, int end) {
209      return GetVariableValues(variableIndex, start, end).Count(x => double.IsNaN(x));
210    }
211
212    #endregion
213
214    public override IDeepCloneable Clone(Cloner cloner) {
215      Dataset clone = (Dataset)base.Clone(cloner);
216      clone.data = (double[,])data.Clone();
217      clone.variableNames = (string[])variableNames.Clone();
218      return clone;
219    }
220
221    #region events
222    public event EventHandler<EventArgs<int, int>> DataChanged;
223    private void OnDataChanged(EventArgs<int, int> e) {
224      var listeners = DataChanged;
225      if (listeners != null) listeners(this, e);
226    }
227    public event EventHandler Reset;
228    private void OnReset(EventArgs e) {
229      var listeners = Reset;
230      if (listeners != null) listeners(this, e);
231    }
232    #endregion
233
234    #region IStringConvertibleMatrix Members
235
236    public int Rows {
237      get {
238        return data.GetLength(0);
239      }
240      set {
241        if (value == 0) throw new ArgumentException("Number of rows must be at least one (for variable names)");
242        if (value != Rows) {
243          var newValues = new double[value, Columns];
244          for (int row = 0; row < Math.Min(Rows, value); row++) {
245            for (int column = 0; column < Columns; column++) {
246              newValues[row, column] = data[row, column];
247            }
248          }
249          Data = newValues;
250        }
251      }
252    }
253
254    public int Columns {
255      get {
256        return data.GetLength(1);
257      }
258      set {
259        if (value != Columns) {
260          var newValues = new double[Rows, value];
261          var newVariableNames = new string[value];
262          for (int row = 0; row < Rows; row++) {
263            for (int column = 0; column < Math.Min(value, Columns); column++) {
264              newValues[row, column] = data[row, column];
265            }
266          }
267          string formatString = new StringBuilder().Append('0', (int)Math.Log10(value) + 1).ToString(); // >= 100 variables => ###
268          for (int column = 0; column < value; column++) {
269            if (column < Columns)
270              newVariableNames[column] = variableNames[column];
271            else
272              newVariableNames[column] = "Var" + column.ToString(formatString);
273          }
274          VariableNames = newVariableNames;
275          Data = newValues;
276        }
277      }
278    }
279
280    [Storable]
281    private bool sortableView;
282    public bool SortableView {
283      get { return sortableView; }
284      set {
285        if (value != sortableView) {
286          sortableView = value;
287          OnSortableViewChanged();
288        }
289      }
290    }
291
292    IEnumerable<string> IStringConvertibleMatrix.ColumnNames {
293      get { return this.VariableNames; }
294      set {
295        int i = 0;
296        foreach (string variableName in value) {
297          SetVariableName(i, variableName);
298          i++;
299        }
300        OnColumnNamesChanged();
301      }
302    }
303
304    IEnumerable<string> IStringConvertibleMatrix.RowNames {
305      get { return new List<string>(); }
306      set { throw new NotImplementedException(); }
307    }
308
309    public bool Validate(string value, out string errorMessage) {
310      double val;
311      bool valid = double.TryParse(value, out val);
312      errorMessage = string.Empty;
313      if (!valid) {
314        StringBuilder sb = new StringBuilder();
315        sb.Append("Invalid Value (Valid Value Format: \"");
316        sb.Append(FormatPatterns.GetDoubleFormatPattern());
317        sb.Append("\")");
318        errorMessage = sb.ToString();
319      }
320      return valid;
321    }
322
323    public string GetValue(int rowIndex, int columnIndex) {
324      return data[rowIndex, columnIndex].ToString();
325    }
326
327    public bool SetValue(string value, int rowIndex, int columnIndex) {
328      double v;
329      if (double.TryParse(value, out v)) {
330        data[rowIndex, columnIndex] = v;
331        OnDataChanged(new EventArgs<int, int>(rowIndex, columnIndex));
332        OnItemChanged(rowIndex, columnIndex);
333        return true;
334      } else return false;
335    }
336
337    public event EventHandler ColumnNamesChanged;
338    private void OnColumnNamesChanged() {
339      EventHandler handler = ColumnNamesChanged;
340      if (handler != null)
341        handler(this, EventArgs.Empty);
342    }
343    public event EventHandler RowNamesChanged;
344    private void OnRowNamesChanged() {
345      EventHandler handler = RowNamesChanged;
346      if (handler != null)
347        handler(this, EventArgs.Empty);
348    }
349    public event EventHandler SortableViewChanged;
350    private void OnSortableViewChanged() {
351      EventHandler handler = SortableViewChanged;
352      if (handler != null)
353        handler(this, EventArgs.Empty);
354    }
355    public event EventHandler<EventArgs<int, int>> ItemChanged;
356    private void OnItemChanged(int rowIndex, int columnIndex) {
357      if (ItemChanged != null)
358        ItemChanged(this, new EventArgs<int, int>(rowIndex, columnIndex));
359      OnToStringChanged();
360    }
361    #endregion
362  }
363}
Note: See TracBrowser for help on using the repository browser.