Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Problems.DataAnalysis/3.3/Dataset.cs @ 3442

Last change on this file since 3442 was 3442, checked in by gkronber, 14 years ago

Implemented views for DataAnalysisProblems and DataAnalysisSolutions. #938 (Data types and operators for regression problems)

File size: 12.2 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Xml;
25using System.Globalization;
26using System.Text;
27using System.Linq;
28using HeuristicLab.Common;
29using HeuristicLab.Core;
30using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
31using HeuristicLab.Data;
32
33namespace HeuristicLab.Problems.DataAnalysis {
34  [Item("Dataset", "Represents a dataset containing data that should be analyzed.")]
35  [StorableClass]
36  public sealed class Dataset : NamedItem, IStringConvertibleMatrix {
37    public Dataset()
38      : this(new string[1] { "y" }, new double[,] { { 0.0 } }) {
39    }
40
41    public Dataset(IEnumerable<string> variableNames, double[,] data)
42      : base() {
43      Name = "-";
44      if (variableNames.Count() != data.GetLength(1)) {
45        throw new ArgumentException("Number of variable names doesn't match the number of columns of data");
46      }
47      this.data = data;
48      this.variableNames = variableNames.ToArray();
49      this.SortableView = false;
50    }
51
52    [Storable]
53    private string[] variableNames;
54    public IEnumerable<string> VariableNames {
55      get { return variableNames; }
56      private set {
57        if (variableNames != value) {
58          variableNames = value.ToArray();
59          OnColumnNamesChanged();
60        }
61      }
62    }
63
64    [Storable]
65    private double[,] data;
66    private double[,] Data {
67      get { return data; }
68      set {
69        if (data != value) {
70          if (value == null) throw new ArgumentNullException();
71          this.data = value;
72          OnReset(EventArgs.Empty);
73        }
74      }
75    }
76
77    // elementwise access
78    public double this[int rowIndex, int columnIndex] {
79      get { return data[rowIndex, columnIndex]; }
80      set {
81        if (!value.Equals(data[rowIndex, columnIndex])) {
82          data[rowIndex, columnIndex] = value;
83          OnDataChanged(new EventArgs<int, int>(rowIndex, columnIndex));
84          OnItemChanged(rowIndex, columnIndex);
85        }
86      }
87    }
88    // access to full columns
89    public double[] this[string variableName] {
90      get { return GetVariableValues(GetVariableIndex(variableName), 0, Rows); }
91    }
92
93    public double[] GetVariableValues(int variableIndex, int start, int end) {
94      if (start < 0 || !(start <= end))
95        throw new ArgumentException("Start must be between 0 and end (" + end + ").");
96      if (end > Rows || end < start)
97        throw new ArgumentException("End must be between start (" + start + ") and dataset rows (" + Rows + ").");
98
99      double[] values = new double[end - start];
100      for (int i = 0; i < end - start; i++)
101        values[i] = data[i + start, variableIndex];
102      return values;
103    }
104
105    public double[] GetVariableValues(string variableName, int start, int end) {
106      return GetVariableValues(GetVariableIndex(variableName), start, end);
107    }
108
109    #region Variable name methods
110    public string GetVariableName(int variableIndex) {
111      return variableNames[variableIndex];
112    }
113
114    public int GetVariableIndex(string variableName) {
115      for (int i = 0; i < variableNames.Length; i++) {
116        if (variableNames[i].Equals(variableName)) return i;
117      }
118      throw new ArgumentException("The variable name " + variableName + " was not found.");
119    }
120
121    public void SetVariableName(int variableIndex, string name) {
122      if (name == null) throw new ArgumentNullException("Cannot set variable name to null for variable at index " + variableIndex + " variableIndex");
123      if (variableNames.Contains(name)) throw new ArgumentException("The data set already contains a variable with name " + name + ".");
124      if (variableIndex < 0 || variableIndex >= variableNames.Length) throw new ArgumentException(" Cannot set name of not existent variable at index " + variableIndex + ".");
125      variableNames[variableIndex] = name;
126      OnColumnNamesChanged();
127    }
128
129    #endregion
130
131    #region variable statistics
132    public double GetMean(string variableName) {
133      return GetMean(GetVariableIndex(variableName));
134    }
135
136    public double GetMean(string variableName, int start, int end) {
137      return GetMean(GetVariableIndex(variableName), start, end);
138    }
139
140    public double GetMean(int variableIndex) {
141      return GetMean(variableIndex, 0, Rows);
142    }
143
144    public double GetMean(int variableIndex, int start, int end) {
145      return GetVariableValues(variableIndex, start, end).Average();
146    }
147
148    public double GetRange(string variableName) {
149      return GetRange(GetVariableIndex(variableName));
150    }
151
152    public double GetRange(int variableIndex) {
153      return GetRange(variableIndex, 0, Rows);
154    }
155
156    public double GetRange(string variableName, int start, int end) {
157      return GetRange(GetVariableIndex(variableName), start, end);
158    }
159
160    public double GetRange(int variableIndex, int start, int end) {
161      var values = GetVariableValues(variableIndex, start, end);
162      return values.Max() - values.Min();
163    }
164
165    public double GetMax(string variableName) {
166      return GetMax(GetVariableIndex(variableName));
167    }
168
169    public double GetMax(int variableIndex) {
170      return GetMax(variableIndex, 0, Rows);
171    }
172
173    public double GetMax(string variableName, int start, int end) {
174      return GetMax(GetVariableIndex(variableName), start, end);
175    }
176
177    public double GetMax(int variableIndex, int start, int end) {
178      return GetVariableValues(variableIndex, start, end).Max();
179    }
180
181    public double GetMin(string variableName) {
182      return GetMin(GetVariableIndex(variableName));
183    }
184
185    public double GetMin(int variableIndex) {
186      return GetMin(variableIndex, 0, Rows);
187    }
188
189    public double GetMin(string variableName, int start, int end) {
190      return GetMin(GetVariableIndex(variableName), start, end);
191    }
192
193    public double GetMin(int variableIndex, int start, int end) {
194      return GetVariableValues(variableIndex, start, end).Min();
195    }
196
197    public int GetMissingValues(string variableName) {
198      return GetMissingValues(GetVariableIndex(variableName));
199    }
200    public int GetMissingValues(int variableIndex) {
201      return GetMissingValues(variableIndex, 0, Rows);
202    }
203
204    public int GetMissingValues(string variableName, int start, int end) {
205      return GetMissingValues(GetVariableIndex(variableName), start, end);
206    }
207
208    public int GetMissingValues(int variableIndex, int start, int end) {
209      return GetVariableValues(variableIndex, start, end).Count(x => double.IsNaN(x));
210    }
211
212    #endregion
213
214    public override IDeepCloneable Clone(Cloner cloner) {
215      Dataset clone = (Dataset)base.Clone(cloner);
216      clone.data = (double[,])data.Clone();
217      clone.variableNames = (string[])variableNames.Clone();
218      return clone;
219    }
220
221    #region events
222    public event EventHandler<EventArgs<int, int>> DataChanged;
223    private void OnDataChanged(EventArgs<int, int> e) {
224      var listeners = DataChanged;
225      if (listeners != null) listeners(this, e);
226    }
227    public event EventHandler Reset;
228    private void OnReset(EventArgs e) {
229      var listeners = Reset;
230      if (listeners != null) listeners(this, e);
231    }
232    #endregion
233
234    #region IStringConvertibleMatrix Members
235
236    public int Rows {
237      get {
238        return data.GetLength(0);
239      }
240      set {
241        if (value == 0) throw new ArgumentException("Number of rows must be at least one (for variable names)");
242        if (value != Rows) {
243          var newValues = new double[value, Columns];
244          for (int row = 0; row < Math.Min(Rows, value); row++) {
245            for (int column = 0; column < Columns; column++) {
246              newValues[row, column] = data[row, column];
247            }
248          }
249          Data = newValues;
250        }
251      }
252    }
253
254    public int Columns {
255      get {
256        return data.GetLength(1);
257      }
258      set {
259        if (value != Columns) {
260          var newValues = new double[Rows, value];
261          var newVariableNames = new string[value];
262          for (int row = 0; row < Rows; row++) {
263            for (int column = 0; column < Math.Min(value, Columns); column++) {
264              newValues[row, column] = data[row, column];
265            }
266          }
267          string formatString = new StringBuilder().Append('0', (int)Math.Log10(value) + 1).ToString(); // >= 100 variables => ###
268          for (int column = 0; column < value; column++) {
269            if (column < Columns)
270              newVariableNames[column] = variableNames[column];
271            else
272              newVariableNames[column] = "Var" + column.ToString(formatString);
273          }
274          VariableNames = newVariableNames;
275          Data = newValues;
276        }
277      }
278    }
279
280    [Storable]
281    private bool sortableView;
282    public bool SortableView {
283      get { return sortableView; }
284      set {
285        if (value != sortableView) {
286          sortableView = value;
287          OnSortableViewChanged();
288        }
289      }
290    }
291
292    public bool ReadOnly {
293      get { return false; }
294    }
295
296    IEnumerable<string> IStringConvertibleMatrix.ColumnNames {
297      get { return this.VariableNames; }
298      set {
299        int i = 0;
300        foreach (string variableName in value) {
301          SetVariableName(i, variableName);
302          i++;
303        }
304        OnColumnNamesChanged();
305      }
306    }
307
308    IEnumerable<string> IStringConvertibleMatrix.RowNames {
309      get { return new List<string>(); }
310      set { throw new NotImplementedException(); }
311    }
312
313    public bool Validate(string value, out string errorMessage) {
314      double val;
315      bool valid = double.TryParse(value, out val);
316      errorMessage = string.Empty;
317      if (!valid) {
318        StringBuilder sb = new StringBuilder();
319        sb.Append("Invalid Value (Valid Value Format: \"");
320        sb.Append(FormatPatterns.GetDoubleFormatPattern());
321        sb.Append("\")");
322        errorMessage = sb.ToString();
323      }
324      return valid;
325    }
326
327    public string GetValue(int rowIndex, int columnIndex) {
328      return data[rowIndex, columnIndex].ToString();
329    }
330
331    public bool SetValue(string value, int rowIndex, int columnIndex) {
332      double v;
333      if (double.TryParse(value, out v)) {
334        data[rowIndex, columnIndex] = v;
335        OnDataChanged(new EventArgs<int, int>(rowIndex, columnIndex));
336        OnItemChanged(rowIndex, columnIndex);
337        return true;
338      } else return false;
339    }
340
341    public event EventHandler ColumnNamesChanged;
342    private void OnColumnNamesChanged() {
343      EventHandler handler = ColumnNamesChanged;
344      if (handler != null)
345        handler(this, EventArgs.Empty);
346    }
347    public event EventHandler RowNamesChanged;
348    private void OnRowNamesChanged() {
349      EventHandler handler = RowNamesChanged;
350      if (handler != null)
351        handler(this, EventArgs.Empty);
352    }
353    public event EventHandler SortableViewChanged;
354    private void OnSortableViewChanged() {
355      EventHandler handler = SortableViewChanged;
356      if (handler != null)
357        handler(this, EventArgs.Empty);
358    }
359    public event EventHandler<EventArgs<int, int>> ItemChanged;
360    private void OnItemChanged(int rowIndex, int columnIndex) {
361      if (ItemChanged != null)
362        ItemChanged(this, new EventArgs<int, int>(rowIndex, columnIndex));
363      OnToStringChanged();
364    }
365    #endregion
366  }
367}
Note: See TracBrowser for help on using the repository browser.