Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Problems.DataAnalysis/3.3/Dataset.cs @ 3430

Last change on this file since 3430 was 3430, checked in by swagner, 14 years ago

Added ReadOnly property to all items of HeuristicLab.Data (#969)

File size: 12.2 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Xml;
25using System.Globalization;
26using System.Text;
27using System.Linq;
28using HeuristicLab.Common;
29using HeuristicLab.Core;
30using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
31using HeuristicLab.Data;
32
33namespace HeuristicLab.Problems.DataAnalysis {
34  [Item("Dataset", "Represents a dataset containing data that should be analyzed.")]
35  [StorableClass]
36  public sealed class Dataset : NamedItem, IStringConvertibleMatrix {
37    public Dataset()
38      : this(new string[0], new double[,] { { } }) {
39    }
40
41    public Dataset(IEnumerable<string> variableNames, double[,] data)
42      : base() {
43      Name = "-";
44      if (variableNames.Count() != data.GetLength(1)) {
45        throw new ArgumentException("Number of variable names doesn't match the number of columns of data");
46      }
47      Data = data;
48      this.VariableNames = variableNames;
49      this.SortableView = false;
50      this.ReadOnlyView = true;
51    }
52
53    [Storable]
54    private string[] variableNames;
55    public IEnumerable<string> VariableNames {
56      get { return variableNames; }
57      private set {
58        if (variableNames != value) {
59          variableNames = value.ToArray();
60          OnColumnNamesChanged();
61        }
62      }
63    }
64
65    [Storable]
66    private double[,] data;
67    private double[,] Data {
68      get { return data; }
69      set {
70        if (data != value) {
71          if (value == null) throw new ArgumentNullException();
72          this.data = value;
73          OnReset(EventArgs.Empty);
74        }
75      }
76    }
77
78    // elementwise access
79    public double this[int rowIndex, int columnIndex] {
80      get { return data[rowIndex, columnIndex]; }
81      set {
82        if (!value.Equals(data[rowIndex, columnIndex])) {
83          data[rowIndex, columnIndex] = value;
84          OnDataChanged(new EventArgs<int, int>(rowIndex, columnIndex));
85          OnItemChanged(rowIndex, columnIndex);
86        }
87      }
88    }
89    // access to full columns
90    public double[] this[string variableName] {
91      get { return GetVariableValues(GetVariableIndex(variableName), 0, Rows); }
92    }
93
94    public double[] GetVariableValues(int variableIndex, int start, int end) {
95      if (start < 0 || !(start <= end))
96        throw new ArgumentException("Start must be between 0 and end (" + end + ").");
97      if (end > Rows || end < start)
98        throw new ArgumentException("End must be between start (" + start + ") and dataset rows (" + Rows + ").");
99
100      double[] values = new double[end - start];
101      for (int i = 0; i < end - start; i++)
102        values[i] = data[i + start, variableIndex];
103      return values;
104    }
105
106    public double[] GetVariableValues(string variableName, int start, int end) {
107      return GetVariableValues(GetVariableIndex(variableName), start, end);
108    }
109
110    #region Variable name methods
111    public string GetVariableName(int variableIndex) {
112      return variableNames[variableIndex];
113    }
114
115    public int GetVariableIndex(string variableName) {
116      for (int i = 0; i < variableNames.Length; i++) {
117        if (variableNames[i].Equals(variableName)) return i;
118      }
119      throw new ArgumentException("The variable name " + variableName + " was not found.");
120    }
121
122    public void SetVariableName(int variableIndex, string name) {
123      if (name == null) throw new ArgumentNullException("Cannot set variable name to null for variable at index " + variableIndex + " variableIndex");
124      if (variableNames.Contains(name)) throw new ArgumentException("The data set already contains a variable with name " + name + ".");
125      if (variableIndex < 0 || variableIndex >= variableNames.Length) throw new ArgumentException(" Cannot set name of not existent variable at index " + variableIndex + ".");
126      variableNames[variableIndex] = name;
127      OnColumnNamesChanged();
128    }
129
130    #endregion
131
132    #region variable statistics
133    public double GetMean(string variableName) {
134      return GetMean(GetVariableIndex(variableName));
135    }
136
137    public double GetMean(string variableName, int start, int end) {
138      return GetMean(GetVariableIndex(variableName), start, end);
139    }
140
141    public double GetMean(int variableIndex) {
142      return GetMean(variableIndex, 0, Rows);
143    }
144
145    public double GetMean(int variableIndex, int start, int end) {
146      return GetVariableValues(variableIndex, start, end).Average();
147    }
148
149    public double GetRange(string variableName) {
150      return GetRange(GetVariableIndex(variableName));
151    }
152
153    public double GetRange(int variableIndex) {
154      return GetRange(variableIndex, 0, Rows);
155    }
156
157    public double GetRange(string variableName, int start, int end) {
158      return GetRange(GetVariableIndex(variableName), start, end);
159    }
160
161    public double GetRange(int variableIndex, int start, int end) {
162      var values = GetVariableValues(variableIndex, start, end);
163      return values.Max() - values.Min();
164    }
165
166    public double GetMax(string variableName) {
167      return GetMax(GetVariableIndex(variableName));
168    }
169
170    public double GetMax(int variableIndex) {
171      return GetMax(variableIndex, 0, Rows);
172    }
173
174    public double GetMax(string variableName, int start, int end) {
175      return GetMax(GetVariableIndex(variableName), start, end);
176    }
177
178    public double GetMax(int variableIndex, int start, int end) {
179      return GetVariableValues(variableIndex, start, end).Max();
180    }
181
182    public double GetMin(string variableName) {
183      return GetMin(GetVariableIndex(variableName));
184    }
185
186    public double GetMin(int variableIndex) {
187      return GetMin(variableIndex, 0, Rows);
188    }
189
190    public double GetMin(string variableName, int start, int end) {
191      return GetMin(GetVariableIndex(variableName), start, end);
192    }
193
194    public double GetMin(int variableIndex, int start, int end) {
195      return GetVariableValues(variableIndex, start, end).Min();
196    }
197
198    public int GetMissingValues(string variableName) {
199      return GetMissingValues(GetVariableIndex(variableName));
200    }
201    public int GetMissingValues(int variableIndex) {
202      return GetMissingValues(variableIndex, 0, Rows);
203    }
204
205    public int GetMissingValues(string variableName, int start, int end) {
206      return GetMissingValues(GetVariableIndex(variableName), start, end);
207    }
208
209    public int GetMissingValues(int variableIndex, int start, int end) {
210      return GetVariableValues(variableIndex, start, end).Count(x => double.IsNaN(x));
211    }
212
213    #endregion
214
215    public override IDeepCloneable Clone(Cloner cloner) {
216      Dataset clone = (Dataset)base.Clone(cloner);
217      clone.data = (double[,])data.Clone();
218      clone.variableNames = (string[])variableNames.Clone();
219      return clone;
220    }
221
222    #region events
223    public event EventHandler<EventArgs<int, int>> DataChanged;
224    private void OnDataChanged(EventArgs<int, int> e) {
225      var listeners = DataChanged;
226      if (listeners != null) listeners(this, e);
227    }
228    public event EventHandler Reset;
229    private void OnReset(EventArgs e) {
230      var listeners = Reset;
231      if (listeners != null) listeners(this, e);
232    }
233    #endregion
234
235    #region IStringConvertibleMatrix Members
236
237    public int Rows {
238      get {
239        return data.GetLength(0);
240      }
241      set {
242        if (value == 0) throw new ArgumentException("Number of rows must be at least one (for variable names)");
243        if (value != Rows) {
244          var newValues = new double[value, Columns];
245          for (int row = 0; row < Math.Min(Rows, value); row++) {
246            for (int column = 0; column < Columns; column++) {
247              newValues[row, column] = data[row, column];
248            }
249          }
250          Data = newValues;
251        }
252      }
253    }
254
255    public int Columns {
256      get {
257        return data.GetLength(1);
258      }
259      set {
260        if (value != Columns) {
261          var newValues = new double[Rows, value];
262          var newVariableNames = new string[value];
263          for (int row = 0; row < Rows; row++) {
264            for (int column = 0; column < Math.Min(value, Columns); column++) {
265              newValues[row, column] = data[row, column];
266            }
267          }
268          string formatString = new StringBuilder().Append('0', (int)Math.Log10(value) + 1).ToString(); // >= 100 variables => ###
269          for (int column = 0; column < value; column++) {
270            if (column < Columns)
271              newVariableNames[column] = variableNames[column];
272            else
273              newVariableNames[column] = "Var" + column.ToString(formatString);
274          }
275          VariableNames = newVariableNames;
276          Data = newValues;
277        }
278      }
279    }
280
281    [Storable]
282    private bool sortableView;
283    public bool SortableView {
284      get { return sortableView; }
285      set {
286        if (value != sortableView) {
287          sortableView = value;
288          OnSortableViewChanged();
289        }
290      }
291    }
292
293    public bool ReadOnly {
294      get { return false; }
295    }
296
297    IEnumerable<string> IStringConvertibleMatrix.ColumnNames {
298      get { return this.VariableNames; }
299      set {
300        int i = 0;
301        foreach (string variableName in value) {
302          SetVariableName(i, variableName);
303          i++;
304        }
305        OnColumnNamesChanged();
306      }
307    }
308
309    IEnumerable<string> IStringConvertibleMatrix.RowNames {
310      get { return new List<string>(); }
311      set { throw new NotImplementedException(); }
312    }
313
314    public bool Validate(string value, out string errorMessage) {
315      double val;
316      bool valid = double.TryParse(value, out val);
317      errorMessage = string.Empty;
318      if (!valid) {
319        StringBuilder sb = new StringBuilder();
320        sb.Append("Invalid Value (Valid Value Format: \"");
321        sb.Append(FormatPatterns.GetDoubleFormatPattern());
322        sb.Append("\")");
323        errorMessage = sb.ToString();
324      }
325      return valid;
326    }
327
328    public string GetValue(int rowIndex, int columnIndex) {
329      return data[rowIndex, columnIndex].ToString();
330    }
331
332    public bool SetValue(string value, int rowIndex, int columnIndex) {
333      double v;
334      if (double.TryParse(value, out v)) {
335        data[rowIndex, columnIndex] = v;
336        OnDataChanged(new EventArgs<int, int>(rowIndex, columnIndex));
337        OnItemChanged(rowIndex, columnIndex);
338        return true;
339      } else return false;
340    }
341
342    public event EventHandler ColumnNamesChanged;
343    private void OnColumnNamesChanged() {
344      EventHandler handler = ColumnNamesChanged;
345      if (handler != null)
346        handler(this, EventArgs.Empty);
347    }
348    public event EventHandler RowNamesChanged;
349    private void OnRowNamesChanged() {
350      EventHandler handler = RowNamesChanged;
351      if (handler != null)
352        handler(this, EventArgs.Empty);
353    }
354    public event EventHandler SortableViewChanged;
355    private void OnSortableViewChanged() {
356      EventHandler handler = SortableViewChanged;
357      if (handler != null)
358        handler(this, EventArgs.Empty);
359    }
360    public event EventHandler<EventArgs<int, int>> ItemChanged;
361    private void OnItemChanged(int rowIndex, int columnIndex) {
362      if (ItemChanged != null)
363        ItemChanged(this, new EventArgs<int, int>(rowIndex, columnIndex));
364      OnToStringChanged();
365    }
366    #endregion
367  }
368}
Note: See TracBrowser for help on using the repository browser.