source: trunk/sources/HeuristicLab.Problems.DataAnalysis/3.3/Dataset.cs @ 3311

Last change on this file since 3311 was 3311, checked in by mkommend, 12 years ago

implemented RowNames property in Dataset (ticket #938)

File size: 10.5 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Xml;
25using System.Globalization;
26using System.Text;
27using System.Linq;
28using HeuristicLab.Core;
29using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
30using HeuristicLab.Data;
31using HeuristicLab.Common;
32
33namespace HeuristicLab.Problems.DataAnalysis {
34  [Item("Dataset", "Represents a dataset containing data that should be analyzed.")]
35  [StorableClass]
36  public sealed class Dataset : NamedItem, IStringConvertibleMatrix {
37    public Dataset()
38      : this(new string[] { "x" }, new double[,] { { 0.0 } }) {
39    }
40
41    public Dataset(IEnumerable<string> variableNames, double[,] data)
42      : base() {
43      Name = "-";
44      if (variableNames.Count() != data.GetLength(1)) {
45        throw new ArgumentException("Number of variable names doesn't match the number of columns of data");
46      }
47      Data = data;
48      this.variableNames = variableNames.ToArray();
49    }
50
51    [Storable]
52    private string[] variableNames;
53    public IEnumerable<string> VariableNames {
54      get { return variableNames; }
55    }
56
57    [Storable]
58    private double[,] data;
59    private double[,] Data {
60      get { return data; }
61      set {
62        if (data != value) {
63          if (value == null) throw new ArgumentNullException();
64          this.data = value;
65          OnReset(EventArgs.Empty);
66        }
67      }
68    }
69
70    // elementwise access
71    public double this[int rowIndex, int columnIndex] {
72      get { return data[rowIndex, columnIndex]; }
73      set {
74        if (!value.Equals(data[rowIndex, columnIndex])) {
75          data[rowIndex, columnIndex] = value;
76          OnDataChanged(new EventArgs<int, int>(rowIndex, columnIndex));
77        }
78      }
79    }
80    // access to full columns
81    public double[] this[string variableName] {
82      get { return GetVariableValues(GetVariableIndex(variableName), 0, Rows); }
83    }
84
85    public double[] GetVariableValues(int variableIndex, int start, int end) {
86      if (start < 0 || !(start <= end))
87        throw new ArgumentException("Start must be between 0 and end (" + end + ").");
88      if (end > Rows || end < start)
89        throw new ArgumentException("End must be between start (" + start + ") and dataset rows (" + Rows + ").");
90
91      double[] values = new double[end - start];
92      for (int i = 0; i < end - start; i++)
93        values[i] = data[i + start, variableIndex];
94      return values;
95    }
96
97    public double[] GetVariableValues(string variableName, int start, int end) {
98      return GetVariableValues(GetVariableIndex(variableName), start, end);
99    }
100
101    #region Variable name methods
102    public string GetVariableName(int variableIndex) {
103      return variableNames[variableIndex];
104    }
105
106    public int GetVariableIndex(string variableName) {
107      for (int i = 0; i < variableNames.Length; i++) {
108        if (variableNames[i].Equals(variableName)) return i;
109      }
110      throw new ArgumentException("The variable name " + variableName + " was not found.");
111    }
112
113    public void SetVariableName(int variableIndex, string name) {
114      if (name == null) throw new ArgumentNullException("Cannot set variable name to null for variable at index " + variableIndex + " variableIndex");
115      if (variableNames.Contains(name)) throw new ArgumentException("The data set already contains a variable with name " + name + ".");
116      if (variableIndex < 0 || variableIndex >= variableNames.Length) throw new ArgumentException(" Cannot set name of not existent variable at index " + variableIndex + ".");
117      variableNames[variableIndex] = name;
118    }
119
120    #endregion
121
122    #region variable statistics
123    public double GetMean(string variableName) {
124      return GetMean(GetVariableIndex(variableName));
125    }
126
127    public double GetMean(string variableName, int start, int end) {
128      return GetMean(GetVariableIndex(variableName), start, end);
129    }
130
131    public double GetMean(int variableIndex) {
132      return GetMean(variableIndex, 0, Rows);
133    }
134
135    public double GetMean(int variableIndex, int start, int end) {
136      return GetVariableValues(variableIndex, start, end).Average();
137    }
138
139    public double GetRange(string variableName) {
140      return GetRange(GetVariableIndex(variableName));
141    }
142
143    public double GetRange(int variableIndex) {
144      return GetRange(variableIndex, 0, Rows);
145    }
146
147    public double GetRange(string variableName, int start, int end) {
148      return GetRange(GetVariableIndex(variableName), start, end);
149    }
150
151    public double GetRange(int variableIndex, int start, int end) {
152      var values = GetVariableValues(variableIndex, start, end);
153      return values.Max() - values.Min();
154    }
155
156    public double GetMax(string variableName) {
157      return GetMax(GetVariableIndex(variableName));
158    }
159
160    public double GetMax(int variableIndex) {
161      return GetMax(variableIndex, 0, Rows);
162    }
163
164    public double GetMax(string variableName, int start, int end) {
165      return GetMax(GetVariableIndex(variableName), start, end);
166    }
167
168    public double GetMax(int variableIndex, int start, int end) {
169      return GetVariableValues(variableIndex, start, end).Max();
170    }
171
172    public double GetMin(string variableName) {
173      return GetMin(GetVariableIndex(variableName));
174    }
175
176    public double GetMin(int variableIndex) {
177      return GetMin(variableIndex, 0, Rows);
178    }
179
180    public double GetMin(string variableName, int start, int end) {
181      return GetMin(GetVariableIndex(variableName), start, end);
182    }
183
184    public double GetMin(int variableIndex, int start, int end) {
185      return GetVariableValues(variableIndex, start, end).Min();
186    }
187
188    public int GetMissingValues(string variableName) {
189      return GetMissingValues(GetVariableIndex(variableName));
190    }
191    public int GetMissingValues(int variableIndex) {
192      return GetMissingValues(variableIndex, 0, Rows);
193    }
194
195    public int GetMissingValues(string variableName, int start, int end) {
196      return GetMissingValues(GetVariableIndex(variableName), start, end);
197    }
198
199    public int GetMissingValues(int variableIndex, int start, int end) {
200      return GetVariableValues(variableIndex, start, end).Count(x => double.IsNaN(x));
201    }
202
203    #endregion
204
205    public override IDeepCloneable Clone(Cloner cloner) {
206      Dataset clone = (Dataset)base.Clone(cloner);
207      clone.data = (double[,])data.Clone();
208      clone.variableNames = (string[])variableNames.Clone();
209      return clone;
210    }
211
212    #region events
213    public event EventHandler<EventArgs<int, int>> DataChanged;
214    private void OnDataChanged(EventArgs<int, int> e) {
215      var listeners = DataChanged;
216      if (listeners != null) listeners(this, e);
217    }
218    public event EventHandler Reset;
219    private void OnReset(EventArgs e) {
220      var listeners = Reset;
221      if (listeners != null) listeners(this, e);
222    }
223
224    private void data_ItemChanged(object sender, EventArgs<int, int> e) {
225      OnDataChanged(e);
226    }
227
228    private void data_Reset(object sender, EventArgs e) {
229      OnReset(e);
230    }
231    #endregion
232
233    #region IStringConvertibleMatrix Members
234
235    public int Rows {
236      get {
237        return data.GetLength(0);
238      }
239      set {
240        if (value == 0) throw new ArgumentException("Number of rows must be at least one (for variable names)");
241        if (value != Rows) {
242          var newValues = new double[value, Columns];
243          for (int row = 0; row < Math.Min(Rows, value); row++) {
244            for (int column = 0; column < Columns; column++) {
245              newValues[row, column] = data[row, column];
246            }
247          }
248          Data = newValues;
249        }
250      }
251    }
252
253    public int Columns {
254      get {
255        return data.GetLength(1);
256      }
257      set {
258        if (value != Columns) {
259          var newValues = new double[Rows, value];
260          var newVariableNames = new string[value];
261          for (int row = 0; row < Rows; row++) {
262            for (int column = 0; column < Math.Min(value, Columns); column++) {
263              newValues[row, column] = data[row, column];
264            }
265          }
266          string formatString = new StringBuilder().Append('0', (int)Math.Log10(value) + 1).ToString(); // >= 100 variables => ###
267          for (int column = 0; column < value; column++) {
268            if (column < Columns)
269              newVariableNames[column] = variableNames[column];
270            else
271              newVariableNames[column] = "Var" + column.ToString(formatString);
272          }
273          variableNames = newVariableNames;
274          Data = newValues;
275        }
276      }
277    }
278
279    IEnumerable<string> IStringConvertibleMatrix.ColumnNames {
280      get { return this.VariableNames; }
281      set {
282        int i = 0;
283        foreach (string variableName in value) {
284          SetVariableName(i, variableName);
285          i++;
286        }
287      }
288    }
289
290    IEnumerable<string> IStringConvertibleMatrix.RowNames {
291      get { return new List<string>(); }
292      set { throw new NotImplementedException(); }
293    }
294
295    public bool Validate(string value, out string errorMessage) {
296      errorMessage = string.Empty;
297      return true;
298    }
299
300    public string GetValue(int rowIndex, int columnIndex) {
301      return data[rowIndex, columnIndex].ToString();
302    }
303
304    public bool SetValue(string value, int rowIndex, int columnIndex) {
305      double v;
306      if (double.TryParse(value, out v)) {
307        data[rowIndex, columnIndex] = v;
308        return true;
309      } else return false;
310    }
311
312    public event EventHandler<EventArgs<int, int>> ItemChanged;
313    #endregion
314  }
315}
Note: See TracBrowser for help on using the repository browser.