Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Problems.DataAnalysis/3.3/Dataset.cs @ 3294

Last change on this file since 3294 was 3294, checked in by gkronber, 14 years ago

Added first version of architecture altering operators for ADFs. #290 (Implement ADFs)

File size: 10.8 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Xml;
25using System.Globalization;
26using System.Text;
27using System.Linq;
28using HeuristicLab.Core;
29using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
30using HeuristicLab.Data;
31using HeuristicLab.Common;
32
33namespace HeuristicLab.Problems.DataAnalysis {
34  [Item("Dataset", "Represents a dataset containing data that should be analyzed.")]
35  [StorableClass]
36  public sealed class Dataset : NamedItem, IStringConvertibleMatrix {
37    public Dataset()
38      : this(new string[] { "x" }, new double[,] { { 0.0 } }) {
39    }
40
41    public Dataset(IEnumerable<string> variableNames, double[,] data)
42      : base() {
43      Name = "-";
44      if (variableNames.Count() != data.GetLength(1)) {
45        throw new ArgumentException("Number of variable names doesn't match the number of columns of data");
46      }
47      Data = new DoubleMatrix(data);
48      this.variableNames = new StringArray(variableNames.ToArray());
49    }
50
51    [Storable]
52    private StringArray variableNames;
53    public IEnumerable<string> VariableNames {
54      get { return variableNames; }
55    }
56
57    [Storable]
58    private DoubleMatrix data;
59    private DoubleMatrix Data {
60      get { return data; }
61      set {
62        if (data != value) {
63          if (value == null) throw new ArgumentNullException();
64          if (data != null) DeregisterDataEvents();
65          this.data = value;
66          RegisterDataEvents();
67          OnReset(EventArgs.Empty);
68        }
69      }
70    }
71
72    private void RegisterDataEvents() {
73      data.Reset += new EventHandler(data_Reset);
74      data.ItemChanged += new EventHandler<EventArgs<int, int>>(data_ItemChanged);
75    }
76
77    private void DeregisterDataEvents() {
78      data.Reset -= new EventHandler(data_Reset);
79      data.ItemChanged -= new EventHandler<EventArgs<int, int>>(data_ItemChanged);
80    }
81    // elementwise access
82    public double this[int rowIndex, int columnIndex] {
83      get { return data[rowIndex, columnIndex]; }
84      set {
85        if (!value.Equals(data[rowIndex, columnIndex])) {
86          data[rowIndex, columnIndex] = value;
87          OnDataChanged(new EventArgs<int, int>(rowIndex, columnIndex));
88        }
89      }
90    }
91    // access to full columns
92    public double[] this[string variableName] {
93      get { return GetVariableValues(GetVariableIndex(variableName), 0, data.Rows); }
94    }
95
96    public double[] GetVariableValues(int variableIndex, int start, int end) {
97      if (start < 0 || !(start <= end))
98        throw new ArgumentException("Start must be between 0 and end (" + end + ").");
99      if (end > data.Rows || end < start)
100        throw new ArgumentException("End must be between start (" + start + ") and dataset rows (" + data.Rows + ").");
101
102      double[] values = new double[end - start];
103      for (int i = 0; i < end - start; i++)
104        values[i] = data[i + start, variableIndex];
105      return values;
106    }
107
108    public double[] GetVariableValues(string variableName, int start, int end) {
109      return GetVariableValues(GetVariableIndex(variableName), start, end);
110    }
111
112    #region Variable name methods
113    public string GetVariableName(int variableIndex) {
114      return variableNames[variableIndex];
115    }
116
117    public int GetVariableIndex(string variableName) {
118      for (int i = 0; i < variableNames.Length; i++) {
119        if (variableNames[i].Equals(variableName)) return i;
120      }
121      throw new ArgumentException("The variable name " + variableName + " was not found.");
122    }
123
124    public void SetVariableName(int variableIndex, string name) {
125      if (variableNames.Contains(name)) throw new ArgumentException("The data set already contains a variable with name " + name + ".");
126      variableNames[variableIndex] = name;
127    }
128
129    #endregion
130
131    #region variable statistics
132    public double GetMean(string variableName) {
133      return GetMean(GetVariableIndex(variableName));
134    }
135
136    public double GetMean(string variableName, int start, int end) {
137      return GetMean(GetVariableIndex(variableName), start, end);
138    }
139
140    public double GetMean(int variableIndex) {
141      return GetMean(variableIndex, 0, data.Rows);
142    }
143
144    public double GetMean(int variableIndex, int start, int end) {
145      return GetVariableValues(variableIndex, start, end).Average();
146    }
147
148    public double GetRange(string variableName) {
149      return GetRange(GetVariableIndex(variableName));
150    }
151
152    public double GetRange(int variableIndex) {
153      return GetRange(variableIndex, 0, data.Rows);
154    }
155
156    public double GetRange(string variableName, int start, int end) {
157      return GetRange(GetVariableIndex(variableName), start, end);
158    }
159
160    public double GetRange(int variableIndex, int start, int end) {
161      var values = GetVariableValues(variableIndex, start, end);
162      return values.Max() - values.Min();
163    }
164
165    public double GetMax(string variableName) {
166      return GetMax(GetVariableIndex(variableName));
167    }
168
169    public double GetMax(int variableIndex) {
170      return GetMax(variableIndex, 0, data.Rows);
171    }
172
173    public double GetMax(string variableName, int start, int end) {
174      return GetMax(GetVariableIndex(variableName), start, end);
175    }
176
177    public double GetMax(int variableIndex, int start, int end) {
178      return GetVariableValues(variableIndex, start, end).Max();
179    }
180
181    public double GetMin(string variableName) {
182      return GetMin(GetVariableIndex(variableName));
183    }
184
185    public double GetMin(int variableIndex) {
186      return GetMin(variableIndex, 0, data.Rows);
187    }
188
189    public double GetMin(string variableName, int start, int end) {
190      return GetMin(GetVariableIndex(variableName), start, end);
191    }
192
193    public double GetMin(int variableIndex, int start, int end) {
194      return GetVariableValues(variableIndex, start, end).Min();
195    }
196
197    public int GetMissingValues(string variableName) {
198      return GetMissingValues(GetVariableIndex(variableName));
199    }
200    public int GetMissingValues(int variableIndex) {
201      return GetMissingValues(variableIndex, 0, data.Rows);
202    }
203
204    public int GetMissingValues(string variableName, int start, int end) {
205      return GetMissingValues(GetVariableIndex(variableName), start, end);
206    }
207
208    public int GetMissingValues(int variableIndex, int start, int end) {
209      return GetVariableValues(variableIndex, start, end).Count(x => double.IsNaN(x));
210    }
211
212    #endregion
213
214    public override IDeepCloneable Clone(Cloner cloner) {
215      Dataset clone = (Dataset)base.Clone(cloner);
216      clone.data = (DoubleMatrix)data.Clone(cloner);
217      clone.variableNames = (StringArray)variableNames.Clone(cloner);
218      return clone;
219    }
220
221    #region events
222    public event EventHandler<EventArgs<int, int>> DataChanged;
223    private void OnDataChanged(EventArgs<int, int> e) {
224      var listeners = DataChanged;
225      if (listeners != null) listeners(this, e);
226    }
227    public event EventHandler Reset;
228    private void OnReset(EventArgs e) {
229      var listeners = Reset;
230      if (listeners != null) listeners(this, e);
231    }
232
233    private void data_ItemChanged(object sender, EventArgs<int, int> e) {
234      OnDataChanged(e);
235    }
236
237    private void data_Reset(object sender, EventArgs e) {
238      OnReset(e);
239    }
240    #endregion
241
242    #region IStringConvertibleMatrix Members
243
244    public int Rows {
245      get {
246        return data.Rows + 1;
247      }
248      set {
249        if (value == 0) throw new ArgumentException("Number of rows must be at least one (for variable names)");
250        if (value - 1 != data.Rows) {
251          var newValues = new double[value - 1, data.Columns];
252          for (int row = 0; row < Math.Min(data.Rows, value - 1); row++) {
253            for (int column = 0; column < data.Columns; column++) {
254              newValues[row, column] = data[row, column];
255            }
256          }
257          Data = new DoubleMatrix(newValues);
258        }
259      }
260    }
261
262    public int Columns {
263      get {
264        return data.Columns;
265      }
266      set {
267        if (value != data.Columns) {
268          var newValues = new double[data.Rows, value];
269          var newVariableNames = new string[value];
270          for (int row = 0; row < data.Rows; row++) {
271            for (int column = 0; column < Math.Min(value, data.Columns); column++) {
272              newValues[row, column] = data[row, column];
273            }
274          }
275          string formatString = new StringBuilder().Append('0', (int)Math.Log10(value) + 1).ToString(); // >= 100 variables => ###
276          for (int column = 0; column < value; column++) {
277            if (column < data.Columns)
278              newVariableNames[column] = variableNames[column];
279            else
280              newVariableNames[column] = "Var" + column.ToString(formatString);
281          }
282          variableNames = new StringArray(newVariableNames);
283          Data = new DoubleMatrix(newValues);
284        }
285      }
286    }
287
288    public bool Validate(string value, out string errorMessage) {
289      errorMessage = string.Empty;
290      return true;
291    }
292
293    public string GetValue(int rowIndex, int columnIndex) {
294      if (rowIndex == 0) {
295        // return variable name
296        return variableNames[columnIndex];
297      } else {
298        return data[rowIndex - 1, columnIndex].ToString();
299      }
300    }
301
302    public bool SetValue(string value, int rowIndex, int columnIndex) {
303      if (rowIndex == 0) {
304        // check if the variable name is already used
305        if (variableNames.Contains(value)) {
306          return false;
307        } else {
308          variableNames[columnIndex] = value;
309          return true;
310        }
311      } else {
312        double v;
313        if (double.TryParse(value, out v)) {
314          data[rowIndex - 1, columnIndex] = v;
315          return true;
316        } else return false;
317      }
318    }
319
320    public event EventHandler<EventArgs<int, int>> ItemChanged;
321
322    #endregion
323  }
324}
Note: See TracBrowser for help on using the repository browser.