Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Problems.DataAnalysis/3.3/Dataset.cs @ 3839

Last change on this file since 3839 was 3839, checked in by mkommend, 14 years ago

implemented first version of LR (ticket #1012)

File size: 12.7 KB
RevLine 
[2]1#region License Information
2/* HeuristicLab
[3253]3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
[2]4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Xml;
25using System.Globalization;
26using System.Text;
[2285]27using System.Linq;
[3376]28using HeuristicLab.Common;
[3253]29using HeuristicLab.Core;
30using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
31using HeuristicLab.Data;
[2]32
[3253]33namespace HeuristicLab.Problems.DataAnalysis {
34  [Item("Dataset", "Represents a dataset containing data that should be analyzed.")]
35  [StorableClass]
36  public sealed class Dataset : NamedItem, IStringConvertibleMatrix {
[2319]37    public Dataset()
[3458]38      : base() {
39      this.Name = string.Empty;
40      this.data = new double[0, 0];
41      this.variableNames = new string[0];
42      this.SortableView = false;
[2319]43    }
[2]44
[3264]45    public Dataset(IEnumerable<string> variableNames, double[,] data)
[3458]46      : this() {
[2319]47      Name = "-";
[3264]48      if (variableNames.Count() != data.GetLength(1)) {
49        throw new ArgumentException("Number of variable names doesn't match the number of columns of data");
50      }
[3442]51      this.data = data;
52      this.variableNames = variableNames.ToArray();
[2038]53    }
54
[3294]55    [Storable]
[3308]56    private string[] variableNames;
[2319]57    public IEnumerable<string> VariableNames {
58      get { return variableNames; }
[3330]59      private set {
[3321]60        if (variableNames != value) {
61          variableNames = value.ToArray();
62          OnColumnNamesChanged();
63        }
64      }
[333]65    }
66
[3294]67    [Storable]
[3308]68    private double[,] data;
69    private double[,] Data {
[3253]70      get { return data; }
[237]71      set {
[3253]72        if (data != value) {
73          if (value == null) throw new ArgumentNullException();
74          this.data = value;
75          OnReset(EventArgs.Empty);
[237]76        }
[2]77      }
78    }
79
[3253]80    // elementwise access
81    public double this[int rowIndex, int columnIndex] {
82      get { return data[rowIndex, columnIndex]; }
[2319]83      set {
[3253]84        if (!value.Equals(data[rowIndex, columnIndex])) {
85          data[rowIndex, columnIndex] = value;
86          OnDataChanged(new EventArgs<int, int>(rowIndex, columnIndex));
[3321]87          OnItemChanged(rowIndex, columnIndex);
[3253]88        }
[2285]89      }
[2]90    }
[3839]91    public double this[string variableName, int rowIndex] {
92      get {
93        int columnIndex = GetVariableIndex(variableName);
94        return data[rowIndex, columnIndex];
95      }
96      set {
97        int columnIndex = GetVariableIndex(variableName);
98        if (!value.Equals(data[rowIndex, columnIndex])) {
99          data[rowIndex, columnIndex] = value;
100          OnDataChanged(new EventArgs<int, int>(rowIndex, columnIndex));
101          OnItemChanged(rowIndex, columnIndex);
102        }
103      }
104    }
[3253]105    // access to full columns
106    public double[] this[string variableName] {
[3308]107      get { return GetVariableValues(GetVariableIndex(variableName), 0, Rows); }
[1287]108    }
109
[3294]110    public double[] GetVariableValues(int variableIndex, int start, int end) {
[2311]111      if (start < 0 || !(start <= end))
112        throw new ArgumentException("Start must be between 0 and end (" + end + ").");
[3308]113      if (end > Rows || end < start)
114        throw new ArgumentException("End must be between start (" + start + ") and dataset rows (" + Rows + ").");
[2311]115
116      double[] values = new double[end - start];
117      for (int i = 0; i < end - start; i++)
[3253]118        values[i] = data[i + start, variableIndex];
[2311]119      return values;
120    }
121
[3294]122    public double[] GetVariableValues(string variableName, int start, int end) {
123      return GetVariableValues(GetVariableIndex(variableName), start, end);
[2311]124    }
125
[2319]126    #region Variable name methods
[3294]127    public string GetVariableName(int variableIndex) {
[2319]128      return variableNames[variableIndex];
129    }
130
[3294]131    public int GetVariableIndex(string variableName) {
[2319]132      for (int i = 0; i < variableNames.Length; i++) {
133        if (variableNames[i].Equals(variableName)) return i;
134      }
135      throw new ArgumentException("The variable name " + variableName + " was not found.");
136    }
137
[1287]138    public void SetVariableName(int variableIndex, string name) {
[3308]139      if (name == null) throw new ArgumentNullException("Cannot set variable name to null for variable at index " + variableIndex + " variableIndex");
[3294]140      if (variableNames.Contains(name)) throw new ArgumentException("The data set already contains a variable with name " + name + ".");
[3308]141      if (variableIndex < 0 || variableIndex >= variableNames.Length) throw new ArgumentException(" Cannot set name of not existent variable at index " + variableIndex + ".");
[1287]142      variableNames[variableIndex] = name;
[3321]143      OnColumnNamesChanged();
[1287]144    }
145
[2319]146    #endregion
[2310]147
[3253]148    #region variable statistics
[3294]149    public double GetMean(string variableName) {
150      return GetMean(GetVariableIndex(variableName));
[2]151    }
152
[3294]153    public double GetMean(string variableName, int start, int end) {
154      return GetMean(GetVariableIndex(variableName), start, end);
[2319]155    }
156
[3294]157    public double GetMean(int variableIndex) {
[3308]158      return GetMean(variableIndex, 0, Rows);
[2319]159    }
160
[3294]161    public double GetMean(int variableIndex, int start, int end) {
162      return GetVariableValues(variableIndex, start, end).Average();
[2319]163    }
164
[3294]165    public double GetRange(string variableName) {
166      return GetRange(GetVariableIndex(variableName));
[2319]167    }
168
[3294]169    public double GetRange(int variableIndex) {
[3308]170      return GetRange(variableIndex, 0, Rows);
[2319]171    }
172
[3294]173    public double GetRange(string variableName, int start, int end) {
174      return GetRange(GetVariableIndex(variableName), start, end);
[2319]175    }
176
[3294]177    public double GetRange(int variableIndex, int start, int end) {
178      var values = GetVariableValues(variableIndex, start, end);
179      return values.Max() - values.Min();
[2319]180    }
181
[3294]182    public double GetMax(string variableName) {
183      return GetMax(GetVariableIndex(variableName));
[2319]184    }
185
[3294]186    public double GetMax(int variableIndex) {
[3308]187      return GetMax(variableIndex, 0, Rows);
[2319]188    }
189
[3294]190    public double GetMax(string variableName, int start, int end) {
191      return GetMax(GetVariableIndex(variableName), start, end);
[2319]192    }
193
[3294]194    public double GetMax(int variableIndex, int start, int end) {
195      return GetVariableValues(variableIndex, start, end).Max();
[2319]196    }
197
[3294]198    public double GetMin(string variableName) {
199      return GetMin(GetVariableIndex(variableName));
[2319]200    }
201
[3294]202    public double GetMin(int variableIndex) {
[3308]203      return GetMin(variableIndex, 0, Rows);
[2319]204    }
205
[3294]206    public double GetMin(string variableName, int start, int end) {
207      return GetMin(GetVariableIndex(variableName), start, end);
[2319]208    }
209
[3294]210    public double GetMin(int variableIndex, int start, int end) {
211      return GetVariableValues(variableIndex, start, end).Min();
[2319]212    }
[2368]213
[3294]214    public int GetMissingValues(string variableName) {
215      return GetMissingValues(GetVariableIndex(variableName));
[2368]216    }
[3294]217    public int GetMissingValues(int variableIndex) {
[3308]218      return GetMissingValues(variableIndex, 0, Rows);
[2368]219    }
220
[3294]221    public int GetMissingValues(string variableName, int start, int end) {
222      return GetMissingValues(GetVariableIndex(variableName), start, end);
[2368]223    }
224
[3294]225    public int GetMissingValues(int variableIndex, int start, int end) {
226      return GetVariableValues(variableIndex, start, end).Count(x => double.IsNaN(x));
[2368]227    }
228
[2319]229    #endregion
230
[3253]231    public override IDeepCloneable Clone(Cloner cloner) {
232      Dataset clone = (Dataset)base.Clone(cloner);
[3493]233      clone.data = data;
[3308]234      clone.variableNames = (string[])variableNames.Clone();
[3253]235      return clone;
[2319]236    }
237
[3253]238    #region events
239    public event EventHandler<EventArgs<int, int>> DataChanged;
240    private void OnDataChanged(EventArgs<int, int> e) {
241      var listeners = DataChanged;
242      if (listeners != null) listeners(this, e);
[2319]243    }
[3253]244    public event EventHandler Reset;
245    private void OnReset(EventArgs e) {
246      var listeners = Reset;
247      if (listeners != null) listeners(this, e);
[2319]248    }
[3253]249    #endregion
[2]250
[3253]251    #region IStringConvertibleMatrix Members
[237]252
[3253]253    public int Rows {
254      get {
[3308]255        return data.GetLength(0);
[237]256      }
[3253]257      set {
258        if (value == 0) throw new ArgumentException("Number of rows must be at least one (for variable names)");
[3308]259        if (value != Rows) {
260          var newValues = new double[value, Columns];
261          for (int row = 0; row < Math.Min(Rows, value); row++) {
262            for (int column = 0; column < Columns; column++) {
[3253]263              newValues[row, column] = data[row, column];
264            }
[2]265          }
[3308]266          Data = newValues;
[2]267        }
268      }
269    }
270
[3253]271    public int Columns {
272      get {
[3308]273        return data.GetLength(1);
[3253]274      }
275      set {
[3308]276        if (value != Columns) {
277          var newValues = new double[Rows, value];
[3253]278          var newVariableNames = new string[value];
[3308]279          for (int row = 0; row < Rows; row++) {
280            for (int column = 0; column < Math.Min(value, Columns); column++) {
[3253]281              newValues[row, column] = data[row, column];
282            }
283          }
[3294]284          string formatString = new StringBuilder().Append('0', (int)Math.Log10(value) + 1).ToString(); // >= 100 variables => ###
[3253]285          for (int column = 0; column < value; column++) {
[3308]286            if (column < Columns)
[3253]287              newVariableNames[column] = variableNames[column];
288            else
289              newVariableNames[column] = "Var" + column.ToString(formatString);
290          }
[3321]291          VariableNames = newVariableNames;
[3308]292          Data = newValues;
[2]293        }
294      }
295    }
296
[3321]297    [Storable]
298    private bool sortableView;
299    public bool SortableView {
300      get { return sortableView; }
301      set {
302        if (value != sortableView) {
303          sortableView = value;
304          OnSortableViewChanged();
305        }
306      }
307    }
308
[3430]309    public bool ReadOnly {
310      get { return false; }
311    }
312
[3308]313    IEnumerable<string> IStringConvertibleMatrix.ColumnNames {
314      get { return this.VariableNames; }
315      set {
316        int i = 0;
317        foreach (string variableName in value) {
318          SetVariableName(i, variableName);
319          i++;
320        }
[3321]321        OnColumnNamesChanged();
[3308]322      }
323    }
324
[3311]325    IEnumerable<string> IStringConvertibleMatrix.RowNames {
326      get { return new List<string>(); }
327      set { throw new NotImplementedException(); }
328    }
329
[3253]330    public bool Validate(string value, out string errorMessage) {
[3321]331      double val;
332      bool valid = double.TryParse(value, out val);
[3253]333      errorMessage = string.Empty;
[3321]334      if (!valid) {
335        StringBuilder sb = new StringBuilder();
336        sb.Append("Invalid Value (Valid Value Format: \"");
337        sb.Append(FormatPatterns.GetDoubleFormatPattern());
338        sb.Append("\")");
339        errorMessage = sb.ToString();
340      }
341      return valid;
[3253]342    }
[2]343
[3253]344    public string GetValue(int rowIndex, int columnIndex) {
[3308]345      return data[rowIndex, columnIndex].ToString();
[2]346    }
347
[3253]348    public bool SetValue(string value, int rowIndex, int columnIndex) {
[3308]349      double v;
350      if (double.TryParse(value, out v)) {
351        data[rowIndex, columnIndex] = v;
[3321]352        OnDataChanged(new EventArgs<int, int>(rowIndex, columnIndex));
353        OnItemChanged(rowIndex, columnIndex);
[3308]354        return true;
355      } else return false;
[237]356    }
357
[3321]358    public event EventHandler ColumnNamesChanged;
359    private void OnColumnNamesChanged() {
360      EventHandler handler = ColumnNamesChanged;
361      if (handler != null)
362        handler(this, EventArgs.Empty);
363    }
364    public event EventHandler RowNamesChanged;
365    private void OnRowNamesChanged() {
366      EventHandler handler = RowNamesChanged;
367      if (handler != null)
368        handler(this, EventArgs.Empty);
369    }
370    public event EventHandler SortableViewChanged;
371    private void OnSortableViewChanged() {
372      EventHandler handler = SortableViewChanged;
373      if (handler != null)
374        handler(this, EventArgs.Empty);
375    }
[3253]376    public event EventHandler<EventArgs<int, int>> ItemChanged;
[3321]377    private void OnItemChanged(int rowIndex, int columnIndex) {
378      if (ItemChanged != null)
379        ItemChanged(this, new EventArgs<int, int>(rowIndex, columnIndex));
380      OnToStringChanged();
381    }
[2012]382    #endregion
[3493]383
384
[2]385  }
386}
Note: See TracBrowser for help on using the repository browser.