Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Problems.DataAnalysis/3.3/Dataset.cs @ 3563

Last change on this file since 3563 was 3493, checked in by gkronber, 15 years ago

fixed bugs in cloning. #938 (Data types and operators for regression problems)

File size: 12.2 KB
RevLine 
[2]1#region License Information
2/* HeuristicLab
[3253]3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
[2]4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Xml;
25using System.Globalization;
26using System.Text;
[2285]27using System.Linq;
[3376]28using HeuristicLab.Common;
[3253]29using HeuristicLab.Core;
30using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
31using HeuristicLab.Data;
[2]32
[3253]33namespace HeuristicLab.Problems.DataAnalysis {
34  [Item("Dataset", "Represents a dataset containing data that should be analyzed.")]
35  [StorableClass]
36  public sealed class Dataset : NamedItem, IStringConvertibleMatrix {
[2319]37    public Dataset()
[3458]38      : base() {
39      this.Name = string.Empty;
40      this.data = new double[0, 0];
41      this.variableNames = new string[0];
42      this.SortableView = false;
[2319]43    }
[2]44
[3264]45    public Dataset(IEnumerable<string> variableNames, double[,] data)
[3458]46      : this() {
[2319]47      Name = "-";
[3264]48      if (variableNames.Count() != data.GetLength(1)) {
49        throw new ArgumentException("Number of variable names doesn't match the number of columns of data");
50      }
[3442]51      this.data = data;
52      this.variableNames = variableNames.ToArray();
[2038]53    }
54
[3294]55    [Storable]
[3308]56    private string[] variableNames;
[2319]57    public IEnumerable<string> VariableNames {
58      get { return variableNames; }
[3330]59      private set {
[3321]60        if (variableNames != value) {
61          variableNames = value.ToArray();
62          OnColumnNamesChanged();
63        }
64      }
[333]65    }
66
[3294]67    [Storable]
[3308]68    private double[,] data;
69    private double[,] Data {
[3253]70      get { return data; }
[237]71      set {
[3253]72        if (data != value) {
73          if (value == null) throw new ArgumentNullException();
74          this.data = value;
75          OnReset(EventArgs.Empty);
[237]76        }
[2]77      }
78    }
79
[3253]80    // elementwise access
81    public double this[int rowIndex, int columnIndex] {
82      get { return data[rowIndex, columnIndex]; }
[2319]83      set {
[3253]84        if (!value.Equals(data[rowIndex, columnIndex])) {
85          data[rowIndex, columnIndex] = value;
86          OnDataChanged(new EventArgs<int, int>(rowIndex, columnIndex));
[3321]87          OnItemChanged(rowIndex, columnIndex);
[3253]88        }
[2285]89      }
[2]90    }
[3253]91    // access to full columns
92    public double[] this[string variableName] {
[3308]93      get { return GetVariableValues(GetVariableIndex(variableName), 0, Rows); }
[1287]94    }
95
[3294]96    public double[] GetVariableValues(int variableIndex, int start, int end) {
[2311]97      if (start < 0 || !(start <= end))
98        throw new ArgumentException("Start must be between 0 and end (" + end + ").");
[3308]99      if (end > Rows || end < start)
100        throw new ArgumentException("End must be between start (" + start + ") and dataset rows (" + Rows + ").");
[2311]101
102      double[] values = new double[end - start];
103      for (int i = 0; i < end - start; i++)
[3253]104        values[i] = data[i + start, variableIndex];
[2311]105      return values;
106    }
107
[3294]108    public double[] GetVariableValues(string variableName, int start, int end) {
109      return GetVariableValues(GetVariableIndex(variableName), start, end);
[2311]110    }
111
[2319]112    #region Variable name methods
[3294]113    public string GetVariableName(int variableIndex) {
[2319]114      return variableNames[variableIndex];
115    }
116
[3294]117    public int GetVariableIndex(string variableName) {
[2319]118      for (int i = 0; i < variableNames.Length; i++) {
119        if (variableNames[i].Equals(variableName)) return i;
120      }
121      throw new ArgumentException("The variable name " + variableName + " was not found.");
122    }
123
[1287]124    public void SetVariableName(int variableIndex, string name) {
[3308]125      if (name == null) throw new ArgumentNullException("Cannot set variable name to null for variable at index " + variableIndex + " variableIndex");
[3294]126      if (variableNames.Contains(name)) throw new ArgumentException("The data set already contains a variable with name " + name + ".");
[3308]127      if (variableIndex < 0 || variableIndex >= variableNames.Length) throw new ArgumentException(" Cannot set name of not existent variable at index " + variableIndex + ".");
[1287]128      variableNames[variableIndex] = name;
[3321]129      OnColumnNamesChanged();
[1287]130    }
131
[2319]132    #endregion
[2310]133
[3253]134    #region variable statistics
[3294]135    public double GetMean(string variableName) {
136      return GetMean(GetVariableIndex(variableName));
[2]137    }
138
[3294]139    public double GetMean(string variableName, int start, int end) {
140      return GetMean(GetVariableIndex(variableName), start, end);
[2319]141    }
142
[3294]143    public double GetMean(int variableIndex) {
[3308]144      return GetMean(variableIndex, 0, Rows);
[2319]145    }
146
[3294]147    public double GetMean(int variableIndex, int start, int end) {
148      return GetVariableValues(variableIndex, start, end).Average();
[2319]149    }
150
[3294]151    public double GetRange(string variableName) {
152      return GetRange(GetVariableIndex(variableName));
[2319]153    }
154
[3294]155    public double GetRange(int variableIndex) {
[3308]156      return GetRange(variableIndex, 0, Rows);
[2319]157    }
158
[3294]159    public double GetRange(string variableName, int start, int end) {
160      return GetRange(GetVariableIndex(variableName), start, end);
[2319]161    }
162
[3294]163    public double GetRange(int variableIndex, int start, int end) {
164      var values = GetVariableValues(variableIndex, start, end);
165      return values.Max() - values.Min();
[2319]166    }
167
[3294]168    public double GetMax(string variableName) {
169      return GetMax(GetVariableIndex(variableName));
[2319]170    }
171
[3294]172    public double GetMax(int variableIndex) {
[3308]173      return GetMax(variableIndex, 0, Rows);
[2319]174    }
175
[3294]176    public double GetMax(string variableName, int start, int end) {
177      return GetMax(GetVariableIndex(variableName), start, end);
[2319]178    }
179
[3294]180    public double GetMax(int variableIndex, int start, int end) {
181      return GetVariableValues(variableIndex, start, end).Max();
[2319]182    }
183
[3294]184    public double GetMin(string variableName) {
185      return GetMin(GetVariableIndex(variableName));
[2319]186    }
187
[3294]188    public double GetMin(int variableIndex) {
[3308]189      return GetMin(variableIndex, 0, Rows);
[2319]190    }
191
[3294]192    public double GetMin(string variableName, int start, int end) {
193      return GetMin(GetVariableIndex(variableName), start, end);
[2319]194    }
195
[3294]196    public double GetMin(int variableIndex, int start, int end) {
197      return GetVariableValues(variableIndex, start, end).Min();
[2319]198    }
[2368]199
[3294]200    public int GetMissingValues(string variableName) {
201      return GetMissingValues(GetVariableIndex(variableName));
[2368]202    }
[3294]203    public int GetMissingValues(int variableIndex) {
[3308]204      return GetMissingValues(variableIndex, 0, Rows);
[2368]205    }
206
[3294]207    public int GetMissingValues(string variableName, int start, int end) {
208      return GetMissingValues(GetVariableIndex(variableName), start, end);
[2368]209    }
210
[3294]211    public int GetMissingValues(int variableIndex, int start, int end) {
212      return GetVariableValues(variableIndex, start, end).Count(x => double.IsNaN(x));
[2368]213    }
214
[2319]215    #endregion
216
[3253]217    public override IDeepCloneable Clone(Cloner cloner) {
218      Dataset clone = (Dataset)base.Clone(cloner);
[3493]219      clone.data = data;
[3308]220      clone.variableNames = (string[])variableNames.Clone();
[3253]221      return clone;
[2319]222    }
223
[3253]224    #region events
225    public event EventHandler<EventArgs<int, int>> DataChanged;
226    private void OnDataChanged(EventArgs<int, int> e) {
227      var listeners = DataChanged;
228      if (listeners != null) listeners(this, e);
[2319]229    }
[3253]230    public event EventHandler Reset;
231    private void OnReset(EventArgs e) {
232      var listeners = Reset;
233      if (listeners != null) listeners(this, e);
[2319]234    }
[3253]235    #endregion
[2]236
[3253]237    #region IStringConvertibleMatrix Members
[237]238
[3253]239    public int Rows {
240      get {
[3308]241        return data.GetLength(0);
[237]242      }
[3253]243      set {
244        if (value == 0) throw new ArgumentException("Number of rows must be at least one (for variable names)");
[3308]245        if (value != Rows) {
246          var newValues = new double[value, Columns];
247          for (int row = 0; row < Math.Min(Rows, value); row++) {
248            for (int column = 0; column < Columns; column++) {
[3253]249              newValues[row, column] = data[row, column];
250            }
[2]251          }
[3308]252          Data = newValues;
[2]253        }
254      }
255    }
256
[3253]257    public int Columns {
258      get {
[3308]259        return data.GetLength(1);
[3253]260      }
261      set {
[3308]262        if (value != Columns) {
263          var newValues = new double[Rows, value];
[3253]264          var newVariableNames = new string[value];
[3308]265          for (int row = 0; row < Rows; row++) {
266            for (int column = 0; column < Math.Min(value, Columns); column++) {
[3253]267              newValues[row, column] = data[row, column];
268            }
269          }
[3294]270          string formatString = new StringBuilder().Append('0', (int)Math.Log10(value) + 1).ToString(); // >= 100 variables => ###
[3253]271          for (int column = 0; column < value; column++) {
[3308]272            if (column < Columns)
[3253]273              newVariableNames[column] = variableNames[column];
274            else
275              newVariableNames[column] = "Var" + column.ToString(formatString);
276          }
[3321]277          VariableNames = newVariableNames;
[3308]278          Data = newValues;
[2]279        }
280      }
281    }
282
[3321]283    [Storable]
284    private bool sortableView;
285    public bool SortableView {
286      get { return sortableView; }
287      set {
288        if (value != sortableView) {
289          sortableView = value;
290          OnSortableViewChanged();
291        }
292      }
293    }
294
[3430]295    public bool ReadOnly {
296      get { return false; }
297    }
298
[3308]299    IEnumerable<string> IStringConvertibleMatrix.ColumnNames {
300      get { return this.VariableNames; }
301      set {
302        int i = 0;
303        foreach (string variableName in value) {
304          SetVariableName(i, variableName);
305          i++;
306        }
[3321]307        OnColumnNamesChanged();
[3308]308      }
309    }
310
[3311]311    IEnumerable<string> IStringConvertibleMatrix.RowNames {
312      get { return new List<string>(); }
313      set { throw new NotImplementedException(); }
314    }
315
[3253]316    public bool Validate(string value, out string errorMessage) {
[3321]317      double val;
318      bool valid = double.TryParse(value, out val);
[3253]319      errorMessage = string.Empty;
[3321]320      if (!valid) {
321        StringBuilder sb = new StringBuilder();
322        sb.Append("Invalid Value (Valid Value Format: \"");
323        sb.Append(FormatPatterns.GetDoubleFormatPattern());
324        sb.Append("\")");
325        errorMessage = sb.ToString();
326      }
327      return valid;
[3253]328    }
[2]329
[3253]330    public string GetValue(int rowIndex, int columnIndex) {
[3308]331      return data[rowIndex, columnIndex].ToString();
[2]332    }
333
[3253]334    public bool SetValue(string value, int rowIndex, int columnIndex) {
[3308]335      double v;
336      if (double.TryParse(value, out v)) {
337        data[rowIndex, columnIndex] = v;
[3321]338        OnDataChanged(new EventArgs<int, int>(rowIndex, columnIndex));
339        OnItemChanged(rowIndex, columnIndex);
[3308]340        return true;
341      } else return false;
[237]342    }
343
[3321]344    public event EventHandler ColumnNamesChanged;
345    private void OnColumnNamesChanged() {
346      EventHandler handler = ColumnNamesChanged;
347      if (handler != null)
348        handler(this, EventArgs.Empty);
349    }
350    public event EventHandler RowNamesChanged;
351    private void OnRowNamesChanged() {
352      EventHandler handler = RowNamesChanged;
353      if (handler != null)
354        handler(this, EventArgs.Empty);
355    }
356    public event EventHandler SortableViewChanged;
357    private void OnSortableViewChanged() {
358      EventHandler handler = SortableViewChanged;
359      if (handler != null)
360        handler(this, EventArgs.Empty);
361    }
[3253]362    public event EventHandler<EventArgs<int, int>> ItemChanged;
[3321]363    private void OnItemChanged(int rowIndex, int columnIndex) {
364      if (ItemChanged != null)
365        ItemChanged(this, new EventArgs<int, int>(rowIndex, columnIndex));
366      OnToStringChanged();
367    }
[2012]368    #endregion
[3493]369
370
[2]371  }
372}
Note: See TracBrowser for help on using the repository browser.