Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Problems.DataAnalysis/3.3/Dataset.cs @ 3330

Last change on this file since 3330 was 3330, checked in by mkommend, 14 years ago

removed warning from Dataset (ticket #938)

File size: 12.1 KB
RevLine 
[2]1#region License Information
2/* HeuristicLab
[3253]3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
[2]4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Xml;
25using System.Globalization;
26using System.Text;
[2285]27using System.Linq;
[3253]28using HeuristicLab.Core;
29using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
30using HeuristicLab.Data;
31using HeuristicLab.Common;
[2]32
[3253]33namespace HeuristicLab.Problems.DataAnalysis {
34  [Item("Dataset", "Represents a dataset containing data that should be analyzed.")]
35  [StorableClass]
36  public sealed class Dataset : NamedItem, IStringConvertibleMatrix {
[2319]37    public Dataset()
[3264]38      : this(new string[] { "x" }, new double[,] { { 0.0 } }) {
[2319]39    }
[2]40
[3264]41    public Dataset(IEnumerable<string> variableNames, double[,] data)
[3253]42      : base() {
[2319]43      Name = "-";
[3264]44      if (variableNames.Count() != data.GetLength(1)) {
45        throw new ArgumentException("Number of variable names doesn't match the number of columns of data");
46      }
[3308]47      Data = data;
[3321]48      this.VariableNames = variableNames;
49      this.sortableView = false;
[2038]50    }
51
[3294]52    [Storable]
[3308]53    private string[] variableNames;
[2319]54    public IEnumerable<string> VariableNames {
55      get { return variableNames; }
[3330]56      private set {
[3321]57        if (variableNames != value) {
58          variableNames = value.ToArray();
59          OnColumnNamesChanged();
60        }
61      }
[333]62    }
63
[3294]64    [Storable]
[3308]65    private double[,] data;
66    private double[,] Data {
[3253]67      get { return data; }
[237]68      set {
[3253]69        if (data != value) {
70          if (value == null) throw new ArgumentNullException();
71          this.data = value;
72          OnReset(EventArgs.Empty);
[237]73        }
[2]74      }
75    }
76
[3253]77    // elementwise access
78    public double this[int rowIndex, int columnIndex] {
79      get { return data[rowIndex, columnIndex]; }
[2319]80      set {
[3253]81        if (!value.Equals(data[rowIndex, columnIndex])) {
82          data[rowIndex, columnIndex] = value;
83          OnDataChanged(new EventArgs<int, int>(rowIndex, columnIndex));
[3321]84          OnItemChanged(rowIndex, columnIndex);
[3253]85        }
[2285]86      }
[2]87    }
[3253]88    // access to full columns
89    public double[] this[string variableName] {
[3308]90      get { return GetVariableValues(GetVariableIndex(variableName), 0, Rows); }
[1287]91    }
92
[3294]93    public double[] GetVariableValues(int variableIndex, int start, int end) {
[2311]94      if (start < 0 || !(start <= end))
95        throw new ArgumentException("Start must be between 0 and end (" + end + ").");
[3308]96      if (end > Rows || end < start)
97        throw new ArgumentException("End must be between start (" + start + ") and dataset rows (" + Rows + ").");
[2311]98
99      double[] values = new double[end - start];
100      for (int i = 0; i < end - start; i++)
[3253]101        values[i] = data[i + start, variableIndex];
[2311]102      return values;
103    }
104
[3294]105    public double[] GetVariableValues(string variableName, int start, int end) {
106      return GetVariableValues(GetVariableIndex(variableName), start, end);
[2311]107    }
108
[2319]109    #region Variable name methods
[3294]110    public string GetVariableName(int variableIndex) {
[2319]111      return variableNames[variableIndex];
112    }
113
[3294]114    public int GetVariableIndex(string variableName) {
[2319]115      for (int i = 0; i < variableNames.Length; i++) {
116        if (variableNames[i].Equals(variableName)) return i;
117      }
118      throw new ArgumentException("The variable name " + variableName + " was not found.");
119    }
120
[1287]121    public void SetVariableName(int variableIndex, string name) {
[3308]122      if (name == null) throw new ArgumentNullException("Cannot set variable name to null for variable at index " + variableIndex + " variableIndex");
[3294]123      if (variableNames.Contains(name)) throw new ArgumentException("The data set already contains a variable with name " + name + ".");
[3308]124      if (variableIndex < 0 || variableIndex >= variableNames.Length) throw new ArgumentException(" Cannot set name of not existent variable at index " + variableIndex + ".");
[1287]125      variableNames[variableIndex] = name;
[3321]126      OnColumnNamesChanged();
[1287]127    }
128
[2319]129    #endregion
[2310]130
[3253]131    #region variable statistics
[3294]132    public double GetMean(string variableName) {
133      return GetMean(GetVariableIndex(variableName));
[2]134    }
135
[3294]136    public double GetMean(string variableName, int start, int end) {
137      return GetMean(GetVariableIndex(variableName), start, end);
[2319]138    }
139
[3294]140    public double GetMean(int variableIndex) {
[3308]141      return GetMean(variableIndex, 0, Rows);
[2319]142    }
143
[3294]144    public double GetMean(int variableIndex, int start, int end) {
145      return GetVariableValues(variableIndex, start, end).Average();
[2319]146    }
147
[3294]148    public double GetRange(string variableName) {
149      return GetRange(GetVariableIndex(variableName));
[2319]150    }
151
[3294]152    public double GetRange(int variableIndex) {
[3308]153      return GetRange(variableIndex, 0, Rows);
[2319]154    }
155
[3294]156    public double GetRange(string variableName, int start, int end) {
157      return GetRange(GetVariableIndex(variableName), start, end);
[2319]158    }
159
[3294]160    public double GetRange(int variableIndex, int start, int end) {
161      var values = GetVariableValues(variableIndex, start, end);
162      return values.Max() - values.Min();
[2319]163    }
164
[3294]165    public double GetMax(string variableName) {
166      return GetMax(GetVariableIndex(variableName));
[2319]167    }
168
[3294]169    public double GetMax(int variableIndex) {
[3308]170      return GetMax(variableIndex, 0, Rows);
[2319]171    }
172
[3294]173    public double GetMax(string variableName, int start, int end) {
174      return GetMax(GetVariableIndex(variableName), start, end);
[2319]175    }
176
[3294]177    public double GetMax(int variableIndex, int start, int end) {
178      return GetVariableValues(variableIndex, start, end).Max();
[2319]179    }
180
[3294]181    public double GetMin(string variableName) {
182      return GetMin(GetVariableIndex(variableName));
[2319]183    }
184
[3294]185    public double GetMin(int variableIndex) {
[3308]186      return GetMin(variableIndex, 0, Rows);
[2319]187    }
188
[3294]189    public double GetMin(string variableName, int start, int end) {
190      return GetMin(GetVariableIndex(variableName), start, end);
[2319]191    }
192
[3294]193    public double GetMin(int variableIndex, int start, int end) {
194      return GetVariableValues(variableIndex, start, end).Min();
[2319]195    }
[2368]196
[3294]197    public int GetMissingValues(string variableName) {
198      return GetMissingValues(GetVariableIndex(variableName));
[2368]199    }
[3294]200    public int GetMissingValues(int variableIndex) {
[3308]201      return GetMissingValues(variableIndex, 0, Rows);
[2368]202    }
203
[3294]204    public int GetMissingValues(string variableName, int start, int end) {
205      return GetMissingValues(GetVariableIndex(variableName), start, end);
[2368]206    }
207
[3294]208    public int GetMissingValues(int variableIndex, int start, int end) {
209      return GetVariableValues(variableIndex, start, end).Count(x => double.IsNaN(x));
[2368]210    }
211
[2319]212    #endregion
213
[3253]214    public override IDeepCloneable Clone(Cloner cloner) {
215      Dataset clone = (Dataset)base.Clone(cloner);
[3308]216      clone.data = (double[,])data.Clone();
217      clone.variableNames = (string[])variableNames.Clone();
[3253]218      return clone;
[2319]219    }
220
[3253]221    #region events
222    public event EventHandler<EventArgs<int, int>> DataChanged;
223    private void OnDataChanged(EventArgs<int, int> e) {
224      var listeners = DataChanged;
225      if (listeners != null) listeners(this, e);
[2319]226    }
[3253]227    public event EventHandler Reset;
228    private void OnReset(EventArgs e) {
229      var listeners = Reset;
230      if (listeners != null) listeners(this, e);
[2319]231    }
[3253]232    #endregion
[2]233
[3253]234    #region IStringConvertibleMatrix Members
[237]235
[3253]236    public int Rows {
237      get {
[3308]238        return data.GetLength(0);
[237]239      }
[3253]240      set {
241        if (value == 0) throw new ArgumentException("Number of rows must be at least one (for variable names)");
[3308]242        if (value != Rows) {
243          var newValues = new double[value, Columns];
244          for (int row = 0; row < Math.Min(Rows, value); row++) {
245            for (int column = 0; column < Columns; column++) {
[3253]246              newValues[row, column] = data[row, column];
247            }
[2]248          }
[3308]249          Data = newValues;
[2]250        }
251      }
252    }
253
[3253]254    public int Columns {
255      get {
[3308]256        return data.GetLength(1);
[3253]257      }
258      set {
[3308]259        if (value != Columns) {
260          var newValues = new double[Rows, value];
[3253]261          var newVariableNames = new string[value];
[3308]262          for (int row = 0; row < Rows; row++) {
263            for (int column = 0; column < Math.Min(value, Columns); column++) {
[3253]264              newValues[row, column] = data[row, column];
265            }
266          }
[3294]267          string formatString = new StringBuilder().Append('0', (int)Math.Log10(value) + 1).ToString(); // >= 100 variables => ###
[3253]268          for (int column = 0; column < value; column++) {
[3308]269            if (column < Columns)
[3253]270              newVariableNames[column] = variableNames[column];
271            else
272              newVariableNames[column] = "Var" + column.ToString(formatString);
273          }
[3321]274          VariableNames = newVariableNames;
[3308]275          Data = newValues;
[2]276        }
277      }
278    }
279
[3321]280    [Storable]
281    private bool sortableView;
282    public bool SortableView {
283      get { return sortableView; }
284      set {
285        if (value != sortableView) {
286          sortableView = value;
287          OnSortableViewChanged();
288        }
289      }
290    }
291
[3308]292    IEnumerable<string> IStringConvertibleMatrix.ColumnNames {
293      get { return this.VariableNames; }
294      set {
295        int i = 0;
296        foreach (string variableName in value) {
297          SetVariableName(i, variableName);
298          i++;
299        }
[3321]300        OnColumnNamesChanged();
[3308]301      }
302    }
303
[3311]304    IEnumerable<string> IStringConvertibleMatrix.RowNames {
305      get { return new List<string>(); }
306      set { throw new NotImplementedException(); }
307    }
308
[3253]309    public bool Validate(string value, out string errorMessage) {
[3321]310      double val;
311      bool valid = double.TryParse(value, out val);
[3253]312      errorMessage = string.Empty;
[3321]313      if (!valid) {
314        StringBuilder sb = new StringBuilder();
315        sb.Append("Invalid Value (Valid Value Format: \"");
316        sb.Append(FormatPatterns.GetDoubleFormatPattern());
317        sb.Append("\")");
318        errorMessage = sb.ToString();
319      }
320      return valid;
[3253]321    }
[2]322
[3253]323    public string GetValue(int rowIndex, int columnIndex) {
[3308]324      return data[rowIndex, columnIndex].ToString();
[2]325    }
326
[3253]327    public bool SetValue(string value, int rowIndex, int columnIndex) {
[3308]328      double v;
329      if (double.TryParse(value, out v)) {
330        data[rowIndex, columnIndex] = v;
[3321]331        OnDataChanged(new EventArgs<int, int>(rowIndex, columnIndex));
332        OnItemChanged(rowIndex, columnIndex);
[3308]333        return true;
334      } else return false;
[237]335    }
336
[3321]337    public event EventHandler ColumnNamesChanged;
338    private void OnColumnNamesChanged() {
339      EventHandler handler = ColumnNamesChanged;
340      if (handler != null)
341        handler(this, EventArgs.Empty);
342    }
343    public event EventHandler RowNamesChanged;
344    private void OnRowNamesChanged() {
345      EventHandler handler = RowNamesChanged;
346      if (handler != null)
347        handler(this, EventArgs.Empty);
348    }
349    public event EventHandler SortableViewChanged;
350    private void OnSortableViewChanged() {
351      EventHandler handler = SortableViewChanged;
352      if (handler != null)
353        handler(this, EventArgs.Empty);
354    }
[3253]355    public event EventHandler<EventArgs<int, int>> ItemChanged;
[3321]356    private void OnItemChanged(int rowIndex, int columnIndex) {
357      if (ItemChanged != null)
358        ItemChanged(this, new EventArgs<int, int>(rowIndex, columnIndex));
359      OnToStringChanged();
360    }
[2012]361    #endregion
[2]362  }
363}
Note: See TracBrowser for help on using the repository browser.