Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Problems.DataAnalysis/3.3/Dataset.cs @ 3334

Last change on this file since 3334 was 3334, checked in by mkommend, 14 years ago

made dataset readonly (ticket #638)

File size: 12.1 KB
RevLine 
[2]1#region License Information
2/* HeuristicLab
[3253]3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
[2]4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Xml;
25using System.Globalization;
26using System.Text;
[2285]27using System.Linq;
[3253]28using HeuristicLab.Core;
29using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
30using HeuristicLab.Data;
31using HeuristicLab.Common;
[2]32
[3253]33namespace HeuristicLab.Problems.DataAnalysis {
34  [Item("Dataset", "Represents a dataset containing data that should be analyzed.")]
35  [StorableClass]
36  public sealed class Dataset : NamedItem, IStringConvertibleMatrix {
[2319]37    public Dataset()
[3334]38      : this(new string[0], new double[,] { { } }) {
[2319]39    }
[2]40
[3264]41    public Dataset(IEnumerable<string> variableNames, double[,] data)
[3253]42      : base() {
[2319]43      Name = "-";
[3264]44      if (variableNames.Count() != data.GetLength(1)) {
45        throw new ArgumentException("Number of variable names doesn't match the number of columns of data");
46      }
[3308]47      Data = data;
[3321]48      this.VariableNames = variableNames;
[3334]49      this.SortableView = false;
50      this.ReadOnlyView = true;
[2038]51    }
52
[3294]53    [Storable]
[3308]54    private string[] variableNames;
[2319]55    public IEnumerable<string> VariableNames {
56      get { return variableNames; }
[3330]57      private set {
[3321]58        if (variableNames != value) {
59          variableNames = value.ToArray();
60          OnColumnNamesChanged();
61        }
62      }
[333]63    }
64
[3294]65    [Storable]
[3308]66    private double[,] data;
67    private double[,] Data {
[3253]68      get { return data; }
[237]69      set {
[3253]70        if (data != value) {
71          if (value == null) throw new ArgumentNullException();
72          this.data = value;
73          OnReset(EventArgs.Empty);
[237]74        }
[2]75      }
76    }
77
[3253]78    // elementwise access
79    public double this[int rowIndex, int columnIndex] {
80      get { return data[rowIndex, columnIndex]; }
[2319]81      set {
[3253]82        if (!value.Equals(data[rowIndex, columnIndex])) {
83          data[rowIndex, columnIndex] = value;
84          OnDataChanged(new EventArgs<int, int>(rowIndex, columnIndex));
[3321]85          OnItemChanged(rowIndex, columnIndex);
[3253]86        }
[2285]87      }
[2]88    }
[3253]89    // access to full columns
90    public double[] this[string variableName] {
[3308]91      get { return GetVariableValues(GetVariableIndex(variableName), 0, Rows); }
[1287]92    }
93
[3294]94    public double[] GetVariableValues(int variableIndex, int start, int end) {
[2311]95      if (start < 0 || !(start <= end))
96        throw new ArgumentException("Start must be between 0 and end (" + end + ").");
[3308]97      if (end > Rows || end < start)
98        throw new ArgumentException("End must be between start (" + start + ") and dataset rows (" + Rows + ").");
[2311]99
100      double[] values = new double[end - start];
101      for (int i = 0; i < end - start; i++)
[3253]102        values[i] = data[i + start, variableIndex];
[2311]103      return values;
104    }
105
[3294]106    public double[] GetVariableValues(string variableName, int start, int end) {
107      return GetVariableValues(GetVariableIndex(variableName), start, end);
[2311]108    }
109
[2319]110    #region Variable name methods
[3294]111    public string GetVariableName(int variableIndex) {
[2319]112      return variableNames[variableIndex];
113    }
114
[3294]115    public int GetVariableIndex(string variableName) {
[2319]116      for (int i = 0; i < variableNames.Length; i++) {
117        if (variableNames[i].Equals(variableName)) return i;
118      }
119      throw new ArgumentException("The variable name " + variableName + " was not found.");
120    }
121
[1287]122    public void SetVariableName(int variableIndex, string name) {
[3308]123      if (name == null) throw new ArgumentNullException("Cannot set variable name to null for variable at index " + variableIndex + " variableIndex");
[3294]124      if (variableNames.Contains(name)) throw new ArgumentException("The data set already contains a variable with name " + name + ".");
[3308]125      if (variableIndex < 0 || variableIndex >= variableNames.Length) throw new ArgumentException(" Cannot set name of not existent variable at index " + variableIndex + ".");
[1287]126      variableNames[variableIndex] = name;
[3321]127      OnColumnNamesChanged();
[1287]128    }
129
[2319]130    #endregion
[2310]131
[3253]132    #region variable statistics
[3294]133    public double GetMean(string variableName) {
134      return GetMean(GetVariableIndex(variableName));
[2]135    }
136
[3294]137    public double GetMean(string variableName, int start, int end) {
138      return GetMean(GetVariableIndex(variableName), start, end);
[2319]139    }
140
[3294]141    public double GetMean(int variableIndex) {
[3308]142      return GetMean(variableIndex, 0, Rows);
[2319]143    }
144
[3294]145    public double GetMean(int variableIndex, int start, int end) {
146      return GetVariableValues(variableIndex, start, end).Average();
[2319]147    }
148
[3294]149    public double GetRange(string variableName) {
150      return GetRange(GetVariableIndex(variableName));
[2319]151    }
152
[3294]153    public double GetRange(int variableIndex) {
[3308]154      return GetRange(variableIndex, 0, Rows);
[2319]155    }
156
[3294]157    public double GetRange(string variableName, int start, int end) {
158      return GetRange(GetVariableIndex(variableName), start, end);
[2319]159    }
160
[3294]161    public double GetRange(int variableIndex, int start, int end) {
162      var values = GetVariableValues(variableIndex, start, end);
163      return values.Max() - values.Min();
[2319]164    }
165
[3294]166    public double GetMax(string variableName) {
167      return GetMax(GetVariableIndex(variableName));
[2319]168    }
169
[3294]170    public double GetMax(int variableIndex) {
[3308]171      return GetMax(variableIndex, 0, Rows);
[2319]172    }
173
[3294]174    public double GetMax(string variableName, int start, int end) {
175      return GetMax(GetVariableIndex(variableName), start, end);
[2319]176    }
177
[3294]178    public double GetMax(int variableIndex, int start, int end) {
179      return GetVariableValues(variableIndex, start, end).Max();
[2319]180    }
181
[3294]182    public double GetMin(string variableName) {
183      return GetMin(GetVariableIndex(variableName));
[2319]184    }
185
[3294]186    public double GetMin(int variableIndex) {
[3308]187      return GetMin(variableIndex, 0, Rows);
[2319]188    }
189
[3294]190    public double GetMin(string variableName, int start, int end) {
191      return GetMin(GetVariableIndex(variableName), start, end);
[2319]192    }
193
[3294]194    public double GetMin(int variableIndex, int start, int end) {
195      return GetVariableValues(variableIndex, start, end).Min();
[2319]196    }
[2368]197
[3294]198    public int GetMissingValues(string variableName) {
199      return GetMissingValues(GetVariableIndex(variableName));
[2368]200    }
[3294]201    public int GetMissingValues(int variableIndex) {
[3308]202      return GetMissingValues(variableIndex, 0, Rows);
[2368]203    }
204
[3294]205    public int GetMissingValues(string variableName, int start, int end) {
206      return GetMissingValues(GetVariableIndex(variableName), start, end);
[2368]207    }
208
[3294]209    public int GetMissingValues(int variableIndex, int start, int end) {
210      return GetVariableValues(variableIndex, start, end).Count(x => double.IsNaN(x));
[2368]211    }
212
[2319]213    #endregion
214
[3253]215    public override IDeepCloneable Clone(Cloner cloner) {
216      Dataset clone = (Dataset)base.Clone(cloner);
[3308]217      clone.data = (double[,])data.Clone();
218      clone.variableNames = (string[])variableNames.Clone();
[3253]219      return clone;
[2319]220    }
221
[3253]222    #region events
223    public event EventHandler<EventArgs<int, int>> DataChanged;
224    private void OnDataChanged(EventArgs<int, int> e) {
225      var listeners = DataChanged;
226      if (listeners != null) listeners(this, e);
[2319]227    }
[3253]228    public event EventHandler Reset;
229    private void OnReset(EventArgs e) {
230      var listeners = Reset;
231      if (listeners != null) listeners(this, e);
[2319]232    }
[3253]233    #endregion
[2]234
[3253]235    #region IStringConvertibleMatrix Members
[237]236
[3253]237    public int Rows {
238      get {
[3308]239        return data.GetLength(0);
[237]240      }
[3253]241      set {
242        if (value == 0) throw new ArgumentException("Number of rows must be at least one (for variable names)");
[3308]243        if (value != Rows) {
244          var newValues = new double[value, Columns];
245          for (int row = 0; row < Math.Min(Rows, value); row++) {
246            for (int column = 0; column < Columns; column++) {
[3253]247              newValues[row, column] = data[row, column];
248            }
[2]249          }
[3308]250          Data = newValues;
[2]251        }
252      }
253    }
254
[3253]255    public int Columns {
256      get {
[3308]257        return data.GetLength(1);
[3253]258      }
259      set {
[3308]260        if (value != Columns) {
261          var newValues = new double[Rows, value];
[3253]262          var newVariableNames = new string[value];
[3308]263          for (int row = 0; row < Rows; row++) {
264            for (int column = 0; column < Math.Min(value, Columns); column++) {
[3253]265              newValues[row, column] = data[row, column];
266            }
267          }
[3294]268          string formatString = new StringBuilder().Append('0', (int)Math.Log10(value) + 1).ToString(); // >= 100 variables => ###
[3253]269          for (int column = 0; column < value; column++) {
[3308]270            if (column < Columns)
[3253]271              newVariableNames[column] = variableNames[column];
272            else
273              newVariableNames[column] = "Var" + column.ToString(formatString);
274          }
[3321]275          VariableNames = newVariableNames;
[3308]276          Data = newValues;
[2]277        }
278      }
279    }
280
[3321]281    [Storable]
282    private bool sortableView;
283    public bool SortableView {
284      get { return sortableView; }
285      set {
286        if (value != sortableView) {
287          sortableView = value;
288          OnSortableViewChanged();
289        }
290      }
291    }
292
[3308]293    IEnumerable<string> IStringConvertibleMatrix.ColumnNames {
294      get { return this.VariableNames; }
295      set {
296        int i = 0;
297        foreach (string variableName in value) {
298          SetVariableName(i, variableName);
299          i++;
300        }
[3321]301        OnColumnNamesChanged();
[3308]302      }
303    }
304
[3311]305    IEnumerable<string> IStringConvertibleMatrix.RowNames {
306      get { return new List<string>(); }
307      set { throw new NotImplementedException(); }
308    }
309
[3253]310    public bool Validate(string value, out string errorMessage) {
[3321]311      double val;
312      bool valid = double.TryParse(value, out val);
[3253]313      errorMessage = string.Empty;
[3321]314      if (!valid) {
315        StringBuilder sb = new StringBuilder();
316        sb.Append("Invalid Value (Valid Value Format: \"");
317        sb.Append(FormatPatterns.GetDoubleFormatPattern());
318        sb.Append("\")");
319        errorMessage = sb.ToString();
320      }
321      return valid;
[3253]322    }
[2]323
[3253]324    public string GetValue(int rowIndex, int columnIndex) {
[3308]325      return data[rowIndex, columnIndex].ToString();
[2]326    }
327
[3253]328    public bool SetValue(string value, int rowIndex, int columnIndex) {
[3308]329      double v;
330      if (double.TryParse(value, out v)) {
331        data[rowIndex, columnIndex] = v;
[3321]332        OnDataChanged(new EventArgs<int, int>(rowIndex, columnIndex));
333        OnItemChanged(rowIndex, columnIndex);
[3308]334        return true;
335      } else return false;
[237]336    }
337
[3321]338    public event EventHandler ColumnNamesChanged;
339    private void OnColumnNamesChanged() {
340      EventHandler handler = ColumnNamesChanged;
341      if (handler != null)
342        handler(this, EventArgs.Empty);
343    }
344    public event EventHandler RowNamesChanged;
345    private void OnRowNamesChanged() {
346      EventHandler handler = RowNamesChanged;
347      if (handler != null)
348        handler(this, EventArgs.Empty);
349    }
350    public event EventHandler SortableViewChanged;
351    private void OnSortableViewChanged() {
352      EventHandler handler = SortableViewChanged;
353      if (handler != null)
354        handler(this, EventArgs.Empty);
355    }
[3253]356    public event EventHandler<EventArgs<int, int>> ItemChanged;
[3321]357    private void OnItemChanged(int rowIndex, int columnIndex) {
358      if (ItemChanged != null)
359        ItemChanged(this, new EventArgs<int, int>(rowIndex, columnIndex));
360      OnToStringChanged();
361    }
[2012]362    #endregion
[2]363  }
364}
Note: See TracBrowser for help on using the repository browser.